+of the tsearch2 module for PostgreSQL.
+a companion document to this one.
+for the current version.
+but for different purposes.
+and can also remember the position of each word.
+if the option is omitted, then the current configuration is used.
+read the next section on Configurations.
+ together with their positions in the document.
+ in the accompanying tsearch2 Guide.
+ about where in the document each lexeme appeared.
+ it will usually be much smaller.
+ and as such is usually not displayed).
+ to be weighted differently by ranking functions.
+ in the two vectors given as arguments.
+ are retained intact during the concatenation.
+ This has at least two uses.
+ and concatenate the resulting vectors into one.
+ that assigns different weights to positions with different labels.
+ Returns the number of lexemes stored in the vector.
+ with whatever positions and position weights you choose to specify.
+ section in the Guide for details.
+ which can be grouped using parenthesis.
+ or specified configuration.
+ This might return a textual representation of the given query.
+ with whatever positions and position weight flags you choose to specify.
+ section in the Guide for details.
+and the dictionaries which then transform each token into a lexeme.
+uses a configuration to perform its processing.
+ which neither discards any stop words nor alters them.
+Those tokens whose types are not listed are discarded.
+or discarding the token if no dictionary returns a lexeme for it.
+for the reference of users.
+and are only of interest to someone writing a parser of their own.
+which is suitable for parsing most plain text and HTML documents.
+the current parser is used when this argument is omitted.
+ are called without a parser as an argument.
+ each kind of token the parser may produce as output.
+ one for each token produced by parsing.
+and return lexemes which are usually some reduced form of the token.
+ before returning the word.
+ to a single recognizable form.
+serve as unique identifiers for the dictionary.
+it specifies a file from which stop words should be read.
+useful only to developers trying to implement their own dictionaries.
+if omitted then the current dictionary is used.
+ that do not select a dictionary explicitly.
+ Reduces a single word to a lexeme.
+ from which an inflected form could arise.
+and whether different search terms occur near each other.
+that specifies whether a document's length should impact its rank.
+is probably more relevant than a thousand-word document with five instances.
+ depending on how you have classified them.
+
+>{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used:
+ Often weights are used to mark words from special areas of the document,
+ like the title or an initial abstract,
+ and make them more or less important than words in the document body.
+
+ CREATE FUNCTION rank_cd(
+ [ K int4, ]
+ vector tsvector, query tsquery,
+ [ normalization int4 ]
+ ) RETURNS float4
+
+ This function computes the cover density ranking
+ for the given document vector and query,
+ as described in Clarke, Cormack, and Tudhope's
--- /dev/null
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
+psql:tsearch2.sql:145: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
+psql:tsearch2.sql:244: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
+psql:tsearch2.sql:251: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
+psql:tsearch2.sql:339: NOTICE: ProcedureCreate: type tsvector is not yet defined
+psql:tsearch2.sql:344: NOTICE: Argument type "tsvector" is only a shell
+psql:tsearch2.sql:398: NOTICE: ProcedureCreate: type tsquery is not yet defined
+psql:tsearch2.sql:403: NOTICE: Argument type "tsquery" is only a shell
+psql:tsearch2.sql:545: NOTICE: ProcedureCreate: type gtsvector is not yet defined
+psql:tsearch2.sql:550: NOTICE: Argument type "gtsvector" is only a shell
+--tsvector
+SELECT '1'::tsvector;
+ tsvector
+----------
+ '1'
+(1 row)
+
+SELECT '1 '::tsvector;
+ tsvector
+----------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsvector;
+ tsvector
+----------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsvector;
+ tsvector
+----------
+ '1'
+(1 row)
+
+SELECT '1 2'::tsvector;
+ tsvector
+----------
+ '1' '2'
+(1 row)
+
+SELECT '\'1 2\''::tsvector;
+ tsvector
+----------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsvector;
+ tsvector
+----------
+ '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\'3'::tsvector;
+ tsvector
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' 3'::tsvector;
+ tsvector
+-------------
+ '3' '1 \'2'
+(1 row)
+
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+ tsvector
+------------------
+ '4' ' 3' '1 \'2'
+(1 row)
+
+select '\'w\':4A,3B,2C,1D,5 a:8';
+ ?column?
+-----------------------
+ 'w':4A,3B,2C,1D,5 a:8
+(1 row)
+
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+ ?column?
+----------------------------
+ 'a':3A,4B 'b':2A 'ba':1237
+(1 row)
+
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+ setweight
+----------------------------------------------------------
+ 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+(1 row)
+
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+ strip
+---------------
+ 'a' 'w' 'asd'
+(1 row)
+
+--tsquery
+SELECT '1'::tsquery;
+ tsquery
+---------
+ '1'
+(1 row)
+
+SELECT '1 '::tsquery;
+ tsquery
+---------
+ '1'
+(1 row)
+
+SELECT ' 1'::tsquery;
+ tsquery
+---------
+ '1'
+(1 row)
+
+SELECT ' 1 '::tsquery;
+ tsquery
+---------
+ '1'
+(1 row)
+
+SELECT '\'1 2\''::tsquery;
+ tsquery
+---------
+ '1 2'
+(1 row)
+
+SELECT '\'1 \\\'2\''::tsquery;
+ tsquery
+---------
+ '1 \'2'
+(1 row)
+
+SELECT '!1'::tsquery;
+ tsquery
+---------
+ !'1'
+(1 row)
+
+SELECT '1|2'::tsquery;
+ tsquery
+-----------
+ '1' | '2'
+(1 row)
+
+SELECT '1|!2'::tsquery;
+ tsquery
+------------
+ '1' | !'2'
+(1 row)
+
+SELECT '!1|2'::tsquery;
+ tsquery
+------------
+ !'1' | '2'
+(1 row)
+
+SELECT '!1|!2'::tsquery;
+ tsquery
+-------------
+ !'1' | !'2'
+(1 row)
+
+SELECT '!(!1|!2)'::tsquery;
+ tsquery
+------------------
+ !( !'1' | !'2' )
+(1 row)
+
+SELECT '!(!1|2)'::tsquery;
+ tsquery
+-----------------
+ !( !'1' | '2' )
+(1 row)
+
+SELECT '!(1|!2)'::tsquery;
+ tsquery
+-----------------
+ !( '1' | !'2' )
+(1 row)
+
+SELECT '!(1|2)'::tsquery;
+ tsquery
+----------------
+ !( '1' | '2' )
+(1 row)
+
+SELECT '1&2'::tsquery;
+ tsquery
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!1&2'::tsquery;
+ tsquery
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '1&!2'::tsquery;
+ tsquery
+------------
+ '1' & !'2'
+(1 row)
+
+SELECT '!1&!2'::tsquery;
+ tsquery
+-------------
+ !'1' & !'2'
+(1 row)
+
+SELECT '(1&2)'::tsquery;
+ tsquery
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '1&(2)'::tsquery;
+ tsquery
+-----------
+ '1' & '2'
+(1 row)
+
+SELECT '!(1)&2'::tsquery;
+ tsquery
+------------
+ !'1' & '2'
+(1 row)
+
+SELECT '!(1&2)'::tsquery;
+ tsquery
+----------------
+ !( '1' & '2' )
+(1 row)
+
+SELECT '1|2&3'::tsquery;
+ tsquery
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '1|(2&3)'::tsquery;
+ tsquery
+-----------------
+ '1' | '2' & '3'
+(1 row)
+
+SELECT '(1|2)&3'::tsquery;
+ tsquery
+---------------------
+ ( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|2&!3'::tsquery;
+ tsquery
+------------------
+ '1' | '2' & !'3'
+(1 row)
+
+SELECT '1|!2&3'::tsquery;
+ tsquery
+------------------
+ '1' | !'2' & '3'
+(1 row)
+
+SELECT '!1|2&3'::tsquery;
+ tsquery
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!1|(2&3)'::tsquery;
+ tsquery
+------------------
+ !'1' | '2' & '3'
+(1 row)
+
+SELECT '!(1|2)&3'::tsquery;
+ tsquery
+----------------------
+ !( '1' | '2' ) & '3'
+(1 row)
+
+SELECT '(!1|2)&3'::tsquery;
+ tsquery
+----------------------
+ ( !'1' | '2' ) & '3'
+(1 row)
+
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+ tsquery
+-----------------------------------------
+ '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+(1 row)
+
+SELECT '1|2|4|5|6'::tsquery;
+ tsquery
+-----------------------------------------
+ ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+(1 row)
+
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+ tsquery
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&2&4&5&6'::tsquery;
+ tsquery
+-----------------------------
+ '1' & '2' & '4' & '5' & '6'
+(1 row)
+
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+ tsquery
+---------------------------------
+ '1' & '2' & '4' & ( '5' | '6' )
+(1 row)
+
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+ tsquery
+----------------------------------
+ '1' & '2' & '4' & ( '5' | !'6' )
+(1 row)
+
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+ tsquery
+------------------------------------------
+ '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
+(1 row)
+
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+ ?column?
+------------------------------------------
+ 'the wether':dc & ' sKies ':BC & a:d b:a
+(1 row)
+
+select lexize('simple', 'ASD56 hsdkf');
+ lexize
+-----------------
+ {"asd56 hsdkf"}
+(1 row)
+
+select lexize('en_stem', 'SKIES Problems identity');
+ lexize
+--------------------------
+ {"skies problems ident"}
+(1 row)
+
+select * from token_type('default');
+ tokid | alias | descr
+-------+--------------+-----------------------------------
+ 1 | lword | Latin word
+ 2 | nlword | Non-latin word
+ 3 | word | Word
+ 4 | email | Email
+ 5 | url | URL
+ 6 | host | Host
+ 7 | sfloat | Scientific notation
+ 8 | version | VERSION
+ 9 | part_hword | Part of hyphenated word
+ 10 | nlpart_hword | Non-latin part of hyphenated word
+ 11 | lpart_hword | Latin part of hyphenated word
+ 12 | blank | Space symbols
+ 13 | tag | HTML Tag
+ 14 | http | HTTP head
+ 15 | hword | Hyphenated word
+ 16 | lhword | Latin hyphenated word
+ 17 | nlhword | Non-latin hyphenated word
+ 18 | uri | URI
+ 19 | file | File or path name
+ 20 | float | Decimal notation
+ 21 | int | Signed integer
+ 22 | uint | Unsigned integer
+ 23 | entity | HTML Entity
+(23 rows)
+
+select * from parse('default', '345
[email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005
[email protected] qwe-wer asdf
qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
+ wow < jqw <> qwerty');
+ tokid | token
+-------+--------------------------------------
+ 22 | 345
+ 12 |
+ 12 |
+ 12 | '
+ 12 |
+ 14 | http://
+ 6 | www.com
+ 12 | /
+ 12 |
+ 14 | http://
+ 5 | aew.werc.ewr/?ad=qwe&dw
+ 6 | aew.werc.ewr
+ 18 | /?ad=qwe&dw
+ 12 |
+ 5 | 1aew.werc.ewr/?ad=qwe&dw
+ 6 | 1aew.werc.ewr
+ 18 | /?ad=qwe&dw
+ 12 |
+ 6 | 2aew.werc.ewr
+ 12 |
+ 14 | http://
+ 5 | 3aew.werc.ewr/?ad=qwe&dw
+ 6 | 3aew.werc.ewr
+ 18 | /?ad=qwe&dw
+ 12 |
+ 14 | http://
+ 6 | 4aew.werc.ewr
+ 12 |
+ 14 | http://
+ 5 | 5aew.werc.ewr:8100/?
+ 6 | 5aew.werc.ewr
+ 18 | :8100/?
+ 12 |
+ 1 | ad
+ 12 | =
+ 1 | qwe
+ 12 | &
+ 1 | dw
+ 12 |
+ 5 | 6aew.werc.ewr:8100/?ad=qwe&dw
+ 6 | 6aew.werc.ewr
+ 18 | :8100/?ad=qwe&dw
+ 12 |
+ 5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
+ 6 | 7aew.werc.ewr
+ 18 | :8100/?ad=qwe&dw=%20%32
+ 12 |
+ 7 | +4.0e-10
+ 12 |
+ 1 | qwe
+ 12 |
+ 1 | qwe
+ 12 |
+ 1 | qwqwe
+ 12 |
+ 20 | 234.435
+ 12 |
+ 22 | 455
+ 12 |
+ 20 | 5.005
+ 12 |
+ 12 |
+ 16 | qwe-wer
+ 11 | qwe
+ 12 | -
+ 11 | wer
+ 12 |
+ 1 | asdf
+ 12 |
+ 13 |
+ 1 | qwer
+ 12 |
+ 1 | jf
+ 12 |
+ 1 | sdjk
+ 13 |
+ 12 |
+ 3 | ewr1
+ 12 | >
+ 12 |
+ 3 | ewri2
+ 12 |
+ 13 |
+ 12 |
+
+ 19 | /usr/local/fff
+ 12 |
+ 19 | /awdf/dwqe/4325
+ 12 |
+ 19 | rewt/ewr
+ 12 |
+ 1 | wefjn
+ 12 |
+ 19 | /wqe-324/ewr
+ 12 |
+ 6 | gist.h
+ 12 |
+ 6 | gist.h.c
+ 12 |
+ 6 | gist.c
+ 12 | .
+ 12 |
+ 1 | readline
+ 12 |
+ 20 | 4.2
+ 12 |
+ 20 | 4.2
+ 12 | .
+ 12 |
+ 20 | 4.2
+ 12 | ,
+ 12 |
+ 15 | readline-4
+ 11 | readline
+ 12 | -
+ 20 | 4.2
+ 12 |
+ 15 | readline-4
+ 11 | readline
+ 12 | -
+ 20 | 4.2
+ 12 | .
+ 12 |
+ 22 | 234
+ 12 |
+
+ 13 |
+ 12 |
+ 1 | wow
+ 12 |
+ 12 | <
+ 12 |
+ 1 | jqw
+ 12 |
+ 12 | <
+ 12 | >
+ 12 |
+ 1 | qwerty
+(138 rows)
+
+SELECT to_tsvector('default', '345
[email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005
[email protected] qwe-wer asdf
qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
+ wow < jqw <> qwerty');
+ to_tsvector
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 '
[email protected]':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 '
[email protected]':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+(1 row)
+
+SELECT length(to_tsvector('default', '345 qw'));
+ length
+--------
+ 2
+(1 row)
+
+SELECT length(to_tsvector('default', '345
[email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005
[email protected] qwe-wer asdf
qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
+ wow < jqw <> qwerty'));
+ length
+--------
+ 53
+(1 row)
+
+select to_tsquery('default', 'qwe & sKies ');
+ to_tsquery
+---------------
+ 'qwe' & 'sky'
+(1 row)
+
+select to_tsquery('simple', 'qwe & sKies ');
+ to_tsquery
+-----------------
+ 'qwe' & 'skies'
+(1 row)
+
+select to_tsquery('default', '\'the wether\':dc & \' sKies \':BC ');
+ to_tsquery
+------------------------
+ 'wether':CD & 'sky':BC
+(1 row)
+
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+ ?column?
+----------
+ t
+(1 row)
+
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+ ?column?
+----------
+ t
+(1 row)
+
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+ ?column?
+----------
+ t
+(1 row)
+
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+ ?column?
+----------
+ t
+(1 row)
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+\copy test_tsvector from 'data/test_tsearch.data'
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count
+-------
+ 158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count
+-------
+ 17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count
+-------
+ 6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count
+-------
+ 98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count
+-------
+ 23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count
+-------
+ 39
+(1 row)
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count
+-------
+ 158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count
+-------
+ 17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count
+-------
+ 6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count
+-------
+ 98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count
+-------
+ 23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count
+-------
+ 39
+(1 row)
+
+select set_curcfg('default');
+ set_curcfg
+------------
+
+(1 row)
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count
+-------
+ 0
+(1 row)
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count
+-------
+ 1
+(1 row)
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count
+-------
+ 0
+(1 row)
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+ count
+-------
+ 1
+(1 row)
+
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+ count
+-------
+ 1
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+ rank
+------
+ 0.28
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+ rank
+------
+ 0.46
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+ rank
+------
+ 0.19
+(1 row)
+
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+ rank
+----------
+ 0.140153
+(1 row)
+
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+ rank
+----------
+ 0.198206
+(1 row)
+
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+ rank
+-----------
+ 0.0991032
+(1 row)
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+ word | ndoc | nentry
+-----------+------+--------
+ qq | 109 | 109
+ qt | 102 | 102
+ qe | 100 | 100
+ qh | 98 | 98
+ qw | 98 | 98
+ qa | 97 | 97
+ ql | 94 | 94
+ qs | 94 | 94
+ qi | 92 | 92
+ qr | 92 | 92
+ qj | 91 | 91
+ qd | 87 | 87
+ qz | 87 | 87
+ qc | 86 | 86
+ qn | 86 | 86
+ qv | 85 | 85
+ qo | 84 | 84
+ qy | 84 | 84
+ wp | 84 | 84
+ qf | 81 | 81
+ qk | 80 | 80
+ wt | 80 | 80
+ qu | 79 | 79
+ qg | 78 | 78
+ wb | 78 | 78
+ qx | 77 | 77
+ wr | 77 | 77
+ ws | 73 | 73
+ wy | 73 | 73
+ wa | 72 | 72
+ wf | 70 | 70
+ wg | 70 | 70
+ wi | 70 | 70
+ wu | 70 | 70
+ wc | 69 | 69
+ wj | 69 | 69
+ qp | 68 | 68
+ wh | 68 | 68
+ wv | 68 | 68
+ qb | 66 | 66
+ eu | 65 | 65
+ we | 65 | 65
+ wl | 65 | 65
+ wq | 65 | 65
+ wk | 64 | 64
+ ee | 63 | 63
+ eo | 63 | 63
+ qm | 63 | 63
+ wn | 63 | 63
+ ef | 62 | 62
+ eh | 62 | 62
+ ex | 62 | 62
+ re | 62 | 62
+ rl | 62 | 62
+ rr | 62 | 62
+ eb | 61 | 61
+ ek | 61 | 61
+ ww | 61 | 61
+ ea | 60 | 60
+ ei | 60 | 60
+ em | 60 | 60
+ eq | 60 | 60
+ ew | 60 | 60
+ ro | 60 | 60
+ rw | 60 | 60
+ tl | 60 | 60
+ eg | 59 | 59
+ en | 59 | 59
+ ez | 59 | 59
+ rj | 59 | 59
+ ry | 59 | 59
+ tw | 59 | 59
+ tx | 59 | 59
+ ej | 58 | 58
+ es | 58 | 58
+ ra | 58 | 58
+ rd | 58 | 58
+ rg | 58 | 58
+ rx | 58 | 58
+ tb | 58 | 58
+ wd | 58 | 58
+ ed | 57 | 57
+ tc | 57 | 57
+ wx | 57 | 57
+ er | 56 | 56
+ wm | 56 | 56
+ wo | 56 | 56
+ yw | 56 | 56
+ ep | 55 | 55
+ rk | 55 | 55
+ rp | 55 | 55
+ rz | 55 | 55
+ ta | 55 | 55
+ rq | 54 | 54
+ yn | 54 | 54
+ ec | 53 | 53
+ el | 53 | 53
+ ru | 53 | 53
+ rv | 53 | 53
+ tz | 53 | 53
+ un | 53 | 53
+ wz | 53 | 53
+ ys | 53 | 53
+ oe | 52 | 52
+ tn | 52 | 52
+ tq | 52 | 52
+ ty | 52 | 52
+ uq | 52 | 52
+ yg | 52 | 52
+ ym | 52 | 52
+ oi | 51 | 51
+ to | 51 | 51
+ yi | 51 | 51
+ pn | 50 | 50
+ rb | 50 | 50
+ ri | 50 | 50
+ rn | 50 | 50
+ ti | 50 | 50
+ tv | 50 | 50
+ um | 50 | 50
+ ut | 50 | 50
+ ya | 50 | 50
+ et | 49 | 49
+ ix | 49 | 49
+ ox | 49 | 49
+ q3 | 49 | 49
+ yf | 49 | 49
+ yl | 49 | 49
+ yo | 49 | 49
+ yr | 49 | 49
+ ev | 48 | 48
+ ey | 48 | 48
+ ot | 48 | 48
+ rc | 48 | 48
+ rm | 48 | 48
+ th | 48 | 48
+ uo | 48 | 48
+ ia | 47 | 47
+ q1 | 47 | 47
+ rh | 47 | 47
+ yq | 47 | 47
+ yz | 47 | 47
+ av | 46 | 46
+ im | 46 | 46
+ os | 46 | 46
+ tk | 46 | 46
+ yy | 46 | 46
+ ir | 45 | 45
+ iv | 45 | 45
+ iw | 45 | 45
+ oj | 45 | 45
+ pl | 45 | 45
+ pv | 45 | 45
+ te | 45 | 45
+ tu | 45 | 45
+ uv | 45 | 45
+ ux | 45 | 45
+ yd | 45 | 45
+ yx | 45 | 45
+ ij | 44 | 44
+ pa | 44 | 44
+ se | 44 | 44
+ tg | 44 | 44
+ ue | 44 | 44
+ yb | 44 | 44
+ yt | 44 | 44
+ if | 43 | 43
+ ik | 43 | 43
+ in | 43 | 43
+ ph | 43 | 43
+ pj | 43 | 43
+ q5 | 43 | 43
+ rt | 43 | 43
+ ub | 43 | 43
+ ud | 43 | 43
+ uh | 43 | 43
+ uj | 43 | 43
+ w7 | 43 | 43
+ ye | 43 | 43
+ yv | 43 | 43
+ db | 42 | 42
+ do | 42 | 42
+ id | 42 | 42
+ ie | 42 | 42
+ ii | 42 | 42
+ of | 42 | 42
+ pr | 42 | 42
+ q4 | 42 | 42
+ rf | 42 | 42
+ td | 42 | 42
+ uk | 42 | 42
+ up | 42 | 42
+ yh | 42 | 42
+ yk | 42 | 42
+ io | 41 | 41
+ it | 41 | 41
+ pb | 41 | 41
+ q0 | 41 | 41
+ q7 | 41 | 41
+ rs | 41 | 41
+ tj | 41 | 41
+ ur | 41 | 41
+ ig | 40 | 40
+ iu | 40 | 40
+ iy | 40 | 40
+ od | 40 | 40
+ q6 | 40 | 40
+ tt | 40 | 40
+ ug | 40 | 40
+ ul | 40 | 40
+ us | 40 | 40
+ uu | 40 | 40
+ uz | 40 | 40
+ ah | 39 | 39
+ ar | 39 | 39
+ as | 39 | 39
+ dl | 39 | 39
+ dt | 39 | 39
+ hk | 39 | 39
+ iq | 39 | 39
+ is | 39 | 39
+ oc | 39 | 39
+ ov | 39 | 39
+ oy | 39 | 39
+ uf | 39 | 39
+ ui | 39 | 39
+ aa | 38 | 38
+ ad | 38 | 38
+ fh | 38 | 38
+ gm | 38 | 38
+ ic | 38 | 38
+ jd | 38 | 38
+ om | 38 | 38
+ or | 38 | 38
+ oz | 38 | 38
+ pm | 38 | 38
+ q8 | 38 | 38
+ sf | 38 | 38
+ sm | 38 | 38
+ sv | 38 | 38
+ uc | 38 | 38
+ ak | 37 | 37
+ aq | 37 | 37
+ di | 37 | 37
+ e4 | 37 | 37
+ fi | 37 | 37
+ fx | 37 | 37
+ ha | 37 | 37
+ hp | 37 | 37
+ ih | 37 | 37
+ og | 37 | 37
+ po | 37 | 37
+ pw | 37 | 37
+ sn | 37 | 37
+ su | 37 | 37
+ sw | 37 | 37
+ w6 | 37 | 37
+ yj | 37 | 37
+ yu | 37 | 37
+ ag | 36 | 36
+ am | 36 | 36
+ at | 36 | 36
+ e1 | 36 | 36
+ ff | 36 | 36
+ gx | 36 | 36
+ he | 36 | 36
+ hj | 36 | 36
+ ib | 36 | 36
+ iz | 36 | 36
+ lm | 36 | 36
+ ok | 36 | 36
+ pk | 36 | 36
+ pp | 36 | 36
+ pu | 36 | 36
+ sp | 36 | 36
+ tf | 36 | 36
+ tm | 36 | 36
+ ay | 35 | 35
+ dy | 35 | 35
+ fu | 35 | 35
+ ku | 35 | 35
+ lh | 35 | 35
+ lq | 35 | 35
+ o6 | 35 | 35
+ ob | 35 | 35
+ on | 35 | 35
+ op | 35 | 35
+ pd | 35 | 35
+ ps | 35 | 35
+ si | 35 | 35
+ sl | 35 | 35
+ sx | 35 | 35
+ tp | 35 | 35
+ tr | 35 | 35
+ w3 | 35 | 35
+ y1 | 35 | 35
+ al | 34 | 34
+ ap | 34 | 34
+ az | 34 | 34
+ dc | 34 | 34
+ dd | 34 | 34
+ dz | 34 | 34
+ e0 | 34 | 34
+ fj | 34 | 34
+ fp | 34 | 34
+ gd | 34 | 34
+ gg | 34 | 34
+ gk | 34 | 34
+ go | 34 | 34
+ ho | 34 | 34
+ jc | 34 | 34
+ oa | 34 | 34
+ oh | 34 | 34
+ oo | 34 | 34
+ pe | 34 | 34
+ px | 34 | 34
+ sd | 34 | 34
+ sq | 34 | 34
+ sy | 34 | 34
+ ab | 33 | 33
+ ae | 33 | 33
+ af | 33 | 33
+ aw | 33 | 33
+ e5 | 33 | 33
+ fk | 33 | 33
+ gu | 33 | 33
+ gy | 33 | 33
+ hb | 33 | 33
+ hm | 33 | 33
+ hy | 33 | 33
+ jl | 33 | 33
+ jr | 33 | 33
+ ls | 33 | 33
+ oq | 33 | 33
+ pt | 33 | 33
+ sa | 33 | 33
+ sh | 33 | 33
+ sj | 33 | 33
+ so | 33 | 33
+ sz | 33 | 33
+ t7 | 33 | 33
+ uw | 33 | 33
+ w8 | 33 | 33
+ y0 | 33 | 33
+ yp | 33 | 33
+ dh | 32 | 32
+ dp | 32 | 32
+ dq | 32 | 32
+ e7 | 32 | 32
+ fn | 32 | 32
+ fo | 32 | 32
+ fr | 32 | 32
+ ga | 32 | 32
+ gq | 32 | 32
+ hh | 32 | 32
+ il | 32 | 32
+ ip | 32 | 32
+ jv | 32 | 32
+ lc | 32 | 32
+ ol | 32 | 32
+ pc | 32 | 32
+ q9 | 32 | 32
+ ds | 31 | 31
+ e9 | 31 | 31
+ fd | 31 | 31
+ fe | 31 | 31
+ ft | 31 | 31
+ gs | 31 | 31
+ hl | 31 | 31
+ hs | 31 | 31
+ jb | 31 | 31
+ kc | 31 | 31
+ kw | 31 | 31
+ mj | 31 | 31
+ q2 | 31 | 31
+ r3 | 31 | 31
+ sb | 31 | 31
+ sk | 31 | 31
+ ts | 31 | 31
+ ua | 31 | 31
+ yc | 31 | 31
+ zw | 31 | 31
+ ao | 30 | 30
+ du | 30 | 30
+ fw | 30 | 30
+ gj | 30 | 30
+ hu | 30 | 30
+ kh | 30 | 30
+ kl | 30 | 30
+ kv | 30 | 30
+ ld | 30 | 30
+ lf | 30 | 30
+ pq | 30 | 30
+ py | 30 | 30
+ sc | 30 | 30
+ sr | 30 | 30
+ uy | 30 | 30
+ vg | 30 | 30
+ w2 | 30 | 30
+ xg | 30 | 30
+ xo | 30 | 30
+ au | 29 | 29
+ cx | 29 | 29
+ fv | 29 | 29
+ gh | 29 | 29
+ gl | 29 | 29
+ gt | 29 | 29
+ hw | 29 | 29
+ ji | 29 | 29
+ km | 29 | 29
+ la | 29 | 29
+ ou | 29 | 29
+ r0 | 29 | 29
+ w0 | 29 | 29
+ y9 | 29 | 29
+ zm | 29 | 29
+ zs | 29 | 29
+ zy | 29 | 29
+ ax | 28 | 28
+ cd | 28 | 28
+ dj | 28 | 28
+ dn | 28 | 28
+ dr | 28 | 28
+ ht | 28 | 28
+ jf | 28 | 28
+ lo | 28 | 28
+ lr | 28 | 28
+ na | 28 | 28
+ ng | 28 | 28
+ r8 | 28 | 28
+ ss | 28 | 28
+ xt | 28 | 28
+ y6 | 28 | 28
+ aj | 27 | 27
+ ca | 27 | 27
+ cg | 27 | 27
+ df | 27 | 27
+ dg | 27 | 27
+ dv | 27 | 27
+ gc | 27 | 27
+ gn | 27 | 27
+ gr | 27 | 27
+ hd | 27 | 27
+ i8 | 27 | 27
+ jn | 27 | 27
+ jt | 27 | 27
+ lp | 27 | 27
+ o9 | 27 | 27
+ ow | 27 | 27
+ r9 | 27 | 27
+ t8 | 27 | 27
+ u5 | 27 | 27
+ w4 | 27 | 27
+ xm | 27 | 27
+ zz | 27 | 27
+ a2 | 26 | 26
+ ac | 26 | 26
+ ai | 26 | 26
+ cm | 26 | 26
+ cu | 26 | 26
+ cw | 26 | 26
+ dk | 26 | 26
+ e2 | 26 | 26
+ fc | 26 | 26
+ fg | 26 | 26
+ fl | 26 | 26
+ fs | 26 | 26
+ ge | 26 | 26
+ gv | 26 | 26
+ hc | 26 | 26
+ hi | 26 | 26
+ hx | 26 | 26
+ jj | 26 | 26
+ jm | 26 | 26
+ kg | 26 | 26
+ kk | 26 | 26
+ kn | 26 | 26
+ ko | 26 | 26
+ kt | 26 | 26
+ ln | 26 | 26
+ mx | 26 | 26
+ pg | 26 | 26
+ r4 | 26 | 26
+ t6 | 26 | 26
+ u1 | 26 | 26
+ u4 | 26 | 26
+ vi | 26 | 26
+ vr | 26 | 26
+ w1 | 26 | 26
+ w9 | 26 | 26
+ xk | 26 | 26
+ xs | 26 | 26
+ zf | 26 | 26
+ bb | 25 | 25
+ dm | 25 | 25
+ dw | 25 | 25
+ e8 | 25 | 25
+ fb | 25 | 25
+ gw | 25 | 25
+ h8 | 25 | 25
+ hf | 25 | 25
+ hg | 25 | 25
+ hn | 25 | 25
+ hv | 25 | 25
+ i0 | 25 | 25
+ i3 | 25 | 25
+ jg | 25 | 25
+ jo | 25 | 25
+ jx | 25 | 25
+ kq | 25 | 25
+ lw | 25 | 25
+ lx | 25 | 25
+ o3 | 25 | 25
+ p7 | 25 | 25
+ pf | 25 | 25
+ pi | 25 | 25
+ pz | 25 | 25
+ r2 | 25 | 25
+ r5 | 25 | 25
+ t9 | 25 | 25
+ u7 | 25 | 25
+ ve | 25 | 25
+ vu | 25 | 25
+ y5 | 25 | 25
+ y8 | 25 | 25
+ zt | 25 | 25
+ an | 24 | 24
+ bj | 24 | 24
+ dx | 24 | 24
+ fm | 24 | 24
+ fz | 24 | 24
+ gb | 24 | 24
+ gi | 24 | 24
+ gp | 24 | 24
+ hr | 24 | 24
+ hz | 24 | 24
+ i5 | 24 | 24
+ jq | 24 | 24
+ kb | 24 | 24
+ ke | 24 | 24
+ kf | 24 | 24
+ kp | 24 | 24
+ lv | 24 | 24
+ lz | 24 | 24
+ o8 | 24 | 24
+ r1 | 24 | 24
+ s7 | 24 | 24
+ sg | 24 | 24
+ u3 | 24 | 24
+ vj | 24 | 24
+ vt | 24 | 24
+ w5 | 24 | 24
+ zj | 24 | 24
+ be | 23 | 23
+ bi | 23 | 23
+ bn | 23 | 23
+ cn | 23 | 23
+ cy | 23 | 23
+ da | 23 | 23
+ e6 | 23 | 23
+ fa | 23 | 23
+ js | 23 | 23
+ ki | 23 | 23
+ kz | 23 | 23
+ li | 23 | 23
+ mt | 23 | 23
+ mz | 23 | 23
+ nu | 23 | 23
+ o2 | 23 | 23
+ p5 | 23 | 23
+ p8 | 23 | 23
+ r7 | 23 | 23
+ t0 | 23 | 23
+ t1 | 23 | 23
+ t3 | 23 | 23
+ vm | 23 | 23
+ xh | 23 | 23
+ xx | 23 | 23
+ zp | 23 | 23
+ zr | 23 | 23
+ a3 | 22 | 22
+ bg | 22 | 22
+ de | 22 | 22
+ e3 | 22 | 22
+ fq | 22 | 22
+ i2 | 22 | 22
+ i7 | 22 | 22
+ ja | 22 | 22
+ jk | 22 | 22
+ jy | 22 | 22
+ kr | 22 | 22
+ kx | 22 | 22
+ ly | 22 | 22
+ nb | 22 | 22
+ nh | 22 | 22
+ ns | 22 | 22
+ s3 | 22 | 22
+ u2 | 22 | 22
+ vn | 22 | 22
+ xe | 22 | 22
+ y4 | 22 | 22
+ zh | 22 | 22
+ zo | 22 | 22
+ zq | 22 | 22
+ a1 | 21 | 21
+ bl | 21 | 21
+ bo | 21 | 21
+ cb | 21 | 21
+ ch | 21 | 21
+ co | 21 | 21
+ cq | 21 | 21
+ cv | 21 | 21
+ d7 | 21 | 21
+ g8 | 21 | 21
+ je | 21 | 21
+ jp | 21 | 21
+ jz | 21 | 21
+ lg | 21 | 21
+ me | 21 | 21
+ nc | 21 | 21
+ p4 | 21 | 21
+ st | 21 | 21
+ vb | 21 | 21
+ vw | 21 | 21
+ vz | 21 | 21
+ xj | 21 | 21
+ xq | 21 | 21
+ xu | 21 | 21
+ xy | 21 | 21
+ zb | 21 | 21
+ bv | 20 | 20
+ bz | 20 | 20
+ cj | 20 | 20
+ cp | 20 | 20
+ cs | 20 | 20
+ d8 | 20 | 20
+ ju | 20 | 20
+ k0 | 20 | 20
+ ks | 20 | 20
+ ky | 20 | 20
+ l1 | 20 | 20
+ lb | 20 | 20
+ lj | 20 | 20
+ lu | 20 | 20
+ nm | 20 | 20
+ nw | 20 | 20
+ nz | 20 | 20
+ o7 | 20 | 20
+ p6 | 20 | 20
+ vh | 20 | 20
+ vp | 20 | 20
+ vs | 20 | 20
+ xb | 20 | 20
+ xr | 20 | 20
+ z3 | 20 | 20
+ zv | 20 | 20
+ bq | 19 | 19
+ br | 19 | 19
+ by | 19 | 19
+ cl | 19 | 19
+ d2 | 19 | 19
+ f1 | 19 | 19
+ f4 | 19 | 19
+ gf | 19 | 19
+ hq | 19 | 19
+ k9 | 19 | 19
+ ka | 19 | 19
+ kd | 19 | 19
+ kj | 19 | 19
+ md | 19 | 19
+ mi | 19 | 19
+ ml | 19 | 19
+ my | 19 | 19
+ nj | 19 | 19
+ ny | 19 | 19
+ o1 | 19 | 19
+ s4 | 19 | 19
+ s8 | 19 | 19
+ t5 | 19 | 19
+ u0 | 19 | 19
+ xl | 19 | 19
+ zg | 19 | 19
+ zi | 19 | 19
+ a5 | 18 | 18
+ b9 | 18 | 18
+ bh | 18 | 18
+ bx | 18 | 18
+ d3 | 18 | 18
+ fy | 18 | 18
+ g2 | 18 | 18
+ i4 | 18 | 18
+ i6 | 18 | 18
+ i9 | 18 | 18
+ jw | 18 | 18
+ lk | 18 | 18
+ mb | 18 | 18
+ mv | 18 | 18
+ nd | 18 | 18
+ nr | 18 | 18
+ nt | 18 | 18
+ t2 | 18 | 18
+ xf | 18 | 18
+ xv | 18 | 18
+ zc | 18 | 18
+ zd | 18 | 18
+ a7 | 17 | 17
+ bc | 17 | 17
+ bd | 17 | 17
+ ce | 17 | 17
+ cf | 17 | 17
+ cr | 17 | 17
+ g9 | 17 | 17
+ j0 | 17 | 17
+ j5 | 17 | 17
+ mp | 17 | 17
+ mr | 17 | 17
+ mw | 17 | 17
+ nk | 17 | 17
+ no | 17 | 17
+ o0 | 17 | 17
+ o4 | 17 | 17
+ s0 | 17 | 17
+ s1 | 17 | 17
+ t4 | 17 | 17
+ u9 | 17 | 17
+ vf | 17 | 17
+ vx | 17 | 17
+ x3 | 17 | 17
+ xi | 17 | 17
+ xn | 17 | 17
+ xz | 17 | 17
+ zl | 17 | 17
+ zn | 17 | 17
+ a0 | 16 | 16
+ bu | 16 | 16
+ bw | 16 | 16
+ ci | 16 | 16
+ ck | 16 | 16
+ d0 | 16 | 16
+ d4 | 16 | 16
+ d6 | 16 | 16
+ f5 | 16 | 16
+ g1 | 16 | 16
+ gz | 16 | 16
+ h4 | 16 | 16
+ jh | 16 | 16
+ l4 | 16 | 16
+ lt | 16 | 16
+ mg | 16 | 16
+ mh | 16 | 16
+ mo | 16 | 16
+ ni | 16 | 16
+ nl | 16 | 16
+ nq | 16 | 16
+ p2 | 16 | 16
+ u8 | 16 | 16
+ v9 | 16 | 16
+ vl | 16 | 16
+ vo | 16 | 16
+ xp | 16 | 16
+ y3 | 16 | 16
+ y7 | 16 | 16
+ z7 | 16 | 16
+ za | 16 | 16
+ zx | 16 | 16
+ bf | 15 | 15
+ bp | 15 | 15
+ cc | 15 | 15
+ g0 | 15 | 15
+ j2 | 15 | 15
+ j9 | 15 | 15
+ l6 | 15 | 15
+ le | 15 | 15
+ ll | 15 | 15
+ m8 | 15 | 15
+ ma | 15 | 15
+ mu | 15 | 15
+ nf | 15 | 15
+ r6 | 15 | 15
+ s5 | 15 | 15
+ vd | 15 | 15
+ vk | 15 | 15
+ xa | 15 | 15
+ xw | 15 | 15
+ y2 | 15 | 15
+ z8 | 15 | 15
+ ze | 15 | 15
+ zu | 15 | 15
+ a6 | 14 | 14
+ bk | 14 | 14
+ bt | 14 | 14
+ c0 | 14 | 14
+ f8 | 14 | 14
+ g3 | 14 | 14
+ g4 | 14 | 14
+ g7 | 14 | 14
+ h6 | 14 | 14
+ h7 | 14 | 14
+ h9 | 14 | 14
+ i1 | 14 | 14
+ k1 | 14 | 14
+ k2 | 14 | 14
+ k6 | 14 | 14
+ k7 | 14 | 14
+ mc | 14 | 14
+ nn | 14 | 14
+ p9 | 14 | 14
+ u6 | 14 | 14
+ xd | 14 | 14
+ z6 | 14 | 14
+ zk | 14 | 14
+ a4 | 13 | 13
+ a9 | 13 | 13
+ bm | 13 | 13
+ cz | 13 | 13
+ f2 | 13 | 13
+ f3 | 13 | 13
+ f6 | 13 | 13
+ g6 | 13 | 13
+ h2 | 13 | 13
+ j1 | 13 | 13
+ k5 | 13 | 13
+ m1 | 13 | 13
+ mf | 13 | 13
+ mq | 13 | 13
+ np | 13 | 13
+ nx | 13 | 13
+ o5 | 13 | 13
+ p0 | 13 | 13
+ p1 | 13 | 13
+ s6 | 13 | 13
+ s9 | 13 | 13
+ v6 | 13 | 13
+ va | 13 | 13
+ vc | 13 | 13
+ xc | 13 | 13
+ z0 | 13 | 13
+ c9 | 12 | 12
+ d1 | 12 | 12
+ h0 | 12 | 12
+ h1 | 12 | 12
+ j8 | 12 | 12
+ k4 | 12 | 12
+ l5 | 12 | 12
+ l9 | 12 | 12
+ m2 | 12 | 12
+ m6 | 12 | 12
+ m9 | 12 | 12
+ n7 | 12 | 12
+ nv | 12 | 12
+ p3 | 12 | 12
+ vq | 12 | 12
+ vy | 12 | 12
+ x1 | 12 | 12
+ x2 | 12 | 12
+ z5 | 12 | 12
+ c1 | 11 | 11
+ c3 | 11 | 11
+ ct | 11 | 11
+ f9 | 11 | 11
+ g5 | 11 | 11
+ j6 | 11 | 11
+ l8 | 11 | 11
+ n1 | 11 | 11
+ v7 | 11 | 11
+ vv | 11 | 11
+ x5 | 11 | 11
+ x8 | 11 | 11
+ z2 | 11 | 11
+ b0 | 10 | 10
+ b2 | 10 | 10
+ b8 | 10 | 10
+ c6 | 10 | 10
+ f0 | 10 | 10
+ f7 | 10 | 10
+ h5 | 10 | 10
+ j3 | 10 | 10
+ j4 | 10 | 10
+ j7 | 10 | 10
+ l7 | 10 | 10
+ m0 | 10 | 10
+ m7 | 10 | 10
+ mm | 10 | 10
+ mn | 10 | 10
+ n8 | 10 | 10
+ v1 | 10 | 10
+ x0 | 10 | 10
+ x6 | 10 | 10
+ x7 | 10 | 10
+ x9 | 10 | 10
+ a8 | 9 | 9
+ b1 | 9 | 9
+ b4 | 9 | 9
+ b5 | 9 | 9
+ b6 | 9 | 9
+ ba | 9 | 9
+ bs | 9 | 9
+ c5 | 9 | 9
+ d5 | 9 | 9
+ k8 | 9 | 9
+ l0 | 9 | 9
+ m5 | 9 | 9
+ mk | 9 | 9
+ ms | 9 | 9
+ n3 | 9 | 9
+ n4 | 9 | 9
+ n6 | 9 | 9
+ ne | 9 | 9
+ v0 | 9 | 9
+ v3 | 9 | 9
+ v5 | 9 | 9
+ v8 | 9 | 9
+ b3 | 8 | 8
+ b7 | 8 | 8
+ c2 | 8 | 8
+ c7 | 8 | 8
+ c8 | 8 | 8
+ d9 | 8 | 8
+ k3 | 8 | 8
+ l3 | 8 | 8
+ m3 | 8 | 8
+ m4 | 8 | 8
+ n0 | 8 | 8
+ n5 | 8 | 8
+ v4 | 8 | 8
+ x4 | 8 | 8
+ z1 | 8 | 8
+ z9 | 8 | 8
+ l2 | 7 | 7
+ s2 | 7 | 7
+ z4 | 7 | 7
+ 1l | 6 | 6
+ 1o | 6 | 6
+ 1t | 6 | 6
+ 2e | 6 | 6
+ 2o | 6 | 6
+ c4 | 6 | 6
+ h3 | 6 | 6
+ n2 | 6 | 6
+ n9 | 6 | 6
+ v2 | 6 | 6
+ 2l | 5 | 5
+ 2u | 5 | 5
+ 3k | 5 | 5
+ 4p | 5 | 5
+ 18 | 4 | 4
+ 1a | 4 | 4
+ 1i | 4 | 4
+ 2s | 4 | 4
+ 3q | 4 | 4
+ 3y | 4 | 4
+ 5y | 4 | 4
+ 1f | 3 | 3
+ 1h | 3 | 3
+ 1m | 3 | 3
+ 1p | 3 | 3
+ 1s | 3 | 3
+ 1v | 3 | 3
+ 1x | 3 | 3
+ 27 | 3 | 3
+ 2a | 3 | 3
+ 2b | 3 | 3
+ 2h | 3 | 3
+ 2n | 3 | 3
+ 2p | 3 | 3
+ 2v | 3 | 3
+ 2y | 3 | 3
+ 3d | 3 | 3
+ 3w | 3 | 3
+ 3z | 3 | 3
+ 4a | 3 | 3
+ 4d | 3 | 3
+ 4v | 3 | 3
+ 4z | 3 | 3
+ 5e | 3 | 3
+ 5i | 3 | 3
+ 5k | 3 | 3
+ 5o | 3 | 3
+ 5t | 3 | 3
+ 6b | 3 | 3
+ 6d | 3 | 3
+ 6o | 3 | 3
+ 6w | 3 | 3
+ 7a | 3 | 3
+ 7h | 3 | 3
+ 7r | 3 | 3
+ 93 | 3 | 3
+ 10 | 2 | 2
+ 12 | 2 | 2
+ 15 | 2 | 2
+ 16 | 2 | 2
+ 19 | 2 | 2
+ 1b | 2 | 2
+ 1d | 2 | 2
+ 1g | 2 | 2
+ 1j | 2 | 2
+ 1n | 2 | 2
+ 1r | 2 | 2
+ 1u | 2 | 2
+ 1w | 2 | 2
+ 1y | 2 | 2
+ 20 | 2 | 2
+ 25 | 2 | 2
+ 2d | 2 | 2
+ 2i | 2 | 2
+ 2j | 2 | 2
+ 2k | 2 | 2
+ 2q | 2 | 2
+ 2r | 2 | 2
+ 2t | 2 | 2
+ 2w | 2 | 2
+ 2z | 2 | 2
+ 3b | 2 | 2
+ 3f | 2 | 2
+ 3h | 2 | 2
+ 3o | 2 | 2
+ 3p | 2 | 2
+ 3r | 2 | 2
+ 3s | 2 | 2
+ 3v | 2 | 2
+ 42 | 2 | 2
+ 43 | 2 | 2
+ 4f | 2 | 2
+ 4g | 2 | 2
+ 4h | 2 | 2
+ 4j | 2 | 2
+ 4m | 2 | 2
+ 4r | 2 | 2
+ 4s | 2 | 2
+ 4t | 2 | 2
+ 4u | 2 | 2
+ 5c | 2 | 2
+ 5f | 2 | 2
+ 5h | 2 | 2
+ 5p | 2 | 2
+ 5q | 2 | 2
+ 5z | 2 | 2
+ 6a | 2 | 2
+ 6h | 2 | 2
+ 6q | 2 | 2
+ 6r | 2 | 2
+ 6t | 2 | 2
+ 6y | 2 | 2
+ 70 | 2 | 2
+ 7c | 2 | 2
+ 7g | 2 | 2
+ 7k | 2 | 2
+ 7o | 2 | 2
+ 7u | 2 | 2
+ 8j | 2 | 2
+ 8w | 2 | 2
+ 9f | 2 | 2
+ 9y | 2 | 2
+ copyright | 2 | 2
+ foo | 1 | 3
+ bar | 1 | 2
+ 0e | 1 | 1
+ 0h | 1 | 1
+ 0p | 1 | 1
+ 0w | 1 | 1
+ 0z | 1 | 1
+ 11 | 1 | 1
+ 13 | 1 | 1
+ 14 | 1 | 1
+ 17 | 1 | 1
+ 1k | 1 | 1
+ 1q | 1 | 1
+ 1z | 1 | 1
+ 24 | 1 | 1
+ 26 | 1 | 1
+ 28 | 1 | 1
+ 2f | 1 | 1
+ 30 | 1 | 1
+ 345 | 1 | 1
+ 37 | 1 | 1
+ 39 | 1 | 1
+ 3a | 1 | 1
+ 3e | 1 | 1
+ 3g | 1 | 1
+ 3i | 1 | 1
+ 3m | 1 | 1
+ 3t | 1 | 1
+ 3u | 1 | 1
+ 40 | 1 | 1
+ 41 | 1 | 1
+ 44 | 1 | 1
+ 45 | 1 | 1
+ 48 | 1 | 1
+ 4b | 1 | 1
+ 4c | 1 | 1
+ 4i | 1 | 1
+ 4k | 1 | 1
+ 4n | 1 | 1
+ 4o | 1 | 1
+ 4q | 1 | 1
+ 4w | 1 | 1
+ 4y | 1 | 1
+ 51 | 1 | 1
+ 55 | 1 | 1
+ 56 | 1 | 1
+ 5a | 1 | 1
+ 5d | 1 | 1
+ 5g | 1 | 1
+ 5j | 1 | 1
+ 5l | 1 | 1
+ 5s | 1 | 1
+ 5u | 1 | 1
+ 5x | 1 | 1
+ 64 | 1 | 1
+ 68 | 1 | 1
+ 6c | 1 | 1
+ 6f | 1 | 1
+ 6g | 1 | 1
+ 6i | 1 | 1
+ 6k | 1 | 1
+ 6n | 1 | 1
+ 6p | 1 | 1
+ 6s | 1 | 1
+ 6u | 1 | 1
+ 6x | 1 | 1
+ 72 | 1 | 1
+ 7f | 1 | 1
+ 7j | 1 | 1
+ 7n | 1 | 1
+ 7p | 1 | 1
+ 7w | 1 | 1
+ 7y | 1 | 1
+ 7z | 1 | 1
+ 80 | 1 | 1
+ 82 | 1 | 1
+ 85 | 1 | 1
+ 8d | 1 | 1
+ 8i | 1 | 1
+ 8l | 1 | 1
+ 8n | 1 | 1
+ 8p | 1 | 1
+ 8t | 1 | 1
+ 8x | 1 | 1
+ 95 | 1 | 1
+ 97 | 1 | 1
+ 9a | 1 | 1
+ 9e | 1 | 1
+ 9h | 1 | 1
+ 9r | 1 | 1
+ 9w | 1 | 1
+ qwerti | 1 | 1
+(1146 rows)
+
+select reset_tsearch();
+NOTICE: TSearch cache cleaned
+ reset_tsearch
+---------------
+
+(1 row)
+
+select to_tsquery('default', 'skies & books');
+ to_tsquery
+----------------
+ 'sky' & 'book'
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ rank_cd
+---------
+ 1.2
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('granite&sea'));
+ rank_cd
+----------
+ 0.880303
+(1 row)
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea'));
+ rank_cd
+---------
+ 2
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+ get_covers
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('granite&sea'));
+ get_covers
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964
+(1 row)
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea'));
+ get_covers
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964
+(1 row)
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+', to_tsquery('sea&thousand&years'));
+ headline
+-----------------------------------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+', to_tsquery('granite&sea'));
+ headline
+----------------------------------------------------------------------------------------------
+ sea an hour one night
+An hour of storm to place
+The sculpture of these granite
+(1 row)
+
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+', to_tsquery('sea'));
+ headline
+-------------------------------------------------------------------------------------------
+ sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+(1 row)
+
--- /dev/null
+subdir = contrib/CFG_DIR
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+MODULE_big = dict_CFG_MODNAME
+OBJS = CFG_OFILE
+DATA_built = dict_CFG_MODNAME.sql
+DOCS = README.CFG_MODNAME
+PG_CPPFLAGS =
+SHLIB_LINK = ../tsearch2/libtsearch2.a
+
+include $(top_srcdir)/contrib/contrib-global.mk
--- /dev/null
+Gendict - generate dictionary templates for contrib/tsearch2 module.
+
+This utility aims to help people creating dictionary for contrib/tsearch v2
+module. Particularly, it has built-in support for snowball stemmers.
+
+Programming API to tsearch2 dictionaries is described in tsearch v2
+documentation.
+
+
+Prerequisities:
+
+* PostgreSQL 7.3 and above.
+
+* You need tsearch2 module sources already compiled
+
+* Rights to install contrib modules
+
+Usage:
+
+ run config.sh without parameters to see options and arguments
+
+Usage:
+./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
+ -v - be verbose
+ -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
+ -C COMMENT - dictionary comment
+Generate Snowball stemmer:
+./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+ -s - generate Snowball wrapper
+ -p - prefix of Snowball's function, (default DICTNAME)
+Generate template dictionary:
+./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+ -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
+ These files will be used in Makefile.
+ -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
+ These files will be used in Makefile and subinclude.h
+ -i - dictionary has init method
+
+
+Example 1:
+
+ Create Portuguese stemmer
+
+ 0. cd PGSQL_SRC/contrib/tsearch2/gendict
+
+ 1. Obtain stem.{c,h} files for Portuguese
+
+ wget http://snowball.tartarus.org/portuguese/stem.c
+ wget http://snowball.tartarus.org/portuguese/stem.h
+
+ 2. Create template files for Portuguese
+
+ ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
+
+ Note, that argument for -p option should be *the same* as name of stemming
+ function in stem.c (without _stem)
+
+ A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
+ directory.
+
+ 3. Compile and install dictionary
+
+ cd PGSQL_SRC/contrib/dict_pt
+ make
+ make install
+
+ 4. Test it
+
+ Sample portuguese words with the stemmed forms are available
+ from http://snowball.tartarus.org/portuguese/stemmer.html
+
+ createdb testdict
+ psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
+ psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
+ psql -d testdict -c "select lexize('pt','bobagem');"
+ lexize
+ ---------
+ {bobag}
+ (1 row)
+
+ Here is what I have in pg_ts_dict table
+
+ psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
+ dict_name | dict_init | dict_initoption | dict_lexize | dict_comment
+ -----------+-----------+-----------------+-------------+---------------------------------
+ pt | 7177806 | | 7159330 | Snowball stemmer for Portuguese
+ (1 row)
+
+
+ Note, that you have already installed dictionary and corresponding
+ entry in tsearch configuration and you may modify it using
+ plain SQL commands, for example, specify stop words.
+
+Example 2:
+
+ a) Simple template dictionary with init method
+
+ ./config.sh -n wow -v -i -C WOW
+
+ b) Create simple template dict (without init method):
+ ./config.sh -n wow -v -C WOW
+
+ The same as above, but dictionary will have not init method
+
+ Dictionaries obtained in a) and b) are fully working and ready
+ for use:
+ a) lowercase input word and remove it if it is a stop word
+ b) recognizes any word
+
+ c) Simple template dictionary with source files (with init method):
+
+ ./config.sh -n wow -v -i -c a.c -h a.h -C WOW
+
+ Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
+ These files will be used in Makefile.
+
+ Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
+ These files will be used in Makefile and subinclude.h
+
+ d) Simple template dictionary with source files (without init method):
+
+ ./config.sh -n wow -v -c a.c -h a.h -C WOW
+
+ The same as above, but dictionary will have not init method
+
+ After that you have sources in PGSQL_SRC/contrib/dict_wow and
+ you may edit them to create actual dictionary.
+
+ Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
+ for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file
--- /dev/null
+#!/bin/sh
+
+usage () {
+ echo Usage:
+ echo $0 -n DICTNAME \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
+ echo ' -v - be verbose'
+ echo ' -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
+ echo ' -C COMMENT - dictionary comment'
+ echo Generate Snowball stemmer:
+ echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+ echo ' -s - generate Snowball wrapper'
+ echo " -p - prefix of Snowball's function, (default DICTNAME)"
+ echo Generate template dictionary:
+ echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
+ echo ' -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
+ echo ' These files will be used in Makefile.'
+ echo ' -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
+ echo ' These files will be used in Makefile and subinclude.h'
+ echo ' -i - dictionary has init method'
+ exit 1;
+}
+
+dictname=
+stemmode=no
+verbose=no
+cfile=
+hfile=
+dir=
+hasinit=no
+comment=
+prefix=
+
+while getopts n:c:C:h:d:p:vis opt
+do
+ case "$opt" in
+ v) verbose=yes;;
+ s) stemmode=yes;;
+ i) hasinit=yes;;
+ n) dictname="$OPTARG";;
+ c) cfile="$OPTARG";;
+ h) hfile="$OPTARG";;
+ d) dir="$OPTARG";;
+ C) comment="$OPTARG";;
+ p) prefix="$OPTARG";;
+ \?) usage;;
+ esac
+done
+
+[ ${#dictname} -eq 0 ] && usage
+
+dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
+
+if [ $stemmode = "yes" ] ; then
+ [ ${#prefix} -eq 0 ] && prefix=$dictname
+ hasinit=yes
+ cfile="stem.c"
+ hfile="stem.h"
+fi
+
+[ ${#dir} -eq 0 ] && dir="dict_$dictname"
+
+if [ ${#comment} -eq 0 ]; then
+ comment=null
+else
+ comment="'$comment'"
+fi
+
+ofile=
+for f in $cfile
+do
+ f=` echo $f | sed 's#c$#o#'`
+ ofile="$ofile $f"
+done
+
+if [ $stemmode = "yes" ] ; then
+ ofile="$ofile dict_snowball.o"
+else
+ ofile="$ofile dict_tmpl.o"
+fi
+
+if [ $verbose = "yes" ]; then
+ echo Dictname: "'"$dictname"'"
+ echo Snowball stemmer: $stemmode
+ echo Has init method: $hasinit
+ [ $stemmode = "yes" ] && echo Function prefix: $prefix
+ echo Source files: $cfile
+ echo Header files: $hfile
+ echo Object files: $ofile
+ echo Comment: $comment
+ echo Directory: ../../$dir
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build directory... '
+if [ ! -d ../../$dir ]; then
+ if ! mkdir ../../$dir ; then
+ echo "Can't create directory ../../$dir"
+ exit 1
+ fi
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n 'Build Makefile... '
+sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
+if [ $stemmode = "yes" ] ; then
+ sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile
+else
+ sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile
+fi
+rm ../../$dir/Makefile.tmp
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in... '
+if [ $hasinit = "yes" ]; then
+ sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
+ if [ $stemmode = "yes" ] ; then
+ sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+ else
+ sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+ fi
+ rm ../../$dir/dict_$dictname.sql.in.tmp
+else
+ sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+
+if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
+ [ $verbose = "yes" ] && echo -n 'Copy source and header files... '
+ if [ ${#cfile} -ne 0 ] ; then
+ if ! cp $cfile ../../$dir ; then
+ echo "Cant cp all or one of files: $cfile"
+ exit 1
+ fi
+ fi
+ if [ ${#hfile} -ne 0 ] ; then
+ if ! cp $hfile ../../$dir ; then
+ echo "Cant cp all or one of files: $hfile"
+ exit 1
+ fi
+ fi
+ [ $verbose = "yes" ] && echo ok
+fi
+
+
+[ $verbose = "yes" ] && echo -n 'Build sub-include header... '
+echo -n > ../../$dir/subinclude.h
+for i in $hfile
+do
+ echo "#include \"$i\"" >> ../../$dir/subinclude.h
+done
+[ $verbose = "yes" ] && echo ok
+
+
+if [ $stemmode = "yes" ] ; then
+ [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer... '
+ sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
+else
+ [ $verbose = "yes" ] && echo -n 'Build dictinonary... '
+ sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
+ if [ $hasinit = "yes" ]; then
+ sed s#^HASINIT## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
+ else
+ sed s#^HASINIT.*\$## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
+ fi
+ rm ../../$dir/dict_tmpl.c.tmp
+fi
+[ $verbose = "yes" ] && echo ok
+
+
+[ $verbose = "yes" ] && echo -n "Build README.$dictname... "
+if [ $stemmode = "yes" ] ; then
+ echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
+else
+ echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
+fi
+[ $verbose = "yes" ] && echo ok
+
+echo All is done
+
--- /dev/null
+/*
+ * example of Snowball dictionary
+ * http://snowball.tartarus.org/
+ * Teodor Sigaev
+ */
+#include
+#include
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+#include "snowball/header.h"
+#include "subinclude.h"
+
+typedef struct {
+ struct SN_env *z;
+ StopList stoplist;
+ int (*stem)(struct SN_env * z);
+} DictSnowball;
+
+
+PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+Datum
+dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+ DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
+
+ if ( !d )
+ elog(ERROR, "No memory");
+ memset(d,0,sizeof(DictSnowball));
+ d->stoplist.wordop=lowerstr;
+
+ if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+ text *in = PG_GETARG_TEXT_P(0);
+ readstoplist(in, &(d->stoplist));
+ sortstoplist(&(d->stoplist));
+ PG_FREE_IF_COPY(in, 0);
+ }
+
+ d->z = CFG_PREFIX_create_env();
+ if (!d->z) {
+ freestoplist(&(d->stoplist));
+ elog(ERROR,"No memory");
+ }
+ d->stem=CFG_PREFIX_stem;
+
+ PG_RETURN_POINTER(d);
+}
+
+
--- /dev/null
+/*
+ * example of dictionary
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#include "subinclude.h"
+
+HASINIT typedef struct {
+HASINIT StopList stoplist;
+HASINIT } DictExample;
+
+
+HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
+HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
+
+HASINIT Datum
+HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
+HASINIT
+HASINIT if ( !d )
+HASINIT elog(ERROR, "No memory");
+HASINIT memset(d,0,sizeof(DictExample));
+HASINIT
+HASINIT d->stoplist.wordop=lowerstr;
+HASINIT
+HASINIT /* Your INIT code */
+HASINIT
+HASINIT if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
+HASINIT text *in = PG_GETARG_TEXT_P(0);
+HASINIT readstoplist(in, &(d->stoplist));
+HASINIT sortstoplist(&(d->stoplist));
+HASINIT PG_FREE_IF_COPY(in, 0);
+HASINIT }
+HASINIT
+HASINIT PG_RETURN_POINTER(d);
+HASINIT }
+
+PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
+Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
+Datum
+dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
+HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
+ char *in = (char*)PG_GETARG_POINTER(1);
+ char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+ char **res=palloc(sizeof(char*)*2);
+
+ /* Your INIT dictionary code */
+HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
+HASINIT pfree(txt);
+HASINIT res[0]=NULL;
+HASINIT } else
+ res[0]=txt;
+ res[1]=NULL;
+
+ PG_RETURN_POINTER(res);
+}
--- /dev/null
+SET search_path = public;
+BEGIN;
+
+HASINIT create function dinit_CFG_MODNAME(text)
+HASINIT returns internal
+HASINIT as 'MODULE_PATHNAME'
+HASINIT language 'C';
+
+NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
+NOSNOWBALL returns internal
+NOSNOWBALL as 'MODULE_PATHNAME'
+NOSNOWBALL language 'C'
+NOSNOWBALL with (isstrict);
+
+insert into pg_ts_dict select
+ 'CFG_MODNAME',
+HASINIT (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
+NOINIT null,
+ null,
+ISSNOWBALL (select oid from pg_proc where proname='snb_lexize'),
+NOSNOWBALL (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
+ CFG_COMMENT
+;
+
+
+END;
--- /dev/null
+#include "postgres.h"
+
+#include
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "gistidx.h"
+#include "crc32.h"
+
+PG_FUNCTION_INFO_V1(gtsvector_in);
+Datum gtsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_out);
+Datum gtsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_compress);
+Datum gtsvector_compress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_decompress);
+Datum gtsvector_decompress(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_consistent);
+Datum gtsvector_consistent(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_union);
+Datum gtsvector_union(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_same);
+Datum gtsvector_same(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_penalty);
+Datum gtsvector_penalty(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(gtsvector_picksplit);
+Datum gtsvector_picksplit(PG_FUNCTION_ARGS);
+
+#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
+#define SUMBIT(val) ( \
+ GETBITBYTE(val,0) + \
+ GETBITBYTE(val,1) + \
+ GETBITBYTE(val,2) + \
+ GETBITBYTE(val,3) + \
+ GETBITBYTE(val,4) + \
+ GETBITBYTE(val,5) + \
+ GETBITBYTE(val,6) + \
+ GETBITBYTE(val,7) \
+)
+
+
+Datum
+gtsvector_in(PG_FUNCTION_ARGS)
+{
+ elog(ERROR, "Not implemented");
+ PG_RETURN_DATUM(0);
+}
+
+Datum
+gtsvector_out(PG_FUNCTION_ARGS)
+{
+ elog(ERROR, "Not implemented");
+ PG_RETURN_DATUM(0);
+}
+
+static int
+compareint(const void *a, const void *b)
+{
+ if (*((int4 *) a) == *((int4 *) b))
+ return 0;
+ return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+}
+
+static int
+uniqueint(int4 *a, int4 l)
+{
+ int4 *ptr,
+ *res;
+
+ if (l == 1)
+ return l;
+
+ ptr = res = a;
+
+ qsort((void *) a, l, sizeof(int4), compareint);
+
+ while (ptr - a < l)
+ if (*ptr != *res)
+ *(++res) = *ptr++;
+ else
+ ptr++;
+ return res + 1 - a;
+}
+
+static void
+makesign(BITVECP sign, GISTTYPE * a)
+{
+ int4 k,
+ len = ARRNELEM(a);
+ int4 *ptr = GETARR(a);
+
+ MemSet((void *) sign, 0, sizeof(BITVEC));
+ for (k = 0; k < len; k++)
+ HASH(sign, ptr[k]);
+}
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval = entry;
+
+ if (entry->leafkey)
+ { /* tsvector */
+ GISTTYPE *res;
+ tsvector *toastedval = (tsvector *) DatumGetPointer(entry->key);
+ tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+ int4 len;
+ int4 *arr;
+ WordEntry *ptr = ARRPTR(val);
+ char *words = STRPTR(val);
+
+ len = CALCGTSIZE(ARRKEY, val->size);
+ res = (GISTTYPE *) palloc(len);
+ res->len = len;
+ res->flag = ARRKEY;
+ arr = GETARR(res);
+ len = val->size;
+ while (len--)
+ {
+ *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
+ arr++;
+ ptr++;
+ }
+
+ len = uniqueint(GETARR(res), val->size);
+ if (len != val->size)
+ {
+ /*
+ * there is a collision of hash-function; len is always less
+ * than val->size
+ */
+ len = CALCGTSIZE(ARRKEY, len);
+ res = (GISTTYPE *) repalloc((void *) res, len);
+ res->len = len;
+ }
+ if (val != toastedval)
+ pfree(val);
+
+ /* make signature, if array is too long */
+ if (res->len > TOAST_INDEX_TARGET)
+ {
+ GISTTYPE *ressign;
+
+ len = CALCGTSIZE(SIGNKEY, 0);
+ ressign = (GISTTYPE *) palloc(len);
+ ressign->len = len;
+ ressign->flag = SIGNKEY;
+ makesign(GETSIGN(ressign), res);
+ pfree(res);
+ res = ressign;
+ }
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, res->len, FALSE);
+ }
+ else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+ !ISALLTRUE(DatumGetPointer(entry->key)))
+ {
+ int4 i,
+ len;
+ GISTTYPE *res;
+ BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
+
+ LOOPBYTE(
+ if ((sign[i] & 0xff) != 0xff)
+ PG_RETURN_POINTER(retval);
+ );
+
+ len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+ res = (GISTTYPE *) palloc(len);
+ res->len = len;
+ res->flag = SIGNKEY | ALLISTRUE;
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, res->len, FALSE);
+ }
+ PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
+
+ if (key != (GISTTYPE *) DatumGetPointer(entry->key))
+ {
+ GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+ gistentryinit(*retval, PointerGetDatum(key),
+ entry->rel, entry->page,
+ entry->offset, key->len, FALSE);
+
+ PG_RETURN_POINTER(retval);
+ }
+
+ PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+ int4 *arrb;
+ int4 *arre;
+} CHKVAL;
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_arr(void *checkval, ITEM * val)
+{
+ int4 *StopLow = ((CHKVAL *) checkval)->arrb;
+ int4 *StopHigh = ((CHKVAL *) checkval)->arre;
+ int4 *StopMiddle;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ if (*StopMiddle == val->val)
+ return (true);
+ else if (*StopMiddle < val->val)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return (false);
+}
+
+static bool
+checkcondition_bit(void *checkval, ITEM * val)
+{
+ return GETBIT(checkval, HASHVAL(val->val));
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+ QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
+ GISTTYPE *key = (GISTTYPE *) DatumGetPointer(
+ ((GISTENTRY *) PG_GETARG_POINTER(0))->key
+ );
+
+ if (!query->size)
+ PG_RETURN_BOOL(false);
+
+ if (ISSIGNKEY(key))
+ {
+ if (ISALLTRUE(key))
+ PG_RETURN_BOOL(true);
+
+ PG_RETURN_BOOL(TS_execute(
+ GETQUERY(query),
+ (void *) GETSIGN(key), false,
+ checkcondition_bit
+ ));
+ }
+ else
+ { /* only leaf pages */
+ CHKVAL chkval;
+
+ chkval.arrb = GETARR(key);
+ chkval.arre = chkval.arrb + ARRNELEM(key);
+ PG_RETURN_BOOL(TS_execute(
+ GETQUERY(query),
+ (void *) &chkval, true,
+ checkcondition_arr
+ ));
+ }
+}
+
+static int4
+unionkey(BITVECP sbase, GISTTYPE * add)
+{
+ int4 i;
+
+ if (ISSIGNKEY(add))
+ {
+ BITVECP sadd = GETSIGN(add);
+
+ if (ISALLTRUE(add))
+ return 1;
+
+ LOOPBYTE(
+ sbase[i] |= sadd[i];
+ );
+ }
+ else
+ {
+ int4 *ptr = GETARR(add);
+
+ for (i = 0; i < ARRNELEM(add); i++)
+ HASH(sbase, ptr[i]);
+ }
+ return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+ bytea *entryvec = (bytea *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ BITVEC base;
+ int4 len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
+ int4 i;
+ int4 flag = 0;
+ GISTTYPE *result;
+
+ MemSet((void *) base, 0, sizeof(BITVEC));
+ for (i = 0; i < len; i++)
+ {
+ if (unionkey(base, GETENTRY(entryvec, i)))
+ {
+ flag = ALLISTRUE;
+ break;
+ }
+ }
+
+ flag |= SIGNKEY;
+ len = CALCGTSIZE(flag, 0);
+ result = (GISTTYPE *) palloc(len);
+ *size = result->len = len;
+ result->flag = flag;
+ if (!ISALLTRUE(result))
+ memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+
+ PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+ GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
+ GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+
+ if (ISSIGNKEY(a))
+ { /* then b also ISSIGNKEY */
+ if (ISALLTRUE(a) && ISALLTRUE(b))
+ *result = true;
+ else if (ISALLTRUE(a))
+ *result = false;
+ else if (ISALLTRUE(b))
+ *result = false;
+ else
+ {
+ int4 i;
+ BITVECP sa = GETSIGN(a),
+ sb = GETSIGN(b);
+
+ *result = true;
+ LOOPBYTE(
+ if (sa[i] != sb[i])
+ {
+ *result = false;
+ break;
+ }
+ );
+ }
+ }
+ else
+ { /* a and b ISARRKEY */
+ int4 lena = ARRNELEM(a),
+ lenb = ARRNELEM(b);
+
+ if (lena != lenb)
+ *result = false;
+ else
+ {
+ int4 *ptra = GETARR(a),
+ *ptrb = GETARR(b);
+ int4 i;
+
+ *result = true;
+ for (i = 0; i < lena; i++)
+ if (ptra[i] != ptrb[i])
+ {
+ *result = false;
+ break;
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(result);
+}
+
+static int4
+sizebitvec(BITVECP sign)
+{
+ int4 size = 0,
+ i;
+
+ LOOPBYTE(
+ size += SUMBIT(*(char *) sign);
+ sign = (BITVECP) (((char *) sign) + 1);
+ );
+ return size;
+}
+
+static int
+hemdistsign(BITVECP a, BITVECP b) {
+ int i,dist=0;
+
+ LOOPBIT(
+ if ( GETBIT(a,i) != GETBIT(b,i) )
+ dist++;
+ );
+ return dist;
+}
+
+static int
+hemdist(GISTTYPE *a, GISTTYPE *b) {
+ if ( ISALLTRUE(a) ) {
+ if (ISALLTRUE(b))
+ return 0;
+ else
+ return SIGLENBIT-sizebitvec(GETSIGN(b));
+ } else if (ISALLTRUE(b))
+ return SIGLENBIT-sizebitvec(GETSIGN(a));
+
+ return hemdistsign( GETSIGN(a), GETSIGN(b) );
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
+ GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
+ BITVECP orig = GETSIGN(origval);
+
+ *penalty = 0.0;
+
+ if (ISARRKEY(newval)) {
+ BITVEC sign;
+ makesign(sign, newval);
+
+ if ( ISALLTRUE(origval) )
+ *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
+ else
+ *penalty=hemdistsign(sign,orig);
+ } else {
+ *penalty=hemdist(origval,newval);
+ }
+ PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+ bool allistrue;
+ BITVEC sign;
+} CACHESIGN;
+
+static void
+fillcache(CACHESIGN * item, GISTTYPE * key)
+{
+ item->allistrue = false;
+ if (ISARRKEY(key))
+ makesign(item->sign, key);
+ else if (ISALLTRUE(key))
+ item->allistrue = true;
+ else
+ memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+ OffsetNumber pos;
+ int4 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+ if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+ return 0;
+ else
+ return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN *a, CACHESIGN *b) {
+ if ( a->allistrue ) {
+ if (b->allistrue)
+ return 0;
+ else
+ return SIGLENBIT-sizebitvec(b->sign);
+ } else if (b->allistrue)
+ return SIGLENBIT-sizebitvec(a->sign);
+
+ return hemdistsign( a->sign, b->sign );
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+ bytea *entryvec = (bytea *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ OffsetNumber k,
+ j;
+ GISTTYPE *datum_l,
+ *datum_r;
+ BITVECP union_l,
+ union_r;
+ int4 size_alpha,
+ size_beta;
+ int4 size_waste,
+ waste = -1;
+ int4 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+ OffsetNumber maxoff;
+ BITVECP ptr;
+ int i;
+ CACHESIGN *cache;
+ SPLITCOST *costvector;
+
+ maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ v->spl_left = (OffsetNumber *) palloc(nbytes);
+ v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+ cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+ fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
+ if (k == FirstOffsetNumber)
+ fillcache(&cache[j], GETENTRY(entryvec, j));
+
+ size_waste=hemdistcache(&(cache[j]),&(cache[k]));
+ if (size_waste > waste) {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+ }
+
+ left = v->spl_left;
+ v->spl_nleft = 0;
+ right = v->spl_right;
+ v->spl_nright = 0;
+
+ if (seed_1 == 0 || seed_2 == 0) {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ /* form initial .. */
+ if (cache[seed_1].allistrue) {
+ datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+ datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+ datum_l->flag = SIGNKEY | ALLISTRUE;
+ } else {
+ datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+ datum_l->len = CALCGTSIZE(SIGNKEY, 0);
+ datum_l->flag = SIGNKEY;
+ memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
+ }
+ if (cache[seed_2].allistrue) {
+ datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
+ datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
+ datum_r->flag = SIGNKEY | ALLISTRUE;
+ } else {
+ datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
+ datum_r->len = CALCGTSIZE(SIGNKEY, 0);
+ datum_r->flag = SIGNKEY;
+ memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
+ }
+
+ union_l=GETSIGN(datum_l);
+ union_r=GETSIGN(datum_r);
+ maxoff = OffsetNumberNext(maxoff);
+ fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+ /* sort before ... */
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
+ costvector[j - 1].pos = j;
+ size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
+ size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
+ costvector[j - 1].cost = abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ for (k = 0; k < maxoff; k++) {
+ j = costvector[k].pos;
+ if (j == seed_1) {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ } else if (j == seed_2) {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+
+ if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+ if ( ISALLTRUE(datum_l) && cache[j].allistrue )
+ size_alpha=0;
+ else
+ size_alpha = SIGLENBIT-sizebitvec(
+ ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
+ );
+ } else {
+ size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
+ }
+
+ if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+ if ( ISALLTRUE(datum_r) && cache[j].allistrue )
+ size_beta=0;
+ else
+ size_beta = SIGLENBIT-sizebitvec(
+ ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
+ );
+ } else {
+ size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
+ }
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
+ if (ISALLTRUE(datum_l) || cache[j].allistrue) {
+ if (! ISALLTRUE(datum_l) )
+ MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+ } else {
+ ptr=cache[j].sign;
+ LOOPBYTE(
+ union_l[i] |= ptr[i];
+ );
+ }
+ *left++ = j;
+ v->spl_nleft++;
+ } else {
+ if (ISALLTRUE(datum_r) || cache[j].allistrue) {
+ if (! ISALLTRUE(datum_r) )
+ MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+ } else {
+ ptr=cache[j].sign;
+ LOOPBYTE(
+ union_r[i] |= ptr[i];
+ );
+ }
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+ pfree(costvector);
+ pfree(cache);
+ v->spl_ldatum = PointerGetDatum(datum_l);
+ v->spl_rdatum = PointerGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
--- /dev/null
+#ifndef __GISTIDX_H__
+#define __GISTIDX_H__
+
+/*
+#define GISTIDX_DEBUG
+*/
+
+/*
+ * signature defines
+ */
+
+#define BITBYTE 8
+#define SIGLENINT 63 /* >121 => key will toast, so it will not
+ * work !!! */
+#define SIGLEN ( sizeof(int4)*SIGLENINT )
+#define SIGLENBIT (SIGLEN*BITBYTE)
+
+typedef char BITVEC[SIGLEN];
+typedef char *BITVECP;
+
+#define LOOPBYTE(a) \
+ for(i=0;i
+ a;\
+ }
+#define LOOPBIT(a) \
+ for(i=0;i
+ a;\
+ }
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
+#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
+#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
+
+#define abs(a) ((a) < (0) ? -(a) : (a))
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
+#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+
+
+/*
+ * type of index key
+ */
+typedef struct
+{
+ int4 len;
+ int4 flag;
+ char data[1];
+} GISTTYPE;
+
+#define ARRKEY 0x01
+#define SIGNKEY 0x02
+#define ALLISTRUE 0x04
+
+#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
+#define ISSIGNKEY(x) ( ((GISTTYPE*)x)->flag & SIGNKEY )
+#define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
+
+#define GTHDRSIZE ( sizeof(int4)*2 )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
+#define GETARR(x) ( (int4*)( (char*)x+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
+
+#endif
--- /dev/null
+#include
+#include
+#include
+#include
+
+#include "postgres.h"
+
+#include "spell.h"
+
+#define MAXNORMLEN 56
+
+#define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y)))
+
+static int cmpspell(const void *s1,const void *s2){
+ return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
+}
+
+static void
+strlower( char * str ) {
+ unsigned char *ptr = (unsigned char *)str;
+ while ( *ptr ) {
+ *ptr = tolower( *ptr );
+ ptr++;
+ }
+}
+
+/* backward string compaire for suffix tree operations */
+static int
+strbcmp(const char *s1, const char *s2) {
+ int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
+ while (l1 >= 0 && l2 >= 0) {
+ if (s1[l1] < s2[l2]) return -1;
+ if (s1[l1] > s2[l2]) return 1;
+ l1--; l2--;
+ }
+ if (l1 < l2) return -1;
+ if (l1 > l2) return 1;
+
+ return 0;
+}
+static int
+strbncmp(const char *s1, const char *s2, size_t count) {
+ int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
+ while (l1 >= 0 && l2 >= 0 && l > 0) {
+ if (s1[l1] < s2[l2]) return -1;
+ if (s1[l1] > s2[l2]) return 1;
+ l1--;
+ l2--;
+ l--;
+ }
+ if (l == 0) return 0;
+ if (l1 < l2) return -1;
+ if (l1 > l2) return 1;
+ return 0;
+}
+
+static int
+cmpaffix(const void *s1,const void *s2){
+ if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
+ if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
+ if (((const AFFIX*)s1)->type == 'p')
+ return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+ else
+ return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
+}
+
+int
+AddSpell(IspellDict * Conf,const char * word,const char *flag){
+ if(Conf->nspell>=Conf->mspell){
+ if(Conf->mspell){
+ Conf->mspell+=1024*20;
+ Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
+ }else{
+ Conf->mspell=1024*20;
+ Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
+ }
+ if ( Conf->Spell == NULL )
+ elog(ERROR,"No memory for AddSpell");
+ }
+ Conf->Spell[Conf->nspell].word=strdup(word);
+ if ( !Conf->Spell[Conf->nspell].word )
+ elog(ERROR,"No memory for AddSpell");
+ strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
+ Conf->nspell++;
+ return(0);
+}
+
+
+int
+ImportDictionary(IspellDict * Conf,const char *filename){
+ unsigned char str[BUFSIZ];
+ FILE *dict;
+
+ if(!(dict=fopen(filename,"r")))return(1);
+ while(fgets(str,sizeof(str),dict)){
+ unsigned char *s;
+ const unsigned char *flag;
+
+ flag = NULL;
+ if((s=strchr(str,'/'))){
+ *s=0;
+ s++;flag=s;
+ while(*s){
+ if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
+ s++;
+ else {
+ *s=0;
+ break;
+ }
+ }
+ }else{
+ flag="";
+ }
+ strlower(str);
+ /* Dont load words if first letter is not required */
+ /* It allows to optimize loading at search time */
+ s=str;
+ while(*s){
+ if(*s=='\r')*s=0;
+ if(*s=='\n')*s=0;
+ s++;
+ }
+ AddSpell(Conf,str,flag);
+ }
+ fclose(dict);
+ return(0);
+}
+
+
+static SPELL *
+FindWord(IspellDict * Conf, const char *word, int affixflag) {
+ int l,c,r,resc,resl,resr, i;
+
+ i = (int)(*word) & 255;
+ l = Conf->SpellTree.Left[i];
+ r = Conf->SpellTree.Right[i];
+ if (l == -1) return (NULL);
+ while(l<=r){
+ c = (l + r) >> 1;
+ resc = strcmp(Conf->Spell[c].word, word);
+ if( (resc == 0) &&
+ ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
+ return(&Conf->Spell[c]);
+ }
+ resl = strcmp(Conf->Spell[l].word, word);
+ if( (resl == 0) &&
+ ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
+ return(&Conf->Spell[l]);
+ }
+ resr = strcmp(Conf->Spell[r].word, word);
+ if( (resr == 0) &&
+ ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
+ return(&Conf->Spell[r]);
+ }
+ if(resc < 0){
+ l = c + 1;
+ r--;
+ } else if(resc > 0){
+ r = c - 1;
+ l++;
+ } else {
+ l++;
+ r--;
+ }
+ }
+ return(NULL);
+}
+
+int
+AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
+ if(Conf->naffixes>=Conf->maffixes){
+ if(Conf->maffixes){
+ Conf->maffixes+=16;
+ Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
+ }else{
+ Conf->maffixes=16;
+ Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
+ }
+ if ( Conf->Affix == NULL )
+ elog(ERROR,"No memory for AddAffix");
+ }
+ if (type=='s') {
+ sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
+ } else {
+ sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
+ }
+ Conf->Affix[Conf->naffixes].compile = 1;
+ Conf->Affix[Conf->naffixes].flag=flag;
+ Conf->Affix[Conf->naffixes].type=type;
+
+ strcpy(Conf->Affix[Conf->naffixes].find,find);
+ strcpy(Conf->Affix[Conf->naffixes].repl,repl);
+ Conf->Affix[Conf->naffixes].replen=strlen(repl);
+ Conf->naffixes++;
+ return(0);
+}
+
+static char *
+remove_spaces(char *dist,char *src){
+char *d,*s;
+ d=dist;
+ s=src;
+ while(*s){
+ if(*s!=' '&&*s!='-'&&*s!='\t'){
+ *d=*s;
+ d++;
+ }
+ s++;
+ }
+ *d=0;
+ return(dist);
+}
+
+
+int
+ImportAffixes(IspellDict * Conf,const char *filename){
+ unsigned char str[BUFSIZ];
+ unsigned char flag=0;
+ unsigned char mask[BUFSIZ]="";
+ unsigned char find[BUFSIZ]="";
+ unsigned char repl[BUFSIZ]="";
+ unsigned char *s;
+ int i;
+ int suffixes=0;
+ int prefixes=0;
+ FILE *affix;
+
+ if(!(affix=fopen(filename,"r")))
+ return(1);
+
+ while(fgets(str,sizeof(str),affix)){
+ if(!STRNCASECMP(str,"suffixes")){
+ suffixes=1;
+ prefixes=0;
+ continue;
+ }
+ if(!STRNCASECMP(str,"prefixes")){
+ suffixes=0;
+ prefixes=1;
+ continue;
+ }
+ if(!STRNCASECMP(str,"flag ")){
+ s=str+5;
+ while(strchr("* ",*s))
+ s++;
+ flag=*s;
+ continue;
+ }
+ if((!suffixes)&&(!prefixes))continue;
+ if((s=strchr(str,'#')))*s=0;
+ if(!*str)continue;
+ strlower(str);
+ strcpy(mask,"");
+ strcpy(find,"");
+ strcpy(repl,"");
+ i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
+ remove_spaces(str,repl);strcpy(repl,str);
+ remove_spaces(str,find);strcpy(find,str);
+ remove_spaces(str,mask);strcpy(mask,str);
+ switch(i){
+ case 3:
+ break;
+ case 2:
+ if(*find != '\0'){
+ strcpy(repl,find);
+ strcpy(find,"");
+ }
+ break;
+ default:
+ continue;
+ }
+
+ AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
+
+ }
+ fclose(affix);
+
+ return(0);
+}
+
+void
+SortDictionary(IspellDict * Conf){
+ int CurLet = -1, Let;size_t i;
+
+ qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
+
+ for(i = 0; i < 256 ; i++ )
+ Conf->SpellTree.Left[i] = -1;
+
+ for(i = 0; i < Conf->nspell; i++) {
+ Let = (int)(*(Conf->Spell[i].word)) & 255;
+ if (CurLet != Let) {
+ Conf->SpellTree.Left[Let] = i;
+ CurLet = Let;
+ }
+ Conf->SpellTree.Right[Let] = i;
+ }
+}
+
+void
+SortAffixes(IspellDict * Conf) {
+ int CurLetP = -1, CurLetS = -1, Let;
+ AFFIX *Affix; size_t i;
+
+ if (Conf->naffixes > 1)
+ qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
+ for(i = 0; i < 256; i++) {
+ Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
+ Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
+ }
+
+ for(i = 0; i < Conf->naffixes; i++) {
+ Affix = &(((AFFIX*)Conf->Affix)[i]);
+ if(Affix->type == 'p') {
+ Let = (int)(*(Affix->repl)) & 255;
+ if (CurLetP != Let) {
+ Conf->PrefixTree.Left[Let] = i;
+ CurLetP = Let;
+ }
+ Conf->PrefixTree.Right[Let] = i;
+ } else {
+ Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
+ if (CurLetS != Let) {
+ Conf->SuffixTree.Left[Let] = i;
+ CurLetS = Let;
+ }
+ Conf->SuffixTree.Right[Let] = i;
+ }
+ }
+}
+
+static char *
+CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
+ regmatch_t subs[2]; /* workaround for apache&linux */
+ char newword[2*MAXNORMLEN] = "";
+ int err;
+
+ *res = strbncmp(word, Affix->repl, Affix->replen);
+ if (*res < 0) {
+ return NULL;
+ }
+ if (*res > 0) {
+ return NULL;
+ }
+ strcpy(newword, word);
+ strcpy(newword+len-Affix->replen, Affix->find);
+
+ if (Affix->compile) {
+ err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+ if(err){
+ /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+ regfree(&(Affix->reg));
+ return(NULL);
+ }
+ Affix->compile = 0;
+ }
+ if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+ if(FindWord(Conf, newword, Affix->flag))
+ return pstrdup(newword);
+ }
+ return NULL;
+}
+
+#define NS 1
+#define MAX_NORM 512
+static int
+CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
+ char **forms, char ***cur ) {
+ regmatch_t subs[NS*2];
+ char newword[2*MAXNORMLEN] = "";
+ int err, ls, res, lres;
+ size_t newlen;
+ AFFIX *CAffix = Conf->Affix;
+
+ res = strncmp(word, Affix->repl, Affix->replen);
+ if (res != 0) {
+ return res;
+ }
+ strcpy(newword, Affix->find);
+ strcat(newword, word+Affix->replen);
+
+ if (Affix->compile) {
+ err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
+ if(err){
+ /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
+ regfree(&(Affix->reg));
+ return (0);
+ }
+ Affix->compile = 0;
+ }
+ if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
+ SPELL * curspell;
+
+ if((curspell=FindWord(Conf, newword, Affix->flag))){
+ if ((*cur - forms) < (MAX_NORM-1)) {
+ **cur = pstrdup(newword);
+ (*cur)++; **cur = NULL;
+ }
+ }
+ newlen = strlen(newword);
+ ls = Conf->SuffixTree.Left[pi];
+ if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
+ **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
+ if (**cur) {
+ (*cur)++; **cur = NULL;
+ }
+ }
+ }
+ return 0;
+}
+
+
+char **
+NormalizeWord(IspellDict * Conf,char *word){
+/*regmatch_t subs[NS];*/
+size_t len;
+char ** forms;
+char **cur;
+AFFIX * Affix;
+int ri, pi, ipi, lp, rp, cp, ls, rs;
+int lres, rres, cres = 0;
+ SPELL *spell;
+
+ len=strlen(word);
+ if (len > MAXNORMLEN)
+ return(NULL);
+
+ strlower(word);
+
+ forms=(char **) palloc(MAX_NORM*sizeof(char **));
+ cur=forms;*cur=NULL;
+
+ ri = (int)(*word) & 255;
+ pi = (int)(word[strlen(word)-1]) & 255;
+ Affix=(AFFIX*)Conf->Affix;
+
+ /* Check that the word itself is normal form */
+ if((spell = FindWord(Conf, word, 0))){
+ *cur=pstrdup(word);
+ cur++;*cur=NULL;
+ }
+
+ /* Find all other NORMAL forms of the 'word' */
+
+ for (ipi = 0; ipi <= pi; ipi += pi) {
+
+ /* check prefix */
+ lp = Conf->PrefixTree.Left[ri];
+ rp = Conf->PrefixTree.Right[ri];
+ while (lp >= 0 && lp <= rp) {
+ cp = (lp + rp) >> 1;
+ cres = 0;
+ if ((cur - forms) < (MAX_NORM-1)) {
+ cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
+ }
+ if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+ lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
+ }
+ if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
+ rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
+ }
+ if (cres < 0) {
+ rp = cp - 1;
+ lp++;
+ } else if (cres > 0) {
+ lp = cp + 1;
+ rp--;
+ } else {
+ lp++;
+ rp--;
+ }
+ }
+
+ /* check suffix */
+ ls = Conf->SuffixTree.Left[ipi];
+ rs = Conf->SuffixTree.Right[ipi];
+ while (ls >= 0 && ls <= rs) {
+ if ( ((cur - forms) < (MAX_NORM-1)) ) {
+ *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
+ if (*cur) {
+ cur++; *cur = NULL;
+ }
+ }
+ if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
+ *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
+ if (*cur) {
+ cur++; *cur = NULL;
+ }
+ }
+ ls++;
+ rs--;
+ } /* end while */
+
+ } /* for ipi */
+
+ if(cur==forms){
+ pfree(forms);
+ return(NULL);
+ }
+ return(forms);
+}
+
+void
+FreeIspell (IspellDict *Conf) {
+ int i;
+ AFFIX *Affix = (AFFIX *)Conf->Affix;
+
+ for (i = 0; i < Conf->naffixes; i++) {
+ if (Affix[i].compile == 0) {
+ regfree(&(Affix[i].reg));
+ }
+ }
+ for (i = 0; i < Conf->naffixes; i++) {
+ free( Conf->Spell[i].word );
+ }
+ free(Conf->Affix);
+ free(Conf->Spell);
+ memset( (void*)Conf, 0, sizeof(IspellDict) );
+ return;
+}
--- /dev/null
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include
+#include
+
+typedef struct spell_struct {
+ char * word;
+ char flag[10];
+} SPELL;
+
+typedef struct aff_struct {
+ char flag;
+ char type;
+ char mask[33];
+ char find[16];
+ char repl[16];
+ regex_t reg;
+ size_t replen;
+ char compile;
+} AFFIX;
+
+typedef struct Tree_struct {
+ int Left[256], Right[256];
+} Tree_struct;
+
+typedef struct {
+ int maffixes;
+ int naffixes;
+ AFFIX * Affix;
+
+ int nspell;
+ int mspell;
+ SPELL *Spell;
+ Tree_struct SpellTree;
+ Tree_struct PrefixTree;
+ Tree_struct SuffixTree;
+
+} IspellDict;
+
+char ** NormalizeWord(IspellDict * Conf,char *word);
+int ImportAffixes(IspellDict * Conf, const char *filename);
+int ImportDictionary(IspellDict * Conf,const char *filename);
+
+int AddSpell(IspellDict * Conf,const char * word,const char *flag);
+int AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
+void SortDictionary(IspellDict * Conf);
+void SortAffixes(IspellDict * Conf);
+void FreeIspell (IspellDict *Conf);
+
+#endif
--- /dev/null
+/*
+ * Simple config parser
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+
+#include "postgres.h"
+
+#include "dict.h"
+#include "common.h"
+
+#define CS_WAITKEY 0
+#define CS_INKEY 1
+#define CS_WAITEQ 2
+#define CS_WAITVALUE 3
+#define CS_INVALUE 4
+#define CS_IN2VALUE 5
+#define CS_WAITDELIM 6
+#define CS_INESC 7
+#define CS_IN2ESC 8
+
+static char *
+nstrdup(char *ptr, int len) {
+ char *res=palloc(len+1), *cptr;
+ memcpy(res,ptr,len);
+ res[len]='\0';
+ cptr = ptr = res;
+ while(*ptr) {
+ if ( *ptr == '\\' )
+ ptr++;
+ *cptr=*ptr; ptr++; cptr++;
+ }
+ *cptr='\0';
+
+ return res;
+}
+
+void
+parse_cfgdict(text *in, Map **m) {
+ Map *mptr;
+ char *ptr=VARDATA(in), *begin=NULL;
+ char num=0;
+ int state=CS_WAITKEY;
+
+ while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+ if ( *ptr==',' ) num++;
+ ptr++;
+ }
+
+ *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
+ memset(mptr, 0, sizeof(Map)*(num+2) );
+ ptr=VARDATA(in);
+ while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
+ if (state==CS_WAITKEY) {
+ if (isalpha(*ptr)) {
+ begin=ptr;
+ state=CS_INKEY;
+ } else if ( !isspace(*ptr) )
+ elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+ } else if (state==CS_INKEY) {
+ if ( isspace(*ptr) ) {
+ mptr->key=nstrdup(begin, ptr-begin);
+ state=CS_WAITEQ;
+ } else if ( *ptr=='=' ) {
+ mptr->key=nstrdup(begin, ptr-begin);
+ state=CS_WAITVALUE;
+ } else if ( !isalpha(*ptr) )
+ elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+ } else if ( state==CS_WAITEQ ) {
+ if ( *ptr=='=' )
+ state=CS_WAITVALUE;
+ else if ( !isspace(*ptr) )
+ elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+ } else if ( state==CS_WAITVALUE ) {
+ if ( *ptr=='"' ) {
+ begin=ptr+1;
+ state=CS_INVALUE;
+ } else if ( !isspace(*ptr) ) {
+ begin=ptr;
+ state=CS_IN2VALUE;
+ }
+ } else if ( state==CS_INVALUE ) {
+ if ( *ptr=='"' ) {
+ mptr->value = nstrdup(begin, ptr-begin);
+ mptr++;
+ state=CS_WAITDELIM;
+ } else if ( *ptr=='\\' )
+ state=CS_INESC;
+ } else if ( state==CS_IN2VALUE ) {
+ if ( isspace(*ptr) || *ptr==',' ) {
+ mptr->value = nstrdup(begin, ptr-begin);
+ mptr++;
+ state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
+ } else if ( *ptr=='\\' )
+ state=CS_INESC;
+ } else if ( state==CS_WAITDELIM ) {
+ if ( *ptr==',' )
+ state=CS_WAITKEY;
+ else if ( !isspace(*ptr) )
+ elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
+ } else if ( state == CS_INESC ) {
+ state=CS_INVALUE;
+ } else if ( state == CS_IN2ESC ) {
+ state=CS_IN2VALUE;
+ } else
+ elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
+ ptr++;
+ }
+
+ if (state==CS_IN2VALUE) {
+ mptr->value = nstrdup(begin, ptr-begin);
+ mptr++;
+ } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) )
+ elog(ERROR,"Unexpected end of line");
+}
+
+
--- /dev/null
+/*
+ * IO definitions for tsquery and mtsquery. This type
+ * are identical, but for parsing mtsquery used parser for text
+ * and also morphology is used.
+ * Internal structure:
+ * query tree, then string with original value.
+ * Query tree with plain view. It's means that in array of nodes
+ * right child is always next and left position = item+item->left
+ * Teodor Sigaev
+ */
+#include "postgres.h"
+
+#include
+#include
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "ts_cfg.h"
+#include "tsvector.h"
+#include "crc32.h"
+#include "query.h"
+#include "rewrite.h"
+#include "common.h"
+
+
+PG_FUNCTION_INFO_V1(tsquery_in);
+Datum tsquery_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquery_out);
+Datum tsquery_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(exectsq);
+Datum exectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rexectsq);
+Datum rexectsq(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsquerytree);
+Datum tsquerytree(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery);
+Datum to_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_name);
+Datum to_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsquery_current);
+Datum to_tsquery_current(PG_FUNCTION_ARGS);
+
+#define END 0
+#define ERR 1
+#define VAL 2
+#define OPR 3
+#define OPEN 4
+#define CLOSE 5
+#define VALTRUE 6 /* for stop words */
+#define VALFALSE 7
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR 2
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct NODE
+{
+ int2 weight;
+ int2 type;
+ int4 val;
+ int2 distance;
+ int2 length;
+ struct NODE *next;
+} NODE;
+
+typedef struct
+{
+ char *buf;
+ int4 state;
+ int4 count;
+ /* reverse polish notation in list (for temprorary usage) */
+ NODE *str;
+ /* number in str */
+ int4 num;
+
+ /* user-friendly operand */
+ int4 lenop;
+ int4 sumlen;
+ char *op;
+ char *curop;
+
+ /* state for value's parser */
+ TI_IN_STATE valstate;
+
+ /* tscfg */
+ int cfg_id;
+} QPRS_STATE;
+
+static char*
+get_weight(char *buf, int2 *weight) {
+ *weight = 0;
+
+ if ( *buf != ':' )
+ return buf;
+
+ buf++;
+ while( *buf ) {
+ switch(tolower(*buf)) {
+ case 'a': *weight |= 1<<3; break;
+ case 'b': *weight |= 1<<2; break;
+ case 'c': *weight |= 1<<1; break;
+ case 'd': *weight |= 1; break;
+ default: return buf;
+ }
+ buf++;
+ }
+
+ return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+ while (1)
+ {
+ switch (state->state)
+ {
+ case WAITOPERAND:
+ if (*(state->buf) == '!')
+ {
+ (state->buf)++;
+ *val = (int4) '!';
+ return OPR;
+ }
+ else if (*(state->buf) == '(')
+ {
+ state->count++;
+ (state->buf)++;
+ return OPEN;
+ } else if ( *(state->buf) == ':' ) {
+ elog(ERROR,"Error at start of operand");
+ } else if (*(state->buf) != ' ') {
+ state->valstate.prsbuf = state->buf;
+ state->state = WAITOPERATOR;
+ if (gettoken_tsvector(&(state->valstate)))
+ {
+ *strval = state->valstate.word;
+ *lenval = state->valstate.curpos - state->valstate.word;
+ state->buf = get_weight(state->valstate.prsbuf, weight);
+ return VAL;
+ }
+ else
+ elog(ERROR, "No operand");
+ }
+ break;
+ case WAITOPERATOR:
+ if (*(state->buf) == '&' || *(state->buf) == '|')
+ {
+ state->state = WAITOPERAND;
+ *val = (int4) *(state->buf);
+ (state->buf)++;
+ return OPR;
+ }
+ else if (*(state->buf) == ')')
+ {
+ (state->buf)++;
+ state->count--;
+ return (state->count < 0) ? ERR : CLOSE;
+ }
+ else if (*(state->buf) == '\0')
+ return (state->count) ? ERR : END;
+ else if (*(state->buf) != ' ')
+ return ERR;
+ break;
+ default:
+ return ERR;
+ break;
+ }
+ (state->buf)++;
+ }
+ return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+static void
+pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+ NODE *tmp = (NODE *) palloc(sizeof(NODE));
+
+ tmp->weight = weight;
+ tmp->type = type;
+ tmp->val = val;
+ if (distance >= MAXSTRPOS)
+ elog(ERROR, "Value is too big");
+ if (lenval >= MAXSTRLEN)
+ elog(ERROR, "Operand is too long");
+ tmp->distance = distance;
+ tmp->length = lenval;
+ tmp->next = state->str;
+ state->str = tmp;
+ state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+static void
+pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
+{
+ if (lenval >= MAXSTRLEN)
+ elog(ERROR, "Word is too long");
+
+ pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
+ state->curop - state->op, lenval, weight);
+
+ while (state->curop - state->op + lenval + 1 >= state->lenop)
+ {
+ int4 tmp = state->curop - state->op;
+
+ state->lenop *= 2;
+ state->op = (char *) repalloc((void *) state->op, state->lenop);
+ state->curop = state->op + tmp;
+ }
+ memcpy((void *) state->curop, (void *) strval, lenval);
+ state->curop += lenval;
+ *(state->curop) = '\0';
+ state->curop++;
+ state->sumlen += lenval + 1;
+ return;
+}
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
+{
+ int4 count = 0;
+ PRSTEXT prs;
+
+ prs.lenwords = 32;
+ prs.curwords = 0;
+ prs.pos = 0;
+ prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+ parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
+
+ for(count=0;count
+ pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+ pfree( prs.words[count].word );
+ if (count)
+ pushquery(state, OPR, (int4) '&', 0, 0, 0 );
+ }
+ pfree(prs.words);
+
+ /* XXX */
+ if ( prs.curwords==0 )
+ pushval_asis(state, VALTRUE, 0, 0, 0);
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notaion of query
+ */
+static int4
+makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
+{
+ int4 val,
+ type;
+ int4 lenval;
+ char *strval;
+ int4 stack[STACKDEPTH];
+ int4 lenstack = 0;
+ int2 weight;
+
+ while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+ {
+ switch (type)
+ {
+ case VAL:
+ (*pushval) (state, VAL, strval, lenval, weight);
+ while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+ stack[lenstack - 1] == (int4) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case OPR:
+ if (lenstack && val == (int4) '|')
+ pushquery(state, OPR, val, 0, 0, 0);
+ else
+ {
+ if (lenstack == STACKDEPTH)
+ elog(ERROR, "Stack too short");
+ stack[lenstack] = val;
+ lenstack++;
+ }
+ break;
+ case OPEN:
+ if (makepol(state, pushval) == ERR)
+ return ERR;
+ if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+ stack[lenstack - 1] == (int4) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case CLOSE:
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+ break;
+ case ERR:
+ default:
+ elog(ERROR, "Syntax error");
+ return ERR;
+
+ }
+ }
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+}
+
+typedef struct
+{
+ WordEntry *arrb;
+ WordEntry *arre;
+ char *values;
+ char *operand;
+} CHKVAL;
+
+/*
+ * compare 2 string values
+ */
+static int4
+ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
+{
+ if (ptr->len == item->length)
+ return strncmp(
+ &(chkval->values[ptr->pos]),
+ &(chkval->operand[item->distance]),
+ item->length);
+
+ return (ptr->len > item->length) ? 1 : -1;
+}
+
+/*
+ * check weight info
+ */
+static bool
+checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
+ WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
+ uint16 len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
+ while (len--) {
+ if ( item->weight & ( 1<
weight ) )
+ return true;
+ ptr++;
+ }
+ return false;
+}
+
+/*
+ * is there value 'val' in array or not ?
+ */
+static bool
+checkcondition_str(void *checkval, ITEM * val)
+{
+ WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
+ WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
+ WordEntry *StopMiddle;
+ int difference;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+ if (difference == 0)
+ return ( val->weight && StopMiddle->haspos ) ?
+ checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
+ else if (difference < 0)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return (false);
+}
+
+/*
+ * check for boolean condition
+ */
+bool
+TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
+{
+ if (curitem->type == VAL)
+ return (*chkcond) (checkval, curitem);
+ else if (curitem->val == (int4) '!')
+ {
+ return (calcnot) ?
+ ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
+ : true;
+ }
+ else if (curitem->val == (int4) '&')
+ {
+ if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+ return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ else
+ return false;
+ }
+ else
+ { /* |-operator */
+ if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
+ return true;
+ else
+ return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+ }
+ return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+rexectsq(PG_FUNCTION_ARGS)
+{
+ return DirectFunctionCall2(
+ exectsq,
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(0)
+ );
+}
+
+Datum
+exectsq(PG_FUNCTION_ARGS)
+{
+ tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+ QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
+ CHKVAL chkval;
+ bool result;
+
+ if (!val->size || !query->size)
+ {
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(false);
+ }
+
+ chkval.arrb = ARRPTR(val);
+ chkval.arre = chkval.arrb + val->size;
+ chkval.values = STRPTR(val);
+ chkval.operand = GETOPERAND(query);
+ result = TS_execute(
+ GETQUERY(query),
+ &chkval,
+ true,
+ checkcondition_str
+ );
+
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * find left operand in polish notation view
+ */
+static void
+findoprnd(ITEM * ptr, int4 *pos)
+{
+#ifdef BS_DEBUG
+ elog(DEBUG3, (ptr[*pos].type == OPR) ?
+ "%d %c" : "%d %d ", *pos, ptr[*pos].val);
+#endif
+ if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
+ {
+ ptr[*pos].left = 0;
+ (*pos)++;
+ }
+ else if (ptr[*pos].val == (int4) '!')
+ {
+ ptr[*pos].left = 1;
+ (*pos)++;
+ findoprnd(ptr, pos);
+ }
+ else
+ {
+ ITEM *curitem = &ptr[*pos];
+ int4 tmp = *pos;
+
+ (*pos)++;
+ findoprnd(ptr, pos);
+ curitem->left = *pos - tmp;
+ findoprnd(ptr, pos);
+ }
+}
+
+
+/*
+ * input
+ */
+static QUERYTYPE *
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+{
+ QPRS_STATE state;
+ int4 i;
+ QUERYTYPE *query;
+ int4 commonlen;
+ ITEM *ptr;
+ NODE *tmp;
+ int4 pos = 0;
+
+#ifdef BS_DEBUG
+ char pbuf[16384],
+ *cur;
+#endif
+
+ /* init state */
+ state.buf = buf;
+ state.state = WAITOPERAND;
+ state.count = 0;
+ state.num = 0;
+ state.str = NULL;
+ state.cfg_id=cfg_id;
+
+ /* init value parser's state */
+ state.valstate.oprisdelim = true;
+ state.valstate.len = 32;
+ state.valstate.word = (char *) palloc(state.valstate.len);
+
+ /* init list of operand */
+ state.sumlen = 0;
+ state.lenop = 64;
+ state.curop = state.op = (char *) palloc(state.lenop);
+ *(state.curop) = '\0';
+
+ /* parse query & make polish notation (postfix, but in reverse order) */
+ makepol(&state, pushval);
+ pfree(state.valstate.word);
+ if (!state.num)
+ elog(ERROR, "Empty query");
+
+ /* make finish struct */
+ commonlen = COMPUTESIZE(state.num, state.sumlen);
+ query = (QUERYTYPE *) palloc(commonlen);
+ query->len = commonlen;
+ query->size = state.num;
+ ptr = GETQUERY(query);
+
+ /* set item in polish notation */
+ for (i = 0; i < state.num; i++)
+ {
+ ptr[i].weight = state.str->weight;
+ ptr[i].type = state.str->type;
+ ptr[i].val = state.str->val;
+ ptr[i].distance = state.str->distance;
+ ptr[i].length = state.str->length;
+ tmp = state.str->next;
+ pfree(state.str);
+ state.str = tmp;
+ }
+
+ /* set user friendly-operand view */
+ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+ pfree(state.op);
+
+ /* set left operand's position for every operator */
+ pos = 0;
+ findoprnd(ptr, &pos);
+
+#ifdef BS_DEBUG
+ cur = pbuf;
+ *cur = '\0';
+ for (i = 0; i < query->size; i++)
+ {
+ if (ptr[i].type == OPR)
+ sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
+ else
+ sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
+ cur = strchr(cur, '\0');
+ }
+ elog(DEBUG3, "POR: %s", pbuf);
+#endif
+
+ return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsquery_in(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+ ITEM *curpol;
+ char *buf;
+ char *cur;
+ char *op;
+ int4 buflen;
+} INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
+{ \
+ int4 len = inf->cur - inf->buf; \
+ inf->buflen *= 2; \
+ inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
+ inf->cur = inf->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+ if (in->curpol->type == VAL)
+ {
+ char *op = in->op + in->curpol->distance;
+
+ RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
+ *(in->cur) = '\'';
+ in->cur++;
+ while (*op)
+ {
+ if (*op == '\'')
+ {
+ *(in->cur) = '\\';
+ in->cur++;
+ }
+ *(in->cur) = *op;
+ op++;
+ in->cur++;
+ }
+ *(in->cur) = '\'';
+ in->cur++;
+ if ( in->curpol->weight ) {
+ *(in->cur) = ':'; in->cur++;
+ if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
+ if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
+ if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
+ if ( in->curpol->weight & 1 ) { *(in->cur) = 'D'; in->cur++; }
+ }
+ *(in->cur) = '\0';
+ in->curpol++;
+ }
+ else if (in->curpol->val == (int4) '!')
+ {
+ bool isopr = false;
+
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
+ if (in->curpol->type == OPR)
+ {
+ isopr = true;
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+ infix(in, isopr);
+ if (isopr)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+ else
+ {
+ int4 op = in->curpol->val;
+ INFIX nrm;
+
+ in->curpol++;
+ if (op == (int4) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+
+ nrm.curpol = in->curpol;
+ nrm.op = in->op;
+ nrm.buflen = 16;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+ /* get right operand */
+ infix(&nrm, false);
+
+ /* get & print left operand */
+ in->curpol = nrm.curpol;
+ infix(in, false);
+
+ /* print operator & right operand */
+ RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+ sprintf(in->cur, " %c %s", op, nrm.buf);
+ in->cur = strchr(in->cur, '\0');
+ pfree(nrm.buf);
+
+ if (op == (int4) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+}
+
+
+Datum
+tsquery_out(PG_FUNCTION_ARGS)
+{
+ QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+ INFIX nrm;
+
+ if (query->size == 0)
+ {
+ char *b = palloc(1);
+
+ *b = '\0';
+ PG_RETURN_POINTER(b);
+ }
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_POINTER(nrm.buf);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+ QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
+ INFIX nrm;
+ text *res;
+ ITEM *q;
+ int4 len;
+
+
+ if (query->size == 0)
+ {
+ res = (text *) palloc(VARHDRSZ);
+ VARATT_SIZEP(res) = VARHDRSZ;
+ PG_RETURN_POINTER(res);
+ }
+
+ q = clean_NOT_v2(GETQUERY(query), &len);
+
+ if (!q)
+ {
+ res = (text *) palloc(1 + VARHDRSZ);
+ VARATT_SIZEP(res) = 1 + VARHDRSZ;
+ *((char *) VARDATA(res)) = 'T';
+ }
+ else
+ {
+ nrm.curpol = q;
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+ VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
+ strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+ pfree(q);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_POINTER(res);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS) {
+ text *in = PG_GETARG_TEXT_P(1);
+ char *str;
+ QUERYTYPE *query;
+ ITEM *res;
+ int4 len;
+
+ str=text2char(in);
+ PG_FREE_IF_COPY(in,1);
+
+ query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+ res = clean_fakeval_v2(GETQUERY(query), &len);
+ if (!res)
+ {
+ query->len = HDRSIZEQT;
+ query->size = 0;
+ PG_RETURN_POINTER(query);
+ }
+ memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+ pfree(res);
+ PG_RETURN_POINTER(query);
+}
+
+Datum
+to_tsquery_name(PG_FUNCTION_ARGS) {
+ text *name=PG_GETARG_TEXT_P(0);
+ Datum res= DirectFunctionCall2(
+ to_tsquery,
+ Int32GetDatum( name2id_cfg(name) ),
+ PG_GETARG_DATUM(1)
+ );
+
+ PG_FREE_IF_COPY(name,1);
+ PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsquery_current(PG_FUNCTION_ARGS) {
+ PG_RETURN_DATUM( DirectFunctionCall2(
+ to_tsquery,
+ Int32GetDatum( get_currcfg() ),
+ PG_GETARG_DATUM(0)
+ ));
+}
+
+
--- /dev/null
+#ifndef __QUERY_H__
+#define __QUERY_H__
+/*
+#define BS_DEBUG
+*/
+
+
+/*
+ * item in polish notation with back link
+ * to left operand
+ */
+typedef struct ITEM
+{
+ int8 type;
+ int8 weight;
+ int2 left;
+ int4 val;
+ /* user-friendly value, must correlate with WordEntry */
+ uint32
+ unused:1,
+ length:11,
+ distance:20;
+} ITEM;
+
+/*
+ *Storage:
+ * (len)(size)(array of ITEM)(array of operand in user-friendly form)
+ */
+typedef struct
+{
+ int4 len;
+ int4 size;
+ char data[1];
+} QUERYTYPE;
+
+#define HDRSIZEQT ( 2*sizeof(int4) )
+#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
+#define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT )
+#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
+
+#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
+
+#define END 0
+#define ERR 1
+#define VAL 2
+#define OPR 3
+#define OPEN 4
+#define CLOSE 5
+#define VALTRUE 6 /* for stop words */
+#define VALFALSE 7
+
+bool TS_execute(ITEM * curitem, void *checkval,
+ bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
+
+#endif
--- /dev/null
+/*
+ * Relevation
+ * Teodor Sigaev
+ */
+#include "postgres.h"
+#include
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/array.h"
+
+#include "tsvector.h"
+#include "query.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(rank);
+Datum rank(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_def);
+Datum rank_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd);
+Datum rank_cd(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(rank_cd_def);
+Datum rank_cd_def(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(get_covers);
+Datum get_covers(PG_FUNCTION_ARGS);
+
+static float weights[]={0.1, 0.2, 0.4, 1.0};
+
+#define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] )
+
+#define DEF_NORM_METHOD 0
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4 word_distance ( int4 w ) {
+ if ( w>100 )
+ return 1e-30;
+
+ return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+}
+
+static int
+cnt_length( tsvector *t ) {
+ WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
+ int len = 0, clen;
+
+ while(ptr < end) {
+ if ( (clen=POSDATALEN(t, ptr)) == 0 )
+ len += 1;
+ else
+ len += clen;
+ ptr++;
+ }
+
+ return len;
+}
+
+static int4
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
+ if (ptr->len == item->length)
+ return strncmp(
+ eval + ptr->pos,
+ qval + item->distance,
+ item->length);
+
+ return (ptr->len > item->length) ? 1 : -1;
+}
+
+static WordEntry*
+find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
+ WordEntry *StopLow = ARRPTR(t);
+ WordEntry *StopHigh = (WordEntry*)STRPTR(t);
+ WordEntry *StopMiddle;
+ int difference;
+
+ /* Loop invariant: StopLow <= item < StopHigh */
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+ if (difference == 0)
+ return StopMiddle;
+ else if (difference < 0)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return NULL;
+}
+
+static WordEntryPos POSNULL[]={
+ {0,0},
+ {0,MAXENTRYPOS-1}
+};
+
+static float
+calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
+ uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
+ int i,k,l,p;
+ WordEntry *entry;
+ WordEntryPos *post,*ct;
+ int4 dimt,lenct,dist;
+ float res=-1.0;
+ ITEM *item=GETQUERY(q);
+
+ memset(pos,0,sizeof(uint16**) * q->size);
+ *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+ for(i=0; isize; i++) {
+
+ if ( item[i].type != VAL )
+ continue;
+
+ entry=find_wordentry(t,q,&(item[i]));
+ if ( !entry )
+ continue;
+
+ if ( entry->haspos )
+ pos[i] = (uint16*)_POSDATAPTR(t,entry);
+ else
+ pos[i] = (uint16*)POSNULL;
+
+
+ dimt = *(uint16*)(pos[i]);
+ post = (WordEntryPos*)(pos[i]+1);
+ for( k=0; k
+ if ( !pos[k] ) continue;
+ lenct = *(uint16*)(pos[k]);
+ ct = (WordEntryPos*)(pos[k]+1);
+ for(l=0; l
+ for(p=0; p
+ dist = abs( post[l].pos - ct[p].pos );
+ if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
+ float curw;
+ if ( !dist ) dist=MAXENTRYPOS;
+ curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
+ res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+ }
+ }
+ }
+ }
+ }
+ pfree(pos);
+ return res;
+}
+
+static float
+calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
+ WordEntry *entry;
+ WordEntryPos *post;
+ int4 dimt,j,i;
+ float res=-1.0;
+ ITEM *item=GETQUERY(q);
+
+ *(uint16*)POSNULL = lengthof(POSNULL)-1;
+
+ for(i=0; isize; i++) {
+ if ( item[i].type != VAL )
+ continue;
+
+ entry=find_wordentry(t,q,&(item[i]));
+ if ( !entry )
+ continue;
+
+ if ( entry->haspos ) {
+ dimt = POSDATALEN(t,entry);
+ post = POSDATAPTR(t,entry);
+ } else {
+ dimt = *(uint16*)POSNULL;
+ post = POSNULL+1;
+ }
+
+ for(j=0;j
+ if ( res < 0 )
+ res = wpos( &(post[j]) );
+ else
+ res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+ }
+ }
+ return res;
+}
+
+static float
+calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
+ ITEM *item = GETQUERY(q);
+ float res=0.0;
+
+ if (!t->size || !q->size)
+ return 0.0;
+
+ res = ( item->type != VAL && item->val == (int4) '&' ) ?
+ calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+
+ if ( res < 0 )
+ res = 1e-20;
+
+ switch(method) {
+ case 0: break;
+ case 1: res /= log((float)cnt_length(t)); break;
+ case 2: res /= (float)cnt_length(t); break;
+ default:
+ elog(ERROR,"Unknown normalization method: %d",method);
+ }
+
+ return res;
+}
+
+Datum
+rank(PG_FUNCTION_ARGS) {
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+ int method=DEF_NORM_METHOD;
+ float res=0.0;
+ float ws[ lengthof(weights) ];
+ int i;
+
+ if ( ARR_NDIM(win) != 1 )
+ elog(ERROR,"Array of weight is not one dimentional");
+ if ( ARRNELEMS(win) < lengthof(weights) )
+ elog(ERROR,"Array of weight is too short");
+
+ for(i=0;i
+ ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
+ if ( ws[ i ] > 1.0 )
+ elog(ERROR,"Weight out of range");
+ }
+
+ if ( PG_NARGS() == 4 )
+ method=PG_GETARG_INT32(3);
+
+ res=calc_rank(ws, txt, query, method);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+rank_def(PG_FUNCTION_ARGS) {
+ tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ float res=0.0;
+ int method=DEF_NORM_METHOD;
+
+ if ( PG_NARGS() == 3 )
+ method=PG_GETARG_INT32(2);
+
+ res=calc_rank(weights, txt, query, method);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+
+typedef struct {
+ ITEM *item;
+ int32 pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *a, const void *b) {
+ if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
+ return 1;
+ return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+}
+
+
+typedef struct {
+ DocRepresentation *doc;
+ int len;
+} ChkDocR;
+
+static bool
+checkcondition_DR(void *checkval, ITEM *val) {
+ DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
+
+ while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
+ if ( val == ptr->item )
+ return true;
+ ptr++;
+ }
+
+ return false;
+}
+
+
+static bool
+Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
+ int i;
+ DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff;
+ ITEM *item=GETQUERY(query);
+ int lastpos=*pos;
+ int oldq=*q;
+
+ *p=0x7fffffff;
+ *q=0;
+
+ for(i=0; isize; i++) {
+ if ( item->type != VAL ) {
+ item++;
+ continue;
+ }
+ ptr = doc + *pos;
+
+ while(ptr-doc
+ if ( ptr->item == item ) {
+ if ( ptr->pos > *q ) {
+ *q = ptr->pos;
+ lastpos= ptr - doc;
+ }
+ break;
+ }
+ ptr++;
+ }
+
+ item++;
+ }
+
+ if (*q==0 )
+ return false;
+
+ if (*q==oldq) { /* already check this pos */
+ (*pos)++;
+ return Cover(doc, len, query, pos,p,q);
+ }
+
+ item=GETQUERY(query);
+ for(i=0; isize; i++) {
+ if ( item->type != VAL ) {
+ item++;
+ continue;
+ }
+ ptr = doc + lastpos;
+
+ while(ptr>=doc+*pos) {
+ if ( ptr->item == item ) {
+ if ( ptr->pos < *p ) {
+ *p = ptr->pos;
+ f=ptr;
+ }
+ break;
+ }
+ ptr--;
+ }
+ item++;
+ }
+
+ if ( *p<=*q ) {
+ ChkDocR ch = { f, (doc + lastpos)-f+1 };
+ *pos = f-doc+1;
+ if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) {
+ /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/
+ return true;
+ } else
+ return Cover(doc, len, query, pos,p,q);
+ }
+
+ return false;
+}
+
+static DocRepresentation*
+get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) {
+ ITEM *item=GETQUERY(query);
+ WordEntry *entry;
+ WordEntryPos *post;
+ int4 dimt,j,i;
+ int len=query->size*4,cur=0;
+ DocRepresentation *doc;
+
+ *(uint16*)POSNULL = lengthof(POSNULL)-1;
+ doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
+ for(i=0; isize; i++) {
+ if ( item[i].type != VAL )
+ continue;
+
+ entry=find_wordentry(txt,query,&(item[i]));
+ if ( !entry )
+ continue;
+
+ if ( entry->haspos ) {
+ dimt = POSDATALEN(txt,entry);
+ post = POSDATAPTR(txt,entry);
+ } else {
+ dimt = *(uint16*)POSNULL;
+ post = POSNULL+1;
+ }
+
+ while( cur+dimt >= len ) {
+ len*=2;
+ doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+ }
+
+ for(j=0;j
+ doc[cur].item=&(item[i]);
+ doc[cur].pos=post[j].pos;
+ cur++;
+ }
+ }
+
+ *doclen=cur;
+
+ if ( cur>0 ) {
+ if ( cur>1 )
+ qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+ return doc;
+ }
+
+ pfree(doc);
+ return NULL;
+}
+
+
+Datum
+rank_cd(PG_FUNCTION_ARGS) {
+ int K = PG_GETARG_INT32(0);
+ tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
+ int method=DEF_NORM_METHOD;
+ DocRepresentation *doc;
+ float res=0.0;
+ int p=0,q=0,len,cur;
+
+ doc = get_docrep(txt, query, &len);
+ if ( !doc ) {
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(0.0);
+ }
+
+ cur=0;
+ if (K<=0)
+ K=4;
+ while( Cover(doc, len, query, &cur, &p, &q) )
+ res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+
+ if ( PG_NARGS() == 4 )
+ method=PG_GETARG_INT32(3);
+
+ switch(method) {
+ case 0: break;
+ case 1: res /= log((float)cnt_length(txt)); break;
+ case 2: res /= (float)cnt_length(txt); break;
+ default:
+ elog(ERROR,"Unknown normalization method: %d",method);
+ }
+
+ pfree(doc);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+
+ PG_RETURN_FLOAT4(res);
+}
+
+
+Datum
+rank_cd_def(PG_FUNCTION_ARGS) {
+ PG_RETURN_DATUM( DirectFunctionCall4(
+ rank_cd,
+ Int32GetDatum(-1),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
+ ));
+}
+
+/**************debug*************/
+
+typedef struct {
+ char *w;
+ int2 len;
+ int2 pos;
+ int2 start;
+ int2 finish;
+} DocWord;
+
+static int
+compareDocWord(const void *a, const void *b) {
+ if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
+ return 1;
+ return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+}
+
+
+Datum
+get_covers(PG_FUNCTION_ARGS) {
+ tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ WordEntry *pptr=ARRPTR(txt);
+ int i,dlen=0,j,cur=0,len=0,rlen;
+ DocWord *dw,*dwptr;
+ text *out;
+ char *cptr;
+ DocRepresentation *doc;
+ int pos=0,p,q,olddwpos=0;
+ int ncover=1;
+
+ doc = get_docrep(txt, query, &rlen);
+
+ if ( !doc ) {
+ out=palloc(VARHDRSZ);
+ VARATT_SIZEP(out) = VARHDRSZ;
+ PG_FREE_IF_COPY(txt,0);
+ PG_FREE_IF_COPY(query,1);
+ PG_RETURN_POINTER(out);
+ }
+
+ for(i=0;isize;i++) {
+ if (!pptr[i].haspos)
+ elog(ERROR,"No pos info");
+ dlen += POSDATALEN(txt,&(pptr[i]));
+ }
+
+ dwptr=dw=palloc(sizeof(DocWord)*dlen);
+ memset(dw,0,sizeof(DocWord)*dlen);
+
+ for(i=0;isize;i++) {
+ WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i]));
+ for(j=0;j
+ dw[cur].w=STRPTR(txt)+pptr[i].pos;
+ dw[cur].len=pptr[i].len;
+ dw[cur].pos=posdata[j].pos;
+ cur++;
+ }
+ len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+ }
+ qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
+
+ while( Cover(doc, rlen, query, &pos, &p, &q) ) {
+ dwptr=dw+olddwpos;
+ while(dwptr->pos < p && dwptr-dw
+ dwptr++;
+ olddwpos=dwptr-dw;
+ dwptr->start=ncover;
+ while(dwptr->pos < q+1 && dwptr-dw
+ dwptr++;
+ (dwptr-1)->finish=ncover;
+ len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
+ ncover++;
+ }
+
+ out=palloc(VARHDRSZ+len);
+ cptr=((char*)out)+VARHDRSZ;
+ dwptr=dw;
+
+ while( dwptr-dw < dlen) {
+ if ( dwptr->start ) {
+ sprintf(cptr,"{%d ",dwptr->start);
+ cptr=strchr(cptr,'\0');
+ }
+ memcpy(cptr,dwptr->w,dwptr->len);
+ cptr+=dwptr->len;
+ *cptr=' ';
+ cptr++;
+ if ( dwptr->finish ) {
+ sprintf(cptr,"}%d ",dwptr->finish);
+ cptr=strchr(cptr,'\0');
+ }
+ dwptr++;
+ }
+
+ VARATT_SIZEP(out) = cptr - ((char*)out);
+
+ pfree(dw);
+ pfree(doc);
+
+ PG_FREE_IF_COPY(txt,0);
+ PG_FREE_IF_COPY(query,1);
+ PG_RETURN_POINTER(out);
+}
+
--- /dev/null
+/*
+ * Rewrite routines of query tree
+ * Teodor Sigaev
+ */
+
+#include "postgres.h"
+
+#include
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "access/rtree.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+#include "query.h"
+#include "rewrite.h"
+
+typedef struct NODE
+{
+ struct NODE *left;
+ struct NODE *right;
+ ITEM *valnode;
+} NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(ITEM * in)
+{
+ NODE *node = (NODE *) palloc(sizeof(NODE));
+
+ node->valnode = in;
+ node->right = node->left = NULL;
+ if (in->type == OPR)
+ {
+ node->right = maketree(in + 1);
+ if (in->val != (int4) '!')
+ node->left = maketree(in + in->left);
+ }
+ return node;
+}
+
+typedef struct
+{
+ ITEM *ptr;
+ int4 len;
+ int4 cur;
+} PLAINTREE;
+
+static void
+plainnode(PLAINTREE * state, NODE * node)
+{
+ if (state->cur == state->len)
+ {
+ state->len *= 2;
+ state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
+ }
+ memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
+ if (node->valnode->type == VAL)
+ state->cur++;
+ else if (node->valnode->val == (int4) '!')
+ {
+ state->ptr[state->cur].left = 1;
+ state->cur++;
+ plainnode(state, node->right);
+ }
+ else
+ {
+ int4 cur = state->cur;
+
+ state->cur++;
+ plainnode(state, node->right);
+ state->ptr[cur].left = state->cur - cur;
+ plainnode(state, node->left);
+ }
+ pfree(node);
+}
+
+/*
+ * make plain view of tree from 'normal' view of tree
+ */
+static ITEM *
+plaintree(NODE * root, int4 *len)
+{
+ PLAINTREE pl;
+
+ pl.cur = 0;
+ pl.len = 16;
+ if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+ {
+ pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
+ plainnode(&pl, root);
+ }
+ else
+ pl.ptr = NULL;
+ *len = pl.cur;
+ return pl.ptr;
+}
+
+static void
+freetree(NODE * node)
+{
+ if (!node)
+ return;
+ if (node->left)
+ freetree(node->left);
+ if (node->right)
+ freetree(node->right);
+ pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's usefull for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE * node)
+{
+ if (node->valnode->type == VAL)
+ return node;
+
+ if (node->valnode->val == (int4) '!')
+ {
+ freetree(node);
+ return NULL;
+ }
+
+ /* operator & or | */
+ if (node->valnode->val == (int4) '|')
+ {
+ if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+ (node->right = clean_NOT_intree(node->right)) == NULL)
+ {
+ freetree(node);
+ return NULL;
+ }
+ }
+ else
+ {
+ NODE *res = node;
+
+ node->left = clean_NOT_intree(node->left);
+ node->right = clean_NOT_intree(node->right);
+ if (node->left == NULL && node->right == NULL)
+ {
+ pfree(node);
+ res = NULL;
+ }
+ else if (node->left == NULL)
+ {
+ res = node->right;
+ pfree(node);
+ }
+ else if (node->right == NULL)
+ {
+ res = node->left;
+ pfree(node);
+ }
+ return res;
+ }
+ return node;
+}
+
+ITEM *
+clean_NOT_v2(ITEM * ptr, int4 *len)
+{
+ NODE *root = maketree(ptr);
+
+ return plaintree(clean_NOT_intree(root), len);
+}
+
+#define V_UNKNOWN 0
+#define V_TRUE 1
+#define V_FALSE 2
+
+/*
+ * Clean query tree from values which is always in
+ * text (stopword)
+ */
+static NODE *
+clean_fakeval_intree(NODE * node, char *result)
+{
+ char lresult = V_UNKNOWN,
+ rresult = V_UNKNOWN;
+
+ if (node->valnode->type == VAL)
+ return node;
+ else if (node->valnode->type == VALTRUE)
+ {
+ pfree(node);
+ *result = V_TRUE;
+ return NULL;
+ }
+
+
+ if (node->valnode->val == (int4) '!')
+ {
+ node->right = clean_fakeval_intree(node->right, &rresult);
+ if (!node->right)
+ {
+ *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
+ freetree(node);
+ return NULL;
+ }
+ }
+ else if (node->valnode->val == (int4) '|')
+ {
+ NODE *res = node;
+
+ node->left = clean_fakeval_intree(node->left, &lresult);
+ node->right = clean_fakeval_intree(node->right, &rresult);
+ if (lresult == V_TRUE || rresult == V_TRUE)
+ {
+ freetree(node);
+ *result = V_TRUE;
+ return NULL;
+ }
+ else if (lresult == V_FALSE && rresult == V_FALSE)
+ {
+ freetree(node);
+ *result = V_FALSE;
+ return NULL;
+ }
+ else if (lresult == V_FALSE)
+ {
+ res = node->right;
+ pfree(node);
+ }
+ else if (rresult == V_FALSE)
+ {
+ res = node->left;
+ pfree(node);
+ }
+ return res;
+ }
+ else
+ {
+ NODE *res = node;
+
+ node->left = clean_fakeval_intree(node->left, &lresult);
+ node->right = clean_fakeval_intree(node->right, &rresult);
+ if (lresult == V_FALSE || rresult == V_FALSE)
+ {
+ freetree(node);
+ *result = V_FALSE;
+ return NULL;
+ }
+ else if (lresult == V_TRUE && rresult == V_TRUE)
+ {
+ freetree(node);
+ *result = V_TRUE;
+ return NULL;
+ }
+ else if (lresult == V_TRUE)
+ {
+ res = node->right;
+ pfree(node);
+ }
+ else if (rresult == V_TRUE)
+ {
+ res = node->left;
+ pfree(node);
+ }
+ return res;
+ }
+ return node;
+}
+
+ITEM *
+clean_fakeval_v2(ITEM * ptr, int4 *len)
+{
+ NODE *root = maketree(ptr);
+ char result = V_UNKNOWN;
+ NODE *resroot;
+
+ resroot = clean_fakeval_intree(root, &result);
+ if (result != V_UNKNOWN)
+ {
+ elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
+ *len = 0;
+ return NULL;
+ }
+
+ return plaintree(resroot, len);
+}
--- /dev/null
+#ifndef __REWRITE_H__
+#define __REWRITE_H__
+
+ITEM *clean_NOT_v2(ITEM * ptr, int4 *len);
+ITEM *clean_fakeval_v2(ITEM * ptr, int4 *len);
+
+#endif
--- /dev/null
+/*
+ * simple but fast map from str to Oid
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+
+#include "postgres.h"
+#include "snmap.h"
+#include "common.h"
+
+static int
+compareSNMapEntry(const void *a, const void *b) {
+ return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
+}
+
+void
+addSNMap( SNMap *map, char *key, Oid value ) {
+ if (map->len>=map->reallen) {
+ SNMapEntry *tmp;
+ int len = (map->reallen) ? 2*map->reallen : 16;
+ tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
+ if ( !tmp )
+ elog(ERROR, "No memory");
+ map->reallen=len;
+ map->list=tmp;
+ }
+ map->list[ map->len ].key = strdup(key);
+ if ( ! map->list[ map->len ].key )
+ elog(ERROR, "No memory");
+ map->list[ map->len ].value=value;
+ map->len++;
+ if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+}
+
+void
+addSNMap_t( SNMap *map, text *key, Oid value ) {
+ char *k=text2char( key );
+ addSNMap(map, k, value);
+ pfree(k);
+}
+
+Oid
+findSNMap( SNMap *map, char *key ) {
+ SNMapEntry *ptr;
+ SNMapEntry ks = {key, 0};
+ if ( map->len==0 || !map->list )
+ return 0;
+ ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
+ return (ptr) ? ptr->value : 0;
+}
+
+Oid
+findSNMap_t( SNMap *map, text *key ) {
+ char *k=text2char(key);
+ int res;
+ res= findSNMap(map, k);
+ pfree(k);
+ return res;
+}
+
+void freeSNMap( SNMap *map ) {
+ SNMapEntry *entry=map->list;
+ if ( map->list ) {
+ while( map->len ) {
+ if ( entry->key ) free(entry->key);
+ entry++; map->len--;
+ }
+ free( map->list );
+ }
+ memset(map,0,sizeof(SNMap));
+}
+
+
--- /dev/null
+#ifndef __SNMAP_H__
+#define __SNMAP_H__
+
+#include "postgres.h"
+
+typedef struct {
+ char *key;
+ Oid value;
+} SNMapEntry;
+
+typedef struct {
+ int len;
+ int reallen;
+ SNMapEntry *list;
+} SNMap;
+
+void addSNMap( SNMap *map, char *key, Oid value );
+void addSNMap_t( SNMap *map, text *key, Oid value );
+Oid findSNMap( SNMap *map, char *key );
+Oid findSNMap_t( SNMap *map, text *key );
+void freeSNMap( SNMap *map );
+
+#endif
--- /dev/null
+
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+ z->p = create_s();
+ if (S_size)
+ { z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+ { int i;
+ for (i = 0; i < S_size; i++) z->S[i] = create_s();
+ }
+ z->S_size = S_size;
+ }
+
+ if (I_size)
+ { z->I = (int *) calloc(I_size, sizeof(int));
+ z->I_size = I_size;
+ }
+
+ if (B_size)
+ { z->B = (symbol *) calloc(B_size, sizeof(symbol));
+ z->B_size = B_size;
+ }
+
+ return z;
+}
+
+extern void SN_close_env(struct SN_env * z)
+{
+ if (z->S_size)
+ {
+ { int i;
+ for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
+ }
+ free(z->S);
+ }
+ if (z->I_size) free(z->I);
+ if (z->B_size) free(z->B);
+ if (z->p) lose_s(z->p);
+ free(z);
+}
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+ replace_s(z, 0, z->l, size, s);
+ z->c = 0;
+}
+
--- /dev/null
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+ More precisely, replace 'char' with whatever type guarantees the
+ character width you need. Note however that sizeof(symbol) should divide
+ HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+ there is an alignment problem. In the unlikely event of a problem here,
+ consult Martin Porter.
+
+*/
+
+struct SN_env {
+ symbol * p;
+ int c; int a; int l; int lb; int bra; int ket;
+ int S_size; int I_size; int B_size;
+ symbol * * S;
+ int * I;
+ symbol * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z);
+
+extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
+
--- /dev/null
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int english_stem(struct SN_env * z);
+static int r_exception2(struct SN_env * z);
+static int r_exception1(struct SN_env * z);
+static int r_Step_5(struct SN_env * z);
+static int r_Step_4(struct SN_env * z);
+static int r_Step_3(struct SN_env * z);
+static int r_Step_2(struct SN_env * z);
+static int r_Step_1c(struct SN_env * z);
+static int r_Step_1b(struct SN_env * z);
+static int r_Step_1a(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_shortv(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
+
+static struct among a_0[1] =
+{
+/* 0 */ { 5, s_0_0, -1, -1, 0}
+};
+
+static symbol s_1_0[3] = { 'i', 'e', 'd' };
+static symbol s_1_1[1] = { 's' };
+static symbol s_1_2[3] = { 'i', 'e', 's' };
+static symbol s_1_3[4] = { 's', 's', 'e', 's' };
+static symbol s_1_4[2] = { 's', 's' };
+static symbol s_1_5[2] = { 'u', 's' };
+
+static struct among a_1[6] =
+{
+/* 0 */ { 3, s_1_0, -1, 2, 0},
+/* 1 */ { 1, s_1_1, -1, 3, 0},
+/* 2 */ { 3, s_1_2, 1, 2, 0},
+/* 3 */ { 4, s_1_3, 1, 1, 0},
+/* 4 */ { 2, s_1_4, 1, -1, 0},
+/* 5 */ { 2, s_1_5, 1, -1, 0}
+};
+
+static symbol s_2_1[2] = { 'b', 'b' };
+static symbol s_2_2[2] = { 'd', 'd' };
+static symbol s_2_3[2] = { 'f', 'f' };
+static symbol s_2_4[2] = { 'g', 'g' };
+static symbol s_2_5[2] = { 'b', 'l' };
+static symbol s_2_6[2] = { 'm', 'm' };
+static symbol s_2_7[2] = { 'n', 'n' };
+static symbol s_2_8[2] = { 'p', 'p' };
+static symbol s_2_9[2] = { 'r', 'r' };
+static symbol s_2_10[2] = { 'a', 't' };
+static symbol s_2_11[2] = { 't', 't' };
+static symbol s_2_12[2] = { 'i', 'z' };
+
+static struct among a_2[13] =
+{
+/* 0 */ { 0, 0, -1, 3, 0},
+/* 1 */ { 2, s_2_1, 0, 2, 0},
+/* 2 */ { 2, s_2_2, 0, 2, 0},
+/* 3 */ { 2, s_2_3, 0, 2, 0},
+/* 4 */ { 2, s_2_4, 0, 2, 0},
+/* 5 */ { 2, s_2_5, 0, 1, 0},
+/* 6 */ { 2, s_2_6, 0, 2, 0},
+/* 7 */ { 2, s_2_7, 0, 2, 0},
+/* 8 */ { 2, s_2_8, 0, 2, 0},
+/* 9 */ { 2, s_2_9, 0, 2, 0},
+/* 10 */ { 2, s_2_10, 0, 1, 0},
+/* 11 */ { 2, s_2_11, 0, 2, 0},
+/* 12 */ { 2, s_2_12, 0, 1, 0}
+};
+
+static symbol s_3_0[2] = { 'e', 'd' };
+static symbol s_3_1[3] = { 'e', 'e', 'd' };
+static symbol s_3_2[3] = { 'i', 'n', 'g' };
+static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
+static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
+static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
+
+static struct among a_3[6] =
+{
+/* 0 */ { 2, s_3_0, -1, 2, 0},
+/* 1 */ { 3, s_3_1, 0, 1, 0},
+/* 2 */ { 3, s_3_2, -1, 2, 0},
+/* 3 */ { 4, s_3_3, -1, 2, 0},
+/* 4 */ { 5, s_3_4, 3, 1, 0},
+/* 5 */ { 5, s_3_5, -1, 2, 0}
+};
+
+static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
+static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
+static symbol s_4_2[3] = { 'o', 'g', 'i' };
+static symbol s_4_3[2] = { 'l', 'i' };
+static symbol s_4_4[3] = { 'b', 'l', 'i' };
+static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
+static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
+static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
+static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
+static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
+static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
+static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
+static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
+static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
+static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
+static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
+static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
+static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
+static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
+static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
+static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
+
+static struct among a_4[24] =
+{
+/* 0 */ { 4, s_4_0, -1, 3, 0},
+/* 1 */ { 4, s_4_1, -1, 2, 0},
+/* 2 */ { 3, s_4_2, -1, 13, 0},
+/* 3 */ { 2, s_4_3, -1, 16, 0},
+/* 4 */ { 3, s_4_4, 3, 12, 0},
+/* 5 */ { 4, s_4_5, 4, 4, 0},
+/* 6 */ { 4, s_4_6, 3, 8, 0},
+/* 7 */ { 5, s_4_7, 3, 14, 0},
+/* 8 */ { 6, s_4_8, 3, 15, 0},
+/* 9 */ { 5, s_4_9, 3, 10, 0},
+/* 10 */ { 5, s_4_10, 3, 5, 0},
+/* 11 */ { 5, s_4_11, -1, 8, 0},
+/* 12 */ { 6, s_4_12, -1, 12, 0},
+/* 13 */ { 5, s_4_13, -1, 11, 0},
+/* 14 */ { 6, s_4_14, -1, 1, 0},
+/* 15 */ { 7, s_4_15, 14, 7, 0},
+/* 16 */ { 5, s_4_16, -1, 8, 0},
+/* 17 */ { 5, s_4_17, -1, 7, 0},
+/* 18 */ { 7, s_4_18, 17, 6, 0},
+/* 19 */ { 4, s_4_19, -1, 6, 0},
+/* 20 */ { 4, s_4_20, -1, 7, 0},
+/* 21 */ { 7, s_4_21, -1, 11, 0},
+/* 22 */ { 7, s_4_22, -1, 9, 0},
+/* 23 */ { 7, s_4_23, -1, 10, 0}
+};
+
+static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
+static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
+static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
+static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
+static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
+static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
+static symbol s_5_7[3] = { 'f', 'u', 'l' };
+static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
+
+static struct among a_5[9] =
+{
+/* 0 */ { 5, s_5_0, -1, 4, 0},
+/* 1 */ { 5, s_5_1, -1, 6, 0},
+/* 2 */ { 5, s_5_2, -1, 3, 0},
+/* 3 */ { 5, s_5_3, -1, 4, 0},
+/* 4 */ { 4, s_5_4, -1, 4, 0},
+/* 5 */ { 6, s_5_5, -1, 1, 0},
+/* 6 */ { 7, s_5_6, 5, 2, 0},
+/* 7 */ { 3, s_5_7, -1, 5, 0},
+/* 8 */ { 4, s_5_8, -1, 5, 0}
+};
+
+static symbol s_6_0[2] = { 'i', 'c' };
+static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
+static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
+static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
+static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
+static symbol s_6_5[3] = { 'a', 't', 'e' };
+static symbol s_6_6[3] = { 'i', 'v', 'e' };
+static symbol s_6_7[3] = { 'i', 'z', 'e' };
+static symbol s_6_8[3] = { 'i', 't', 'i' };
+static symbol s_6_9[2] = { 'a', 'l' };
+static symbol s_6_10[3] = { 'i', 's', 'm' };
+static symbol s_6_11[3] = { 'i', 'o', 'n' };
+static symbol s_6_12[2] = { 'e', 'r' };
+static symbol s_6_13[3] = { 'o', 'u', 's' };
+static symbol s_6_14[3] = { 'a', 'n', 't' };
+static symbol s_6_15[3] = { 'e', 'n', 't' };
+static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
+static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
+
+static struct among a_6[18] =
+{
+/* 0 */ { 2, s_6_0, -1, 1, 0},
+/* 1 */ { 4, s_6_1, -1, 1, 0},
+/* 2 */ { 4, s_6_2, -1, 1, 0},
+/* 3 */ { 4, s_6_3, -1, 1, 0},
+/* 4 */ { 4, s_6_4, -1, 1, 0},
+/* 5 */ { 3, s_6_5, -1, 1, 0},
+/* 6 */ { 3, s_6_6, -1, 1, 0},
+/* 7 */ { 3, s_6_7, -1, 1, 0},
+/* 8 */ { 3, s_6_8, -1, 1, 0},
+/* 9 */ { 2, s_6_9, -1, 1, 0},
+/* 10 */ { 3, s_6_10, -1, 1, 0},
+/* 11 */ { 3, s_6_11, -1, 2, 0},
+/* 12 */ { 2, s_6_12, -1, 1, 0},
+/* 13 */ { 3, s_6_13, -1, 1, 0},
+/* 14 */ { 3, s_6_14, -1, 1, 0},
+/* 15 */ { 3, s_6_15, -1, 1, 0},
+/* 16 */ { 4, s_6_16, 15, 1, 0},
+/* 17 */ { 5, s_6_17, 16, 1, 0}
+};
+
+static symbol s_7_0[1] = { 'e' };
+static symbol s_7_1[1] = { 'l' };
+
+static struct among a_7[2] =
+{
+/* 0 */ { 1, s_7_0, -1, 1, 0},
+/* 1 */ { 1, s_7_1, -1, 2, 0}
+};
+
+static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
+static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
+static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
+static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
+static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
+static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
+
+static struct among a_8[8] =
+{
+/* 0 */ { 7, s_8_0, -1, -1, 0},
+/* 1 */ { 7, s_8_1, -1, -1, 0},
+/* 2 */ { 6, s_8_2, -1, -1, 0},
+/* 3 */ { 7, s_8_3, -1, -1, 0},
+/* 4 */ { 6, s_8_4, -1, -1, 0},
+/* 5 */ { 7, s_8_5, -1, -1, 0},
+/* 6 */ { 7, s_8_6, -1, -1, 0},
+/* 7 */ { 6, s_8_7, -1, -1, 0}
+};
+
+static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
+static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
+static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
+static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
+static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
+static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
+static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
+static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
+static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
+static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
+static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
+static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
+static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
+static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
+static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
+static symbol s_9_15[3] = { 's', 'k', 'y' };
+static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
+static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
+
+static struct among a_9[18] =
+{
+/* 0 */ { 5, s_9_0, -1, -1, 0},
+/* 1 */ { 5, s_9_1, -1, -1, 0},
+/* 2 */ { 4, s_9_2, -1, -1, 0},
+/* 3 */ { 6, s_9_3, -1, -1, 0},
+/* 4 */ { 5, s_9_4, -1, 3, 0},
+/* 5 */ { 5, s_9_5, -1, 9, 0},
+/* 6 */ { 6, s_9_6, -1, 7, 0},
+/* 7 */ { 4, s_9_7, -1, -1, 0},
+/* 8 */ { 4, s_9_8, -1, 6, 0},
+/* 9 */ { 5, s_9_9, -1, 4, 0},
+/* 10 */ { 4, s_9_10, -1, -1, 0},
+/* 11 */ { 4, s_9_11, -1, 10, 0},
+/* 12 */ { 6, s_9_12, -1, 11, 0},
+/* 13 */ { 5, s_9_13, -1, 2, 0},
+/* 14 */ { 4, s_9_14, -1, 1, 0},
+/* 15 */ { 3, s_9_15, -1, -1, 0},
+/* 16 */ { 5, s_9_16, -1, 5, 0},
+/* 17 */ { 4, s_9_17, -1, 8, 0}
+};
+
+static unsigned char g_v[] = { 17, 65, 16, 1 };
+
+static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
+
+static unsigned char g_valid_LI[] = { 55, 141, 2 };
+
+static symbol s_0[] = { 'y' };
+static symbol s_1[] = { 'Y' };
+static symbol s_2[] = { 'y' };
+static symbol s_3[] = { 'Y' };
+static symbol s_4[] = { 's', 's' };
+static symbol s_5[] = { 'i', 'e' };
+static symbol s_6[] = { 'i' };
+static symbol s_7[] = { 'e', 'e' };
+static symbol s_8[] = { 'e' };
+static symbol s_9[] = { 'e' };
+static symbol s_10[] = { 'y' };
+static symbol s_11[] = { 'Y' };
+static symbol s_12[] = { 'i' };
+static symbol s_13[] = { 't', 'i', 'o', 'n' };
+static symbol s_14[] = { 'e', 'n', 'c', 'e' };
+static symbol s_15[] = { 'a', 'n', 'c', 'e' };
+static symbol s_16[] = { 'a', 'b', 'l', 'e' };
+static symbol s_17[] = { 'e', 'n', 't' };
+static symbol s_18[] = { 'i', 'z', 'e' };
+static symbol s_19[] = { 'a', 't', 'e' };
+static symbol s_20[] = { 'a', 'l' };
+static symbol s_21[] = { 'f', 'u', 'l' };
+static symbol s_22[] = { 'o', 'u', 's' };
+static symbol s_23[] = { 'i', 'v', 'e' };
+static symbol s_24[] = { 'b', 'l', 'e' };
+static symbol s_25[] = { 'l' };
+static symbol s_26[] = { 'o', 'g' };
+static symbol s_27[] = { 'f', 'u', 'l' };
+static symbol s_28[] = { 'l', 'e', 's', 's' };
+static symbol s_29[] = { 't', 'i', 'o', 'n' };
+static symbol s_30[] = { 'a', 't', 'e' };
+static symbol s_31[] = { 'a', 'l' };
+static symbol s_32[] = { 'i', 'c' };
+static symbol s_33[] = { 's' };
+static symbol s_34[] = { 't' };
+static symbol s_35[] = { 'l' };
+static symbol s_36[] = { 's', 'k', 'i' };
+static symbol s_37[] = { 's', 'k', 'y' };
+static symbol s_38[] = { 'd', 'i', 'e' };
+static symbol s_39[] = { 'l', 'i', 'e' };
+static symbol s_40[] = { 't', 'i', 'e' };
+static symbol s_41[] = { 'i', 'd', 'l' };
+static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
+static symbol s_43[] = { 'u', 'g', 'l', 'i' };
+static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
+static symbol s_45[] = { 'o', 'n', 'l', 'i' };
+static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
+static symbol s_47[] = { 'Y' };
+static symbol s_48[] = { 'y' };
+
+static int r_prelude(struct SN_env * z) {
+ z->B[0] = 0; /* unset Y_found, line 24 */
+ { int c = z->c; /* do, line 25 */
+ z->bra = z->c; /* [, line 25 */
+ if (!(eq_s(z, 1, s_0))) goto lab0;
+ z->ket = z->c; /* ], line 25 */
+ if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
+ slice_from_s(z, 1, s_1); /* <-, line 25 */
+ z->B[0] = 1; /* set Y_found, line 25 */
+ lab0:
+ z->c = c;
+ }
+ { int c = z->c; /* do, line 26 */
+ while(1) { /* repeat, line 26 */
+ int c = z->c;
+ while(1) { /* goto, line 26 */
+ int c = z->c;
+ if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+ z->bra = z->c; /* [, line 26 */
+ if (!(eq_s(z, 1, s_2))) goto lab3;
+ z->ket = z->c; /* ], line 26 */
+ z->c = c;
+ break;
+ lab3:
+ z->c = c;
+ if (z->c >= z->l) goto lab2;
+ z->c++;
+ }
+ slice_from_s(z, 1, s_3); /* <-, line 26 */
+ z->B[0] = 1; /* set Y_found, line 26 */
+ continue;
+ lab2:
+ z->c = c;
+ break;
+ }
+ lab1:
+ z->c = c;
+ }
+ return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+ z->I[0] = z->l;
+ z->I[1] = z->l;
+ { int c = z->c; /* do, line 32 */
+ { int c = z->c; /* or, line 36 */
+ if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
+ goto lab1;
+ lab2:
+ z->c = c;
+ while(1) { /* gopast, line 36 */
+ if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
+ break;
+ lab3:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ while(1) { /* gopast, line 36 */
+ if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
+ break;
+ lab4:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ }
+ lab1:
+ z->I[0] = z->c; /* setmark p1, line 37 */
+ while(1) { /* gopast, line 38 */
+ if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
+ break;
+ lab5:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ while(1) { /* gopast, line 38 */
+ if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
+ break;
+ lab6:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ z->I[1] = z->c; /* setmark p2, line 38 */
+ lab0:
+ z->c = c;
+ }
+ return 1;
+}
+
+static int r_shortv(struct SN_env * z) {
+ { int m = z->l - z->c; /* or, line 46 */
+ if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
+ if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
+ if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+ if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
+ if (z->c > z->lb) return 0; /* atlimit, line 47 */
+ }
+lab0:
+ return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+ if (!(z->I[0] <= z->c)) return 0;
+ return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+ if (!(z->I[1] <= z->c)) return 0;
+ return 1;
+}
+
+static int r_Step_1a(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 54 */
+ among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 54 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_from_s(z, 2, s_4); /* <-, line 55 */
+ break;
+ case 2:
+ { int m = z->l - z->c; /* or, line 57 */
+ if (z->c <= z->lb) goto lab1;
+ z->c--; /* next, line 57 */
+ if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
+ slice_from_s(z, 2, s_5); /* <-, line 57 */
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ slice_from_s(z, 1, s_6); /* <-, line 57 */
+ }
+ lab0:
+ break;
+ case 3:
+ if (z->c <= z->lb) return 0;
+ z->c--; /* next, line 58 */
+ while(1) { /* gopast, line 58 */
+ if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
+ break;
+ lab2:
+ if (z->c <= z->lb) return 0;
+ z->c--;
+ }
+ slice_del(z); /* delete, line 58 */
+ break;
+ }
+ return 1;
+}
+
+static int r_Step_1b(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 64 */
+ among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 64 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ if (!r_R1(z)) return 0; /* call R1, line 66 */
+ slice_from_s(z, 2, s_7); /* <-, line 66 */
+ break;
+ case 2:
+ { int m_test = z->l - z->c; /* test, line 69 */
+ while(1) { /* gopast, line 69 */
+ if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
+ break;
+ lab0:
+ if (z->c <= z->lb) return 0;
+ z->c--;
+ }
+ z->c = z->l - m_test;
+ }
+ slice_del(z); /* delete, line 69 */
+ { int m_test = z->l - z->c; /* test, line 70 */
+ among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
+ if (!(among_var)) return 0;
+ z->c = z->l - m_test;
+ }
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ { int c = z->c;
+ insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
+ z->c = c;
+ }
+ break;
+ case 2:
+ z->ket = z->c; /* [, line 75 */
+ if (z->c <= z->lb) return 0;
+ z->c--; /* next, line 75 */
+ z->bra = z->c; /* ], line 75 */
+ slice_del(z); /* delete, line 75 */
+ break;
+ case 3:
+ if (z->c != z->I[0]) return 0; /* atmark, line 76 */
+ { int m_test = z->l - z->c; /* test, line 76 */
+ if (!r_shortv(z)) return 0; /* call shortv, line 76 */
+ z->c = z->l - m_test;
+ }
+ { int c = z->c;
+ insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
+ z->c = c;
+ }
+ break;
+ }
+ break;
+ }
+ return 1;
+}
+
+static int r_Step_1c(struct SN_env * z) {
+ z->ket = z->c; /* [, line 83 */
+ { int m = z->l - z->c; /* or, line 83 */
+ if (!(eq_s_b(z, 1, s_10))) goto lab1;
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ if (!(eq_s_b(z, 1, s_11))) return 0;
+ }
+lab0:
+ z->bra = z->c; /* ], line 83 */
+ if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
+ { int m = z->l - z->c; /* not, line 84 */
+ if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
+ return 0;
+ lab2:
+ z->c = z->l - m;
+ }
+ slice_from_s(z, 1, s_12); /* <-, line 85 */
+ return 1;
+}
+
+static int r_Step_2(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 89 */
+ among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 89 */
+ if (!r_R1(z)) return 0; /* call R1, line 89 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_from_s(z, 4, s_13); /* <-, line 90 */
+ break;
+ case 2:
+ slice_from_s(z, 4, s_14); /* <-, line 91 */
+ break;
+ case 3:
+ slice_from_s(z, 4, s_15); /* <-, line 92 */
+ break;
+ case 4:
+ slice_from_s(z, 4, s_16); /* <-, line 93 */
+ break;
+ case 5:
+ slice_from_s(z, 3, s_17); /* <-, line 94 */
+ break;
+ case 6:
+ slice_from_s(z, 3, s_18); /* <-, line 96 */
+ break;
+ case 7:
+ slice_from_s(z, 3, s_19); /* <-, line 98 */
+ break;
+ case 8:
+ slice_from_s(z, 2, s_20); /* <-, line 100 */
+ break;
+ case 9:
+ slice_from_s(z, 3, s_21); /* <-, line 101 */
+ break;
+ case 10:
+ slice_from_s(z, 3, s_22); /* <-, line 103 */
+ break;
+ case 11:
+ slice_from_s(z, 3, s_23); /* <-, line 105 */
+ break;
+ case 12:
+ slice_from_s(z, 3, s_24); /* <-, line 107 */
+ break;
+ case 13:
+ if (!(eq_s_b(z, 1, s_25))) return 0;
+ slice_from_s(z, 2, s_26); /* <-, line 108 */
+ break;
+ case 14:
+ slice_from_s(z, 3, s_27); /* <-, line 109 */
+ break;
+ case 15:
+ slice_from_s(z, 4, s_28); /* <-, line 110 */
+ break;
+ case 16:
+ if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
+ slice_del(z); /* delete, line 111 */
+ break;
+ }
+ return 1;
+}
+
+static int r_Step_3(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 116 */
+ among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 116 */
+ if (!r_R1(z)) return 0; /* call R1, line 116 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_from_s(z, 4, s_29); /* <-, line 117 */
+ break;
+ case 2:
+ slice_from_s(z, 3, s_30); /* <-, line 118 */
+ break;
+ case 3:
+ slice_from_s(z, 2, s_31); /* <-, line 119 */
+ break;
+ case 4:
+ slice_from_s(z, 2, s_32); /* <-, line 121 */
+ break;
+ case 5:
+ slice_del(z); /* delete, line 123 */
+ break;
+ case 6:
+ if (!r_R2(z)) return 0; /* call R2, line 125 */
+ slice_del(z); /* delete, line 125 */
+ break;
+ }
+ return 1;
+}
+
+static int r_Step_4(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 130 */
+ among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 130 */
+ if (!r_R2(z)) return 0; /* call R2, line 130 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_del(z); /* delete, line 133 */
+ break;
+ case 2:
+ { int m = z->l - z->c; /* or, line 134 */
+ if (!(eq_s_b(z, 1, s_33))) goto lab1;
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ if (!(eq_s_b(z, 1, s_34))) return 0;
+ }
+ lab0:
+ slice_del(z); /* delete, line 134 */
+ break;
+ }
+ return 1;
+}
+
+static int r_Step_5(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 139 */
+ among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 139 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ { int m = z->l - z->c; /* or, line 140 */
+ if (!r_R2(z)) goto lab1; /* call R2, line 140 */
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ if (!r_R1(z)) return 0; /* call R1, line 140 */
+ { int m = z->l - z->c; /* not, line 140 */
+ if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
+ return 0;
+ lab2:
+ z->c = z->l - m;
+ }
+ }
+ lab0:
+ slice_del(z); /* delete, line 140 */
+ break;
+ case 2:
+ if (!r_R2(z)) return 0; /* call R2, line 141 */
+ if (!(eq_s_b(z, 1, s_35))) return 0;
+ slice_del(z); /* delete, line 141 */
+ break;
+ }
+ return 1;
+}
+
+static int r_exception2(struct SN_env * z) {
+ z->ket = z->c; /* [, line 147 */
+ if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
+ z->bra = z->c; /* ], line 147 */
+ if (z->c > z->lb) return 0; /* atlimit, line 147 */
+ return 1;
+}
+
+static int r_exception1(struct SN_env * z) {
+ int among_var;
+ z->bra = z->c; /* [, line 159 */
+ among_var = find_among(z, a_9, 18); /* substring, line 159 */
+ if (!(among_var)) return 0;
+ z->ket = z->c; /* ], line 159 */
+ if (z->c < z->l) return 0; /* atlimit, line 159 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_from_s(z, 3, s_36); /* <-, line 163 */
+ break;
+ case 2:
+ slice_from_s(z, 3, s_37); /* <-, line 164 */
+ break;
+ case 3:
+ slice_from_s(z, 3, s_38); /* <-, line 165 */
+ break;
+ case 4:
+ slice_from_s(z, 3, s_39); /* <-, line 166 */
+ break;
+ case 5:
+ slice_from_s(z, 3, s_40); /* <-, line 167 */
+ break;
+ case 6:
+ slice_from_s(z, 3, s_41); /* <-, line 171 */
+ break;
+ case 7:
+ slice_from_s(z, 5, s_42); /* <-, line 172 */
+ break;
+ case 8:
+ slice_from_s(z, 4, s_43); /* <-, line 173 */
+ break;
+ case 9:
+ slice_from_s(z, 5, s_44); /* <-, line 174 */
+ break;
+ case 10:
+ slice_from_s(z, 4, s_45); /* <-, line 175 */
+ break;
+ case 11:
+ slice_from_s(z, 5, s_46); /* <-, line 176 */
+ break;
+ }
+ return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+ if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
+ while(1) { /* repeat, line 192 */
+ int c = z->c;
+ while(1) { /* goto, line 192 */
+ int c = z->c;
+ z->bra = z->c; /* [, line 192 */
+ if (!(eq_s(z, 1, s_47))) goto lab1;
+ z->ket = z->c; /* ], line 192 */
+ z->c = c;
+ break;
+ lab1:
+ z->c = c;
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ slice_from_s(z, 1, s_48); /* <-, line 192 */
+ continue;
+ lab0:
+ z->c = c;
+ break;
+ }
+ return 1;
+}
+
+extern int english_stem(struct SN_env * z) {
+ { int c = z->c; /* or, line 196 */
+ if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
+ goto lab0;
+ lab1:
+ z->c = c;
+ { int c_test = z->c; /* test, line 198 */
+ { int c = z->c + 3;
+ if (0 > c || c > z->l) return 0;
+ z->c = c; /* hop, line 198 */
+ }
+ z->c = c_test;
+ }
+ { int c = z->c; /* do, line 199 */
+ if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
+ lab2:
+ z->c = c;
+ }
+ { int c = z->c; /* do, line 200 */
+ if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
+ lab3:
+ z->c = c;
+ }
+ z->lb = z->c; z->c = z->l; /* backwards, line 201 */
+
+ { int m = z->l - z->c; /* do, line 203 */
+ if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
+ lab4:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* or, line 205 */
+ if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
+ goto lab5;
+ lab6:
+ z->c = z->l - m;
+ { int m = z->l - z->c; /* do, line 207 */
+ if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
+ lab7:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* do, line 208 */
+ if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
+ lab8:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* do, line 210 */
+ if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
+ lab9:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* do, line 211 */
+ if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
+ lab10:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* do, line 212 */
+ if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
+ lab11:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* do, line 214 */
+ if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
+ lab12:
+ z->c = z->l - m;
+ }
+ }
+ lab5:
+ z->c = z->lb;
+ { int c = z->c; /* do, line 217 */
+ if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
+ lab13:
+ z->c = c;
+ }
+ }
+lab0:
+ return 1;
+}
+
+extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
+
--- /dev/null
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * english_create_env(void);
+extern void english_close_env(struct SN_env * z);
+
+extern int english_stem(struct SN_env * z);
+
--- /dev/null
+
+#include
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p) ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p) ((int *)(p))[-2]
+
+struct among
+{ int s_size; /* number of chars in string */
+ symbol * s; /* search string */
+ int substring_i;/* index to longest matching substring */
+ int result; /* result of the lookup */
+ int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
+
+extern int in_range(struct SN_env * z, int min, int max);
+extern int in_range_b(struct SN_env * z, int min, int max);
+extern int out_range(struct SN_env * z, int min, int max);
+extern int out_range_b(struct SN_env * z, int min, int max);
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
+extern int eq_v(struct SN_env * z, symbol * p);
+extern int eq_v_b(struct SN_env * z, symbol * p);
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
+
+extern symbol * increase_size(symbol * p, int n);
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
+extern void slice_from_v(struct SN_env * z, symbol * p);
+extern void slice_del(struct SN_env * z);
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+
--- /dev/null
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "header.h"
+
+extern int russian_stem(struct SN_env * z);
+static int r_tidy_up(struct SN_env * z);
+static int r_derivational(struct SN_env * z);
+static int r_noun(struct SN_env * z);
+static int r_verb(struct SN_env * z);
+static int r_reflexive(struct SN_env * z);
+static int r_adjectival(struct SN_env * z);
+static int r_adjective(struct SN_env * z);
+static int r_perfective_gerund(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+static symbol s_0_0[3] = { 215, 219, 201 };
+static symbol s_0_1[4] = { 201, 215, 219, 201 };
+static symbol s_0_2[4] = { 217, 215, 219, 201 };
+static symbol s_0_3[1] = { 215 };
+static symbol s_0_4[2] = { 201, 215 };
+static symbol s_0_5[2] = { 217, 215 };
+static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
+static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
+static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
+
+static struct among a_0[9] =
+{
+/* 0 */ { 3, s_0_0, -1, 1, 0},
+/* 1 */ { 4, s_0_1, 0, 2, 0},
+/* 2 */ { 4, s_0_2, 0, 2, 0},
+/* 3 */ { 1, s_0_3, -1, 1, 0},
+/* 4 */ { 2, s_0_4, 3, 2, 0},
+/* 5 */ { 2, s_0_5, 3, 2, 0},
+/* 6 */ { 5, s_0_6, -1, 1, 0},
+/* 7 */ { 6, s_0_7, 6, 2, 0},
+/* 8 */ { 6, s_0_8, 6, 2, 0}
+};
+
+static symbol s_1_0[2] = { 192, 192 };
+static symbol s_1_1[2] = { 197, 192 };
+static symbol s_1_2[2] = { 207, 192 };
+static symbol s_1_3[2] = { 213, 192 };
+static symbol s_1_4[2] = { 197, 197 };
+static symbol s_1_5[2] = { 201, 197 };
+static symbol s_1_6[2] = { 207, 197 };
+static symbol s_1_7[2] = { 217, 197 };
+static symbol s_1_8[2] = { 201, 200 };
+static symbol s_1_9[2] = { 217, 200 };
+static symbol s_1_10[3] = { 201, 205, 201 };
+static symbol s_1_11[3] = { 217, 205, 201 };
+static symbol s_1_12[2] = { 197, 202 };
+static symbol s_1_13[2] = { 201, 202 };
+static symbol s_1_14[2] = { 207, 202 };
+static symbol s_1_15[2] = { 217, 202 };
+static symbol s_1_16[2] = { 197, 205 };
+static symbol s_1_17[2] = { 201, 205 };
+static symbol s_1_18[2] = { 207, 205 };
+static symbol s_1_19[2] = { 217, 205 };
+static symbol s_1_20[3] = { 197, 199, 207 };
+static symbol s_1_21[3] = { 207, 199, 207 };
+static symbol s_1_22[2] = { 193, 209 };
+static symbol s_1_23[2] = { 209, 209 };
+static symbol s_1_24[3] = { 197, 205, 213 };
+static symbol s_1_25[3] = { 207, 205, 213 };
+
+static struct among a_1[26] =
+{
+/* 0 */ { 2, s_1_0, -1, 1, 0},
+/* 1 */ { 2, s_1_1, -1, 1, 0},
+/* 2 */ { 2, s_1_2, -1, 1, 0},
+/* 3 */ { 2, s_1_3, -1, 1, 0},
+/* 4 */ { 2, s_1_4, -1, 1, 0},
+/* 5 */ { 2, s_1_5, -1, 1, 0},
+/* 6 */ { 2, s_1_6, -1, 1, 0},
+/* 7 */ { 2, s_1_7, -1, 1, 0},
+/* 8 */ { 2, s_1_8, -1, 1, 0},
+/* 9 */ { 2, s_1_9, -1, 1, 0},
+/* 10 */ { 3, s_1_10, -1, 1, 0},
+/* 11 */ { 3, s_1_11, -1, 1, 0},
+/* 12 */ { 2, s_1_12, -1, 1, 0},
+/* 13 */ { 2, s_1_13, -1, 1, 0},
+/* 14 */ { 2, s_1_14, -1, 1, 0},
+/* 15 */ { 2, s_1_15, -1, 1, 0},
+/* 16 */ { 2, s_1_16, -1, 1, 0},
+/* 17 */ { 2, s_1_17, -1, 1, 0},
+/* 18 */ { 2, s_1_18, -1, 1, 0},
+/* 19 */ { 2, s_1_19, -1, 1, 0},
+/* 20 */ { 3, s_1_20, -1, 1, 0},
+/* 21 */ { 3, s_1_21, -1, 1, 0},
+/* 22 */ { 2, s_1_22, -1, 1, 0},
+/* 23 */ { 2, s_1_23, -1, 1, 0},
+/* 24 */ { 3, s_1_24, -1, 1, 0},
+/* 25 */ { 3, s_1_25, -1, 1, 0}
+};
+
+static symbol s_2_0[2] = { 197, 205 };
+static symbol s_2_1[2] = { 206, 206 };
+static symbol s_2_2[2] = { 215, 219 };
+static symbol s_2_3[3] = { 201, 215, 219 };
+static symbol s_2_4[3] = { 217, 215, 219 };
+static symbol s_2_5[1] = { 221 };
+static symbol s_2_6[2] = { 192, 221 };
+static symbol s_2_7[3] = { 213, 192, 221 };
+
+static struct among a_2[8] =
+{
+/* 0 */ { 2, s_2_0, -1, 1, 0},
+/* 1 */ { 2, s_2_1, -1, 1, 0},
+/* 2 */ { 2, s_2_2, -1, 1, 0},
+/* 3 */ { 3, s_2_3, 2, 2, 0},
+/* 4 */ { 3, s_2_4, 2, 2, 0},
+/* 5 */ { 1, s_2_5, -1, 1, 0},
+/* 6 */ { 2, s_2_6, 5, 1, 0},
+/* 7 */ { 3, s_2_7, 6, 2, 0}
+};
+
+static symbol s_3_0[2] = { 211, 209 };
+static symbol s_3_1[2] = { 211, 216 };
+
+static struct among a_3[2] =
+{
+/* 0 */ { 2, s_3_0, -1, 1, 0},
+/* 1 */ { 2, s_3_1, -1, 1, 0}
+};
+
+static symbol s_4_0[1] = { 192 };
+static symbol s_4_1[2] = { 213, 192 };
+static symbol s_4_2[2] = { 204, 193 };
+static symbol s_4_3[3] = { 201, 204, 193 };
+static symbol s_4_4[3] = { 217, 204, 193 };
+static symbol s_4_5[2] = { 206, 193 };
+static symbol s_4_6[3] = { 197, 206, 193 };
+static symbol s_4_7[3] = { 197, 212, 197 };
+static symbol s_4_8[3] = { 201, 212, 197 };
+static symbol s_4_9[3] = { 202, 212, 197 };
+static symbol s_4_10[4] = { 197, 202, 212, 197 };
+static symbol s_4_11[4] = { 213, 202, 212, 197 };
+static symbol s_4_12[2] = { 204, 201 };
+static symbol s_4_13[3] = { 201, 204, 201 };
+static symbol s_4_14[3] = { 217, 204, 201 };
+static symbol s_4_15[1] = { 202 };
+static symbol s_4_16[2] = { 197, 202 };
+static symbol s_4_17[2] = { 213, 202 };
+static symbol s_4_18[1] = { 204 };
+static symbol s_4_19[2] = { 201, 204 };
+static symbol s_4_20[2] = { 217, 204 };
+static symbol s_4_21[2] = { 197, 205 };
+static symbol s_4_22[2] = { 201, 205 };
+static symbol s_4_23[2] = { 217, 205 };
+static symbol s_4_24[1] = { 206 };
+static symbol s_4_25[2] = { 197, 206 };
+static symbol s_4_26[2] = { 204, 207 };
+static symbol s_4_27[3] = { 201, 204, 207 };
+static symbol s_4_28[3] = { 217, 204, 207 };
+static symbol s_4_29[2] = { 206, 207 };
+static symbol s_4_30[3] = { 197, 206, 207 };
+static symbol s_4_31[3] = { 206, 206, 207 };
+static symbol s_4_32[2] = { 192, 212 };
+static symbol s_4_33[3] = { 213, 192, 212 };
+static symbol s_4_34[2] = { 197, 212 };
+static symbol s_4_35[3] = { 213, 197, 212 };
+static symbol s_4_36[2] = { 201, 212 };
+static symbol s_4_37[2] = { 209, 212 };
+static symbol s_4_38[2] = { 217, 212 };
+static symbol s_4_39[2] = { 212, 216 };
+static symbol s_4_40[3] = { 201, 212, 216 };
+static symbol s_4_41[3] = { 217, 212, 216 };
+static symbol s_4_42[3] = { 197, 219, 216 };
+static symbol s_4_43[3] = { 201, 219, 216 };
+static symbol s_4_44[2] = { 206, 217 };
+static symbol s_4_45[3] = { 197, 206, 217 };
+
+static struct among a_4[46] =
+{
+/* 0 */ { 1, s_4_0, -1, 2, 0},
+/* 1 */ { 2, s_4_1, 0, 2, 0},
+/* 2 */ { 2, s_4_2, -1, 1, 0},
+/* 3 */ { 3, s_4_3, 2, 2, 0},
+/* 4 */ { 3, s_4_4, 2, 2, 0},
+/* 5 */ { 2, s_4_5, -1, 1, 0},
+/* 6 */ { 3, s_4_6, 5, 2, 0},
+/* 7 */ { 3, s_4_7, -1, 1, 0},
+/* 8 */ { 3, s_4_8, -1, 2, 0},
+/* 9 */ { 3, s_4_9, -1, 1, 0},
+/* 10 */ { 4, s_4_10, 9, 2, 0},
+/* 11 */ { 4, s_4_11, 9, 2, 0},
+/* 12 */ { 2, s_4_12, -1, 1, 0},
+/* 13 */ { 3, s_4_13, 12, 2, 0},
+/* 14 */ { 3, s_4_14, 12, 2, 0},
+/* 15 */ { 1, s_4_15, -1, 1, 0},
+/* 16 */ { 2, s_4_16, 15, 2, 0},
+/* 17 */ { 2, s_4_17, 15, 2, 0},
+/* 18 */ { 1, s_4_18, -1, 1, 0},
+/* 19 */ { 2, s_4_19, 18, 2, 0},
+/* 20 */ { 2, s_4_20, 18, 2, 0},
+/* 21 */ { 2, s_4_21, -1, 1, 0},
+/* 22 */ { 2, s_4_22, -1, 2, 0},
+/* 23 */ { 2, s_4_23, -1, 2, 0},
+/* 24 */ { 1, s_4_24, -1, 1, 0},
+/* 25 */ { 2, s_4_25, 24, 2, 0},
+/* 26 */ { 2, s_4_26, -1, 1, 0},
+/* 27 */ { 3, s_4_27, 26, 2, 0},
+/* 28 */ { 3, s_4_28, 26, 2, 0},
+/* 29 */ { 2, s_4_29, -1, 1, 0},
+/* 30 */ { 3, s_4_30, 29, 2, 0},
+/* 31 */ { 3, s_4_31, 29, 1, 0},
+/* 32 */ { 2, s_4_32, -1, 1, 0},
+/* 33 */ { 3, s_4_33, 32, 2, 0},
+/* 34 */ { 2, s_4_34, -1, 1, 0},
+/* 35 */ { 3, s_4_35, 34, 2, 0},
+/* 36 */ { 2, s_4_36, -1, 2, 0},
+/* 37 */ { 2, s_4_37, -1, 2, 0},
+/* 38 */ { 2, s_4_38, -1, 2, 0},
+/* 39 */ { 2, s_4_39, -1, 1, 0},
+/* 40 */ { 3, s_4_40, 39, 2, 0},
+/* 41 */ { 3, s_4_41, 39, 2, 0},
+/* 42 */ { 3, s_4_42, -1, 1, 0},
+/* 43 */ { 3, s_4_43, -1, 2, 0},
+/* 44 */ { 2, s_4_44, -1, 1, 0},
+/* 45 */ { 3, s_4_45, 44, 2, 0}
+};
+
+static symbol s_5_0[1] = { 192 };
+static symbol s_5_1[2] = { 201, 192 };
+static symbol s_5_2[2] = { 216, 192 };
+static symbol s_5_3[1] = { 193 };
+static symbol s_5_4[1] = { 197 };
+static symbol s_5_5[2] = { 201, 197 };
+static symbol s_5_6[2] = { 216, 197 };
+static symbol s_5_7[2] = { 193, 200 };
+static symbol s_5_8[2] = { 209, 200 };
+static symbol s_5_9[3] = { 201, 209, 200 };
+static symbol s_5_10[1] = { 201 };
+static symbol s_5_11[2] = { 197, 201 };
+static symbol s_5_12[2] = { 201, 201 };
+static symbol s_5_13[3] = { 193, 205, 201 };
+static symbol s_5_14[3] = { 209, 205, 201 };
+static symbol s_5_15[4] = { 201, 209, 205, 201 };
+static symbol s_5_16[1] = { 202 };
+static symbol s_5_17[2] = { 197, 202 };
+static symbol s_5_18[3] = { 201, 197, 202 };
+static symbol s_5_19[2] = { 201, 202 };
+static symbol s_5_20[2] = { 207, 202 };
+static symbol s_5_21[2] = { 193, 205 };
+static symbol s_5_22[2] = { 197, 205 };
+static symbol s_5_23[3] = { 201, 197, 205 };
+static symbol s_5_24[2] = { 207, 205 };
+static symbol s_5_25[2] = { 209, 205 };
+static symbol s_5_26[3] = { 201, 209, 205 };
+static symbol s_5_27[1] = { 207 };
+static symbol s_5_28[1] = { 209 };
+static symbol s_5_29[2] = { 201, 209 };
+static symbol s_5_30[2] = { 216, 209 };
+static symbol s_5_31[1] = { 213 };
+static symbol s_5_32[2] = { 197, 215 };
+static symbol s_5_33[2] = { 207, 215 };
+static symbol s_5_34[1] = { 216 };
+static symbol s_5_35[1] = { 217 };
+
+static struct among a_5[36] =
+{
+/* 0 */ { 1, s_5_0, -1, 1, 0},
+/* 1 */ { 2, s_5_1, 0, 1, 0},
+/* 2 */ { 2, s_5_2, 0, 1, 0},
+/* 3 */ { 1, s_5_3, -1, 1, 0},
+/* 4 */ { 1, s_5_4, -1, 1, 0},
+/* 5 */ { 2, s_5_5, 4, 1, 0},
+/* 6 */ { 2, s_5_6, 4, 1, 0},
+/* 7 */ { 2, s_5_7, -1, 1, 0},
+/* 8 */ { 2, s_5_8, -1, 1, 0},
+/* 9 */ { 3, s_5_9, 8, 1, 0},
+/* 10 */ { 1, s_5_10, -1, 1, 0},
+/* 11 */ { 2, s_5_11, 10, 1, 0},
+/* 12 */ { 2, s_5_12, 10, 1, 0},
+/* 13 */ { 3, s_5_13, 10, 1, 0},
+/* 14 */ { 3, s_5_14, 10, 1, 0},
+/* 15 */ { 4, s_5_15, 14, 1, 0},
+/* 16 */ { 1, s_5_16, -1, 1, 0},
+/* 17 */ { 2, s_5_17, 16, 1, 0},
+/* 18 */ { 3, s_5_18, 17, 1, 0},
+/* 19 */ { 2, s_5_19, 16, 1, 0},
+/* 20 */ { 2, s_5_20, 16, 1, 0},
+/* 21 */ { 2, s_5_21, -1, 1, 0},
+/* 22 */ { 2, s_5_22, -1, 1, 0},
+/* 23 */ { 3, s_5_23, 22, 1, 0},
+/* 24 */ { 2, s_5_24, -1, 1, 0},
+/* 25 */ { 2, s_5_25, -1, 1, 0},
+/* 26 */ { 3, s_5_26, 25, 1, 0},
+/* 27 */ { 1, s_5_27, -1, 1, 0},
+/* 28 */ { 1, s_5_28, -1, 1, 0},
+/* 29 */ { 2, s_5_29, 28, 1, 0},
+/* 30 */ { 2, s_5_30, 28, 1, 0},
+/* 31 */ { 1, s_5_31, -1, 1, 0},
+/* 32 */ { 2, s_5_32, -1, 1, 0},
+/* 33 */ { 2, s_5_33, -1, 1, 0},
+/* 34 */ { 1, s_5_34, -1, 1, 0},
+/* 35 */ { 1, s_5_35, -1, 1, 0}
+};
+
+static symbol s_6_0[3] = { 207, 211, 212 };
+static symbol s_6_1[4] = { 207, 211, 212, 216 };
+
+static struct among a_6[2] =
+{
+/* 0 */ { 3, s_6_0, -1, 1, 0},
+/* 1 */ { 4, s_6_1, -1, 1, 0}
+};
+
+static symbol s_7_0[4] = { 197, 202, 219, 197 };
+static symbol s_7_1[1] = { 206 };
+static symbol s_7_2[1] = { 216 };
+static symbol s_7_3[3] = { 197, 202, 219 };
+
+static struct among a_7[4] =
+{
+/* 0 */ { 4, s_7_0, -1, 1, 0},
+/* 1 */ { 1, s_7_1, -1, 2, 0},
+/* 2 */ { 1, s_7_2, -1, 3, 0},
+/* 3 */ { 3, s_7_3, -1, 1, 0}
+};
+
+static unsigned char g_v[] = { 35, 130, 34, 18 };
+
+static symbol s_0[] = { 193 };
+static symbol s_1[] = { 209 };
+static symbol s_2[] = { 193 };
+static symbol s_3[] = { 209 };
+static symbol s_4[] = { 193 };
+static symbol s_5[] = { 209 };
+static symbol s_6[] = { 206 };
+static symbol s_7[] = { 206 };
+static symbol s_8[] = { 206 };
+static symbol s_9[] = { 201 };
+
+static int r_mark_regions(struct SN_env * z) {
+ z->I[0] = z->l;
+ z->I[1] = z->l;
+ { int c = z->c; /* do, line 100 */
+ while(1) { /* gopast, line 101 */
+ if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
+ break;
+ lab1:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ z->I[0] = z->c; /* setmark pV, line 101 */
+ while(1) { /* gopast, line 101 */
+ if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
+ break;
+ lab2:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ while(1) { /* gopast, line 102 */
+ if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
+ break;
+ lab3:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ while(1) { /* gopast, line 102 */
+ if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
+ break;
+ lab4:
+ if (z->c >= z->l) goto lab0;
+ z->c++;
+ }
+ z->I[1] = z->c; /* setmark p2, line 102 */
+ lab0:
+ z->c = c;
+ }
+ return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+ if (!(z->I[1] <= z->c)) return 0;
+ return 1;
+}
+
+static int r_perfective_gerund(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 111 */
+ among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 111 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ { int m = z->l - z->c; /* or, line 115 */
+ if (!(eq_s_b(z, 1, s_0))) goto lab1;
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ if (!(eq_s_b(z, 1, s_1))) return 0;
+ }
+ lab0:
+ slice_del(z); /* delete, line 115 */
+ break;
+ case 2:
+ slice_del(z); /* delete, line 122 */
+ break;
+ }
+ return 1;
+}
+
+static int r_adjective(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 127 */
+ among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 127 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_del(z); /* delete, line 136 */
+ break;
+ }
+ return 1;
+}
+
+static int r_adjectival(struct SN_env * z) {
+ int among_var;
+ if (!r_adjective(z)) return 0; /* call adjective, line 141 */
+ { int m = z->l - z->c; /* try, line 148 */
+ z->ket = z->c; /* [, line 149 */
+ among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
+ if (!(among_var)) { z->c = z->l - m; goto lab0; }
+ z->bra = z->c; /* ], line 149 */
+ switch(among_var) {
+ case 0: { z->c = z->l - m; goto lab0; }
+ case 1:
+ { int m = z->l - z->c; /* or, line 154 */
+ if (!(eq_s_b(z, 1, s_2))) goto lab2;
+ goto lab1;
+ lab2:
+ z->c = z->l - m;
+ if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
+ }
+ lab1:
+ slice_del(z); /* delete, line 154 */
+ break;
+ case 2:
+ slice_del(z); /* delete, line 161 */
+ break;
+ }
+ lab0:
+ ;
+ }
+ return 1;
+}
+
+static int r_reflexive(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 168 */
+ among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 168 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_del(z); /* delete, line 171 */
+ break;
+ }
+ return 1;
+}
+
+static int r_verb(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 176 */
+ among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 176 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ { int m = z->l - z->c; /* or, line 182 */
+ if (!(eq_s_b(z, 1, s_4))) goto lab1;
+ goto lab0;
+ lab1:
+ z->c = z->l - m;
+ if (!(eq_s_b(z, 1, s_5))) return 0;
+ }
+ lab0:
+ slice_del(z); /* delete, line 182 */
+ break;
+ case 2:
+ slice_del(z); /* delete, line 190 */
+ break;
+ }
+ return 1;
+}
+
+static int r_noun(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 199 */
+ among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 199 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_del(z); /* delete, line 206 */
+ break;
+ }
+ return 1;
+}
+
+static int r_derivational(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 215 */
+ among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 215 */
+ if (!r_R2(z)) return 0; /* call R2, line 215 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_del(z); /* delete, line 218 */
+ break;
+ }
+ return 1;
+}
+
+static int r_tidy_up(struct SN_env * z) {
+ int among_var;
+ z->ket = z->c; /* [, line 223 */
+ among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
+ if (!(among_var)) return 0;
+ z->bra = z->c; /* ], line 223 */
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ slice_del(z); /* delete, line 227 */
+ z->ket = z->c; /* [, line 228 */
+ if (!(eq_s_b(z, 1, s_6))) return 0;
+ z->bra = z->c; /* ], line 228 */
+ if (!(eq_s_b(z, 1, s_7))) return 0;
+ slice_del(z); /* delete, line 228 */
+ break;
+ case 2:
+ if (!(eq_s_b(z, 1, s_8))) return 0;
+ slice_del(z); /* delete, line 231 */
+ break;
+ case 3:
+ slice_del(z); /* delete, line 233 */
+ break;
+ }
+ return 1;
+}
+
+extern int russian_stem(struct SN_env * z) {
+ { int c = z->c; /* do, line 240 */
+ if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
+ lab0:
+ z->c = c;
+ }
+ z->lb = z->c; z->c = z->l; /* backwards, line 241 */
+
+ { int m = z->l - z->c; /* setlimit, line 241 */
+ int m3;
+ if (z->c < z->I[0]) return 0;
+ z->c = z->I[0]; /* tomark, line 241 */
+ m3 = z->lb; z->lb = z->c;
+ z->c = z->l - m;
+ { int m = z->l - z->c; /* do, line 242 */
+ { int m = z->l - z->c; /* or, line 243 */
+ if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
+ goto lab2;
+ lab3:
+ z->c = z->l - m;
+ { int m = z->l - z->c; /* try, line 244 */
+ if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
+ lab4:
+ ;
+ }
+ { int m = z->l - z->c; /* or, line 245 */
+ if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
+ goto lab5;
+ lab6:
+ z->c = z->l - m;
+ if (!r_verb(z)) goto lab7; /* call verb, line 245 */
+ goto lab5;
+ lab7:
+ z->c = z->l - m;
+ if (!r_noun(z)) goto lab1; /* call noun, line 245 */
+ }
+ lab5:
+ ;
+ }
+ lab2:
+ lab1:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* try, line 248 */
+ z->ket = z->c; /* [, line 248 */
+ if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
+ z->bra = z->c; /* ], line 248 */
+ slice_del(z); /* delete, line 248 */
+ lab8:
+ ;
+ }
+ { int m = z->l - z->c; /* do, line 251 */
+ if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
+ lab9:
+ z->c = z->l - m;
+ }
+ { int m = z->l - z->c; /* do, line 252 */
+ if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
+ lab10:
+ z->c = z->l - m;
+ }
+ z->lb = m3;
+ }
+ z->c = z->lb;
+ return 1;
+}
+
+extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
+
+extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
+
--- /dev/null
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+extern struct SN_env * russian_create_env(void);
+extern void russian_close_env(struct SN_env * z);
+
+extern int russian_stem(struct SN_env * z);
+
--- /dev/null
+
+#include
+#include
+#include
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void)
+{ symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
+ CAPACITY(p) = CREATE_SIZE;
+ SET_SIZE(p, CREATE_SIZE);
+ return p;
+}
+
+extern void lose_s(symbol * p) { free((char *) p - HEAD); }
+
+extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{ if (z->c >= z->l) return 0;
+ { int ch = z->p[z->c];
+ if
+ (ch > max || (ch -= min) < 0 ||
+ (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+ }
+ z->c++; return 1;
+}
+
+extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{ if (z->c <= z->lb) return 0;
+ { int ch = z->p[z->c - 1];
+ if
+ (ch > max || (ch -= min) < 0 ||
+ (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+ }
+ z->c--; return 1;
+}
+
+extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
+{ if (z->c >= z->l) return 0;
+ { int ch = z->p[z->c];
+ unless
+ (ch > max || (ch -= min) < 0 ||
+ (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+ }
+ z->c++; return 1;
+}
+
+extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
+{ if (z->c <= z->lb) return 0;
+ { int ch = z->p[z->c - 1];
+ unless
+ (ch > max || (ch -= min) < 0 ||
+ (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
+ }
+ z->c--; return 1;
+}
+
+
+extern int in_range(struct SN_env * z, int min, int max)
+{ if (z->c >= z->l) return 0;
+ { int ch = z->p[z->c];
+ if
+ (ch > max || ch < min) return 0;
+ }
+ z->c++; return 1;
+}
+
+extern int in_range_b(struct SN_env * z, int min, int max)
+{ if (z->c <= z->lb) return 0;
+ { int ch = z->p[z->c - 1];
+ if
+ (ch > max || ch < min) return 0;
+ }
+ z->c--; return 1;
+}
+
+extern int out_range(struct SN_env * z, int min, int max)
+{ if (z->c >= z->l) return 0;
+ { int ch = z->p[z->c];
+ unless
+ (ch > max || ch < min) return 0;
+ }
+ z->c++; return 1;
+}
+
+extern int out_range_b(struct SN_env * z, int min, int max)
+{ if (z->c <= z->lb) return 0;
+ { int ch = z->p[z->c - 1];
+ unless
+ (ch > max || ch < min) return 0;
+ }
+ z->c--; return 1;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, symbol * s)
+{ if (z->l - z->c < s_size ||
+ memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+ z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
+{ if (z->c - z->lb < s_size ||
+ memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+ z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, symbol * p)
+{ return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, symbol * p)
+{ return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, struct among * v, int v_size)
+{
+ int i = 0;
+ int j = v_size;
+
+ int c = z->c; int l = z->l;
+ symbol * q = z->p + c;
+
+ struct among * w;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ int first_key_inspected = 0;
+
+ while(1)
+ { int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j; /* smaller */
+ w = v + k;
+ { int i; for (i = common; i < w->s_size; i++)
+ { if (c + common == l) { diff = -1; break; }
+ diff = q[common] - w->s[i];
+ if (diff != 0) break;
+ common++;
+ }
+ }
+ if (diff < 0) { j = k; common_j = common; }
+ else { i = k; common_i = common; }
+ if (j - i <= 1)
+ { if (i > 0) break; /* v->s has been inspected */
+ if (j == i) break; /* only one item in v */
+
+ /* - but now we need to go round once more to get
+ v->s inspected. This looks messy, but is actually
+ the optimal approach. */
+
+ if (first_key_inspected) break;
+ first_key_inspected = 1;
+ }
+ }
+ while(1)
+ { w = v + i;
+ if (common_i >= w->s_size)
+ { z->c = c + w->s_size;
+ if (w->function == 0) return w->result;
+ { int res = w->function(z);
+ z->c = c + w->s_size;
+ if (res) return w->result;
+ }
+ }
+ i = w->substring_i;
+ if (i < 0) return 0;
+ }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
+{
+ int i = 0;
+ int j = v_size;
+
+ int c = z->c; int lb = z->lb;
+ symbol * q = z->p + c - 1;
+
+ struct among * w;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ int first_key_inspected = 0;
+
+ while(1)
+ { int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j;
+ w = v + k;
+ { int i; for (i = w->s_size - 1 - common; i >= 0; i--)
+ { if (c - common == lb) { diff = -1; break; }
+ diff = q[- common] - w->s[i];
+ if (diff != 0) break;
+ common++;
+ }
+ }
+ if (diff < 0) { j = k; common_j = common; }
+ else { i = k; common_i = common; }
+ if (j - i <= 1)
+ { if (i > 0) break;
+ if (j == i) break;
+ if (first_key_inspected) break;
+ first_key_inspected = 1;
+ }
+ }
+ while(1)
+ { w = v + i;
+ if (common_i >= w->s_size)
+ { z->c = c - w->s_size;
+ if (w->function == 0) return w->result;
+ { int res = w->function(z);
+ z->c = c - w->s_size;
+ if (res) return w->result;
+ }
+ }
+ i = w->substring_i;
+ if (i < 0) return 0;
+ }
+}
+
+
+extern symbol * increase_size(symbol * p, int n)
+{ int new_size = n + 20;
+ symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
+ CAPACITY(q) = new_size;
+ memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+ s_size symbols at s
+*/
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
+{ int adjustment = s_size - (c_ket - c_bra);
+ int len = SIZE(z->p);
+ if (adjustment != 0)
+ { if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
+ memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
+ SET_SIZE(z->p, adjustment + len);
+ z->l += adjustment;
+ if (z->c >= c_ket) z->c += adjustment; else
+ if (z->c > c_bra) z->c = c_bra;
+ }
+ unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+ return adjustment;
+}
+
+static void slice_check(struct SN_env * z)
+{
+ if (!(0 <= z->bra &&
+ z->bra <= z->ket &&
+ z->ket <= z->l &&
+ z->l <= SIZE(z->p))) /* this line could be removed */
+ {
+ fprintf(stderr, "faulty slice operation:\n");
+ debug(z, -1, 0);
+ exit(1);
+ }
+}
+
+extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
+{ slice_check(z);
+ replace_s(z, z->bra, z->ket, s_size, s);
+}
+
+extern void slice_from_v(struct SN_env * z, symbol * p)
+{ slice_from_s(z, SIZE(p), p);
+}
+
+extern void slice_del(struct SN_env * z)
+{ slice_from_s(z, 0, 0);
+}
+
+extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
+{ int adjustment = replace_s(z, bra, ket, s_size, s);
+ if (bra <= z->bra) z->bra += adjustment;
+ if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
+{ int adjustment = replace_s(z, bra, ket, SIZE(p), p);
+ if (bra <= z->bra) z->bra += adjustment;
+ if (bra <= z->ket) z->ket += adjustment;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p)
+{ slice_check(z);
+ { int len = z->ket - z->bra;
+ if (CAPACITY(p) < len) p = increase_size(p, len);
+ memmove(p, z->p + z->bra, len * sizeof(symbol));
+ SET_SIZE(p, len);
+ }
+ return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p)
+{ int len = z->l;
+ if (CAPACITY(p) < len) p = increase_size(p, len);
+ memmove(p, z->p, len * sizeof(symbol));
+ SET_SIZE(p, len);
+ return p;
+}
+
+extern void debug(struct SN_env * z, int number, int line_count)
+{ int i;
+ int limit = SIZE(z->p);
+ /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+ for (i = 0; i <= limit; i++)
+ { if (z->lb == i) printf("{");
+ if (z->bra == i) printf("[");
+ if (z->c == i) printf("|");
+ if (z->ket == i) printf("]");
+ if (z->l == i) printf("}");
+ if (i < limit)
+ { int ch = z->p[i];
+ if (ch == 0) ch = '#';
+ printf("%c", ch);
+ }
+ }
+ printf("'\n");
+}
--- /dev/null
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i tsearch2.sql
+\set ECHO all
+
+--tsvector
+SELECT '1'::tsvector;
+SELECT '1 '::tsvector;
+SELECT ' 1'::tsvector;
+SELECT ' 1 '::tsvector;
+SELECT '1 2'::tsvector;
+SELECT '\'1 2\''::tsvector;
+SELECT '\'1 \\\'2\''::tsvector;
+SELECT '\'1 \\\'2\'3'::tsvector;
+SELECT '\'1 \\\'2\' 3'::tsvector;
+SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
+select '\'w\':4A,3B,2C,1D,5 a:8';
+select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
+select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+
+
+--tsquery
+SELECT '1'::tsquery;
+SELECT '1 '::tsquery;
+SELECT ' 1'::tsquery;
+SELECT ' 1 '::tsquery;
+SELECT '\'1 2\''::tsquery;
+SELECT '\'1 \\\'2\''::tsquery;
+SELECT '!1'::tsquery;
+SELECT '1|2'::tsquery;
+SELECT '1|!2'::tsquery;
+SELECT '!1|2'::tsquery;
+SELECT '!1|!2'::tsquery;
+SELECT '!(!1|!2)'::tsquery;
+SELECT '!(!1|2)'::tsquery;
+SELECT '!(1|!2)'::tsquery;
+SELECT '!(1|2)'::tsquery;
+SELECT '1&2'::tsquery;
+SELECT '!1&2'::tsquery;
+SELECT '1&!2'::tsquery;
+SELECT '!1&!2'::tsquery;
+SELECT '(1&2)'::tsquery;
+SELECT '1&(2)'::tsquery;
+SELECT '!(1)&2'::tsquery;
+SELECT '!(1&2)'::tsquery;
+SELECT '1|2&3'::tsquery;
+SELECT '1|(2&3)'::tsquery;
+SELECT '(1|2)&3'::tsquery;
+SELECT '1|2&!3'::tsquery;
+SELECT '1|!2&3'::tsquery;
+SELECT '!1|2&3'::tsquery;
+SELECT '!1|(2&3)'::tsquery;
+SELECT '!(1|2)&3'::tsquery;
+SELECT '(!1|2)&3'::tsquery;
+SELECT '1|(2|(4|(5|6)))'::tsquery;
+SELECT '1|2|4|5|6'::tsquery;
+SELECT '1&(2&(4&(5&6)))'::tsquery;
+SELECT '1&2&4&5&6'::tsquery;
+SELECT '1&(2&(4&(5|6)))'::tsquery;
+SELECT '1&(2&(4&(5|!6)))'::tsquery;
+SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
+SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
+
+select lexize('simple', 'ASD56 hsdkf');
+select lexize('en_stem', 'SKIES Problems identity');
+
+select * from token_type('default');
+select * from parse('default', '345
[email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005
[email protected] qwe-wer asdf
qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
+ wow < jqw <> qwerty');
+
+SELECT to_tsvector('default', '345
[email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005
[email protected] qwe-wer asdf
qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
+ wow < jqw <> qwerty');
+
+SELECT length(to_tsvector('default', '345 qw'));
+
+SELECT length(to_tsvector('default', '345
[email protected] \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005
[email protected] qwe-wer asdf
qwer jf sdjk ewr1> ewri2 ">
+/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
+ wow < jqw <> qwerty'));
+
+
+select to_tsquery('default', 'qwe & sKies ');
+select to_tsquery('simple', 'qwe & sKies ');
+select to_tsquery('default', '\'the wether\':dc & \' sKies \':BC ');
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
+select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
+
+CREATE TABLE test_tsvector( t text, a tsvector );
+
+\copy test_tsvector from 'data/test_tsearch.data'
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+create index wowidx on test_tsvector using gist (a);
+set enable_seqscan=off;
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+
+select set_curcfg('default');
+
+CREATE TRIGGER tsvectorupdate
+BEFORE UPDATE OR INSERT ON test_tsvector
+FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+
+drop trigger tsvectorupdate on test_tsvector;
+create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
+create trigger tsvectorupdate before update or insert on test_tsvector
+for each row execute procedure tsearch2(a, wow, t);
+insert into test_tsvector (t) values ('345 qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
+select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
+
+select rank(' a:1 s:2C d g'::tsvector, 'a | s');
+select rank(' a:1 s:2B d g'::tsvector, 'a | s');
+select rank(' a:1 s:2 d g'::tsvector, 'a | s');
+select rank(' a:1 s:2C d g'::tsvector, 'a & s');
+select rank(' a:1 s:2B d g'::tsvector, 'a & s');
+select rank(' a:1 s:2 d g'::tsvector, 'a & s');
+
+insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+
+select reset_tsearch();
+select to_tsquery('default', 'skies & books');
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea&thousand&years'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('granite&sea'));
+
+select get_covers(to_tsvector('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+'), to_tsquery('sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+', to_tsquery('sea&thousand&years'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+', to_tsquery('granite&sea'));
+
+select headline('Erosion It took the sea a thousand years,
+A thousand years to trace
+The granite features of this cliff
+In crag and scarp and base.
+It took the sea an hour one night
+An hour of storm to place
+The sculpture of these granite seams,
+Upon a woman s face. E. J. Pratt (1882 1964)
+', to_tsquery('sea'));
+
--- /dev/null
+/*
+ * stopword library
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+#include
+
+#include "postgres.h"
+#include "common.h"
+#include "dict.h"
+
+#define STOPBUFLEN 4096
+
+char*
+lowerstr(char *str) {
+ char *ptr=str;
+ while(*ptr) {
+ *ptr = tolower(*(unsigned char*)ptr);
+ ptr++;
+ }
+ return str;
+}
+
+void
+freestoplist(StopList *s) {
+ char **ptr=s->stop;
+ if ( ptr )
+ while( *ptr && s->len >0 ) {
+ free(*ptr);
+ ptr++; s->len--;
+ free(s->stop);
+ }
+ memset(s,0,sizeof(StopList));
+}
+
+void
+readstoplist(text *in, StopList *s) {
+ char **stop=NULL;
+ s->len=0;
+ if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
+ char *filename=text2char(in);
+ FILE *hin=NULL;
+ char buf[STOPBUFLEN];
+ int reallen=0;
+
+ if ( (hin=fopen(filename,"r")) == NULL )
+ elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
+ while( fgets(buf,STOPBUFLEN,hin) ) {
+ buf[strlen(buf)-1] = '\0';
+ if ( *buf=='\0' ) continue;
+
+ if ( s->len>= reallen ) {
+ char **tmp;
+ reallen=(reallen) ? reallen*2 : 16;
+ tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
+ if (!tmp) {
+ freestoplist(s);
+ fclose(hin);
+ elog(ERROR,"Not enough memory");
+ }
+ stop=tmp;
+ }
+
+ stop[s->len]=strdup(buf);
+ if ( !stop[s->len] ) {
+ freestoplist(s);
+ fclose(hin);
+ elog(ERROR,"Not enough memory");
+ }
+ if ( s->wordop )
+ stop[s->len]=(s->wordop)(stop[s->len]);
+
+ (s->len)++;
+ }
+ fclose(hin);
+ pfree(filename);
+ }
+ s->stop=stop;
+}
+
+static int
+comparestr(const void *a, const void *b) {
+ return strcmp( *(char**)a, *(char**)b );
+}
+
+void
+sortstoplist(StopList *s) {
+ if (s->stop && s->len>0)
+ qsort(s->stop, s->len, sizeof(char*), comparestr);
+}
+
+bool
+searchstoplist(StopList *s, char *key) {
+ if ( s->wordop )
+ key=(*(s->wordop))(key);
+ return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
+}
+
+
--- /dev/null
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+her
+hers
+herself
+it
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+should
+now
+
--- /dev/null
+É
+×
+×Ï
+ÎÅ
+ÞÔÏ
+ÏÎ
+ÎÁ
+Ñ
+Ó
+ÓÏ
+ËÁË
+Á
+ÔÏ
+×ÓÅ
+ÏÎÁ
+ÔÁË
+ÅÇÏ
+ÎÏ
+ÄÁ
+ÔÙ
+Ë
+Õ
+ÖÅ
+×Ù
+ÚÁ
+ÂÙ
+ÐÏ
+ÔÏÌØËÏ
+ÅÅ
+ÍÎÅ
+ÂÙÌÏ
+×ÏÔ
+ÏÔ
+ÍÅÎÑ
+ÅÝÅ
+ÎÅÔ
+Ï
+ÉÚ
+ÅÍÕ
+ÔÅÐÅÒØ
+ËÏÇÄÁ
+ÄÁÖÅ
+ÎÕ
+×ÄÒÕÇ
+ÌÉ
+ÅÓÌÉ
+ÕÖÅ
+ÉÌÉ
+ÎÉ
+ÂÙÔØ
+ÂÙÌ
+ÎÅÇÏ
+ÄÏ
+×ÁÓ
+ÎÉÂÕÄØ
+ÏÐÑÔØ
+ÕÖ
+×ÁÍ
+×ÅÄØ
+ÔÁÍ
+ÐÏÔÏÍ
+ÓÅÂÑ
+ÎÉÞÅÇÏ
+ÅÊ
+ÍÏÖÅÔ
+ÏÎÉ
+ÔÕÔ
+ÇÄÅ
+ÅÓÔØ
+ÎÁÄÏ
+ÎÅÊ
+ÄÌÑ
+ÍÙ
+ÔÅÂÑ
+ÉÈ
+ÞÅÍ
+ÂÙÌÁ
+ÓÁÍ
+ÞÔÏÂ
+ÂÅÚ
+ÂÕÄÔÏ
+ÞÅÇÏ
+ÒÁÚ
+ÔÏÖÅ
+ÓÅÂÅ
+ÐÏÄ
+ÂÕÄÅÔ
+Ö
+ÔÏÇÄÁ
+ËÔÏ
+ÜÔÏÔ
+ÔÏÇÏ
+ÐÏÔÏÍÕ
+ÜÔÏÇÏ
+ËÁËÏÊ
+ÓÏ×ÓÅÍ
+ÎÉÍ
+ÚÄÅÓØ
+ÜÔÏÍ
+ÏÄÉÎ
+ÐÏÞÔÉ
+ÍÏÊ
+ÔÅÍ
+ÞÔÏÂÙ
+ÎÅÅ
+ÓÅÊÞÁÓ
+ÂÙÌÉ
+ËÕÄÁ
+ÚÁÞÅÍ
+×ÓÅÈ
+ÎÉËÏÇÄÁ
+ÍÏÖÎÏ
+ÐÒÉ
+ÎÁËÏÎÅÃ
+Ä×Á
+ÏÂ
+ÄÒÕÇÏÊ
+ÈÏÔØ
+ÐÏÓÌÅ
+ÎÁÄ
+ÂÏÌØÛÅ
+ÔÏÔ
+ÞÅÒÅÚ
+ÜÔÉ
+ÎÁÓ
+ÐÒÏ
+×ÓÅÇÏ
+ÎÉÈ
+ËÁËÁÑ
+ÍÎÏÇÏ
+ÒÁÚ×Å
+ÔÒÉ
+ÜÔÕ
+ÍÏÑ
+×ÐÒÏÞÅÍ
+ÈÏÒÏÛÏ
+Ó×ÏÀ
+ÜÔÏÊ
+ÐÅÒÅÄ
+ÉÎÏÇÄÁ
+ÌÕÞÛÅ
+ÞÕÔØ
+ÔÏÍ
+ÎÅÌØÚÑ
+ÔÁËÏÊ
+ÉÍ
+ÂÏÌÅÅ
+×ÓÅÇÄÁ
+ËÏÎÅÞÎÏ
+×ÓÀ
+ÍÅÖÄÕ
--- /dev/null
+/*
+ * interface functions to tscfg
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+#include
+#include
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+ Oid arg[2]={ OIDOID, OIDOID };
+ bool isnull;
+ Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+ int stat,i,j;
+ text *ptr;
+ text *prsname=NULL;
+ MemoryContext oldcontext;
+
+ memset(cfg,0,sizeof(TSCfgInfo));
+ SPI_connect();
+ if ( !plan_getcfg ) {
+ plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+ if ( !plan_getcfg )
+ ts_error(ERROR, "SPI_prepare() failed");
+ }
+
+ stat = SPI_execp(plan_getcfg, pars, " ", 1);
+ if ( stat < 0 )
+ ts_error (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 ) {
+ prsname = (text*) DatumGetPointer(
+ SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
+ );
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ prsname = ptextdup( prsname );
+ MemoryContextSwitchTo(oldcontext);
+
+ cfg->id=id;
+ } else
+ ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+ arg[0]=TEXTOID;
+ if ( !plan_getmap ) {
+ plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+ if ( !plan_getmap )
+ ts_error(ERROR, "SPI_prepare() failed");
+ }
+
+ pars[0]=PointerGetDatum( prsname );
+ stat = SPI_execp(plan_getmap, pars, " ", 0);
+ if ( stat < 0 )
+ ts_error (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed <= 0 )
+ ts_error(ERROR, "No parser with id %d", id);
+
+ for(i=0;i
+ int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+ ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+ ArrayType *a;
+
+ if ( !cfg->map ) {
+ cfg->len=lexid+1;
+ cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+ if ( !cfg->map )
+ ts_error(ERROR,"No memory");
+ memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+ }
+
+ if (isnull)
+ continue;
+
+ a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+
+ if ( ARR_NDIM(a) != 1 )
+ ts_error(ERROR,"Wrong dimension");
+ if ( ARRNELEMS(a) < 1 )
+ continue;
+
+ cfg->map[lexid].len=ARRNELEMS(a);
+ cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+ memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+ ptr=(text*)ARR_DATA_PTR(a);
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ for(j=0;jmap[lexid].len;j++) {
+ cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+ ptr=NEXTVAL(ptr);
+ }
+ MemoryContextSwitchTo(oldcontext);
+
+ if ( a != toasted_a )
+ pfree(a);
+ }
+
+ SPI_finish();
+ cfg->prs_id = name2id_prs( prsname );
+ pfree(prsname);
+ for(i=0;ilen;i++) {
+ for(j=0;jmap[i].len;j++) {
+ ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+ cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+ pfree(ptr);
+ }
+ }
+}
+
+typedef struct {
+ TSCfgInfo *last_cfg;
+ int len;
+ int reallen;
+ TSCfgInfo *list;
+ SNMap name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+ freeSNMap( &(CList.name2id_map) );
+ if ( CList.list ) {
+ int i,j;
+ for(i=0;i
+ if ( CList.list[i].map ) {
+ for(j=0;j
+ if ( CList.list[i].map[j].dict_id )
+ free(CList.list[i].map[j].dict_id);
+ free( CList.list[i].map );
+ }
+ free(CList.list);
+ }
+ memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+ return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+ /* last used cfg */
+ if ( CList.last_cfg && CList.last_cfg->id==id )
+ return CList.last_cfg;
+
+ /* already used cfg */
+ if ( CList.len != 0 ) {
+ TSCfgInfo key;
+ key.id=id;
+ CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+ if ( CList.last_cfg != NULL )
+ return CList.last_cfg;
+ }
+
+ /* last chance */
+ if ( CList.len==CList.reallen ) {
+ TSCfgInfo *tmp;
+ int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+ tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+ if ( !tmp )
+ ts_error(ERROR,"No memory");
+ CList.reallen=reallen;
+ CList.list=tmp;
+ }
+ CList.last_cfg=&(CList.list[CList.len]);
+ init_cfg(id, CList.last_cfg);
+ CList.len++;
+ qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+ return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+ Oid arg[1]={ TEXTOID };
+ bool isnull;
+ Datum pars[1]={ PointerGetDatum(name) };
+ int stat;
+ Oid id=findSNMap_t( &(CList.name2id_map), name );
+
+ if ( id )
+ return id;
+
+ SPI_connect();
+ if ( !plan_name2id ) {
+ plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+ if ( !plan_name2id )
+ elog(ERROR, "SPI_prepare() failed");
+ }
+
+ stat = SPI_execp(plan_name2id, pars, " ", 1);
+ if ( stat < 0 )
+ elog (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 ) {
+ id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ if ( isnull )
+ elog(ERROR, "Null id for tsearch config");
+ } else
+ elog(ERROR, "No tsearch config");
+ SPI_finish();
+ addSNMap_t( &(CList.name2id_map), name, id );
+ return id;
+}
+
+
+void
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+ int type, lenlemm, i;
+ char *lemm=NULL;
+ WParserInfo *prsobj = findprs(cfg->prs_id);
+
+ prsobj->prs=(void*)DatumGetPointer(
+ FunctionCall2(
+ &(prsobj->start_info),
+ PointerGetDatum(buf),
+ Int32GetDatum(buflen)
+ )
+ );
+
+ while( ( type=DatumGetInt32(FunctionCall3(
+ &(prsobj->getlexeme_info),
+ PointerGetDatum(prsobj->prs),
+ PointerGetDatum(&lemm),
+ PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+ if ( lenlemm >= MAXSTRLEN )
+ elog(ERROR, "Word is too long");
+
+
+ if ( type >= cfg->len ) /* skip this type of lexem */
+ continue;
+
+ for(i=0;imap[type].len;i++) {
+ DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+ char **norms, **ptr;
+
+ norms = ptr = (char**)DatumGetPointer(
+ FunctionCall3(
+ &(dict->lexize_info),
+ PointerGetDatum(dict->dictionary),
+ PointerGetDatum(lemm),
+ PointerGetDatum(lenlemm)
+ )
+ );
+ if ( !norms ) /* dictionary doesn't know this lexem */
+ continue;
+
+ prs->pos++; /*set pos*/
+
+ while( *ptr ) {
+ if (prs->curwords == prs->lenwords) {
+ prs->lenwords *= 2;
+ prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+ }
+
+ prs->words[prs->curwords].len = strlen(*ptr);
+ prs->words[prs->curwords].word = *ptr;
+ prs->words[prs->curwords].alen = 0;
+ prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+ ptr++;
+ prs->curwords++;
+ }
+ pfree(norms);
+ break; /* lexem already normalized or is stop word*/
+ }
+ }
+
+ FunctionCall1(
+ &(prsobj->end_info),
+ PointerGetDatum(prsobj->prs)
+ );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+ while (prs->curwords >= prs->lenwords) {
+ prs->lenwords *= 2;
+ prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+ }
+ memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) );
+ prs->words[prs->curwords].type = (uint8)type;
+ prs->words[prs->curwords].len = buflen;
+ prs->words[prs->curwords].word = palloc(buflen);
+ memcpy(prs->words[prs->curwords].word, buf, buflen);
+ prs->curwords++;
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+ int i;
+ ITEM *item=GETQUERY(query);
+ HLWORD *word=&( prs->words[prs->curwords-1] );
+
+ while (prs->curwords + query->size >= prs->lenwords) {
+ prs->lenwords *= 2;
+ prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+ }
+
+ for(i=0; isize; i++) {
+ if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+ if ( word->item ) {
+ memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+ prs->words[prs->curwords].item=item;
+ prs->words[prs->curwords].repeated=1;
+ prs->curwords++;
+ } else
+ word->item=item;
+ }
+ item++;
+ }
+}
+
+void
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+ int type, lenlemm, i;
+ char *lemm=NULL;
+ WParserInfo *prsobj = findprs(cfg->prs_id);
+
+ prsobj->prs=(void*)DatumGetPointer(
+ FunctionCall2(
+ &(prsobj->start_info),
+ PointerGetDatum(buf),
+ Int32GetDatum(buflen)
+ )
+ );
+
+ while( ( type=DatumGetInt32(FunctionCall3(
+ &(prsobj->getlexeme_info),
+ PointerGetDatum(prsobj->prs),
+ PointerGetDatum(&lemm),
+ PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+ if ( lenlemm >= MAXSTRLEN )
+ elog(ERROR, "Word is too long");
+
+ hladdword(prs,lemm,lenlemm,type);
+
+ if ( type >= cfg->len )
+ continue;
+
+ for(i=0;imap[type].len;i++) {
+ DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+ char **norms, **ptr;
+
+ norms = ptr = (char**)DatumGetPointer(
+ FunctionCall3(
+ &(dict->lexize_info),
+ PointerGetDatum(dict->dictionary),
+ PointerGetDatum(lemm),
+ PointerGetDatum(lenlemm)
+ )
+ );
+ if ( !norms ) /* dictionary doesn't know this lexem */
+ continue;
+
+ while( *ptr ) {
+ hlfinditem(prs,query,*ptr,strlen(*ptr));
+ pfree(*ptr);
+ ptr++;
+ }
+ pfree(norms);
+ break; /* lexem already normalized or is stop word*/
+ }
+ }
+
+ FunctionCall1(
+ &(prsobj->end_info),
+ PointerGetDatum(prsobj->prs)
+ );
+}
+
+text*
+genhl(HLPRSTEXT * prs) {
+ text *out;
+ int len=128;
+ char *ptr;
+ HLWORD *wrd=prs->words;
+
+ out = (text*)palloc( len );
+ ptr=((char*)out) + VARHDRSZ;
+
+ while( wrd - prs->words < prs->curwords ) {
+ while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+ int dist = ptr - ((char*)out);
+ len*= 2;
+ out = (text *) repalloc(out, len);
+ ptr=((char*)out) + dist;
+ }
+
+ if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+ if ( wrd->replace ) {
+ *ptr=' ';
+ ptr++;
+ } else {
+ if (wrd->selected) {
+ memcpy(ptr,prs->startsel,prs->startsellen);
+ ptr+=prs->startsellen;
+ }
+ memcpy(ptr,wrd->word,wrd->len);
+ ptr+=wrd->len;
+ if (wrd->selected) {
+ memcpy(ptr,prs->stopsel,prs->stopsellen);
+ ptr+=prs->stopsellen;
+ }
+ }
+ }
+
+ if ( !wrd->repeated )
+ pfree(wrd->word);
+
+ wrd++;
+ }
+
+ VARATT_SIZEP(out)=ptr - ((char*)out);
+ return out;
+}
+
+int
+get_currcfg(void) {
+ Oid arg[1]={ TEXTOID };
+ const char *curlocale;
+ Datum pars[1];
+ bool isnull;
+ int stat;
+
+ if ( current_cfg_id > 0 )
+ return current_cfg_id;
+
+ SPI_connect();
+ if ( !plan_getcfg_bylocale ) {
+ plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+ if ( !plan_getcfg_bylocale )
+ elog(ERROR, "SPI_prepare() failed");
+ }
+
+ curlocale = setlocale(LC_CTYPE, NULL);
+ pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+ stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+ if ( stat < 0 )
+ elog (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 )
+ current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ else
+ elog(ERROR,"Can't find tsearch config by locale");
+
+ pfree(DatumGetPointer(pars[0]));
+ SPI_finish();
+ return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+ findcfg(PG_GETARG_OID(0));
+ current_cfg_id=PG_GETARG_OID(0);
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+ text *name=PG_GETARG_TEXT_P(0);
+
+ DirectFunctionCall1(
+ set_curcfg,
+ ObjectIdGetDatum( name2id_cfg(name) )
+ );
+ PG_FREE_IF_COPY(name, 0);
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+ PG_RETURN_OID( get_currcfg() );
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+ ts_error(NOTICE,"TSearch cache cleaned");
+ PG_RETURN_VOID();
+}
--- /dev/null
+#ifndef __TS_CFG_H__
+#define __TS_CFG_H__
+#include "postgres.h"
+#include "query.h"
+
+typedef struct {
+ int len;
+ Datum *dict_id;
+} ListDictionary;
+
+typedef struct {
+ Oid id;
+ Oid prs_id;
+ int len;
+ ListDictionary *map;
+} TSCfgInfo;
+
+Oid name2id_cfg(text *name);
+TSCfgInfo * findcfg(Oid id);
+void init_cfg(Oid id, TSCfgInfo *cfg);
+void reset_cfg(void);
+
+typedef struct {
+ uint16 len;
+ union {
+ uint16 pos;
+ uint16 *apos;
+ } pos;
+ char *word;
+ uint32 alen;
+} WORD;
+
+typedef struct {
+ WORD *words;
+ int4 lenwords;
+ int4 curwords;
+ int4 pos;
+} PRSTEXT;
+
+typedef struct {
+ uint16 len;
+ uint8 selected:1,
+ in:1,
+ skip:1,
+ replace:1,
+ repeated:1;
+ uint8 type;
+ char *word;
+ ITEM *item;
+} HLWORD;
+
+typedef struct {
+ HLWORD *words;
+ int4 lenwords;
+ int4 curwords;
+ char *startsel;
+ char *stopsel;
+ int2 startsellen;
+ int2 stopsellen;
+} HLPRSTEXT;
+
+void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
+text* genhl(HLPRSTEXT * prs);
+
+void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
+int get_currcfg(void);
+
+#endif
--- /dev/null
+/*
+ * stat functions
+ */
+
+#include "tsvector.h"
+#include "ts_stat.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsstat_in);
+Datum tsstat_in(PG_FUNCTION_ARGS);
+Datum
+tsstat_in(PG_FUNCTION_ARGS) {
+ tsstat *stat=palloc(STATHDRSIZE);
+ stat->len=STATHDRSIZE;
+ stat->size=0;
+ PG_RETURN_POINTER(stat);
+}
+
+PG_FUNCTION_INFO_V1(tsstat_out);
+Datum tsstat_out(PG_FUNCTION_ARGS);
+Datum
+tsstat_out(PG_FUNCTION_ARGS) {
+ elog(ERROR,"Unimplemented");
+ PG_RETURN_NULL();
+}
+
+static WordEntry**
+SEI_realloc( WordEntry** in, uint32 *len ) {
+ if ( *len==0 || in==NULL ) {
+ *len=8;
+ in=palloc( sizeof(WordEntry*)* (*len) );
+ } else {
+ *len *= 2;
+ in=repalloc( in, sizeof(WordEntry*)* (*len) );
+ }
+ return in;
+}
+
+static int
+compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
+ if ( a->len == b->len )
+ return strncmp(
+ STATSTRPTR(stat) + a->pos,
+ STRPTR(txt) + b->pos,
+ a->len
+ );
+ return ( a->len > b->len ) ? 1 : -1;
+}
+
+static tsstat*
+formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
+ tsstat *newstat;
+ uint32 totallen, nentry;
+ uint32 slen=0;
+ WordEntry **ptr=entry;
+ char *curptr;
+ StatEntry *sptr,*nptr;
+
+ while(ptr-entry
+ slen += (*ptr)->len;
+ ptr++;
+ }
+
+ nentry=stat->size + len;
+ slen+=STATSTRSIZE(stat);
+ totallen=CALCSTATSIZE(nentry,slen);
+ newstat=palloc(totallen);
+ newstat->len=totallen;
+ newstat->size=nentry;
+
+ memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
+ curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
+
+ ptr=entry;
+ sptr=STATPTR(stat);
+ nptr=STATPTR(newstat);
+
+ if ( len == 1 ) {
+ StatEntry *StopLow = STATPTR(stat);
+ StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+
+ while (StopLow < StopHigh) {
+ sptr=StopLow + (StopHigh - StopLow) / 2;
+ if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
+ StopLow = sptr + 1;
+ else
+ StopHigh = sptr;
+ }
+ nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
+ memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
+ nptr->nentry=POSDATALEN(txt,*ptr);
+ if ( nptr->nentry==0 )
+ nptr->nentry=1;
+ nptr->ndoc=1;
+ nptr->len=(*ptr)->len;
+ memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->pos = curptr - STATSTRPTR(newstat);
+ memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
+ } else {
+ while( sptr-STATPTR(stat) < stat->size && ptr-entry
+ if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
+ memcpy(nptr, sptr, sizeof(StatEntry));
+ sptr++;
+ } else {
+ nptr->nentry=POSDATALEN(txt,*ptr);
+ if ( nptr->nentry==0 )
+ nptr->nentry=1;
+ nptr->ndoc=1;
+ nptr->len=(*ptr)->len;
+ memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->pos = curptr - STATSTRPTR(newstat);
+ curptr += nptr->len;
+ ptr++;
+ }
+ nptr++;
+ }
+
+ memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) );
+
+ while(ptr-entry
+ nptr->nentry=POSDATALEN(txt,*ptr);
+ if ( nptr->nentry==0 )
+ nptr->nentry=1;
+ nptr->ndoc=1;
+ nptr->len=(*ptr)->len;
+ memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->pos = curptr - STATSTRPTR(newstat);
+ curptr += nptr->len;
+ ptr++; nptr++;
+ }
+ }
+
+ return newstat;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum);
+Datum ts_accum(PG_FUNCTION_ARGS);
+Datum
+ts_accum(PG_FUNCTION_ARGS) {
+ tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
+ tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ WordEntry **newentry=NULL;
+ uint32 len=0, cur=0;
+ StatEntry *sptr;
+ WordEntry *wptr;
+
+ if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */
+ stat=palloc(STATHDRSIZE);
+ stat->len=STATHDRSIZE;
+ stat->size=0;
+ }
+
+ /* simple check of correctness */
+ if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
+ PG_FREE_IF_COPY(txt,1);
+ PG_RETURN_POINTER(stat);
+ }
+
+ sptr=STATPTR(stat);
+ wptr=ARRPTR(txt);
+
+ if ( stat->size < 100*txt->size ) { /* merge */
+ while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
+ int cmp = compareStatWord(sptr,wptr,stat,txt);
+ if ( cmp<0 ) {
+ sptr++;
+ } else if ( cmp==0 ) {
+ int n=POSDATALEN(txt,wptr);
+
+ if (n==0) n=1;
+ sptr->ndoc++;
+ sptr->nentry +=n ;
+ sptr++; wptr++;
+ } else {
+ if ( cur==len )
+ newentry=SEI_realloc(newentry, &len);
+ newentry[cur]=wptr;
+ wptr++; cur++;
+ }
+ }
+
+ while( wptr-ARRPTR(txt) < txt->size ) {
+ if ( cur==len )
+ newentry=SEI_realloc(newentry, &len);
+ newentry[cur]=wptr;
+ wptr++; cur++;
+ }
+ } else { /* search */
+ while( wptr-ARRPTR(txt) < txt->size ) {
+ StatEntry *StopLow = STATPTR(stat);
+ StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
+ int cmp;
+
+ while (StopLow < StopHigh) {
+ sptr=StopLow + (StopHigh - StopLow) / 2;
+ cmp = compareStatWord(sptr,wptr,stat,txt);
+ if (cmp==0) {
+ int n=POSDATALEN(txt,wptr);
+ if (n==0) n=1;
+ sptr->ndoc++;
+ sptr->nentry +=n ;
+ break;
+ } else if ( cmp < 0 )
+ StopLow = sptr + 1;
+ else
+ StopHigh = sptr;
+ }
+
+ if ( StopLow >= StopHigh ) { /* not found */
+ if ( cur==len )
+ newentry=SEI_realloc(newentry, &len);
+ newentry[cur]=wptr;
+ cur++;
+ }
+ wptr++;
+ }
+ }
+
+
+ if ( cur==0 ) { /* no new words */
+ PG_FREE_IF_COPY(txt,1);
+ PG_RETURN_POINTER(stat);
+ }
+
+ newstat = formstat(stat, txt, newentry, cur);
+ pfree(newentry);
+ PG_FREE_IF_COPY(txt,1);
+ /* pfree(stat); */
+
+ PG_RETURN_POINTER(newstat);
+}
+
+typedef struct {
+ uint32 cur;
+ tsvector *stat;
+} StatStorage;
+
+static void
+ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ StatStorage *st;
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ st=palloc( sizeof(StatStorage) );
+ st->cur=0;
+ st->stat=palloc( stat->len );
+ memcpy(st->stat, stat, stat->len);
+ funcctx->user_fctx = (void*)st;
+ tupdesc = RelationNameGetTupleDesc("statinfo");
+ funcctx->slot = TupleDescGetSlot(tupdesc);
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+
+static Datum
+ts_process_call(FuncCallContext *funcctx) {
+ StatStorage *st;
+ st=(StatStorage*)funcctx->user_fctx;
+
+ if ( st->cur < st->stat->size ) {
+ Datum result;
+ char* values[3];
+ char ndoc[16];
+ char nentry[16];
+ StatEntry *entry=STATPTR(st->stat) + st->cur;
+ HeapTuple tuple;
+
+ values[1]=ndoc;
+ sprintf(ndoc,"%d",entry->ndoc);
+ values[2]=nentry;
+ sprintf(nentry,"%d",entry->nentry);
+ values[0]=palloc( entry->len+1 );
+ memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
+ (values[0])[entry->len]='\0';
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = TupleGetDatum(funcctx->slot, tuple);
+
+ pfree(values[0]);
+ st->cur++;
+ return result;
+ } else {
+ pfree(st->stat);
+ pfree(st);
+ }
+
+ return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(ts_accum_finish);
+Datum ts_accum_finish(PG_FUNCTION_ARGS);
+Datum
+ts_accum_finish(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ funcctx = SRF_FIRSTCALL_INIT();
+ ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ if ( (result=ts_process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+static Oid tiOid=InvalidOid;
+static void
+get_ti_Oid(void) {
+ int ret;
+ bool isnull;
+
+ if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )
+ elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
+
+ if ( SPI_processed<0 )
+ elog(ERROR, "There is no tsvector type");
+ tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ if ( tiOid==InvalidOid )
+ elog(ERROR, "tsvector type has InvalidOid");
+}
+
+static tsstat*
+ts_stat_sql(text *txt) {
+ char *query=text2char(txt);
+ int i;
+ tsstat *newstat,*stat;
+ bool isnull;
+ Portal portal;
+ void *plan;
+
+ if ( tiOid==InvalidOid )
+ get_ti_Oid();
+
+ if ( (plan = SPI_prepare(query,0,NULL))==NULL )
+ elog(ERROR, "SPI_prepare('%s') returns NULL",query);
+
+ if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
+ elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
+
+ SPI_cursor_fetch(portal, true, 100);
+
+ if ( SPI_tuptable->tupdesc->natts != 1 )
+ elog(ERROR, "Number of fields doesn't equal to 1");
+
+ if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
+ elog(ERROR, "Column isn't of tsvector type");
+
+ stat=palloc(STATHDRSIZE);
+ stat->len=STATHDRSIZE;
+ stat->size=0;
+
+ while(SPI_processed>0) {
+ for(i=0;i
+ Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+ if ( !isnull ) {
+ newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
+ ts_accum,
+ PointerGetDatum(stat),
+ data
+ ));
+ if ( stat!=newstat && stat )
+ pfree(stat);
+ stat=newstat;
+ }
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_fetch(portal, true, 100);
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_close(portal);
+ SPI_freeplan(plan);
+ pfree(query);
+
+ return stat;
+}
+
+PG_FUNCTION_INFO_V1(ts_stat);
+Datum ts_stat(PG_FUNCTION_ARGS);
+Datum
+ts_stat(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ tsstat *stat;
+ text *txt=PG_GETARG_TEXT_P(0);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ SPI_connect();
+ stat = ts_stat_sql(txt);
+ PG_FREE_IF_COPY(txt,0);
+ ts_setup_firstcall(funcctx, stat );
+ SPI_finish();
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ if ( (result=ts_process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+
--- /dev/null
+#ifndef __TXTIDX_STAT_H__
+#define __TXTIDX_STAT_H__
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+ uint32 len;
+ uint32 pos;
+ uint32 ndoc;
+ uint32 nentry;
+} StatEntry;
+
+typedef struct {
+ int4 len;
+ int4 size;
+ char data[1];
+} tsstat;
+
+#define STATHDRSIZE (sizeof(int4)*2)
+#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
+#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
+#define STATSTRPTR(x) ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
+
+#endif
--- /dev/null
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+BEGIN;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+ dict_name text not null primary key,
+ dict_init oid,
+ dict_initoption text,
+ dict_lexize oid not null,
+ dict_comment text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text)
+ returns _text
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+ returns _text
+ as 'MODULE_PATHNAME', 'lexize_byname'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION lexize(text)
+ returns _text
+ as 'MODULE_PATHNAME', 'lexize_bycurrent'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+ returns void
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+ returns void
+ as 'MODULE_PATHNAME', 'set_curdict_byname'
+ language 'C'
+ with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+insert into pg_ts_dict select
+ 'simple',
+ (select oid from pg_proc where proname='dex_init'),
+ null,
+ (select oid from pg_proc where proname='dex_lexize'),
+ 'Simple example of dictionary.'
+;
+
+CREATE FUNCTION snb_en_init(text)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+insert into pg_ts_dict select
+ 'en_stem',
+ (select oid from pg_proc where proname='snb_en_init'),
+ 'DATA_PATH/english.stop',
+ (select oid from pg_proc where proname='snb_lexize'),
+ 'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+insert into pg_ts_dict select
+ 'ru_stem',
+ (select oid from pg_proc where proname='snb_ru_init'),
+ 'DATA_PATH/russian.stop',
+ (select oid from pg_proc where proname='snb_lexize'),
+ 'Russian Stemmer. Snowball.'
+;
+
+CREATE FUNCTION spell_init(text)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+insert into pg_ts_dict select
+ 'ispell_template',
+ (select oid from pg_proc where proname='spell_init'),
+ null,
+ (select oid from pg_proc where proname='spell_lexize'),
+ 'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+insert into pg_ts_dict select
+ 'synonym',
+ (select oid from pg_proc where proname='syn_init'),
+ null,
+ (select oid from pg_proc where proname='syn_lexize'),
+ 'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+ prs_name text not null primary key,
+ prs_start oid not null,
+ prs_nexttoken oid not null,
+ prs_end oid not null,
+ prs_headline oid not null,
+ prs_lextype oid not null,
+ prs_comment text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype
+ as (tokid int4, alias text, descr text);
+
+CREATE FUNCTION token_type(int4)
+ returns setof tokentype
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION token_type(text)
+ returns setof tokentype
+ as 'MODULE_PATHNAME', 'token_type_byname'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION token_type()
+ returns setof tokentype
+ as 'MODULE_PATHNAME', 'token_type_current'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+ returns void
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+ returns void
+ as 'MODULE_PATHNAME', 'set_curprs_byname'
+ language 'C'
+ with (isstrict);
+
+CREATE TYPE tokenout
+ as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+ returns setof tokenout
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION parse(text,text)
+ returns setof tokenout
+ as 'MODULE_PATHNAME', 'parse_byname'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION parse(text)
+ returns setof tokenout
+ as 'MODULE_PATHNAME', 'parse_current'
+ language 'C'
+ with (isstrict);
+
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+ returns int4
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+ returns void
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+ returns internal
+ as 'MODULE_PATHNAME'
+ language 'C';
+
+insert into pg_ts_parser select
+ 'default',
+ (select oid from pg_proc where proname='prsd_start'),
+ (select oid from pg_proc where proname='prsd_getlexeme'),
+ (select oid from pg_proc where proname='prsd_end'),
+ (select oid from pg_proc where proname='prsd_headline'),
+ (select oid from pg_proc where proname='prsd_lextype'),
+ 'Parser from OpenFTS v0.34'
+;
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+ ts_name text not null primary key,
+ prs_name text not null,
+ locale text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+ ts_name text not null,
+ tok_alias text not null,
+ dict_name text[],
+ primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+ returns void
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+ returns void
+ as 'MODULE_PATHNAME', 'set_curcfg_byname'
+ language 'C'
+ with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+ returns oid
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+copy pg_ts_cfgmap from stdin;
+default lword {en_stem}
+default nlword {simple}
+default word {simple}
+default email {simple}
+default url {simple}
+default host {simple}
+default sfloat {simple}
+default version {simple}
+default part_hword {simple}
+default nlpart_hword {simple}
+default lpart_hword {en_stem}
+default hword {simple}
+default lhword {en_stem}
+default nlhword {simple}
+default uri {simple}
+default file {simple}
+default float {simple}
+default int {simple}
+default uint {simple}
+default_russian lword {en_stem}
+default_russian nlword {ru_stem}
+default_russian word {ru_stem}
+default_russian email {simple}
+default_russian url {simple}
+default_russian host {simple}
+default_russian sfloat {simple}
+default_russian version {simple}
+default_russian part_hword {simple}
+default_russian nlpart_hword {ru_stem}
+default_russian lpart_hword {en_stem}
+default_russian hword {ru_stem}
+default_russian lhword {en_stem}
+default_russian nlhword {ru_stem}
+default_russian uri {simple}
+default_russian file {simple}
+default_russian float {simple}
+default_russian int {simple}
+default_russian uint {simple}
+simple lword {simple}
+simple nlword {simple}
+simple word {simple}
+simple email {simple}
+simple url {simple}
+simple host {simple}
+simple sfloat {simple}
+simple version {simple}
+simple part_hword {simple}
+simple nlpart_hword {simple}
+simple lpart_hword {simple}
+simple hword {simple}
+simple lhword {simple}
+simple nlhword {simple}
+simple uri {simple}
+simple file {simple}
+simple float {simple}
+simple int {simple}
+simple uint {simple}
+\.
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+ INTERNALLENGTH = -1,
+ INPUT = tsvector_in,
+ OUTPUT = tsvector_out,
+ STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS 'MODULE_PATHNAME', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+ LEFTARG = tsvector,
+ RIGHTARG = tsvector,
+ PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+ INTERNALLENGTH = -1,
+ INPUT = tsquery_in,
+ OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+ LEFTARG = tsvector,
+ RIGHTARG = tsquery,
+ PROCEDURE = exectsq,
+ COMMUTATOR = '@@',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+ LEFTARG = tsquery,
+ RIGHTARG = tsvector,
+ PROCEDURE = rexectsq,
+ COMMUTATOR = '@@',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS 'MODULE_PATHNAME', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS 'MODULE_PATHNAME', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+ INTERNALLENGTH = -1,
+ INPUT = gtsvector_in,
+ OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+ OPERATOR 1 @@ (tsvector, tsquery) RECHECK ,
+ FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4),
+ FUNCTION 2 gtsvector_union (bytea, internal),
+ FUNCTION 3 gtsvector_compress (internal),
+ FUNCTION 4 gtsvector_decompress (internal),
+ FUNCTION 5 gtsvector_penalty (internal, internal, internal),
+ FUNCTION 6 gtsvector_picksplit (internal, internal),
+ FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal),
+ STORAGE gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo
+ as (word text, ndoc int4, nentry int4);
+
+--REATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+-- INTERNALLENGTH = -1,
+-- INPUT = tsstat_in,
+-- OUTPUT = tsstat_out,
+-- STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS 'MODULE_PATHNAME'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+-- returns setof statinfo
+-- as 'MODULE_PATHNAME'
+-- language 'C'
+-- with (isstrict);
+--
+--CREATE AGGREGATE stat (
+-- BASETYPE=tsvector,
+-- SFUNC=ts_accum,
+-- STYPE=tsstat,
+-- FINALFUNC = ts_accum_finish,
+-- initcond = ''
+--);
+
+CREATE FUNCTION stat(text)
+ returns setof statinfo
+ as 'MODULE_PATHNAME', 'ts_stat'
+ language 'C'
+ with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+ returns void
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+ returns text
+ as 'MODULE_PATHNAME'
+ language 'C'
+ with (isstrict);
+
+
+--example of ISpell dictionary
+--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
+--example of synonym dict
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+END;
--- /dev/null
+/*
+ * In/Out definitions for tsvector type
+ * Internal structure:
+ * string of values, array of position lexem in string and it's length
+ * Teodor Sigaev
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(tsvector_in);
+Datum tsvector_in(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_out);
+Datum tsvector_out(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(to_tsvector);
+Datum to_tsvector(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_current);
+Datum to_tsvector_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(to_tsvector_name);
+Datum to_tsvector_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsearch2);
+Datum tsearch2(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(tsvector_length);
+Datum tsvector_length(PG_FUNCTION_ARGS);
+
+/*
+ * in/out text index type
+ */
+static int
+comparePos(const void *a, const void *b) {
+ if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
+ return 1;
+ return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
+}
+
+static int
+uniquePos(WordEntryPos *a, int4 l) {
+ WordEntryPos *ptr, *res;
+
+ res=a;
+ if (l==1)
+ return l;
+
+ qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
+
+ ptr = a + 1;
+ while (ptr - a < l) {
+ if ( ptr->pos != res->pos ) {
+ res++;
+ res->pos = ptr->pos;
+ res->weight = ptr->weight;
+ if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
+ break;
+ } else if ( ptr->weight > res->weight )
+ res->weight = ptr->weight;
+ ptr++;
+ }
+ return res + 1 - a;
+}
+
+static char *BufferStr;
+static int
+compareentry(const void *a, const void *b)
+{
+ if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+ {
+ return strncmp(
+ &BufferStr[((WordEntryIN *) a)->entry.pos],
+ &BufferStr[((WordEntryIN *) b)->entry.pos],
+ ((WordEntryIN *) a)->entry.len);
+ }
+ return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
+}
+
+static int
+uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+{
+ WordEntryIN *ptr,
+ *res;
+
+ res = a;
+ if (l == 1) {
+ if ( a->entry.haspos ) {
+ *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
+ *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
+ }
+ return l;
+ }
+
+ ptr = a + 1;
+ BufferStr = buf;
+ qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
+
+ while (ptr - a < l)
+ {
+ if (!(ptr->entry.len == res->entry.len &&
+ strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
+ {
+ if ( res->entry.haspos ) {
+ *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+ *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+ }
+ *outbuflen += SHORTALIGN(res->entry.len);
+ res++;
+ memcpy(res,ptr,sizeof(WordEntryIN));
+ } else if ( ptr->entry.haspos ){
+ if ( res->entry.haspos ) {
+ int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
+ res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
+ memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]),
+ &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
+ *(uint16*)(res->pos) += *(uint16*)(ptr->pos);
+ pfree( ptr->pos );
+ } else {
+ res->entry.haspos=1;
+ res->pos = ptr->pos;
+ }
+ }
+ ptr++;
+ }
+ if ( res->entry.haspos ) {
+ *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
+ *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
+ }
+ *outbuflen += SHORTALIGN(res->entry.len);
+
+ return res + 1 - a;
+}
+
+#define WAITWORD 1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR 3
+#define WAITENDCMPLX 4
+#define WAITPOSINFO 5
+#define INPOSINFO 6
+#define WAITPOSDELIM 7
+
+#define RESIZEPRSBUF \
+do { \
+ if ( state->curpos - state->word + 1 >= state->len ) \
+ { \
+ int4 clen = state->curpos - state->word; \
+ state->len *= 2; \
+ state->word = (char*)repalloc( (void*)state->word, state->len ); \
+ state->curpos = state->word + clen; \
+ } \
+} while (0)
+
+int4
+gettoken_tsvector(TI_IN_STATE * state)
+{
+ int4 oldstate = 0;
+
+ state->curpos = state->word;
+ state->state = WAITWORD;
+ state->alen=0;
+
+ while (1)
+ {
+ if (state->state == WAITWORD)
+ {
+ if (*(state->prsbuf) == '\0')
+ return 0;
+ else if (*(state->prsbuf) == '\'')
+ state->state = WAITENDCMPLX;
+ else if (*(state->prsbuf) == '\\')
+ {
+ state->state = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
+ elog(ERROR, "Syntax error");
+ else if (*(state->prsbuf) != ' ')
+ {
+ *(state->curpos) = *(state->prsbuf);
+ state->curpos++;
+ state->state = WAITENDWORD;
+ }
+ }
+ else if (state->state == WAITNEXTCHAR)
+ {
+ if (*(state->prsbuf) == '\0')
+ elog(ERROR, "There is no escaped character");
+ else
+ {
+ RESIZEPRSBUF;
+ *(state->curpos) = *(state->prsbuf);
+ state->curpos++;
+ state->state = oldstate;
+ }
+ }
+ else if (state->state == WAITENDWORD)
+ {
+ if (*(state->prsbuf) == '\\')
+ {
+ state->state = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
+ (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
+ {
+ RESIZEPRSBUF;
+ if (state->curpos == state->word)
+ elog(ERROR, "Syntax error");
+ *(state->curpos) = '\0';
+ return 1;
+ } else if ( *(state->prsbuf) == ':' ) {
+ if (state->curpos == state->word)
+ elog(ERROR, "Syntax error");
+ *(state->curpos) = '\0';
+ if ( state->oprisdelim )
+ return 1;
+ else
+ state->state = INPOSINFO;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ *(state->curpos) = *(state->prsbuf);
+ state->curpos++;
+ }
+ }
+ else if (state->state == WAITENDCMPLX)
+ {
+ if (*(state->prsbuf) == '\'')
+ {
+ RESIZEPRSBUF;
+ *(state->curpos) = '\0';
+ if (state->curpos == state->word)
+ elog(ERROR, "Syntax error");
+ if ( state->oprisdelim ) {
+ state->prsbuf++;
+ return 1;
+ } else
+ state->state = WAITPOSINFO;
+ }
+ else if (*(state->prsbuf) == '\\')
+ {
+ state->state = WAITNEXTCHAR;
+ oldstate = WAITENDCMPLX;
+ }
+ else if (*(state->prsbuf) == '\0')
+ elog(ERROR, "Syntax error");
+ else
+ {
+ RESIZEPRSBUF;
+ *(state->curpos) = *(state->prsbuf);
+ state->curpos++;
+ }
+ } else if (state->state == WAITPOSINFO) {
+ if ( *(state->prsbuf) == ':' )
+ state->state=INPOSINFO;
+ else
+ return 1;
+ } else if (state->state == INPOSINFO) {
+ if ( isdigit(*(state->prsbuf)) ) {
+ if ( state->alen==0 ) {
+ state->alen=4;
+ state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
+ *(uint16*)(state->pos)=0;
+ } else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
+ state->alen *= 2;
+ state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
+ }
+ ( *(uint16*)(state->pos) )++;
+ state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
+ if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
+ elog(ERROR,"Wrong position info");
+ state->pos[ *(uint16*)(state->pos) ].weight = 0;
+ state->state = WAITPOSDELIM;
+ } else
+ elog(ERROR,"Syntax error");
+ } else if (state->state == WAITPOSDELIM) {
+ if ( *(state->prsbuf) == ',' ) {
+ state->state = INPOSINFO;
+ } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
+ if ( state->pos[ *(uint16*)(state->pos) ].weight )
+ elog(ERROR,"Syntax error");
+ state->pos[ *(uint16*)(state->pos) ].weight = 3;
+ } else if ( tolower(*(state->prsbuf)) == 'b' ) {
+ if ( state->pos[ *(uint16*)(state->pos) ].weight )
+ elog(ERROR,"Syntax error");
+ state->pos[ *(uint16*)(state->pos) ].weight = 2;
+ } else if ( tolower(*(state->prsbuf)) == 'c' ) {
+ if ( state->pos[ *(uint16*)(state->pos) ].weight )
+ elog(ERROR,"Syntax error");
+ state->pos[ *(uint16*)(state->pos) ].weight = 1;
+ } else if ( tolower(*(state->prsbuf)) == 'd' ) {
+ if ( state->pos[ *(uint16*)(state->pos) ].weight )
+ elog(ERROR,"Syntax error");
+ state->pos[ *(uint16*)(state->pos) ].weight = 0;
+ } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
+ return 1;
+ } else if ( !isdigit(*(state->prsbuf)) )
+ elog(ERROR,"Syntax error");
+ } else
+ elog(ERROR, "Inner bug :(");
+ state->prsbuf++;
+ }
+
+ return 0;
+}
+
+Datum
+tsvector_in(PG_FUNCTION_ARGS)
+{
+ char *buf = PG_GETARG_CSTRING(0);
+ TI_IN_STATE state;
+ WordEntryIN *arr;
+ WordEntry *inarr;
+ int4 len = 0,
+ totallen = 64;
+ tsvector *in;
+ char *tmpbuf,
+ *cur;
+ int4 i,
+ buflen = 256;
+
+ state.prsbuf = buf;
+ state.len = 32;
+ state.word = (char *) palloc(state.len);
+ state.oprisdelim = false;
+
+ arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+ cur = tmpbuf = (char *) palloc(buflen);
+ while (gettoken_tsvector(&state))
+ {
+ if (len >= totallen)
+ {
+ totallen *= 2;
+ arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
+ }
+ while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
+ {
+ int4 dist = cur - tmpbuf;
+
+ buflen *= 2;
+ tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+ cur = tmpbuf + dist;
+ }
+ if (state.curpos - state.word >= MAXSTRLEN)
+ elog(ERROR, "Word is too long");
+ arr[len].entry.len= state.curpos - state.word;
+ if (cur - tmpbuf > MAXSTRPOS)
+ elog(ERROR, "Too long value");
+ arr[len].entry.pos=cur - tmpbuf;
+ memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
+ cur += arr[len].entry.len;
+ if ( state.alen ) {
+ arr[len].entry.haspos=1;
+ arr[len].pos = state.pos;
+ } else
+ arr[len].entry.haspos=0;
+ len++;
+ }
+ pfree(state.word);
+
+ if ( len > 0 )
+ len = uniqueentry(arr, len, tmpbuf, &buflen);
+ totallen = CALCDATASIZE(len, buflen);
+ in = (tsvector *) palloc(totallen);
+ memset(in,0,totallen);
+ in->len = totallen;
+ in->size = len;
+ cur = STRPTR(in);
+ inarr = ARRPTR(in);
+ for (i = 0; i < len; i++)
+ {
+ memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+ arr[i].entry.pos=cur - STRPTR(in);
+ cur += SHORTALIGN(arr[i].entry.len);
+ if ( arr[i].entry.haspos ) {
+ memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
+ cur += (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
+ pfree( arr[i].pos );
+ }
+ memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
+ }
+ pfree(tmpbuf);
+ pfree(arr);
+ PG_RETURN_POINTER(in);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+ tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ int4 ret = in->size;
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_out(PG_FUNCTION_ARGS)
+{
+ tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ char *outbuf;
+ int4 i,
+ j,
+ lenbuf = 0, pp;
+ WordEntry *ptr = ARRPTR(out);
+ char *curin,
+ *curout;
+
+ lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
+ for (i = 0; i < out->size; i++) {
+ lenbuf += ptr[i].len*2 /*for escape */;
+ if ( ptr[i].haspos )
+ lenbuf += 7*POSDATALEN(out, &(ptr[i]));
+ }
+
+ curout = outbuf = (char *) palloc(lenbuf);
+ for (i = 0; i < out->size; i++)
+ {
+ curin = STRPTR(out)+ptr->pos;
+ if (i != 0)
+ *curout++ = ' ';
+ *curout++ = '\'';
+ j = ptr->len;
+ while (j--)
+ {
+ if (*curin == '\'')
+ {
+ int4 pos = curout - outbuf;
+
+ outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
+ curout = outbuf + pos;
+ *curout++ = '\\';
+ }
+ *curout++ = *curin++;
+ }
+ *curout++ = '\'';
+ if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
+ WordEntryPos *wptr;
+ *curout++ = ':';
+ wptr=POSDATAPTR(out,ptr);
+ while(pp) {
+ sprintf(curout,"%d",wptr->pos);
+ curout=strchr(curout,'\0');
+ switch( wptr->weight ) {
+ case 3: *curout++ = 'A'; break;
+ case 2: *curout++ = 'B'; break;
+ case 1: *curout++ = 'C'; break;
+ case 0:
+ default: break;
+ }
+ if ( pp>1 ) *curout++ = ',';
+ pp--; wptr++;
+ }
+ }
+ ptr++;
+ }
+ *curout='\0';
+ outbuf[lenbuf - 1] = '\0';
+ PG_FREE_IF_COPY(out, 0);
+ PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareWORD(const void *a, const void *b)
+{
+ if (((WORD *) a)->len == ((WORD *) b)->len) {
+ int res = strncmp(
+ ((WORD *) a)->word,
+ ((WORD *) b)->word,
+ ((WORD *) b)->len);
+ if ( res==0 )
+ return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
+ return res;
+ }
+ return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(WORD * a, int4 l)
+{
+ WORD *ptr,
+ *res;
+ int tmppos;
+
+ if (l == 1) {
+ tmppos=LIMITPOS(a->pos.pos);
+ a->alen=2;
+ a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+ a->pos.apos[0]=1;
+ a->pos.apos[1]=tmppos;
+ return l;
+ }
+
+ res = a;
+ ptr = a + 1;
+
+ qsort((void *) a, l, sizeof(WORD), compareWORD);
+ tmppos=LIMITPOS(a->pos.pos);
+ a->alen=2;
+ a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
+ a->pos.apos[0]=1;
+ a->pos.apos[1]=tmppos;
+
+ while (ptr - a < l)
+ {
+ if (!(ptr->len == res->len &&
+ strncmp(ptr->word, res->word, res->len) == 0))
+ {
+ res++;
+ res->len = ptr->len;
+ res->word = ptr->word;
+ tmppos=LIMITPOS(ptr->pos.pos);
+ res->alen=2;
+ res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
+ res->pos.apos[0]=1;
+ res->pos.apos[1]=tmppos;
+ } else {
+ pfree(ptr->word);
+ if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
+ if ( res->pos.apos[0]+1 >= res->alen ) {
+ res->alen*=2;
+ res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
+ }
+ res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
+ res->pos.apos[0]++;
+ }
+ }
+ ptr++;
+ }
+
+ return res + 1 - a;
+}
+
+/*
+ * make value of tsvector
+ */
+static tsvector *
+makevalue(PRSTEXT * prs)
+{
+ int4 i,j,
+ lenstr = 0,
+ totallen;
+ tsvector *in;
+ WordEntry *ptr;
+ char *str,
+ *cur;
+
+ prs->curwords = uniqueWORD(prs->words, prs->curwords);
+ for (i = 0; i < prs->curwords; i++) {
+ lenstr += SHORTALIGN(prs->words[i].len);
+
+ if ( prs->words[i].alen )
+ lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+ }
+
+ totallen = CALCDATASIZE(prs->curwords, lenstr);
+ in = (tsvector *) palloc(totallen);
+ memset(in,0,totallen);
+ in->len = totallen;
+ in->size = prs->curwords;
+
+ ptr = ARRPTR(in);
+ cur = str = STRPTR(in);
+ for (i = 0; i < prs->curwords; i++)
+ {
+ ptr->len = prs->words[i].len;
+ if (cur - str > MAXSTRPOS)
+ elog(ERROR, "Value is too big");
+ ptr->pos= cur - str;
+ memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+ pfree(prs->words[i].word);
+ cur += SHORTALIGN(prs->words[i].len);
+ if ( prs->words[i].alen ) {
+ WordEntryPos *wptr;
+
+ ptr->haspos=1;
+ *(uint16*)cur = prs->words[i].pos.apos[0];
+ wptr=POSDATAPTR(in,ptr);
+ for(j=0;j<*(uint16*)cur;j++) {
+ wptr[j].weight=0;
+ wptr[j].pos=prs->words[i].pos.apos[j+1];
+ }
+ cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+ pfree(prs->words[i].pos.apos);
+ } else
+ ptr->haspos=0;
+ ptr++;
+ }
+ pfree(prs->words);
+ return in;
+}
+
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_P(1);
+ PRSTEXT prs;
+ tsvector *out = NULL;
+ TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0));
+
+ prs.lenwords = 32;
+ prs.curwords = 0;
+ prs.pos = 0;
+ prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+ parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+ PG_FREE_IF_COPY(in, 1);
+
+ if (prs.curwords)
+ out = makevalue(&prs);
+ else {
+ pfree(prs.words);
+ out = palloc(CALCDATASIZE(0,0));
+ out->len = CALCDATASIZE(0,0);
+ out->size = 0;
+ }
+ PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector_name(PG_FUNCTION_ARGS) {
+ text *cfg=PG_GETARG_TEXT_P(0);
+ Datum res = DirectFunctionCall3(
+ to_tsvector,
+ Int32GetDatum( name2id_cfg( cfg ) ),
+ PG_GETARG_DATUM(1),
+ (Datum)0
+ );
+ PG_FREE_IF_COPY(cfg,0);
+ PG_RETURN_DATUM(res);
+}
+
+Datum
+to_tsvector_current(PG_FUNCTION_ARGS) {
+ Datum res = DirectFunctionCall3(
+ to_tsvector,
+ Int32GetDatum( get_currcfg() ),
+ PG_GETARG_DATUM(0),
+ (Datum)0
+ );
+ PG_RETURN_DATUM(res);
+}
+
+static Oid
+findFunc(char *fname) {
+ FuncCandidateList clist,ptr;
+ Oid funcid = InvalidOid;
+ List *names=makeList1(makeString(fname));
+
+ ptr = clist = FuncnameGetCandidates(names, 1);
+ freeList(names);
+
+ if ( !ptr )
+ return funcid;
+
+ while(ptr) {
+ if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
+ funcid=ptr->oid;
+ clist=ptr->next;
+ pfree(ptr);
+ ptr=clist;
+ }
+
+ return funcid;
+}
+
+/*
+ * Trigger
+ */
+Datum
+tsearch2(PG_FUNCTION_ARGS)
+{
+ TriggerData *trigdata;
+ Trigger *trigger;
+ Relation rel;
+ HeapTuple rettuple = NULL;
+ TSCfgInfo *cfg=findcfg(get_currcfg());
+ int numidxattr,
+ i;
+ PRSTEXT prs;
+ Datum datum = (Datum) 0;
+ Oid funcoid = InvalidOid;
+
+ if (!CALLED_AS_TRIGGER(fcinfo))
+ elog(ERROR, "TSearch: Not fired by trigger manager");
+
+ trigdata = (TriggerData *) fcinfo->context;
+ if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
+ elog(ERROR, "TSearch: Can't process STATEMENT events");
+ if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
+ elog(ERROR, "TSearch: Must be fired BEFORE event");
+
+ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+ rettuple = trigdata->tg_trigtuple;
+ else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ rettuple = trigdata->tg_newtuple;
+ else
+ elog(ERROR, "TSearch: Unknown event");
+
+ trigger = trigdata->tg_trigger;
+ rel = trigdata->tg_relation;
+
+ if (trigger->tgnargs < 2)
+ elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
+
+ numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+ if (numidxattr == SPI_ERROR_NOATTRIBUTE)
+ elog(ERROR, "TSearch: Can not find tsvector_field");
+
+ prs.lenwords = 32;
+ prs.curwords = 0;
+ prs.pos = 0;
+ prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
+
+ /* find all words in indexable column */
+ for (i = 1; i < trigger->tgnargs; i++)
+ {
+ int numattr;
+ Oid oidtype;
+ Datum txt_toasted;
+ bool isnull;
+ text *txt;
+
+ numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+ if (numattr == SPI_ERROR_NOATTRIBUTE)
+ {
+ funcoid=findFunc(trigger->tgargs[i]);
+ if ( funcoid==InvalidOid )
+ elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
+ continue;
+ }
+ oidtype = SPI_gettypeid(rel->rd_att, numattr);
+ /* We assume char() and varchar() are binary-equivalent to text */
+ if (!(oidtype == TEXTOID ||
+ oidtype == VARCHAROID ||
+ oidtype == BPCHAROID))
+ {
+ elog(WARNING, "TSearch: '%s' is not of character type",
+ trigger->tgargs[i]);
+ continue;
+ }
+ txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+ if (isnull)
+ continue;
+
+ if ( funcoid!=InvalidOid ) {
+ text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
+ funcoid,
+ PointerGetDatum(txt_toasted)
+ ));
+ txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
+ if ( txt == txttmp )
+ txt_toasted = PointerGetDatum(txt);
+ } else
+ txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
+
+ parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
+ if (txt != (text*)DatumGetPointer(txt_toasted) )
+ pfree(txt);
+ }
+
+ /* make tsvector value */
+ if (prs.curwords)
+ {
+ datum = PointerGetDatum(makevalue(&prs));
+ rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+ &datum, NULL);
+ pfree(DatumGetPointer(datum));
+ }
+ else
+ {
+ tsvector *out = palloc(CALCDATASIZE(0,0));
+ out->len = CALCDATASIZE(0,0);
+ out->size = 0;
+ datum = PointerGetDatum(out);
+ pfree(prs.words);
+ rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
+ &datum, NULL);
+ }
+
+ if (rettuple == NULL)
+ elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
+
+ return PointerGetDatum(rettuple);
+}
--- /dev/null
+#ifndef __TXTIDX_H__
+#define __TXTIDX_H__
+
+/*
+#define TXTIDX_DEBUG
+*/
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+
+typedef struct {
+ uint32
+ haspos:1,
+ len:11, /* MAX 2Kb */
+ pos:20; /* MAX 1Mb */
+} WordEntry;
+#define MAXSTRLEN ( 1<<11 )
+#define MAXSTRPOS ( 1<<20 )
+
+typedef struct {
+ uint16
+ weight:2,
+ pos:14;
+} WordEntryPos;
+#define MAXENTRYPOS (1<<14)
+#define MAXNUMPOS 256
+#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+typedef struct
+{
+ int4 len;
+ int4 size;
+ char data[1];
+} tsvector;
+
+#define DATAHDRSIZE (sizeof(int4)*2)
+#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
+#define ARRPTR(x) ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
+#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
+#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
+#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+
+
+typedef struct {
+ WordEntry entry;
+ WordEntryPos *pos;
+} WordEntryIN;
+
+typedef struct
+{
+ char *prsbuf;
+ char *word;
+ char *curpos;
+ int4 len;
+ int4 state;
+ int4 alen;
+ WordEntryPos *pos;
+ bool oprisdelim;
+} TI_IN_STATE;
+
+int4 gettoken_tsvector(TI_IN_STATE * state);
+
+#endif
--- /dev/null
+/*
+ * Operations for tsvector type
+ * Teodor Sigaev
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/itup.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "storage/bufpage.h"
+#include "executor/spi.h"
+#include "commands/trigger.h"
+#include "nodes/pg_list.h"
+#include "catalog/namespace.h"
+
+#include "utils/pg_locale.h"
+
+#include /* tolower */
+#include "tsvector.h"
+#include "query.h"
+#include "ts_cfg.h"
+#include "common.h"
+
+PG_FUNCTION_INFO_V1(strip);
+Datum strip(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(setweight);
+Datum setweight(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(concat);
+Datum concat(PG_FUNCTION_ARGS);
+
+Datum
+strip(PG_FUNCTION_ARGS)
+{
+ tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ tsvector *out;
+ int i,len=0;
+ WordEntry *arrin=ARRPTR(in), *arrout;
+ char *cur;
+
+ for(i=0;isize;i++)
+ len += SHORTALIGN( arrin[i].len );
+
+ len = CALCDATASIZE(in->size, len);
+ out=(tsvector*)palloc(len);
+ memset(out,0,len);
+ out->len=len;
+ out->size=in->size;
+ arrout=ARRPTR(out);
+ cur=STRPTR(out);
+ for(i=0;isize;i++) {
+ memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
+ arrout[i].haspos = 0;
+ arrout[i].len = arrin[i].len;
+ arrout[i].pos = cur - STRPTR(out);
+ cur += SHORTALIGN( arrout[i].len );
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
+
+Datum
+setweight(PG_FUNCTION_ARGS)
+{
+ tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ char cw = PG_GETARG_CHAR(1);
+ tsvector *out;
+ int i,j;
+ WordEntry *entry;
+ WordEntryPos *p;
+ int w=0;
+
+ switch(tolower(cw)) {
+ case 'a': w=3; break;
+ case 'b': w=2; break;
+ case 'c': w=1; break;
+ case 'd': w=0; break;
+ default: elog(ERROR,"Unknown weight");
+ }
+
+ out=(tsvector*)palloc(in->len);
+ memcpy(out,in,in->len);
+ entry=ARRPTR(out);
+ i=out->size;
+ while(i--) {
+ if ( (j=POSDATALEN(out,entry)) != 0 ) {
+ p=POSDATAPTR(out,entry);
+ while(j--) {
+ p->weight=w;
+ p++;
+ }
+ }
+ entry++;
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
+
+static int
+compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
+{
+ if ( a->len == b->len)
+ {
+ return strncmp(
+ ptra + a->pos,
+ ptrb + b->pos,
+ a->len);
+ }
+ return ( a->len > b->len ) ? 1 : -1;
+}
+
+static int4
+add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
+ uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
+ int i;
+ uint16 slen = POSDATALEN(src, srcptr), startlen;
+ WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
+
+ if ( ! destptr->haspos )
+ *clen=0;
+
+ startlen = *clen;
+ for(i=0; i
+ dpos[ *clen ].weight = spos[i].weight;
+ dpos[ *clen ].pos = LIMITPOS(spos[i].pos + maxpos);
+ (*clen)++;
+ }
+
+ if ( *clen != startlen )
+ destptr->haspos=1;
+ return *clen - startlen;
+}
+
+
+Datum
+concat(PG_FUNCTION_ARGS) {
+ tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+ tsvector *out;
+ WordEntry *ptr;
+ WordEntry *ptr1,*ptr2;
+ WordEntryPos *p;
+ int maxpos=0,i,j,i1,i2;
+ char *cur;
+ char *data,*data1,*data2;
+
+ ptr=ARRPTR(in1);
+ i=in1->size;
+ while(i--) {
+ if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
+ p=POSDATAPTR(in1,ptr);
+ while(j--) {
+ if ( p->pos > maxpos )
+ maxpos = p->pos;
+ p++;
+ }
+ }
+ ptr++;
+ }
+
+ ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
+ data1=STRPTR(in1); data2=STRPTR(in2);
+ i1=in1->size; i2=in2->size;
+ out=(tsvector*)palloc( in1->len + in2->len );
+ memset(out,0,in1->len + in2->len);
+ out->len = in1->len + in2->len;
+ out->size = in1->size + in2->size;
+ data=cur=STRPTR(out);
+ ptr=ARRPTR(out);
+ while( i1 && i2 ) {
+ int cmp=compareEntry(data1,ptr1,data2,ptr2);
+ if ( cmp < 0 ) { /* in1 first */
+ ptr->haspos = ptr1->haspos;
+ ptr->len = ptr1->len;
+ memcpy( cur, data1 + ptr1->pos, ptr1->len );
+ ptr->pos = cur - data;
+ cur+=SHORTALIGN(ptr1->len);
+ if ( ptr->haspos ) {
+ memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+ cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++; ptr1++; i1--;
+ } else if ( cmp>0 ) { /* in2 first */
+ ptr->haspos = ptr2->haspos;
+ ptr->len = ptr2->len;
+ memcpy( cur, data2 + ptr2->pos, ptr2->len );
+ ptr->pos = cur - data;
+ cur+=SHORTALIGN(ptr2->len);
+ if ( ptr->haspos ) {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+ if ( addlen == 0 )
+ ptr->haspos=0;
+ else
+ cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++; ptr2++; i2--;
+ } else {
+ ptr->haspos = ptr1->haspos | ptr2->haspos;
+ ptr->len = ptr1->len;
+ memcpy( cur, data1 + ptr1->pos, ptr1->len );
+ ptr->pos = cur - data;
+ cur+=SHORTALIGN(ptr1->len);
+ if ( ptr->haspos ) {
+ if ( ptr1->haspos ) {
+ memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+ cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+ if ( ptr2->haspos )
+ cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
+ } else if ( ptr2->haspos ) {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+ if ( addlen == 0 )
+ ptr->haspos=0;
+ else
+ cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ }
+ ptr++; ptr1++; ptr2++; i1--; i2--;
+ }
+ }
+
+ while(i1) {
+ ptr->haspos = ptr1->haspos;
+ ptr->len = ptr1->len;
+ memcpy( cur, data1 + ptr1->pos, ptr1->len );
+ ptr->pos = cur - data;
+ cur+=SHORTALIGN(ptr1->len);
+ if ( ptr->haspos ) {
+ memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
+ cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++; ptr1++; i1--;
+ }
+
+ while(i2) {
+ ptr->haspos = ptr2->haspos;
+ ptr->len = ptr2->len;
+ memcpy( cur, data2 + ptr2->pos, ptr2->len );
+ ptr->pos = cur - data;
+ cur+=SHORTALIGN(ptr2->len);
+ if ( ptr->haspos ) {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
+ if ( addlen == 0 )
+ ptr->haspos=0;
+ else
+ cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ ptr++; ptr2++; i2--;
+ }
+
+ out->size=ptr-ARRPTR(out);
+ out->len = CALCDATASIZE( out->size, cur-data );
+ if ( data != STRPTR(out) )
+ memmove( STRPTR(out), data, cur-data );
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_POINTER(out);
+}
+
--- /dev/null
+BEGIN;
+
+--Be careful !!!
+--script drops all indices, triggers and columns with types defined
+--in tsearch2.sql
+
+
+DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
+
+
+DROP OPERATOR || (tsvector, tsvector);
+DROP OPERATOR @@ (tsvector, tsquery);
+DROP OPERATOR @@ (tsquery, tsvector);
+
+DROP AGGREGATE stat(tsvector);
+
+DROP TABLE pg_ts_dict;
+DROP TABLE pg_ts_parser;
+DROP TABLE pg_ts_cfg;
+DROP TABLE pg_ts_cfgmap;
+
+DROP TYPE tokentype CASCADE;
+DROP TYPE tokenout CASCADE;
+DROP TYPE tsvector CASCADE;
+DROP TYPE tsquery CASCADE;
+DROP TYPE gtsvector CASCADE;
+DROP TYPE tsstat CASCADE;
+DROP TYPE statinfo CASCADE;
+
+DROP FUNCTION lexize(oid, text) ;
+DROP FUNCTION lexize(text, text);
+DROP FUNCTION lexize(text);
+DROP FUNCTION set_curdict(int);
+DROP FUNCTION set_curdict(text);
+DROP FUNCTION dex_init(text);
+DROP FUNCTION dex_lexize(internal,internal,int4);
+DROP FUNCTION snb_en_init(text);
+DROP FUNCTION snb_lexize(internal,internal,int4);
+DROP FUNCTION snb_ru_init(text);
+DROP FUNCTION spell_init(text);
+DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION syn_init(text);
+DROP FUNCTION syn_lexize(internal,internal,int4);
+DROP FUNCTION set_curprs(int);
+DROP FUNCTION set_curprs(text);
+DROP FUNCTION prsd_start(internal,int4);
+DROP FUNCTION prsd_getlexeme(internal,internal,internal);
+DROP FUNCTION prsd_end(internal);
+DROP FUNCTION prsd_lextype(internal);
+DROP FUNCTION prsd_headline(internal,internal,internal);
+DROP FUNCTION set_curcfg(int);
+DROP FUNCTION set_curcfg(text);
+DROP FUNCTION show_curcfg();
+DROP FUNCTION gtsvector_compress(internal);
+DROP FUNCTION gtsvector_decompress(internal);
+DROP FUNCTION gtsvector_penalty(internal,internal,internal);
+DROP FUNCTION gtsvector_picksplit(internal, internal);
+DROP FUNCTION gtsvector_union(bytea, internal);
+DROP FUNCTION reset_tsearch();
+DROP FUNCTION tsearch2() CASCADE;
+
+END;
--- /dev/null
+#include "deflex.h"
+
+const char *lex_descr[]={
+ "",
+ "Latin word",
+ "Non-latin word",
+ "Word",
+ "Email",
+ "URL",
+ "Host",
+ "Scientific notation",
+ "VERSION",
+ "Part of hyphenated word",
+ "Non-latin part of hyphenated word",
+ "Latin part of hyphenated word",
+ "Space symbols",
+ "HTML Tag",
+ "HTTP head",
+ "Hyphenated word",
+ "Latin hyphenated word",
+ "Non-latin hyphenated word",
+ "URI",
+ "File or path name",
+ "Decimal notation",
+ "Signed integer",
+ "Unsigned integer",
+ "HTML Entity"
+};
+
+const char *tok_alias[]={
+ "",
+ "lword",
+ "nlword",
+ "word",
+ "email",
+ "url",
+ "host",
+ "sfloat",
+ "version",
+ "part_hword",
+ "nlpart_hword",
+ "lpart_hword",
+ "blank",
+ "tag",
+ "http",
+ "hword",
+ "lhword",
+ "nlhword",
+ "uri",
+ "file",
+ "float",
+ "int",
+ "uint",
+ "entity"
+};
+
--- /dev/null
+#ifndef __DEFLEX_H__
+#define __DEFLEX_H__
+
+/* rememder !!!! */
+#define LASTNUM 23
+
+#define LATWORD 1
+#define CYRWORD 2
+#define UWORD 3
+#define EMAIL 4
+#define FURL 5
+#define HOST 6
+#define SCIENTIFIC 7
+#define VERSIONNUMBER 8
+#define PARTHYPHENWORD 9
+#define CYRPARTHYPHENWORD 10
+#define LATPARTHYPHENWORD 11
+#define SPACE 12
+#define TAG 13
+#define HTTP 14
+#define HYPHENWORD 15
+#define LATHYPHENWORD 16
+#define CYRHYPHENWORD 17
+#define URI 18
+#define FILEPATH 19
+#define DECIMAL 20
+#define SIGNEDINT 21
+#define UNSIGNEDINT 22
+#define HTMLENTITY 23
+
+extern const char *lex_descr[];
+extern const char *tok_alias[];
+
+#endif
--- /dev/null
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+char *token;
+int tokenlen;
+int tsearch2_yylex(void);
+void start_parse_str(char *, int);
+void start_parse_fh(FILE *, int);
+void end_parse(void);
+
+#endif
--- /dev/null
+%{
+#include "postgres.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "common.h"
+
+/* Avoid exit() on fatal scanner errors */
+#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg)
+
+/* postgres allocation function */
+#define free pfree
+#define malloc palloc
+#define realloc repalloc
+
+#ifdef strdup
+#undef strdup
+#endif
+#define strdup pstrdup
+
+char *token = NULL; /* pointer to token */
+char *s = NULL; /* to return WHOLE hyphenated-word */
+
+YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
+
+int lrlimit = -1; /* for limiting read from filehandle ( -1 - unlimited read ) */
+int bytestoread = 0; /* for limiting read from filehandle */
+
+/* redefine macro for read limited length */
+#define YY_INPUT(buf,result,max_size) \
+ if ( yy_current_buffer->yy_is_interactive ) { \
+ int c = '*', n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( tsearch2_yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } else { \
+ if ( lrlimit == 0 ) \
+ result=YY_NULL; \
+ else { \
+ if ( lrlimit>0 ) { \
+ bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
+ lrlimit -= bytestoread; \
+ } else \
+ bytestoread = max_size; \
+ if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
+ && ferror( tsearch2_yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ } \
+ }
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nounput
+%option noyywrap
+
+/* parser's state for parsing hyphenated-word */
+%x DELIM
+/* parser's state for parsing URL*/
+%x URL
+%x SERVER
+
+/* parser's state for parsing TAGS */
+%x INTAG
+%x QINTAG
+%x INCOMMENT
+%x INSCRIPT
+
+/* cyrillic koi8 char */
+CYRALNUM [0-9\200-\377]
+CYRALPHA [\200-\377]
+ALPHA [a-zA-Z\200-\377]
+ALNUM [0-9a-zA-Z\200-\377]
+
+
+HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+
+URI [-_[:alnum:]/%,\.;=&?#]+
+
+%%
+
+"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
+
+""[Ss][Cc][Rr][Ii][Pp][Tt]">" {
+ BEGIN INITIAL;
+ *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0';
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SPACE;
+}
+
+"" {
+ BEGIN INITIAL;
+ *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0';
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SPACE;
+}
+
+
+"<"[\![:alpha:]] { BEGIN INTAG; }
+
+""[[:alpha:]] { BEGIN INTAG; }
+
+"\"" { BEGIN QINTAG; }
+
+"\\\"" ;
+
+"\"" { BEGIN INTAG; }
+
+">" {
+ BEGIN INITIAL;
+ token = tsearch2_yytext;
+ *tsearch2_yytext=' ';
+ token = tsearch2_yytext;
+ tokenlen = 1;
+ return TAG;
+}
+
+.|\n ;
+
+\&(quot|amp|nbsp|lt|gt)\; {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return HTMLENTITY;
+}
+
+\&\#[0-9][0-9]?[0-9]?\; {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return HTMLENTITY;
+}
+
+[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return EMAIL;
+}
+
+[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SCIENTIFIC;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return VERSIONNUMBER;
+}
+
+[+-]?[0-9]+\.[0-9]+ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return DECIMAL;
+}
+
+[+-][0-9]+ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SIGNEDINT;
+}
+
+[0-9]+ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return UNSIGNEDINT;
+}
+
+http"://" {
+ BEGIN URL;
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return HTTP;
+}
+
+ftp"://" {
+ BEGIN URL;
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return HTTP;
+}
+
+{HOSTNAME}[/:]{URI} {
+ BEGIN SERVER;
+ if (s) { free(s); s=NULL; }
+ s = strdup( tsearch2_yytext );
+ tokenlen = tsearch2_yyleng;
+ yyless( 0 );
+ token = s;
+ return FURL;
+}
+
+{HOSTNAME} {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return HOST;
+}
+
+[/:]{URI} {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return URI;
+}
+
+[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return FILEPATH;
+}
+
+({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ {
+ BEGIN DELIM;
+ if (s) { free(s); s=NULL; }
+ s = strdup( tsearch2_yytext );
+ tokenlen = tsearch2_yyleng;
+ yyless( 0 );
+ token = s;
+ return CYRHYPHENWORD;
+}
+
+([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ {
+ BEGIN DELIM;
+ if (s) { free(s); s=NULL; }
+ s = strdup( tsearch2_yytext );
+ tokenlen = tsearch2_yyleng;
+ yyless( 0 );
+ token = s;
+ return LATHYPHENWORD;
+}
+
+({ALNUM}+-)+{ALNUM}+ /* composite-word */ {
+ BEGIN DELIM;
+ if (s) { free(s); s=NULL; }
+ s = strdup( tsearch2_yytext );
+ tokenlen = tsearch2_yyleng;
+ yyless( 0 );
+ token = s;
+ return HYPHENWORD;
+}
+
+[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return VERSIONNUMBER;
+}
+
+\+?[0-9]+\.[0-9]+ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return DECIMAL;
+}
+
+{CYRALPHA}+ /* one word in composite-word */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return CYRPARTHYPHENWORD;
+}
+
+[[:alpha:]]+ /* one word in composite-word */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return LATPARTHYPHENWORD;
+}
+
+{ALNUM}+ /* one word in composite-word */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return PARTHYPHENWORD;
+}
+
+- {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SPACE;
+}
+
+.|\n /* return in basic state */ {
+ BEGIN INITIAL;
+ yyless( 0 );
+}
+
+{CYRALPHA}+ /* normal word */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return CYRWORD;
+}
+
+[[:alpha:]]+ /* normal word */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return LATWORD;
+}
+
+{ALNUM}+ /* normal word */ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return UWORD;
+}
+
+[ \r\n\t]+ {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SPACE;
+}
+
+. {
+ token = tsearch2_yytext;
+ tokenlen = tsearch2_yyleng;
+ return SPACE;
+}
+
+%%
+
+/* clearing after parsing from string */
+void end_parse() {
+ if (s) { free(s); s=NULL; }
+ tsearch2_yy_delete_buffer( buf );
+ buf = NULL;
+}
+
+/* start parse from string */
+void start_parse_str(char* str, int limit) {
+ if (buf) end_parse();
+ buf = tsearch2_yy_scan_bytes( str, limit );
+ tsearch2_yy_switch_to_buffer( buf );
+ BEGIN INITIAL;
+}
+
+/* start parse from filehandle */
+void start_parse_fh( FILE* fh, int limit ) {
+ if (buf) end_parse();
+ lrlimit = ( limit ) ? limit : -1;
+ buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
+ tsearch2_yy_switch_to_buffer( buf );
+ BEGIN INITIAL;
+}
+
+
--- /dev/null
+/*
+ * interface functions to parser
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+#include
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+
+#include "wparser.h"
+#include "ts_cfg.h"
+#include "snmap.h"
+#include "common.h"
+
+/*********top interface**********/
+
+static void *plan_getparser=NULL;
+static Oid current_parser_id=InvalidOid;
+
+void
+init_prs(Oid id, WParserInfo *prs) {
+ Oid arg[1]={ OIDOID };
+ bool isnull;
+ Datum pars[1]={ ObjectIdGetDatum(id) };
+ int stat;
+
+ memset(prs,0,sizeof(WParserInfo));
+ SPI_connect();
+ if ( !plan_getparser ) {
+ plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
+ if ( !plan_getparser )
+ ts_error(ERROR, "SPI_prepare() failed");
+ }
+
+ stat = SPI_execp(plan_getparser, pars, " ", 1);
+ if ( stat < 0 )
+ ts_error (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 ) {
+ Oid oid=InvalidOid;
+ oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
+ oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
+ fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
+ oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
+ fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
+ prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
+ oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
+ fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
+ prs->prs_id=id;
+ } else
+ ts_error(ERROR, "No parser with id %d", id);
+ SPI_finish();
+}
+
+typedef struct {
+ WParserInfo *last_prs;
+ int len;
+ int reallen;
+ WParserInfo *list;
+ SNMap name2id_map;
+} PrsList;
+
+static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_prs(void) {
+ freeSNMap( &(PList.name2id_map) );
+ if ( PList.list )
+ free(PList.list);
+ memset(&PList,0,sizeof(PrsList));
+}
+
+static int
+compareprs(const void *a, const void *b) {
+ return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
+}
+
+WParserInfo *
+findprs(Oid id) {
+ /* last used prs */
+ if ( PList.last_prs && PList.last_prs->prs_id==id )
+ return PList.last_prs;
+
+ /* already used prs */
+ if ( PList.len != 0 ) {
+ WParserInfo key;
+ key.prs_id=id;
+ PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
+ if ( PList.last_prs != NULL )
+ return PList.last_prs;
+ }
+
+ /* last chance */
+ if ( PList.len==PList.reallen ) {
+ WParserInfo *tmp;
+ int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
+ tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
+ if ( !tmp )
+ ts_error(ERROR,"No memory");
+ PList.reallen=reallen;
+ PList.list=tmp;
+ }
+ PList.last_prs=&(PList.list[PList.len]);
+ init_prs(id, PList.last_prs);
+ PList.len++;
+ qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
+ return findprs(id); /* qsort changed order!! */;
+}
+
+static void *plan_name2id=NULL;
+
+Oid
+name2id_prs(text *name) {
+ Oid arg[1]={ TEXTOID };
+ bool isnull;
+ Datum pars[1]={ PointerGetDatum(name) };
+ int stat;
+ Oid id=findSNMap_t( &(PList.name2id_map), name );
+
+ if ( id )
+ return id;
+
+
+ SPI_connect();
+ if ( !plan_name2id ) {
+ plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
+ if ( !plan_name2id )
+ ts_error(ERROR, "SPI_prepare() failed");
+ }
+
+ stat = SPI_execp(plan_name2id, pars, " ", 1);
+ if ( stat < 0 )
+ ts_error (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 )
+ id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ else
+ ts_error(ERROR, "No parser '%s'", text2char(name));
+ SPI_finish();
+ addSNMap_t( &(PList.name2id_map), name, id );
+ return id;
+}
+
+
+/******sql-level interface******/
+typedef struct {
+ int cur;
+ LexDescr *list;
+} TypeStorage;
+
+static void
+setup_firstcall(FuncCallContext *funcctx, Oid prsid) {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ TypeStorage *st;
+ WParserInfo *prs = findprs(prsid);
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ st=(TypeStorage*)palloc( sizeof(TypeStorage) );
+ st->cur=0;
+ st->list = (LexDescr*)DatumGetPointer(
+ OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
+ );
+ funcctx->user_fctx = (void*)st;
+ tupdesc = RelationNameGetTupleDesc("tokentype");
+ funcctx->slot = TupleDescGetSlot(tupdesc);
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+process_call(FuncCallContext *funcctx) {
+ TypeStorage *st;
+
+ st=(TypeStorage*)funcctx->user_fctx;
+ if ( st->list && st->list[st->cur].lexid ) {
+ Datum result;
+ char* values[3];
+ char txtid[16];
+ HeapTuple tuple;
+
+ values[0]=txtid;
+ sprintf(txtid,"%d",st->list[st->cur].lexid);
+ values[1]=st->list[st->cur].alias;
+ values[2]=st->list[st->cur].descr;
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = TupleGetDatum(funcctx->slot, tuple);
+
+ pfree(values[1]);
+ pfree(values[2]);
+ st->cur++;
+ return result;
+ } else {
+ if ( st->list ) pfree(st->list);
+ pfree(st);
+ }
+ return (Datum)0;
+}
+
+PG_FUNCTION_INFO_V1(token_type);
+Datum token_type(PG_FUNCTION_ARGS);
+
+Datum
+token_type(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ funcctx = SRF_FIRSTCALL_INIT();
+ setup_firstcall(funcctx, PG_GETARG_OID(0) );
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ( (result=process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_byname);
+Datum token_type_byname(PG_FUNCTION_ARGS);
+Datum
+token_type_byname(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ text *name = PG_GETARG_TEXT_P(0);
+ funcctx = SRF_FIRSTCALL_INIT();
+ setup_firstcall(funcctx, name2id_prs( name ) );
+ PG_FREE_IF_COPY(name,0);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ( (result=process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(token_type_current);
+Datum token_type_current(PG_FUNCTION_ARGS);
+Datum
+token_type_current(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ funcctx = SRF_FIRSTCALL_INIT();
+ if ( current_parser_id==InvalidOid )
+ current_parser_id = name2id_prs( char2text("default") );
+ setup_firstcall(funcctx, current_parser_id );
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ( (result=process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(set_curprs);
+Datum set_curprs(PG_FUNCTION_ARGS);
+Datum
+set_curprs(PG_FUNCTION_ARGS) {
+ findprs(PG_GETARG_OID(0));
+ current_parser_id=PG_GETARG_OID(0);
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curprs_byname);
+Datum set_curprs_byname(PG_FUNCTION_ARGS);
+Datum
+set_curprs_byname(PG_FUNCTION_ARGS) {
+ text *name=PG_GETARG_TEXT_P(0);
+
+ DirectFunctionCall1(
+ set_curprs,
+ ObjectIdGetDatum( name2id_prs(name) )
+ );
+ PG_FREE_IF_COPY(name, 0);
+ PG_RETURN_VOID();
+}
+
+typedef struct {
+ int type;
+ char *lexem;
+} LexemEntry;
+
+typedef struct {
+ int cur;
+ int len;
+ LexemEntry *list;
+} PrsStorage;
+
+
+static void
+prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ PrsStorage *st;
+ WParserInfo *prs = findprs(prsid);
+ char *lex=NULL;
+ int llen=0, type=0;
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ st=(PrsStorage*)palloc( sizeof(PrsStorage) );
+ st->cur=0;
+ st->len=16;
+ st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
+
+ prs->prs = (void*)DatumGetPointer(
+ FunctionCall2(
+ &(prs->start_info),
+ PointerGetDatum(VARDATA(txt)),
+ Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
+ )
+ );
+
+ while( ( type=DatumGetInt32(FunctionCall3(
+ &(prs->getlexeme_info),
+ PointerGetDatum(prs->prs),
+ PointerGetDatum(&lex),
+ PointerGetDatum(&llen))) ) != 0 ) {
+
+ if ( st->cur>=st->len ) {
+ st->len=2*st->len;
+ st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
+ }
+ st->list[st->cur].lexem = palloc(llen+1);
+ memcpy( st->list[st->cur].lexem, lex, llen);
+ st->list[st->cur].lexem[llen]='\0';
+ st->list[st->cur].type=type;
+ st->cur++;
+ }
+
+ FunctionCall1(
+ &(prs->end_info),
+ PointerGetDatum(prs->prs)
+ );
+
+ st->len=st->cur;
+ st->cur=0;
+
+ funcctx->user_fctx = (void*)st;
+ tupdesc = RelationNameGetTupleDesc("tokenout");
+ funcctx->slot = TupleDescGetSlot(tupdesc);
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext *funcctx) {
+ PrsStorage *st;
+
+ st=(PrsStorage*)funcctx->user_fctx;
+ if ( st->cur < st->len ) {
+ Datum result;
+ char* values[2];
+ char tid[16];
+ HeapTuple tuple;
+
+ values[0]=tid;
+ sprintf(tid,"%d",st->list[st->cur].type);
+ values[1]=st->list[st->cur].lexem;
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = TupleGetDatum(funcctx->slot, tuple);
+
+ pfree(values[1]);
+ st->cur++;
+ return result;
+ } else {
+ if ( st->list ) pfree(st->list);
+ pfree(st);
+ }
+ return (Datum)0;
+}
+
+
+
+PG_FUNCTION_INFO_V1(parse);
+Datum parse(PG_FUNCTION_ARGS);
+Datum
+parse(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ text *txt = PG_GETARG_TEXT_P(1);
+ funcctx = SRF_FIRSTCALL_INIT();
+ prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
+ PG_FREE_IF_COPY(txt,1);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ( (result=prs_process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(parse_byname);
+Datum parse_byname(PG_FUNCTION_ARGS);
+Datum
+parse_byname(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ text *name = PG_GETARG_TEXT_P(0);
+ text *txt = PG_GETARG_TEXT_P(1);
+ funcctx = SRF_FIRSTCALL_INIT();
+ prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
+ PG_FREE_IF_COPY(name,0);
+ PG_FREE_IF_COPY(txt,1);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ( (result=prs_process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(parse_current);
+Datum parse_current(PG_FUNCTION_ARGS);
+Datum
+parse_current(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL()) {
+ text *txt = PG_GETARG_TEXT_P(0);
+ funcctx = SRF_FIRSTCALL_INIT();
+ if ( current_parser_id==InvalidOid )
+ current_parser_id = name2id_prs( char2text("default") );
+ prs_setup_firstcall(funcctx, current_parser_id,txt );
+ PG_FREE_IF_COPY(txt,0);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ( (result=prs_process_call(funcctx)) != (Datum)0 )
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(headline);
+Datum headline(PG_FUNCTION_ARGS);
+Datum
+headline(PG_FUNCTION_ARGS) {
+ TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
+ text *in = PG_GETARG_TEXT_P(1);
+ QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
+ text *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
+ HLPRSTEXT prs;
+ text *out;
+ WParserInfo *prsobj = findprs(cfg->prs_id);
+
+ memset(&prs,0,sizeof(HLPRSTEXT));
+ prs.lenwords = 32;
+ prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
+ hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+
+ FunctionCall3(
+ &(prsobj->headline_info),
+ PointerGetDatum(&prs),
+ PointerGetDatum(opt),
+ PointerGetDatum(query)
+ );
+
+ out = genhl(&prs);
+
+ PG_FREE_IF_COPY(in,1);
+ PG_FREE_IF_COPY(query,2);
+ if ( opt ) PG_FREE_IF_COPY(opt,3);
+ pfree(prs.words);
+ pfree(prs.startsel);
+ pfree(prs.stopsel);
+
+ PG_RETURN_POINTER(out);
+}
+
+
+PG_FUNCTION_INFO_V1(headline_byname);
+Datum headline_byname(PG_FUNCTION_ARGS);
+Datum
+headline_byname(PG_FUNCTION_ARGS) {
+ text *cfg=PG_GETARG_TEXT_P(0);
+
+ Datum out=DirectFunctionCall4(
+ headline,
+ ObjectIdGetDatum(name2id_cfg( cfg ) ),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2),
+ ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
+ );
+
+ PG_FREE_IF_COPY(cfg,0);
+ PG_RETURN_DATUM(out);
+}
+
+PG_FUNCTION_INFO_V1(headline_current);
+Datum headline_current(PG_FUNCTION_ARGS);
+Datum
+headline_current(PG_FUNCTION_ARGS) {
+ PG_RETURN_DATUM(DirectFunctionCall4(
+ headline,
+ ObjectIdGetDatum(get_currcfg()),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
+ ));
+}
+
+
+
--- /dev/null
+#ifndef __WPARSER_H__
+#define __WPARSER_H__
+#include "postgres.h"
+#include "fmgr.h"
+
+typedef struct {
+ Oid prs_id;
+ FmgrInfo start_info;
+ FmgrInfo getlexeme_info;
+ FmgrInfo end_info;
+ FmgrInfo headline_info;
+ Oid lextype;
+ void *prs;
+} WParserInfo;
+
+void init_prs(Oid id, WParserInfo *prs);
+WParserInfo* findprs(Oid id);
+Oid name2id_prs(text *name);
+void reset_prs(void);
+
+
+typedef struct {
+ int lexid;
+ char *alias;
+ char *descr;
+} LexDescr;
+
+#endif
--- /dev/null
+/*
+ * default word parser
+ * Teodor Sigaev
+ */
+#include
+#include
+#include
+
+#include "postgres.h"
+#include "utils/builtins.h"
+
+#include "dict.h"
+#include "wparser.h"
+#include "common.h"
+#include "ts_cfg.h"
+#include "wordparser/parser.h"
+#include "wordparser/deflex.h"
+
+PG_FUNCTION_INFO_V1(prsd_lextype);
+Datum prsd_lextype(PG_FUNCTION_ARGS);
+
+Datum
+prsd_lextype(PG_FUNCTION_ARGS) {
+ LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
+ int i;
+
+ for(i=1;i<=LASTNUM;i++) {
+ descr[i-1].lexid = i;
+ descr[i-1].alias = pstrdup(tok_alias[i]);
+ descr[i-1].descr = pstrdup(lex_descr[i]);
+ }
+
+ descr[LASTNUM].lexid=0;
+
+ PG_RETURN_POINTER(descr);
+}
+
+PG_FUNCTION_INFO_V1(prsd_start);
+Datum prsd_start(PG_FUNCTION_ARGS);
+Datum
+prsd_start(PG_FUNCTION_ARGS) {
+ start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
+ PG_RETURN_POINTER(NULL);
+}
+
+PG_FUNCTION_INFO_V1(prsd_getlexeme);
+Datum prsd_getlexeme(PG_FUNCTION_ARGS);
+Datum
+prsd_getlexeme(PG_FUNCTION_ARGS) {
+ /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+ char **t=(char**)PG_GETARG_POINTER(1);
+ int *tlen=(int*)PG_GETARG_POINTER(2);
+ int type=tsearch2_yylex();
+
+ *t = token;
+ *tlen = tokenlen;
+ PG_RETURN_INT32(type);
+}
+
+PG_FUNCTION_INFO_V1(prsd_end);
+Datum prsd_end(PG_FUNCTION_ARGS);
+Datum
+prsd_end(PG_FUNCTION_ARGS) {
+ /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+ end_parse();
+ PG_RETURN_VOID();
+}
+
+#define LEAVETOKEN(x) ( (x)==12 )
+#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
+#define ENDPUNCTOKEN(x) ( (x)==12 )
+
+
+#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
+#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
+#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
+#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
+
+typedef struct {
+ HLWORD *words;
+ int len;
+} hlCheck;
+
+static bool
+checkcondition_HL(void *checkval, ITEM *val) {
+ int i;
+ for(i=0;i<((hlCheck*)checkval)->len;i++) {
+ if ( ((hlCheck*)checkval)->words[i].item==val )
+ return true;
+ }
+ return false;
+}
+
+
+static bool
+hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
+ int i,j;
+ ITEM *item=GETQUERY(query);
+ int pos=*p;
+ *q=0;
+ *p=0x7fffffff;
+
+ for(j=0;jsize;j++) {
+ if ( item->type != VAL ) {
+ item++;
+ continue;
+ }
+ for(i=pos;i
curwords;i++) {
+ if ( prs->words[i].item == item ) {
+ if ( i>*q)
+ *q = i;
+ break;
+ }
+ }
+ item++;
+ }
+
+ if ( *q==0 )
+ return false;
+
+ item=GETQUERY(query);
+ for(j=0;jsize;j++) {
+ if ( item->type != VAL ) {
+ item++;
+ continue;
+ }
+ for(i=*q;i>=pos;i--) {
+ if ( prs->words[i].item == item ) {
+ if ( i<*p )
+ *p=i;
+ break;
+ }
+ }
+ item++;
+ }
+
+ if ( *p<=*q ) {
+ hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
+ if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) {
+ return true;
+ } else {
+ (*p)++;
+ return hlCover(prs,query,p,q);
+ }
+ }
+
+ return false;
+}
+
+PG_FUNCTION_INFO_V1(prsd_headline);
+Datum prsd_headline(PG_FUNCTION_ARGS);
+Datum
+prsd_headline(PG_FUNCTION_ARGS) {
+ HLPRSTEXT *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
+ text *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
+ QUERYTYPE *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
+ /* from opt + start and and tag */
+ int min_words=15;
+ int max_words=35;
+ int shortword=3;
+
+ int p=0,q=0;
+ int bestb=-1,beste=-1;
+ int bestlen=-1;
+ int pose=0, poslen, curlen;
+
+ int i;
+
+ /*config*/
+ prs->startsel=NULL;
+ prs->stopsel=NULL;
+ if ( opt ) {
+ Map *map,*mptr;
+
+ parse_cfgdict(opt,&map);
+ mptr=map;
+
+ while(mptr && mptr->key) {
+ if ( strcasecmp(mptr->key,"MaxWords")==0 )
+ max_words=pg_atoi(mptr->value,4,1);
+ else if ( strcasecmp(mptr->key,"MinWords")==0 )
+ min_words=pg_atoi(mptr->value,4,1);
+ else if ( strcasecmp(mptr->key,"ShortWord")==0 )
+ shortword=pg_atoi(mptr->value,4,1);
+ else if ( strcasecmp(mptr->key,"StartSel")==0 )
+ prs->startsel=pstrdup(mptr->value);
+ else if ( strcasecmp(mptr->key,"StopSel")==0 )
+ prs->stopsel=pstrdup(mptr->value);
+
+ pfree(mptr->key);
+ pfree(mptr->value);
+
+ mptr++;
+ }
+ pfree(map);
+
+ if ( min_words >= max_words )
+ elog(ERROR,"Must be MinWords < MaxWords");
+ if ( min_words<=0 )
+ elog(ERROR,"Must be MinWords > 0");
+ if ( shortword<0 )
+ elog(ERROR,"Must be ShortWord >= 0");
+ }
+
+ while( hlCover(prs,query,&p,&q) ) {
+ /* find cover len in words */
+ curlen=0;
+ poslen=0;
+ for(i=p;i<=q && curlen < max_words ; i++) {
+ if ( !NONWORDTOKEN(prs->words[i].type) )
+ curlen++;
+ if ( prs->words[i].item && !prs->words[i].repeated )
+ poslen++;
+ pose=i;
+ }
+
+ if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) {
+ /* best already finded, so try one more cover */
+ p++;
+ continue;
+ }
+
+ if ( curlen < max_words ) { /* find good end */
+ for(i=i-1 ;i
curwords && curlen
+ if ( i!=q ) {
+ if ( !NONWORDTOKEN(prs->words[i].type) )
+ curlen++;
+ if ( prs->words[i].item && !prs->words[i].repeated )
+ poslen++;
+ }
+ pose=i;
+ if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
+ continue;
+ if ( curlen>=min_words )
+ break;
+ }
+ } else { /* shorter cover :((( */
+ for(;curlen>min_words;i--) {
+ if ( !NONWORDTOKEN(prs->words[i].type) )
+ curlen--;
+ if ( prs->words[i].item && !prs->words[i].repeated )
+ poslen--;
+ pose=i;
+ if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
+ continue;
+ break;
+ }
+ }
+
+ if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
+ ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
+ (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
+ bestb=p; beste=pose;
+ bestlen=poslen;
+ }
+
+ p++;
+ }
+
+ if ( bestlen<0 ) {
+ curlen=0;
+ poslen=0;
+ for(i=0;i
curwords && curlen
+ if ( !NONWORDTOKEN(prs->words[i].type) )
+ curlen++;
+ pose=i;
+ }
+ bestb=0; beste=pose;
+ }
+
+ for(i=bestb;i<=beste;i++) {
+ if ( prs->words[i].item )
+ prs->words[i].selected=1;
+ if ( prs->words[i].repeated )
+ prs->words[i].skip=1;
+ if ( HLIDIGNORE(prs->words[i].type) )
+ prs->words[i].replace=1;
+
+ prs->words[i].in=1;
+ }
+
+ if (!prs->startsel)
+ prs->startsel=pstrdup("");
+ if (!prs->stopsel)
+ prs->stopsel=pstrdup("");
+ prs->startsellen=strlen(prs->startsel);
+ prs->stopsellen=strlen(prs->stopsel);
+
+ PG_RETURN_POINTER(prs);
+}
+