Some more tsearch docs work --- sync names with CVS-tip reality, some

author Tom Lane

Sat, 25 Aug 2007 06:26:57 +0000 (06:26 +0000)

committer Tom Lane

Sat, 25 Aug 2007 06:26:57 +0000 (06:26 +0000)
author Tom Lane
Sat, 25 Aug 2007 06:26:57 +0000 (06:26 +0000)
committer Tom Lane
Sat, 25 Aug 2007 06:26:57 +0000 (06:26 +0000)
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml

index 0d1ab5000236845a23088c1c5393d931abb4a506..5124bd80ae3960dcdb3f4abc3b852e4079d1f814 100644 (file)
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -210,9 +210,9 @@ SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::ts
   'a':1,6,10 'on':5 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4
  
  
-Each lexeme position also can be labeled as 'A',
-'B', 'C', 'D',
-where 'D' is the default. These labels can be used to group
+Each lexeme position also can be labeled as A,
+B, C, D,
+where D is the default. These labels can be used to group
  lexemes into different importance or
  rankings, for example to reflect document structure.
  Actual values can be assigned at search time and used during the calculation
@@ -668,9 +668,9 @@ setweight(vector TSVECTOR, 
  
  
  This function returns a copy of the input vector in which every location
-has been labeled with either the letter 'A',
-'B', or 'C', or the default label
-'D' (which is the default for new vectors
+has been labeled with either the letter A,
+B, or C, or the default label
+D (which is the default for new vectors
  and as such is usually not displayed). These labels are retained
  when vectors are concatenated, allowing words from different parts of a
  document to be weighted differently by ranking functions.
@@ -807,13 +807,12 @@ to be made.
  
  
  
-stat
+ts_stat
  
  
  
  
-stat(sqlquery text , weight text ) returns SETOF statinfo
-
+ts_stat(sqlquery text , weights text ) returns SETOF statinfo
  
  
  
@@ -821,27 +820,27 @@ stat(sqlquery text 
  
  Here statinfo is a type, defined as:
  
-CREATE TYPE statinfo AS (word text, ndoc int4, nentry int4);
+CREATE TYPE statinfo AS (word text, ndoc integer, nentry integer);
  
-and sqlquery is a query which returns a
-tsvector column's contents.  stat returns
-statistics about a tsvector column, i.e., the number of
-documents, ndoc, and the total number of words in the
-collection, nentry.  It is useful for checking your
-configuration and to find stop word candidates.  For example, to find
-the ten most frequent words:
+and sqlquery is a text value containing a SQL query
+which returns a single tsvector column.  ts_stat
+executes the query and returns statistics about the resulting
+tsvector data, i.e., the number of documents, ndoc,
+and the total number of words in the collection, nentry.  It is
+useful for checking your configuration and to find stop word candidates.  For
+example, to find the ten most frequent words:
  
  
-SELECT * FROM stat('SELECT vector from apod')
+SELECT * FROM ts_stat('SELECT vector from apod')
  ORDER BY ndoc DESC, nentry DESC, word
  LIMIT 10;
  
  
-Optionally, one can specify weight to obtain
+Optionally, one can specify weights to obtain
  statistics about words with a specific weight:
  
  
-SELECT * FROM stat('SELECT vector FROM apod','a')
+SELECT * FROM ts_stat('SELECT vector FROM apod','a')
  ORDER BY ndoc DESC, nentry DESC, word
  LIMIT 10;
  
@@ -1146,9 +1145,9 @@ topic.
  
  
  
-The rewrite() function changes the original query by
+The ts_rewrite() function changes the original query by
  replacing part of the query with some other string of type tsquery,
-as defined by the rewrite rule. Arguments to rewrite()
+as defined by the rewrite rule. Arguments to ts_rewrite()
  can be names of columns of type tsquery.
  
  
@@ -1161,20 +1160,20 @@ INSERT INTO aliases VALUES('a', 'c');
  
  
  
-rewrite - 1
+ts_rewrite
  
  
  
  
-rewrite (query TSQUERY, target TSQUERY, sample TSQUERY) returns TSQUERY
+ts_rewrite (query TSQUERY, target TSQUERY, sample TSQUERY) returns TSQUERY
  
  
  
  
  
  
-SELECT rewrite('a & b'::tsquery, 'a'::tsquery, 'c'::tsquery);
-  rewrite
+SELECT ts_rewrite('a & b'::tsquery, 'a'::tsquery, 'c'::tsquery);
+  ts_rewrite
    -----------
     'b' & 'c'
  
@@ -1184,21 +1183,17 @@ SELECT rewrite('a & b'::tsquery, 'a'::tsquery, 'c'::tsquery);
  
  
  
-
-rewrite - 2
-
-
  
  
-rewrite(ARRAY[query TSQUERY, target TSQUERY, sample TSQUERY]) returns TSQUERY
+ts_rewrite(ARRAY[query TSQUERY, target TSQUERY, sample TSQUERY]) returns TSQUERY
  
  
  
  
  
  
-SELECT rewrite(ARRAY['a & b'::tsquery, t,s]) FROM aliases;
-  rewrite
+SELECT ts_rewrite(ARRAY['a & b'::tsquery, t,s]) FROM aliases;
+  ts_rewrite
    -----------
     'b' & 'c'
  
@@ -1208,21 +1203,17 @@ SELECT rewrite(ARRAY['a & b'::tsquery, t,s]) FROM aliases;
  
  
  
-
-rewrite - 3
-
-
  
  
-rewrite (query TSQUERY,'SELECT target ,sample FROM test'::text) returns TSQUERY
+ts_rewrite (query TSQUERY,'SELECT target ,sample FROM test'::text) returns TSQUERY
  
  
  
  
  
  
-SELECT rewrite('a & b'::tsquery, 'SELECT t,s FROM aliases');
-  rewrite
+SELECT ts_rewrite('a & b'::tsquery, 'SELECT t,s FROM aliases');
+  ts_rewrite
    -----------
     'b' & 'c'
  
@@ -1246,12 +1237,12 @@ SELECT * FROM aliases;
  
  This ambiguity can be resolved by specifying a sort order:
  
-SELECT rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t DESC');
- rewrite
+SELECT ts_rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t DESC');
+ ts_rewrite
  ---------
   'cc'
-SELECT rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t ASC');
-  rewrite
+SELECT ts_rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t ASC');
+  ts_rewrite
  -----------
   'b' & 'c'
  
@@ -1263,7 +1254,7 @@ Let's consider a real-life astronomical example. We'll expand query
  
  CREATE TABLE aliases (t tsquery primary key, s tsquery);
  INSERT INTO aliases VALUES(to_tsquery('supernovae'), to_tsquery('supernovae|sn'));
-SELECT rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') && to_tsquery('crab');
+SELECT ts_rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') && to_tsquery('crab');
              ?column?
  ---------------------------------
   ( 'supernova' | 'sn' ) & 'crab'
@@ -1271,7 +1262,7 @@ SELECT rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') && to
  Notice, that we can change the rewriting rule online:
  
  UPDATE aliases SET s=to_tsquery('supernovae|sn & !nebulae') WHERE t=to_tsquery('supernovae');
-SELECT rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') && to_tsquery('crab');
+SELECT ts_rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') && to_tsquery('crab');
                    ?column?
  ---------------------------------------------
   ( 'supernova' | 'sn' & !'nebula' ) & 'crab'
@@ -1288,10 +1279,10 @@ for a possible hit. To filter out obvious non-candidate rules there are containm
  operators for the tsquery type. In the example below, we select only those
  rules which might contain the original query:
  
-SELECT rewrite(ARRAY['a & b'::tsquery, t,s])
+SELECT ts_rewrite(ARRAY['a & b'::tsquery, t,s])
  FROM aliases
  WHERE 'a & b' @> t;
-  rewrite
+  ts_rewrite
  -----------
   'b' & 'c'
  
@@ -1525,7 +1516,7 @@ SELECT * FROM ts_parse('default','123 - a number');
  
  
  
-token_type
+ts_token_type
  
  
  
@@ -1894,11 +1885,13 @@ configuration config_name is realized by
  superimposed coding (Knuth, 1973) of signatures, i.e., a parent is the
  result of 'OR'-ing the bit-strings of all children.  This is a second
  factor of lossiness.  It is clear that parents tend to be full of
-'1's (degenerates) and become quite useless because of the
+1s (degenerates) and become quite useless because of the
  limited selectivity.  Searching is performed as a bit comparison of a
  signature representing the query and an RD-tree entry.
-If all '1's of both signatures are in the same position we
+If all 1s of both signatures are in the same position we
  say that this branch probably matches the query, but if there is even one
  discrepancy we can definitely reject this branch.
  
@@ -2870,13 +2863,15 @@ The current limitations of Full Text Searching are:
  
  
  For comparison, the PostgreSQL 8.1 documentation
-consists of 10,441 unique words, a total of 335,420 words, and the most frequent word
-'postgresql' is mentioned 6,127 times in 655 documents.
+contained 10,441 unique words, a total of 335,420 words, and the most frequent
+word postgresql was mentioned 6,127 times in 655 documents.
  
  
+
  
-Another example - the PostgreSQL mailing list archives
-consists of 910,989  unique words with 57,491,343 lexemes in 461,020 messages.
+Another example — the PostgreSQL mailing list
+archives contained 910,989 unique words with 57,491,343 lexemes in 461,020
+messages.
  
  
  
@@ -2942,28 +2937,27 @@ names and object names.  The following examples illustrate this:
  => \dF+ russian
  Configuration "pg_catalog.russian"
  Parser name: "pg_catalog.default"
-Locale: 'ru_RU.UTF-8' (default)
      Token     |      Dictionaries
  --------------+-------------------------
   email        | pg_catalog.simple
   file         | pg_catalog.simple
   float        | pg_catalog.simple
   host         | pg_catalog.simple
- hword        | pg_catalog.ru_stem_utf8
+ hword        | pg_catalog.russian_stem
   int          | pg_catalog.simple
   lhword       | public.tz_simple
   lpart_hword  | public.tz_simple
   lword        | public.tz_simple
- nlhword      | pg_catalog.ru_stem_utf8
- nlpart_hword | pg_catalog.ru_stem_utf8
- nlword       | pg_catalog.ru_stem_utf8
+ nlhword      | pg_catalog.russian_stem
+ nlpart_hword | pg_catalog.russian_stem
+ nlword       | pg_catalog.russian_stem
   part_hword   | pg_catalog.simple
   sfloat       | pg_catalog.simple
   uint         | pg_catalog.simple
   uri          | pg_catalog.simple
   url          | pg_catalog.simple
   version      | pg_catalog.simple
- word         | pg_catalog.ru_stem_utf8
+ word         | pg_catalog.russian_stem
  
  
      
@@ -3112,43 +3106,43 @@ play with the standard english configuration.
  
  CREATE TEXT SEARCH CONFIGURATION public.english ( COPY = pg_catalog.english );
  
-CREATE TEXT SEARCH DICTIONARY en_ispell (
+CREATE TEXT SEARCH DICTIONARY english_ispell (
      TEMPLATE = ispell,
-    DictFile = english-utf8,
-    AffFile = english-utf8,
+    DictFile = english,
+    AffFile = english,
      StopWords = english
  );
  
  ALTER TEXT SEARCH CONFIGURATION public.english
-    ALTER MAPPING FOR lword WITH en_ispell, en_stem;
+    ALTER MAPPING FOR lword WITH english_ispell, english_stem;
  
  
  
  SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
   Alias |  Description  |    Token    |              Dicts list               |          Lexized token
  -------+---------------+-------------+---------------------------------------+---------------------------------
- lword | Latin word    | The         | {public.en_ispell,pg_catalog.en_stem} | public.en_ispell: {}
+ lword | Latin word    | The         | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {}
   blank | Space symbols |             |                                       |
- lword | Latin word    | Brightest   | {public.en_ispell,pg_catalog.en_stem} | public.en_ispell: {bright}
+ lword | Latin word    | Brightest   | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {bright}
   blank | Space symbols |             |                                       |
- lword | Latin word    | supernovaes | {public.en_ispell,pg_catalog.en_stem} | pg_catalog.en_stem: {supernova}
+ lword | Latin word    | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova}
  (5 rows)
  
  
-In this example, the word 'Brightest' was recognized by a
+In this example, the word Brightest was recognized by a
  parser as a Latin word (alias lword)
-and came through the dictionaries public.en_ispell and
-pg_catalog.en_stem. It was recognized by
-public.en_ispell, which reduced it to the noun
+and came through the dictionaries public.english_ispell and
+pg_catalog.english_stem. It was recognized by
+public.english_ispell, which reduced it to the noun
  bright. The word supernovaes is unknown
-by the public.en_ispell dictionary so it was passed to
+by the public.english_ispell dictionary so it was passed to
  the next dictionary, and, fortunately, was recognized (in fact,
-public.en_stem is a stemming dictionary and recognizes
+public.english_stem is a stemming dictionary and recognizes
  everything; that is why it was placed at the end of the dictionary stack).
  
  
  
-The word The was recognized by public.en_ispell
+The word The was recognized by public.english_ispell
  dictionary as a stop word () and will not be indexed.
  
  
@@ -3159,11 +3153,11 @@ SELECT "Alias", "Token", "Lexized token"
  FROM ts_debug('public.english','The Brightest supernovaes');
   Alias |    Token    |          Lexized token
  -------+-------------+---------------------------------
- lword | The         | public.en_ispell: {}
+ lword | The         | public.english_ispell: {}
   blank |             |
- lword | Brightest   | public.en_ispell: {bright}
+ lword | Brightest   | public.english_ispell: {bright}
   blank |             |
- lword | supernovaes | pg_catalog.en_stem: {supernova}
+ lword | supernovaes | pg_catalog.english_stem: {supernova}
  (5 rows)
author	Tom Lane
	Sat, 25 Aug 2007 06:26:57 +0000 (06:26 +0000)
committer	Tom Lane
	Sat, 25 Aug 2007 06:26:57 +0000 (06:26 +0000)