|
-
substring(string from pattern for escape>)
+
substring(string from pattern>)
text
- extract regular expression
+ extract substring matching POSIX regular expression
- substring('Thomas' from 'mas$' for escape '\\')
+ substring('Thomas' from '...$')
mas
+ |
+
substring(string from pattern for escape)
+ text
+
+ extract substring matching SQL99 regular expression
+
+
+
+ substring('Thomas' from '%#"o_a#"_' for '#')
+ oma
+
+
|
trim(leading | trailing | both
+ Additional string manipulation functions are available and are
+ listed below. Some of them are used internally to implement the
+
SQL-standard string functions listed above.
+
+
+
+
Other String Functions
+
+
+ |
+ Function
+ Return Type
+ Description
+ Example
+ Result
+
+
+
+
+ |
+ ascii(text)
+ integer
+
Returns the ASCII code of the first character of the argument.
+ ascii('x')
+ 120
+
+
+ |
+
btrim(string text, trim text)
+ text
+
+ Remove (trim) the longest string consisting only of characters
+ in
trim from the start and end of
+
+ btrim('xyxtrimyyx','xy')
+ trim
+
+
+ |
+ chr(integer)
+ text
+
Returns the character with the given ASCII code.
+ chr(65)
+ A
+
+
+ |
+
+ text,
+
+ text
+
+ Converts string using
dest_encoding.
+ The original encoding is specified by
+
src_encoding is omitted, database
+ encoding is assumed.
+
+ convert('text_in_unicode', 'UNICODE', 'LATIN1')
+ text_in_unicode represented in ISO 8859-1
+
+
+ |
+
+
+ bytea
+
+ Decodes binary data from
string previously
+ encoded with encode(). Parameter type is same as in encode().
+
+ decode('MTIzAAE=', 'base64')
+ 123\000\001
+
+
+ |
+
+
+ text
+
+ Encodes binary data to
ASCII-only representation. Supported
+ types are: 'base64', 'hex', 'escape'.
+
+ encode('123\\000\\001', 'base64')
+ MTIzAAE=
+
+
+ |
+ initcap(text)
+ text
+ Converts first letter of each word (whitespace separated) to upper case.
+ initcap('hi thomas')
+ Hi Thomas
+
+
+ |
+ integer
+
+ length of string
+
+ length
+
+
+ character strings
+ character strings, length
+
+
+ length('jose')
+ 4
+
+
+ |
+
+
+ text
+
+ Fills up the
string to length
+
length by prepending the characters
+
fill (a space by default). If the
+
string is already longer than
+
length then it is truncated (on the
+ right).
+
+ lpad('hi', 5, 'xy')
+ xyxhi
+
+
+ |
+
ltrim(string text, text text)
+ text
+
+ Removes the longest string containing only characters from
+
trim from the start of the string.
+
+ ltrim('zzzytrim','xyz')
+ trim
+
+
+ |
+ pg_client_encoding()
+ name
+
+ Returns current client encoding name.
+
+ pg_client_encoding()
+ SQL_ASCII
+
+
+ |
+
quote_ident(string text)
+ text
+
+ Returns the given string suitably quoted to be used as an identifier
+ in an SQL query string.
+ Quotes are added only if necessary (i.e., if the string contains
+ non-identifier characters or would be case-folded).
+ Embedded quotes are properly doubled.
+
+ quote_ident('Foo')
+ "Foo"
+
+
+ |
+
quote_literal(string text)
+ text
+
+ Returns the given string suitably quoted to be used as a literal
+ in an SQL query string.
+ Embedded quotes and backslashes are properly doubled.
+
+ quote_literal('O\'Reilly')
+ 'O''Reilly'
+
+
+ |
+ repeat(text, integer)
+ text
+ Repeat text a number of times.
+ repeat('Pg', 4)
+ PgPgPgPg
+
+
+ |
+ text
+
Replace all occurrences in string of substring
+
+ replace('abcdefabcdef', 'cd', 'XX')
+ abXXefabXXef
+
+
+ |
+
+
+ text
+
+ Fills up the
string to length
+
length by appending the characters
+
fill (a space by default). If the
+
string is already longer than
+
length then it is truncated.
+
+ rpad('hi', 5, 'xy')
+ hixyx
+
+
+ |
+ text
+
+ Removes the longest string containing only characters from
+
trim from the end of the string.
+
+ rtrim('trimxxxx','x')
+ trim
+
+
+ |
+
split_part(string text,
+ text
+
Split string on delimiter
+ returning the resulting (one based)
column number.
+
+ split_part('abc~@~def~@~ghi','~@~',2)
+ def
+
+
+ |
+
strpos(string, substring)
+ text
+
+ Locates specified substring. (same as
+
string), but note the reversed
+ argument order)
+
+ strpos('high','ig')
+ 2
+
+
+ |
+
substr(string, from , count)
+ text
+
+ Extracts specified substring. (same as
+
substring(string from from for count))
+
+ substr('alphabet', 3, 2)
+ ph
+
+
+ |
+ to_ascii(text
+ text
+
Converts text from multibyte encoding to ASCII.
+ to_ascii('Karel')
+ Karel
+
+
+ |
+ or bigint)
+ text
+
Convert number to its equivalent hexadecimal
+ representation.
+
+ to_hex(9223372036854775807::bigint)
+ 7fffffffffffffff
+
+
+ |
+
+ text,
+
+ text
+
+ Any character in
string that matches a
+ character in the
from set is replaced by
+ the corresponding character in the
to
+ set.
+
+ translate('12345', '14', 'ax')
+ a23x5
+
+
+
+
+
+
+ The to_ascii function supports conversion from
+ LATIN1, LATIN2, WIN1250 (CP1250) only.
+
+
+
Available conversion names
|
- win1250_to_utf_8
- WIN1250
- UNICODE
-
-
- |
- win1251_to_iso_8859_5
- WIN
- ISO_8859_5
-
-
- |
- win1251_to_koi8r
- WIN
- KOI8
-
-
- |
- win1251_to_mic
- WIN
- MULE_INTERNAL
-
-
- |
- win1251_to_utf_8
- WIN
- UNICODE
-
-
- |
- win1251_to_win866
- WIN
- ALT
-
-
- |
- win1256_to_utf_8
- WIN1256
- UNICODE
-
-
- |
- win866_to_iso_8859_5
- ALT
- ISO_8859_5
-
-
- |
- win866_to_koi8r
- ALT
- KOI8
-
-
- |
- win866_to_mic
- ALT
- MULE_INTERNAL
-
-
- |
- win866_to_utf_8
- ALT
- UNICODE
-
-
- |
- win866_to_win1251
- ALT
- WIN
-
-
- |
- win874_to_utf_8
- WIN874
- UNICODE
-
-
-
-
-
-
- Additional string manipulation functions are available and are
- listed below. Some of them are used internally to implement the
-
SQL-standard string functions listed above.
-
-
-
-
Other String Functions
-
-
- |
- Function
- Return Type
- Description
- Example
- Result
-
-
-
-
- |
- ascii(text)
- integer
-
Returns the ASCII code of the first character of the argument.
- ascii('x')
- 120
-
-
- |
-
btrim(string text, trim text)
- text
-
- Remove (trim) the longest string consisting only of characters
- in
trim from the start and end of
-
- btrim('xyxtrimyyx','xy')
- trim
-
-
- |
- chr(integer)
- text
-
Returns the character with the given ASCII code.
- chr(65)
- A
-
-
- |
-
- text,
-
- text
-
- Converts string using
dest_encoding.
- The original encoding is specified by
-
src_encoding is omitted, database
- encoding is assumed.
-
- convert('text_in_unicode', 'UNICODE', 'LATIN1')
- text_in_unicode represented in ISO 8859-1
-
-
- |
-
-
- bytea
-
- Decodes binary data from
string previously
- encoded with encode(). Parameter type is same as in encode().
-
- decode('MTIzAAE=', 'base64')
- 123\000\001
-
-
- |
-
-
- text
-
- Encodes binary data to
ASCII-only representation. Supported
- types are: 'base64', 'hex', 'escape'.
-
- encode('123\\000\\001', 'base64')
- MTIzAAE=
-
-
- |
- initcap(text)
- text
- Converts first letter of each word (whitespace separated) to upper case.
- initcap('hi thomas')
- Hi Thomas
-
-
- |
- integer
-
- length of string
-
- length
-
-
- character strings
- character strings, length
-
-
- length('jose')
- 4
-
-
- |
-
-
- text
-
- Fills up the
string to length
-
length by prepending the characters
-
fill (a space by default). If the
-
string is already longer than
-
length then it is truncated (on the
- right).
-
- lpad('hi', 5, 'xy')
- xyxhi
-
-
- |
-
ltrim(string text, text text)
- text
-
- Removes the longest string containing only characters from
-
trim from the start of the string.
-
- ltrim('zzzytrim','xyz')
- trim
+ win1250_to_utf_8
+ WIN1250
+ UNICODE
|
- pg_client_encoding()
- name
-
- Returns current client encoding name.
-
- pg_client_encoding()
- SQL_ASCII
+ win1251_to_iso_8859_5
+ WIN
+ ISO_8859_5
|
-
quote_ident(string text)
- text
-
- Returns the given string suitably quoted to be used as an identifier
- in an SQL query string.
- Quotes are added only if necessary (i.e., if the string contains
- non-identifier characters or would be case-folded).
- Embedded quotes are properly doubled.
-
- quote_ident('Foo')
- "Foo"
+ win1251_to_koi8r
+ WIN
+ KOI8
|
-
quote_literal(string text)
- text
-
- Returns the given string suitably quoted to be used as a literal
- in an SQL query string.
- Embedded quotes and backslashes are properly doubled.
-
- quote_literal('O\'Reilly')
- 'O''Reilly'
+ win1251_to_mic
+ WIN
+ MULE_INTERNAL
|
- repeat(text, integer)
- text
- Repeat text a number of times.
- repeat('Pg', 4)
- PgPgPgPg
+ win1251_to_utf_8
+ WIN
+ UNICODE
|
- text
-
Replace all occurrences in string of substring
-
- replace('abcdefabcdef', 'cd', 'XX')
- abXXefabXXef
+ win1251_to_win866
+ WIN
+ ALT
|
-
-
- text
-
- Fills up the
string to length
-
length by appending the characters
-
fill (a space by default). If the
-
string is already longer than
-
length then it is truncated.
-
- rpad('hi', 5, 'xy')
- hixyx
+ win1256_to_utf_8
+ WIN1256
+ UNICODE
|
- text
-
- Removes the longest string containing only characters from
-
trim from the end of the string.
-
- rtrim('trimxxxx','x')
- trim
+ win866_to_iso_8859_5
+ ALT
+ ISO_8859_5
|
-
split_part(string text,
- text
-
Split string on delimiter
- returning the resulting (one based)
column number.
-
- split_part('abc~@~def~@~ghi','~@~',2)
- def
+ win866_to_koi8r
+ ALT
+ KOI8
|
-
strpos(string, substring)
- text
-
- Locates specified substring. (same as
-
string), but note the reversed
- argument order)
-
- strpos('high','ig')
- 2
+ win866_to_mic
+ ALT
+ MULE_INTERNAL
|
-
substr(string, from , count)
- text
-
- Extracts specified substring. (same as
-
substring(string from from for count))
-
- substr('alphabet', 3, 2)
- ph
+ win866_to_utf_8
+ ALT
+ UNICODE
|
- to_ascii(text
- text
-
Converts text from multibyte encoding to ASCII.
- to_ascii('Karel')
- Karel
+ win866_to_win1251
+ ALT
+ WIN
|
- or bigint)
- text
-
Convert number to its equivalent hexadecimal
- representation.
-
- to_hex(9223372036854775807::bigint)
- 7fffffffffffffff
+ win874_to_utf_8
+ WIN874
+ UNICODE
- |
-
- text,
-
- text
-
- Any character in
string that matches a
- character in the
from set is replaced by
- the corresponding character in the
to
- set.
-
- translate('12345', '14', 'ax')
- a23x5
-
-
- The to_ascii function supports conversion from
- LATIN1, LATIN2, WIN1250 (CP1250) only.
-
Pattern Matching
- There are two separate approaches to pattern matching provided by
+ There are three separate approaches to pattern matching provided by
PostgreSQL: the traditional
- LIKE operator and the more recent
+ LIKE operator, the more recent
- SIMILAR TO operator implementing
+ SIMILAR TO operator, and
POSIX-style regular expressions.
Additionally, a pattern matching function,
- SUBSTRING, is available, as defined in
+ SUBSTRING, is available, using either
+
SQL99-style or POSIX-style regular expressions.
- Both LIKE and SIMILAR TO
- are SQL-standard operators which are also available in alternate
- forms as
PostgreSQL operators; look at
- ~ and ~~ for examples.
-
-
LIKE
-
+ sql99-regexp">
Regular Expressions
-
+
+ pattern matching
+
+
+
+
+
+
+
+
+
+string SIMILAR TO pattern ESCAPE escape-character
+string NOT SIMILAR TO pattern ESCAPE escape-character
+
+
+ The SIMILAR TO operator returns true or false
+ depending on whether its pattern matches the given string. It is
+ much like LIKE, except that it interprets the
+ pattern using
SQL99's definition of a regular
+ expression.
+
SQL99's regular expressions are a curious cross
+ between LIKE notation and common regular expression
+ notation.
+
+
+ Like LIKE, the SIMILAR TO
+ operator succeeds only if its pattern matches the entire string;
+ this is unlike common regular expression practice, wherein the pattern
+ may match any part of the string.
+ Also like
+ LIKE, SIMILAR TO uses
+ %> and _> as wildcard characters denoting
+ any string and any single character, respectively (these are
+ comparable to .*> and .> in POSIX regular
+ expressions).
+
+
+ In addition to these facilities borrowed from LIKE,
+ SIMILAR TO supports these pattern-matching
+ metacharacters borrowed from POSIX regular expressions:
+
+
+
+ | denotes alternation (either of two alternatives).
+
+
+
+ * denotes repetition of the previous item zero
+ or more times.
+
+
+
+ + denotes repetition of the previous item one
+ or more times.
+
+
+
+ Parentheses () may be used to group items into
+ a single logical item.
+
+
+
+ A bracket expression [...] specifies a character
+ class, just as in POSIX regular expressions.
+
+
+
+
+ Notice that bounded repetition (?> and {...}>)
+ are not provided, though they exist in POSIX. Also, dot (.>)
+ is not a metacharacter.
+
+
+ As with LIKE>, a backslash disables the special meaning
+ of any of these metacharacters; or a different escape character can
+ be specified with ESCAPE>.
+
+
+
+ Some examples:
+'abc' SIMILAR TO 'abc' true
+'abc' SIMILAR TO 'a' false
+'abc' SIMILAR TO '%(b|d)%' true
+'abc' SIMILAR TO '(b|c)%' false
+
+
+
+
+ The SUBSTRING function with three parameters,
+ pattern FOR
+ escape), provides extraction of a substring
+ that matches a SQL99 regular expression pattern. As with SIMILAR TO,
+ the specified pattern must match to the entire data string, else the
+ function fails and returns NULL. To indicate the part of the pattern
+ that should be returned on success, SQL99 specifies that the pattern
+ must contain two occurrences of the escape character followed by
+ double quote (">). The text matching the portion of the
+ pattern between these markers is returned.
+
+
+
+ Some examples:
+SUBSTRING('foobar' FROM '%#"o_b#"%' FOR '#') oob
+SUBSTRING('foobar' FROM '#"o_b#"%' FOR '#') NULL
+
+
+
+
+
+
+
+
POSIX Regular Expressions
+
+
pattern matching
Does not match regular expression, case insensitive
'thomas' !~* '.*vadim.*'
-
- |
- SIMILAR TO
- Matches regular expression, case sensitive
- 'thomas' SIMILAR TO '.*thomas.*'
-
POSIX regular expressions provide a more
powerful means for
- pattern matching than the LIKE function.
+ pattern matching than the LIKE and
+ SIMILAR TO> operators.
Many Unix tools such as egrep,
sed, or awk use a pattern
matching language that is similar to the one described here.
Some examples:
-'abc' SIMILAR TO 'abc' true
-'abc' SIMILAR TO '^a' true
-'abc' SIMILAR TO '(b|d)' true
-'abc' SIMILAR TO '^(b|c)' false
+'abc' ~ 'abc' true
+'abc' ~ '^a' true
+'abc' ~ '(b|d)' true
+'abc' ~ '^(b|c)' false
+
+
+
+
+ The SUBSTRING function with two parameters,
+ pattern), provides extraction of a substring
+ that matches a POSIX regular expression pattern. It returns NULL if
+ there is no match, otherwise the portion of the text that matched the
+ pattern. But if the pattern contains any parentheses, the portion
+ of the text that matched the first parenthesized subexpression (the
+ one whose left parenthesis comes first) is
+ returned. You can always put parentheses around the whole expression
+ if you want to use parentheses within it without triggering this
+ exception.
+
+
+
+ Some examples:
+SUBSTRING('foobar' FROM 'o.b') oob
+SUBSTRING('foobar' FROM 'o(.)b') o
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.367 2002/09/18 21:35:21 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.368 2002/09/22 17:27:23 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
}
| a_expr SIMILAR TO a_expr %prec SIMILAR
- { $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); }
+ {
+ A_Const *c = makeNode(A_Const);
+ FuncCall *n = makeNode(FuncCall);
+ c->val.type = T_Null;
+ n->funcname = SystemFuncName("similar_escape");
+ n->args = makeList2($4, (Node *) c);
+ n->agg_star = FALSE;
+ n->agg_distinct = FALSE;
+ $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
+ }
| a_expr SIMILAR TO a_expr ESCAPE a_expr
{
FuncCall *n = makeNode(FuncCall);
- n->funcname = SystemFuncName("like_escape");
+ n->funcname = SystemFuncName("similar_escape");
n->args = makeList2($4, $6);
n->agg_star = FALSE;
n->agg_distinct = FALSE;
$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
}
| a_expr NOT SIMILAR TO a_expr %prec SIMILAR
- { $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); }
+ {
+ A_Const *c = makeNode(A_Const);
+ FuncCall *n = makeNode(FuncCall);
+ c->val.type = T_Null;
+ n->funcname = SystemFuncName("similar_escape");
+ n->args = makeList2($5, (Node *) c);
+ n->agg_star = FALSE;
+ n->agg_distinct = FALSE;
+ $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n);
+ }
| a_expr NOT SIMILAR TO a_expr ESCAPE a_expr
{
FuncCall *n = makeNode(FuncCall);
- n->funcname = SystemFuncName("like_escape");
+ n->funcname = SystemFuncName("similar_escape");
n->args = makeList2($5, $7);
n->agg_star = FALSE;
n->agg_distinct = FALSE;
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
char *sterm;
int len;
bool match;
- int nmatch = 1;
- regmatch_t pmatch;
+ regmatch_t pmatch[2];
/* be sure sterm is null-terminated */
len = VARSIZE(s) - VARHDRSZ;
sterm[len] = '\0';
/*
- * We need the match info back from the pattern match to be able to
- * actually extract the substring. It seems to be adequate to pass in
- * a structure to return only one result.
+ * We pass two regmatch_t structs to get info about the overall match
+ * and the match for the first parenthesized subexpression (if any).
+ * If there is a parenthesized subexpression, we return what it matched;
+ * else return what the whole regexp matched.
*/
- match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch);
+ match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch);
+
pfree(sterm);
/* match? then return the substring matching the pattern */
if (match)
{
+ int so,
+ eo;
+
+ so = pmatch[1].rm_so;
+ eo = pmatch[1].rm_eo;
+ if (so < 0 || eo < 0)
+ {
+ /* no parenthesized subexpression */
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+ }
+
return (DirectFunctionCall3(text_substr,
PointerGetDatum(s),
- Int32GetDatum(pmatch.rm_so + 1),
- Int32GetDatum(pmatch.rm_eo - pmatch.rm_so)));
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so)));
}
PG_RETURN_NULL();
}
+
+/* similar_escape()
+ * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
+ * our regexp engine.
+ */
+Datum
+similar_escape(PG_FUNCTION_ARGS)
+{
+ text *pat_text;
+ text *esc_text;
+ text *result;
+ unsigned char *p,
+ *e,
+ *r;
+ int plen,
+ elen;
+ bool afterescape = false;
+ int nquotes = 0;
+
+ /* This function is not strict, so must test explicitly */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat_text = PG_GETARG_TEXT_P(0);
+ p = VARDATA(pat_text);
+ plen = (VARSIZE(pat_text) - VARHDRSZ);
+ if (PG_ARGISNULL(1))
+ {
+ /* No ESCAPE clause provided; default to backslash as escape */
+ e = "\\";
+ elen = 1;
+ }
+ else
+ {
+ esc_text = PG_GETARG_TEXT_P(1);
+ e = VARDATA(esc_text);
+ elen = (VARSIZE(esc_text) - VARHDRSZ);
+ if (elen == 0)
+ e = NULL; /* no escape character */
+ else if (elen != 1)
+ elog(ERROR, "ESCAPE string must be empty or one character");
+ }
+
+ /* We need room for ^, $, and up to 2 output bytes per input byte */
+ result = (text *) palloc(VARHDRSZ + 2 + 2 * plen);
+ r = VARDATA(result);
+
+ *r++ = '^';
+
+ while (plen > 0)
+ {
+ unsigned char pchar = *p;
+
+ if (afterescape)
+ {
+ if (pchar == '"') /* for SUBSTRING patterns */
+ *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
+ else
+ {
+ *r++ = '\\';
+ *r++ = pchar;
+ }
+ afterescape = false;
+ }
+ else if (e && pchar == *e)
+ {
+ /* SQL99 escape character; do not send to output */
+ afterescape = true;
+ }
+ else if (pchar == '%')
+ {
+ *r++ = '.';
+ *r++ = '*';
+ }
+ else if (pchar == '_')
+ {
+ *r++ = '.';
+ }
+ else if (pchar == '\\' || pchar == '.' || pchar == '?' ||
+ pchar == '{')
+ {
+ *r++ = '\\';
+ *r++ = pchar;
+ }
+ else
+ {
+ *r++ = pchar;
+ }
+ p++, plen--;
+ }
+
+ *r++ = '$';
+
+ VARATT_SIZEP(result) = r - ((unsigned char *) result);
+
+ PG_RETURN_TEXT_P(result);
+}
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pg_proc.h,v 1.272 2002/09/18 21:35:23 tgl Exp $
+ * $Id: pg_proc.h,v 1.273 2002/09/22 17:27:23 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
DATA(insert OID = 1622 ( repeat PGNSP PGUID 12 f f t f i 2 25 "25 23" repeat - _null_ ));
DESCR("replicate string int4 times");
+DATA(insert OID = 1623 ( similar_escape PGNSP PGUID 12 f f f f i 2 25 "25 25" similar_escape - _null_ ));
+DESCR("convert SQL99 regexp pattern to POSIX style");
+
DATA(insert OID = 1624 ( mul_d_interval PGNSP PGUID 12 f f t f i 2 1186 "701 1186" mul_d_interval - _null_ ));
DATA(insert OID = 1633 ( texticlike PGNSP PGUID 12 f f t f i 2 16 "25 25" texticlike - _null_ ));
DATA(insert OID = 1636 ( nameicnlike PGNSP PGUID 12 f f t f i 2 16 "19 25" nameicnlike - _null_ ));
DESCR("does not match LIKE expression, case-insensitive");
DATA(insert OID = 1637 ( like_escape PGNSP PGUID 12 f f t f i 2 25 "25 25" like_escape - _null_ ));
-DESCR("convert match pattern to use backslash escapes");
+DESCR("convert LIKE pattern to use backslash escapes");
DATA(insert OID = 1689 ( update_pg_pwd_and_pg_group PGNSP PGUID 12 f f t f v 0 2279 "" update_pg_pwd_and_pg_group - _null_ ));
DESCR("update pg_pwd and pg_group files");
DATA(insert OID = 2008 ( notlike PGNSP PGUID 12 f f t f i 2 16 "17 17" byteanlike - _null_ ));
DESCR("does not match LIKE expression");
DATA(insert OID = 2009 ( like_escape PGNSP PGUID 12 f f t f i 2 17 "17 17" like_escape_bytea - _null_ ));
-DESCR("convert match pattern to use backslash escapes");
+DESCR("convert LIKE pattern to use backslash escapes");
DATA(insert OID = 2010 ( length PGNSP PGUID 12 f f t f i 1 23 "17" byteaoctetlen - _null_ ));
DESCR("octet length");
DATA(insert OID = 2011 ( byteacat PGNSP PGUID 12 f f t f i 2 17 "17 17" byteacat - _null_ ));
DESCR("subtract");
DATA(insert OID = 2073 ( substring PGNSP PGUID 12 f f t f i 2 25 "25 25" textregexsubstr - _null_ ));
-DESCR("substitutes regular expression");
-DATA(insert OID = 2074 ( substring PGNSP PGUID 14 f f t f i 3 25 "25 25 25" "select substring($1, like_escape($2, $3))" - _null_ ));
-DESCR("substitutes regular expression with escape argument");
+DESCR("extracts text matching regular expression");
+DATA(insert OID = 2074 ( substring PGNSP PGUID 14 f f t f i 3 25 "25 25 25" "select substring($1, similar_escape($2, $3))" - _null_ ));
+DESCR("extracts text matching SQL99 regular expression");
DATA(insert OID = 2075 ( bit PGNSP PGUID 12 f f t f i 1 1560 "20" bitfromint8 - _null_ ));
DESCR("int8 to bitstring");
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: builtins.h,v 1.201 2002/09/19 22:48:34 tgl Exp $
+ * $Id: builtins.h,v 1.202 2002/09/22 17:27:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern Datum texticregexeq(PG_FUNCTION_ARGS);
extern Datum texticregexne(PG_FUNCTION_ARGS);
extern Datum textregexsubstr(PG_FUNCTION_ARGS);
+extern Datum similar_escape(PG_FUNCTION_ARGS);
/* regproc.c */
extern Datum regprocin(PG_FUNCTION_ARGS);
t
(1 row)
--- T581 regular expression substring
-SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
+-- T581 regular expression substring (with SQL99's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
bcd
-----
bcd
(1 row)
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
True
------
t
t
(1 row)
--- PostgreSQL extention to allow omitting the escape character
-SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
+-- PostgreSQL extension to allow omitting the escape character;
+-- here the regexp is taken as Posix syntax
+SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
+ cde
+-----
+ cde
+(1 row)
+
+-- With a parenthesized subexpression, return only what matches the subexpr
+SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
cde
-----
cde
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
--- T581 regular expression substring
-SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
+-- T581 regular expression substring (with SQL99's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
-- Null inputs should return NULL
SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True";
SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True";
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
--- PostgreSQL extention to allow omitting the escape character
-SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
+-- PostgreSQL extension to allow omitting the escape character;
+-- here the regexp is taken as Posix syntax
+SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
+
+-- With a parenthesized subexpression, return only what matches the subexpr
+SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
+
-- E021-11 position expression
SELECT POSITION('4' IN '1234567890') = '4' AS "4";