+ALTER COLLATION name REFRESH VERSION
+
ALTER COLLATION name RENAME TO new_name
ALTER COLLATION name OWNER TO { new_owner | CURRENT_USER | SESSION_USER }
ALTER COLLATION name SET SCHEMA new_schema
+
+
+ REFRESH VERSION
+
+ Updated the collation version.
+ See below.
+
+
+
+
+
Notes
+
+ When using collations provided by the ICU library, the ICU-specific version
+ of the collator is recorded in the system catalog when the collation object
+ is created. When the collation is then used, the current version is
+ checked against the recorded version, and a warning is issued when there is
+ a mismatch, for example:
+
+WARNING: ICU collator version mismatch
+DETAIL: The database was created using version 1.2.3.4, the library provides version 2.3.4.5.
+HINT: Rebuild all objects affected by this collation and run ALTER COLLATION pg_catalog."xx-x-icu" REFRESH VERSION, or build PostgreSQL with the right version of ICU.
+
+ A change in collation definitions can lead to corrupt indexes and other
+ problems where the database system relies on stored objects having a
+ certain sort order. Generally, this should be avoided, but it can happen
+ in legitimate circumstances, such as when
+ using pg_upgrade to upgrade to server binaries linked
+ with a newer version of ICU. When this happens, all objects depending on
+ the collation should be rebuilt, for example,
+ using REINDEX. When that is done, the collation version
+ can be refreshed using the command ALTER COLLATION ... REFRESH
+ VERSION. This will update the system catalog to record the
+ current collator version and will make the warning go away. Note that this
+ does not actually check whether all affected objects have been rebuilt
+ correctly.
+
+
+ The following query can be used to identify all collations in the current
+ database that need to be refreshed and the objects that depend on them:
+
+SELECT pg_describe_object(refclassid, refobjid, refobjsubid) AS "Collation",
+ pg_describe_object(classid, objid, objsubid) AS "Object"
+ FROM pg_depend d JOIN pg_collation c
+ ON refclassid = 'pg_collation'::regclass AND refobjid = c.oid
+ WHERE c.collversion <> pg_collation_actual_version(c.oid)
+ ORDER BY 1, 2;
+]]>
+
+
+
Examples
CREATE COLLATION [ IF NOT EXISTS ] name (
[ LOCALE = locale, ]
[ LC_COLLATE = lc_collate, ]
- [ LC_CTYPE = lc_ctype ]
+ [ LC_CTYPE = lc_ctype, ]
+ [ PROVIDER = provider, ]
+ [ VERSION = version ]
)
CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation
+
+ provider
+
+
+ Specifies the provider to use for locale services associated with this
+ collation. Possible values
+ The available choices depend on the operating system and build options.
+
+
+
+
+
+ version
+
+
+ Specifies the version string to store with the collation. Normally,
+ this should be omitted, which will cause the version to be computed
+ from the actual version of the collation as provided by the operating
+ system. This option is intended to be used
+ by pg_upgrade for copying the version from an
+ existing installation.
+
+
+ See also for how to handle
+ collation version mismatches.
+
+
+
+
existing_collation
#
# Records the choice of the various --enable-xxx and --with-xxx options.
+with_icu = @with_icu@
with_perl = @with_perl@
with_python = @with_python@
with_tcl = @with_tcl@
krb_srvtab = @krb_srvtab@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_LIBS = @ICU_LIBS@
+
TCLSH = @TCLSH@
TCL_LIBS = @TCL_LIBS@
TCL_LIB_SPEC = @TCL_LIB_SPEC@
ifneq ($(PORTNAME), aix)
postgres: $(OBJS)
- $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) $(ICU_LIBS) -o $@
endif
endif
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
+#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
Oid
CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
+ char collprovider,
int32 collencoding,
const char *collcollate, const char *collctype,
+ const char *collversion,
bool if_not_exists)
{
Relation rel;
{
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
- errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
- collname, pg_encoding_to_char(collencoding))));
+ collencoding == -1
+ ? errmsg("collation \"%s\" already exists, skipping",
+ collname)
+ : errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
+ collname, pg_encoding_to_char(collencoding))));
return InvalidOid;
}
else
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
- errmsg("collation \"%s\" for encoding \"%s\" already exists",
- collname, pg_encoding_to_char(collencoding))));
+ collencoding == -1
+ ? errmsg("collation \"%s\" already exists",
+ collname)
+ : errmsg("collation \"%s\" for encoding \"%s\" already exists",
+ collname, pg_encoding_to_char(collencoding))));
}
+ /* open pg_collation; see below about the lock level */
+ rel = heap_open(CollationRelationId, ShareRowExclusiveLock);
+
/*
- * Also forbid matching an any-encoding entry. This test of course is not
- * backed up by the unique index, but it's not a problem since we don't
- * support adding any-encoding entries after initdb.
+ * Also forbid a specific-encoding collation shadowing an any-encoding
+ * collation, or an any-encoding collation being shadowed (see
+ * get_collation_name()). This test is not backed up by the unique index,
+ * so we take a ShareRowExclusiveLock earlier, to protect against
+ * concurrent changes fooling this check.
*/
- if (SearchSysCacheExists3(COLLNAMEENCNSP,
- PointerGetDatum(collname),
- Int32GetDatum(-1),
- ObjectIdGetDatum(collnamespace)))
+ if ((collencoding == -1 &&
+ SearchSysCacheExists3(COLLNAMEENCNSP,
+ PointerGetDatum(collname),
+ Int32GetDatum(GetDatabaseEncoding()),
+ ObjectIdGetDatum(collnamespace))) ||
+ (collencoding != -1 &&
+ SearchSysCacheExists3(COLLNAMEENCNSP,
+ PointerGetDatum(collname),
+ Int32GetDatum(-1),
+ ObjectIdGetDatum(collnamespace))))
{
if (if_not_exists)
{
+ heap_close(rel, NoLock);
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" already exists, skipping",
collname)));
}
- /* open pg_collation */
- rel = heap_open(CollationRelationId, RowExclusiveLock);
tupDesc = RelationGetDescr(rel);
/* form a tuple */
values[Anum_pg_collation_collname - 1] = NameGetDatum(&name_name);
values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace);
values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner);
+ values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider);
values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding);
namestrcpy(&name_collate, collcollate);
values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate);
namestrcpy(&name_ctype, collctype);
values[Anum_pg_collation_collctype - 1] = NameGetDatum(&name_ctype);
+ if (collversion)
+ values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion);
+ else
+ nulls[Anum_pg_collation_collversion - 1] = true;
tup = heap_form_tuple(tupDesc, values, nulls);
InvokeObjectPostCreateHook(CollationRelationId, oid, 0);
heap_freetuple(tup);
- heap_close(rel, RowExclusiveLock);
+ heap_close(rel, NoLock);
return oid;
}
*/
#include "postgres.h"
+#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_collation_fn.h"
#include "commands/alter.h"
#include "commands/collationcmds.h"
+#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
#include "utils/rel.h"
#include "utils/syscache.h"
+
/*
* CREATE COLLATION
*/
DefElem *localeEl = NULL;
DefElem *lccollateEl = NULL;
DefElem *lcctypeEl = NULL;
+ DefElem *providerEl = NULL;
+ DefElem *versionEl = NULL;
char *collcollate = NULL;
char *collctype = NULL;
+ char *collproviderstr = NULL;
+ int collencoding;
+ char collprovider = 0;
+ char *collversion = NULL;
Oid newoid;
ObjectAddress address;
defelp = &lccollateEl;
else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0)
defelp = &lcctypeEl;
+ else if (pg_strcasecmp(defel->defname, "provider") == 0)
+ defelp = &providerEl;
+ else if (pg_strcasecmp(defel->defname, "version") == 0)
+ defelp = &versionEl;
else
{
ereport(ERROR,
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
+ collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
ReleaseSysCache(tp);
}
if (lcctypeEl)
collctype = defGetString(lcctypeEl);
+ if (providerEl)
+ collproviderstr = defGetString(providerEl);
+
+ if (versionEl)
+ collversion = defGetString(versionEl);
+
+ if (collproviderstr)
+ {
+ if (pg_strcasecmp(collproviderstr, "icu") == 0)
+ collprovider = COLLPROVIDER_ICU;
+ else if (pg_strcasecmp(collproviderstr, "libc") == 0)
+ collprovider = COLLPROVIDER_LIBC;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("unrecognized collation provider: %s",
+ collproviderstr)));
+ }
+ else if (!fromEl)
+ collprovider = COLLPROVIDER_LIBC;
+
if (!collcollate)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_ctype\" must be specified")));
- check_encoding_locale_matches(GetDatabaseEncoding(), collcollate, collctype);
+ if (collprovider == COLLPROVIDER_ICU)
+ collencoding = -1;
+ else
+ {
+ collencoding = GetDatabaseEncoding();
+ check_encoding_locale_matches(collencoding, collcollate, collctype);
+ }
+
+ if (!collversion)
+ collversion = get_collation_actual_version(collprovider, collcollate);
newoid = CollationCreate(collName,
collNamespace,
GetUserId(),
- GetDatabaseEncoding(),
+ collprovider,
+ collencoding,
collcollate,
collctype,
+ collversion,
if_not_exists);
if (!OidIsValid(newoid))
collname, get_namespace_name(nspOid))));
}
+/*
+ * ALTER COLLATION
+ */
+ObjectAddress
+AlterCollation(AlterCollationStmt *stmt)
+{
+ Relation rel;
+ Oid collOid;
+ HeapTuple tup;
+ Form_pg_collation collForm;
+ Datum collversion;
+ bool isnull;
+ char *oldversion;
+ char *newversion;
+ ObjectAddress address;
+
+ rel = heap_open(CollationRelationId, RowExclusiveLock);
+ collOid = get_collation_oid(stmt->collname, false);
+
+ if (!pg_collation_ownercheck(collOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION,
+ NameListToString(stmt->collname));
+
+ tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for collation %u", collOid);
+
+ collForm = (Form_pg_collation) GETSTRUCT(tup);
+ collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
+ &isnull);
+ oldversion = isnull ? NULL : TextDatumGetCString(collversion);
+
+ newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
+
+ /* cannot change from NULL to non-NULL or vice versa */
+ if ((!oldversion && newversion) || (oldversion && !newversion))
+ elog(ERROR, "invalid collation version change");
+ else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+ {
+ bool nulls[Natts_pg_collation];
+ bool replaces[Natts_pg_collation];
+ Datum values[Natts_pg_collation];
+
+ ereport(NOTICE,
+ (errmsg("changing version from %s to %s",
+ oldversion, newversion)));
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
+ replaces[Anum_pg_collation_collversion - 1] = true;
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+ values, nulls, replaces);
+ }
+ else
+ ereport(NOTICE,
+ (errmsg("version has not changed")));
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
+
+ ObjectAddressSet(address, CollationRelationId, collOid);
+
+ heap_freetuple(tup);
+ heap_close(rel, NoLock);
+
+ return address;
+}
+
+
+Datum
+pg_collation_actual_version(PG_FUNCTION_ARGS)
+{
+ Oid collid = PG_GETARG_OID(0);
+ HeapTuple tp;
+ char *collcollate;
+ char collprovider;
+ char *version;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("collation with OID %u does not exist", collid)));
+
+ collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
+ collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+
+ ReleaseSysCache(tp);
+
+ version = get_collation_actual_version(collprovider, collcollate);
+
+ if (version)
+ PG_RETURN_TEXT_P(cstring_to_text(version));
+ else
+ PG_RETURN_NULL();
+}
+
/*
- * "Normalize" a locale name, stripping off encoding tags such as
+ * "Normalize" a libc locale name, stripping off encoding tags such as
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
* -> "br_FR@euro"). Return true if a new, different name was
* generated.
*/
pg_attribute_unused()
static bool
-normalize_locale_name(char *new, const char *old)
+normalize_libc_locale_name(char *new, const char *old)
{
char *n = new;
const char *o = old;
}
+#ifdef USE_ICU
+static char *
+get_icu_language_tag(const char *localename)
+{
+ char buf[ULOC_FULLNAME_CAPACITY];
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not convert locale name \"%s\" to language tag: %s",
+ localename, u_errorName(status))));
+
+ return pstrdup(buf);
+}
+
+
+static char *
+get_icu_locale_comment(const char *localename)
+{
+ UErrorCode status;
+ UChar displayname[128];
+ int32 len_uchar;
+ char *result;
+
+ status = U_ZERO_ERROR;
+ len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could get display name for locale \"%s\": %s",
+ localename, u_errorName(status))));
+
+ icu_from_uchar(&result, displayname, len_uchar);
+
+ return result;
+}
+#endif /* USE_ICU */
+
+
Datum
pg_import_system_collations(PG_FUNCTION_ARGS)
{
count++;
- CollationCreate(localebuf, nspid, GetUserId(), enc,
- localebuf, localebuf, if_not_exists);
+ CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
+ localebuf, localebuf,
+ get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
+ if_not_exists);
CommandCounterIncrement();
* "locale -a" output. So save up the aliases and try to add them
* after we've read all the output.
*/
- if (normalize_locale_name(alias, localebuf))
+ if (normalize_libc_locale_name(alias, localebuf))
{
aliaslist = lappend(aliaslist, pstrdup(alias));
localelist = lappend(localelist, pstrdup(localebuf));
char *locale = (char *) lfirst(lcl);
int enc = lfirst_int(lce);
- CollationCreate(alias, nspid, GetUserId(), enc,
- locale, locale, true);
+ CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
+ locale, locale,
+ get_collation_actual_version(COLLPROVIDER_LIBC, locale),
+ true);
CommandCounterIncrement();
}
(errmsg("no usable system locales were found")));
#endif /* not HAVE_LOCALE_T && not WIN32 */
+#ifdef USE_ICU
+ if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("encoding \"%s\" not supported by ICU",
+ pg_encoding_to_char(GetDatabaseEncoding()))));
+ }
+ else
+ {
+ int i;
+
+ /*
+ * Start the loop at -1 to sneak in the root locale without too much
+ * code duplication.
+ */
+ for (i = -1; i < ucol_countAvailable(); i++)
+ {
+ const char *name;
+ char *langtag;
+ const char *collcollate;
+ UEnumeration *en;
+ UErrorCode status;
+ const char *val;
+ Oid collid;
+
+ if (i == -1)
+ name = ""; /* ICU root locale */
+ else
+ name = ucol_getAvailable(i);
+
+ langtag = get_icu_language_tag(name);
+ collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
+ collid = CollationCreate(psprintf("%s-x-icu", langtag),
+ nspid, GetUserId(), COLLPROVIDER_ICU, -1,
+ collcollate, collcollate,
+ get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
+ if_not_exists);
+
+ CreateComments(collid, CollationRelationId, 0,
+ get_icu_locale_comment(name));
+
+ /*
+ * Add keyword variants
+ */
+ status = U_ZERO_ERROR;
+ en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not get keyword values for locale \"%s\": %s",
+ name, u_errorName(status))));
+
+ status = U_ZERO_ERROR;
+ uenum_reset(en, &status);
+ while ((val = uenum_next(en, NULL, &status)))
+ {
+ char *localeid = psprintf("%s@collation=%s", name, val);
+
+ langtag = get_icu_language_tag(localeid);
+ collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
+ collid = CollationCreate(psprintf("%s-x-icu", langtag),
+ nspid, GetUserId(), COLLPROVIDER_ICU, -1,
+ collcollate, collcollate,
+ get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
+ if_not_exists);
+ CreateComments(collid, CollationRelationId, 0,
+ get_icu_locale_comment(localeid));
+ }
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not get keyword values for locale \"%s\": %s",
+ name, u_errorName(status))));
+ uenum_close(en);
+ }
+ }
+#endif
+
PG_RETURN_VOID();
}
# this directory and SUBDIRS to subdirectories containing more things
# to build.
+override CPPFLAGS := $(CPPFLAGS) $(ICU_CFLAGS)
+
ifdef PARTIAL_LINKING
# old style: linking using SUBSYS.o
subsysfilename = SUBSYS.o
return newnode;
}
+static AlterCollationStmt *
+_copyAlterCollationStmt(const AlterCollationStmt *from)
+{
+ AlterCollationStmt *newnode = makeNode(AlterCollationStmt);
+
+ COPY_NODE_FIELD(collname);
+
+ return newnode;
+}
+
static AlterDomainStmt *
_copyAlterDomainStmt(const AlterDomainStmt *from)
{
case T_AlterTableCmd:
retval = _copyAlterTableCmd(from);
break;
+ case T_AlterCollationStmt:
+ retval = _copyAlterCollationStmt(from);
+ break;
case T_AlterDomainStmt:
retval = _copyAlterDomainStmt(from);
break;
return true;
}
+static bool
+_equalAlterCollationStmt(const AlterCollationStmt *a, const AlterCollationStmt *b)
+{
+ COMPARE_NODE_FIELD(collname);
+
+ return true;
+}
+
static bool
_equalAlterDomainStmt(const AlterDomainStmt *a, const AlterDomainStmt *b)
{
case T_AlterTableCmd:
retval = _equalAlterTableCmd(a, b);
break;
+ case T_AlterCollationStmt:
+ retval = _equalAlterCollationStmt(a, b);
+ break;
case T_AlterDomainStmt:
retval = _equalAlterDomainStmt(a, b);
break;
}
%type stmt schema_stmt
- AlterEventTrigStmt
+ AlterEventTrigStmt AlterCollationStmt
AlterDatabaseStmt AlterDatabaseSetStmt AlterDomainStmt AlterEnumStmt
AlterFdwStmt AlterForeignServerStmt AlterGroupStmt
AlterObjectDependsStmt AlterObjectSchemaStmt AlterOwnerStmt
stmt :
AlterEventTrigStmt
+ | AlterCollationStmt
| AlterDatabaseStmt
| AlterDatabaseSetStmt
| AlterDefaultPrivilegesStmt
;
+/*****************************************************************************
+ *
+ * ALTER COLLATION
+ *
+ *****************************************************************************/
+
+AlterCollationStmt: ALTER COLLATION any_name REFRESH VERSION_P
+ {
+ AlterCollationStmt *n = makeNode(AlterCollationStmt);
+ n->collname = $3;
+ $$ = (Node *)n;
+ }
+ ;
+
+
/*****************************************************************************
*
* ALTER SYSTEM
PG_REGEX_LOCALE_WIDE, /* Use functions */
PG_REGEX_LOCALE_1BYTE, /* Use functions */
PG_REGEX_LOCALE_WIDE_L, /* Use locale_t functions */
- PG_REGEX_LOCALE_1BYTE_L /* Use locale_t functions */
+ PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t functions */
+ PG_REGEX_LOCALE_ICU /* Use ICU uchar.h functions */
} PG_Locale_Strategy;
static PG_Locale_Strategy pg_regex_strategy;
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
+#ifdef USE_ICU
+ if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
+ pg_regex_strategy = PG_REGEX_LOCALE_ICU;
+ else
+#endif
#ifdef USE_WIDE_UPPER_LOWER
if (GetDatabaseEncoding() == PG_UTF8)
{
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit_l((wint_t) c, pg_regex_locale);
+ return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit_l((unsigned char) c, pg_regex_locale));
+ isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isdigit(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha_l((wint_t) c, pg_regex_locale);
+ return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha_l((unsigned char) c, pg_regex_locale));
+ isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isalpha(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum_l((wint_t) c, pg_regex_locale);
+ return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum_l((unsigned char) c, pg_regex_locale));
+ isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isalnum(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper_l((wint_t) c, pg_regex_locale);
+ return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isupper_l((unsigned char) c, pg_regex_locale));
+ isupper_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isupper(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower_l((wint_t) c, pg_regex_locale);
+ return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- islower_l((unsigned char) c, pg_regex_locale));
+ islower_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_islower(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph_l((wint_t) c, pg_regex_locale);
+ return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph_l((unsigned char) c, pg_regex_locale));
+ isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isgraph(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint_l((wint_t) c, pg_regex_locale);
+ return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isprint_l((unsigned char) c, pg_regex_locale));
+ isprint_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isprint(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct_l((wint_t) c, pg_regex_locale);
+ return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct_l((unsigned char) c, pg_regex_locale));
+ ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_ispunct(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace_l((wint_t) c, pg_regex_locale);
+ return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
- isspace_l((unsigned char) c, pg_regex_locale));
+ isspace_l((unsigned char) c, pg_regex_locale->info.lt));
+#endif
+ break;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_isspace(c);
#endif
break;
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper_l((wint_t) c, pg_regex_locale);
+ return towupper_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
if (c <= (pg_wchar) UCHAR_MAX)
- return toupper_l((unsigned char) c, pg_regex_locale);
+ return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
#endif
return c;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_toupper(c);
+#endif
+ break;
}
return 0; /* can't get here, but keep compiler quiet */
}
case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower_l((wint_t) c, pg_regex_locale);
+ return towlower_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
if (c <= (pg_wchar) UCHAR_MAX)
- return tolower_l((unsigned char) c, pg_regex_locale);
+ return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
#endif
return c;
+ case PG_REGEX_LOCALE_ICU:
+#ifdef USE_ICU
+ return u_tolower(c);
+#endif
+ break;
}
return 0; /* can't get here, but keep compiler quiet */
}
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
#endif
break;
+ case PG_REGEX_LOCALE_ICU:
+ max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+ break;
default:
max_chr = 0; /* can't get here, but keep compiler quiet */
break;
commandCollected = true;
break;
+ case T_AlterCollationStmt:
+ address = AlterCollation((AlterCollationStmt *) parsetree);
+ break;
+
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(parsetree));
tag = "DROP SUBSCRIPTION";
break;
+ case T_AlterCollationStmt:
+ tag = "ALTER COLLATION";
+ break;
+
case T_PrepareStmt:
tag = "PREPARE";
break;
#include
#endif
+#ifdef USE_ICU
+#include
+#endif
+
#include "catalog/pg_collation.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
* upper/lower/initcap functions
*****************************************************************************/
+#ifdef USE_ICU
+static int32_t
+icu_convert_case(int32_t (*func)(UChar *, int32_t, const UChar *, int32_t, const char *, UErrorCode *),
+ pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source)
+{
+ UErrorCode status;
+ int32_t len_dest;
+
+ len_dest = len_source; /* try first with same length */
+ *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR)
+ {
+ /* try again with adjusted length */
+ pfree(buff_dest);
+ buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status);
+ }
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return len_dest;
+}
+
+static int32_t
+u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode)
+{
+ return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode);
+}
+#endif
+
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
result = asc_tolower(buff, nbytes);
}
#ifdef USE_WIDE_UPPER_LOWER
- else if (pg_database_encoding_max_length() > 1)
+ else
{
pg_locale_t mylocale = 0;
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
if (collid != DEFAULT_COLLATION_OID)
{
mylocale = pg_newlocale_from_collation(collid);
}
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
- if (mylocale)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
- else
+ if (mylocale)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+ else
#endif
- workspace[curr_char] = towlower(workspace[curr_char]);
- }
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ }
- /* Make result large enough; case change might change number of bytes */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
+ /* Make result large enough; case change might change number of bytes */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
+ }
#endif /* USE_WIDE_UPPER_LOWER */
- else
- {
-#ifdef HAVE_LOCALE_T
- pg_locale_t mylocale = 0;
-#endif
- char *p;
-
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
+ else
{
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for lower() function"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
-#ifdef HAVE_LOCALE_T
- mylocale = pg_newlocale_from_collation(collid);
-#endif
- }
+ char *p;
- result = pnstrdup(buff, nbytes);
+ result = pnstrdup(buff, nbytes);
- /*
- * Note: we assume that tolower_l() will not be so broken as to need
- * an isupper_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = result; *p; p++)
- {
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need
+ * an isupper_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
#ifdef HAVE_LOCALE_T
- if (mylocale)
- *p = tolower_l((unsigned char) *p, mylocale);
- else
+ if (mylocale)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+ else
#endif
- *p = pg_tolower((unsigned char) *p);
+ *p = pg_tolower((unsigned char) *p);
+ }
+ }
}
}
result = asc_toupper(buff, nbytes);
}
#ifdef USE_WIDE_UPPER_LOWER
- else if (pg_database_encoding_max_length() > 1)
+ else
{
pg_locale_t mylocale = 0;
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
if (collid != DEFAULT_COLLATION_OID)
{
mylocale = pg_newlocale_from_collation(collid);
}
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar, len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
-#ifdef HAVE_LOCALE_T
- if (mylocale)
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
- else
-#endif
- workspace[curr_char] = towupper(workspace[curr_char]);
- }
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- /* Make result large enough; case change might change number of bytes */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
-#endif /* USE_WIDE_UPPER_LOWER */
- else
- {
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
- pg_locale_t mylocale = 0;
+ if (mylocale)
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ else
#endif
- char *p;
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ }
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for upper() function"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
+ /* Make result large enough; case change might change number of bytes */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
+
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
}
-#ifdef HAVE_LOCALE_T
- mylocale = pg_newlocale_from_collation(collid);
-#endif
- }
+#endif /* USE_WIDE_UPPER_LOWER */
+ else
+ {
+ char *p;
- result = pnstrdup(buff, nbytes);
+ result = pnstrdup(buff, nbytes);
- /*
- * Note: we assume that toupper_l() will not be so broken as to need
- * an islower_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = result; *p; p++)
- {
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need
+ * an islower_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
#ifdef HAVE_LOCALE_T
- if (mylocale)
- *p = toupper_l((unsigned char) *p, mylocale);
- else
+ if (mylocale)
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ else
#endif
- *p = pg_toupper((unsigned char) *p);
+ *p = pg_toupper((unsigned char) *p);
+ }
+ }
}
}
result = asc_initcap(buff, nbytes);
}
#ifdef USE_WIDE_UPPER_LOWER
- else if (pg_database_encoding_max_length() > 1)
+ else
{
pg_locale_t mylocale = 0;
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
if (collid != DEFAULT_COLLATION_OID)
{
mylocale = pg_newlocale_from_collation(collid);
}
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
{
-#ifdef HAVE_LOCALE_T
- if (mylocale)
- {
- if (wasalnum)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
- else
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
- wasalnum = iswalnum_l(workspace[curr_char], mylocale);
- }
- else
+ int32_t len_uchar, len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ }
+ else
#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
{
- if (wasalnum)
- workspace[curr_char] = towlower(workspace[curr_char]);
- else
- workspace[curr_char] = towupper(workspace[curr_char]);
- wasalnum = iswalnum(workspace[curr_char]);
- }
- }
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
- /* Make result large enough; case change might change number of bytes */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
-#endif /* USE_WIDE_UPPER_LOWER */
- else
- {
-#ifdef HAVE_LOCALE_T
- pg_locale_t mylocale = 0;
-#endif
- char *p;
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for initcap() function"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
- mylocale = pg_newlocale_from_collation(collid);
+ if (mylocale)
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+ else
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
+ }
+ else
#endif
- }
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ else
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ wasalnum = iswalnum(workspace[curr_char]);
+ }
+ }
- result = pnstrdup(buff, nbytes);
+ /* Make result large enough; case change might change number of bytes */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
- /*
- * Note: we assume that toupper_l()/tolower_l() will not be so broken
- * as to need guard tests. When using the default collation, we apply
- * the traditional Postgres behavior that forces ASCII-style treatment
- * of I/i, but in non-default collations you get exactly what the
- * collation says.
- */
- for (p = result; *p; p++)
- {
-#ifdef HAVE_LOCALE_T
- if (mylocale)
- {
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, mylocale);
- else
- *p = toupper_l((unsigned char) *p, mylocale);
- wasalnum = isalnum_l((unsigned char) *p, mylocale);
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
}
+#endif /* USE_WIDE_UPPER_LOWER */
else
-#endif
{
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
- else
- *p = pg_toupper((unsigned char) *p);
- wasalnum = isalnum((unsigned char) *p);
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken
+ * as to need guard tests. When using the default collation, we apply
+ * the traditional Postgres behavior that forces ASCII-style treatment
+ * of I/i, but in non-default collations you get exactly what the
+ * collation says.
+ */
+ for (p = result; *p; p++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+ else
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
+ }
+ else
+#endif
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ wasalnum = isalnum((unsigned char) *p);
+ }
+ }
}
}
}
return pg_ascii_tolower(c);
#ifdef HAVE_LOCALE_T
else if (locale)
- return tolower_l(c, locale);
+ return tolower_l(c, locale->info.lt);
#endif
else
return pg_tolower(c);
*p;
int slen,
plen;
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
+
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else if (collation != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ locale = pg_newlocale_from_collation(collation);
+ }
/*
* For efficiency reasons, in the single byte case we don't call lower()
* on the pattern and text, but instead call SB_lower_char on each
- * character. In the multi-byte case we don't have much choice :-(
+ * character. In the multi-byte case we don't have much choice :-(.
+ * Also, ICU does not support single-character case folding, so we go the
+ * long way.
*/
- if (pg_database_encoding_max_length() > 1)
+ if (pg_database_encoding_max_length() > 1 || locale->provider == COLLPROVIDER_ICU)
{
/* lower's result is never packed, so OK to use old macros here */
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
}
else
{
- /*
- * Here we need to prepare locale information for SB_lower_char. This
- * should match the methods used in str_tolower().
- */
- pg_locale_t locale = 0;
- bool locale_is_c = false;
-
- if (lc_ctype_is_c(collation))
- locale_is_c = true;
- else if (collation != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collation))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for ILIKE"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
- locale = pg_newlocale_from_collation(collation);
- }
-
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str);
#include "catalog/pg_collation.h"
#include "catalog/pg_control.h"
#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
#include "utils/hsearch.h"
+#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/syscache.h"
+#ifdef USE_ICU
+#include
+#endif
+
#ifdef WIN32
/*
* This Windows file defines StrNCpy. We don't need it here, so we undefine
if (cache_entry->locale == 0)
{
/* We haven't computed this yet in this session, so do it */
-#ifdef HAVE_LOCALE_T
HeapTuple tp;
Form_pg_collation collform;
const char *collcollate;
- const char *collctype;
- locale_t result;
+ const char *collctype pg_attribute_unused();
+ pg_locale_t result;
+ Datum collversion;
+ bool isnull;
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
if (!HeapTupleIsValid(tp))
collcollate = NameStr(collform->collcollate);
collctype = NameStr(collform->collctype);
- if (strcmp(collcollate, collctype) == 0)
+ result = malloc(sizeof(* result));
+ memset(result, 0, sizeof(* result));
+ result->provider = collform->collprovider;
+
+ if (collform->collprovider == COLLPROVIDER_LIBC)
{
- /* Normal case where they're the same */
+#ifdef HAVE_LOCALE_T
+ locale_t loc;
+
+ if (strcmp(collcollate, collctype) == 0)
+ {
+ /* Normal case where they're the same */
#ifndef WIN32
- result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
- NULL);
+ loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
+ NULL);
#else
- result = _create_locale(LC_ALL, collcollate);
+ loc = _create_locale(LC_ALL, collcollate);
#endif
- if (!result)
- report_newlocale_failure(collcollate);
- }
- else
- {
+ if (!loc)
+ report_newlocale_failure(collcollate);
+ }
+ else
+ {
#ifndef WIN32
- /* We need two newlocale() steps */
- locale_t loc1;
-
- loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
- if (!loc1)
- report_newlocale_failure(collcollate);
- result = newlocale(LC_CTYPE_MASK, collctype, loc1);
- if (!result)
- report_newlocale_failure(collctype);
+ /* We need two newlocale() steps */
+ locale_t loc1;
+
+ loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
+ if (!loc1)
+ report_newlocale_failure(collcollate);
+ loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
+ if (!loc)
+ report_newlocale_failure(collctype);
#else
- /*
- * XXX The _create_locale() API doesn't appear to support this.
- * Could perhaps be worked around by changing pg_locale_t to
- * contain two separate fields.
- */
+ /*
+ * XXX The _create_locale() API doesn't appear to support this.
+ * Could perhaps be worked around by changing pg_locale_t to
+ * contain two separate fields.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+ }
+
+ result->info.lt = loc;
+#else /* not HAVE_LOCALE_T */
+ /* platform that doesn't support locale_t */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("collations with different collate and ctype values are not supported on this platform")));
-#endif
+ errmsg("collation provider LIBC is not supported on this platform")));
+#endif /* not HAVE_LOCALE_T */
+ }
+ else if (collform->collprovider == COLLPROVIDER_ICU)
+ {
+#ifdef USE_ICU
+ UCollator *collator;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(collcollate, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ collcollate, u_errorName(status))));
+
+ result->info.icu.locale = strdup(collcollate);
+ result->info.icu.ucol = collator;
+#else /* not USE_ICU */
+ /* could get here if a collation was created by a build with ICU */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ICU is not supported in this build"), \
+ errhint("You need to rebuild PostgreSQL using --with-icu.")));
+#endif /* not USE_ICU */
}
- cache_entry->locale = result;
+ collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
+ &isnull);
+ if (!isnull)
+ {
+ char *actual_versionstr;
+ char *collversionstr;
+
+ actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
+ if (!actual_versionstr)
+ /* This could happen when specifying a version in CREATE
+ * COLLATION for a libc locale, or manually creating a mess
+ * in the catalogs. */
+ ereport(ERROR,
+ (errmsg("collation \"%s\" has no actual version, but a version was specified",
+ NameStr(collform->collname))));
+ collversionstr = TextDatumGetCString(collversion);
+
+ if (strcmp(actual_versionstr, collversionstr) != 0)
+ ereport(WARNING,
+ (errmsg("collation \"%s\" has version mismatch",
+ NameStr(collform->collname)),
+ errdetail("The collation in the database was created using version %s, "
+ "but the operating system provides version %s.",
+ collversionstr, actual_versionstr),
+ errhint("Rebuild all objects affected by this collation and run "
+ "ALTER COLLATION %s REFRESH VERSION, "
+ "or build PostgreSQL with the right library version.",
+ quote_qualified_identifier(get_namespace_name(collform->collnamespace),
+ NameStr(collform->collname)))));
+ }
ReleaseSysCache(tp);
-#else /* not HAVE_LOCALE_T */
- /*
- * For platforms that don't support locale_t, we can't do anything
- * with non-default collations.
- */
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("nondefault collations are not supported on this platform")));
-#endif /* not HAVE_LOCALE_T */
+ cache_entry->locale = result;
}
return cache_entry->locale;
}
+/*
+ * Get provider-specific collation version string for the given collation from
+ * the operating system/library.
+ *
+ * A particular provider must always either return a non-NULL string or return
+ * NULL (if it doesn't support versions). It must not return NULL for some
+ * collcollate and not NULL for others.
+ */
+char *
+get_collation_actual_version(char collprovider, const char *collcollate)
+{
+ char *collversion;
+
+#ifdef USE_ICU
+ if (collprovider == COLLPROVIDER_ICU)
+ {
+ UCollator *collator;
+ UErrorCode status;
+ UVersionInfo versioninfo;
+ char buf[U_MAX_VERSION_STRING_LENGTH];
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(collcollate, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ collcollate, u_errorName(status))));
+ ucol_getVersion(collator, versioninfo);
+ ucol_close(collator);
+
+ u_versionToString(versioninfo, buf);
+ collversion = pstrdup(buf);
+ }
+ else
+#endif
+ collversion = NULL;
+
+ return collversion;
+}
+
+
+#ifdef USE_ICU
+/*
+ * Converter object for converting between ICU's UChar strings and C strings
+ * in database encoding. Since the database encoding doesn't change, we only
+ * need one of these per session.
+ */
+static UConverter *icu_converter = NULL;
+
+static void
+init_icu_converter(void)
+{
+ const char *icu_encoding_name;
+ UErrorCode status;
+ UConverter *conv;
+
+ if (icu_converter)
+ return;
+
+ icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
+
+ status = U_ZERO_ERROR;
+ conv = ucnv_open(icu_encoding_name, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open ICU converter for encoding \"%s\": %s",
+ icu_encoding_name, u_errorName(status))));
+
+ icu_converter = conv;
+}
+
+int32_t
+icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
+{
+ UErrorCode status;
+ int32_t len_uchar;
+
+ init_icu_converter();
+
+ len_uchar = 2 * nbytes; /* max length per docs */
+ *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
+ status = U_ZERO_ERROR;
+ len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
+ return len_uchar;
+}
+
+int32_t
+icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
+{
+ UErrorCode status;
+ int32_t len_result;
+
+ init_icu_converter();
+
+ len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
+ *result = palloc(len_result + 1);
+ status = U_ZERO_ERROR;
+ ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
+ return len_result;
+}
+#endif
/*
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
{
size_t result;
+ Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+
if (tolen == 0)
return 0;
#ifdef HAVE_LOCALE_T
#ifdef HAVE_WCSTOMBS_L
/* Use wcstombs_l for nondefault locales */
- result = wcstombs_l(to, from, tolen, locale);
+ result = wcstombs_l(to, from, tolen, locale->info.lt);
#else /* !HAVE_WCSTOMBS_L */
/* We have to temporarily set the locale as current ... ugh */
- locale_t save_locale = uselocale(locale);
+ locale_t save_locale = uselocale(locale->info.lt);
result = wcstombs(to, from, tolen);
{
size_t result;
+ Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+
if (tolen == 0)
return 0;
#ifdef HAVE_LOCALE_T
#ifdef HAVE_MBSTOWCS_L
/* Use mbstowcs_l for nondefault locales */
- result = mbstowcs_l(to, str, tolen, locale);
+ result = mbstowcs_l(to, str, tolen, locale->info.lt);
#else /* !HAVE_MBSTOWCS_L */
/* We have to temporarily set the locale as current ... ugh */
- locale_t save_locale = uselocale(locale);
+ locale_t save_locale = uselocale(locale->info.lt);
result = mbstowcs(to, str, tolen);
/*
* Check whether char is a letter (and, hence, subject to case-folding)
*
- * In multibyte character sets, we can't use isalpha, and it does not seem
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
* worth trying to convert to wchar_t to use iswalpha. Instead, just assume
* any multibyte char is potentially case-varying.
*/
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
else if (is_multibyte && IS_HIGHBIT_SET(c))
return true;
+ else if (locale && locale->provider == COLLPROVIDER_ICU)
+ return IS_HIGHBIT_SET(c) ? true : false;
#ifdef HAVE_LOCALE_T
- else if (locale)
- return isalpha_l((unsigned char) c, locale);
+ else if (locale && locale->provider == COLLPROVIDER_LIBC)
+ return isalpha_l((unsigned char) c, locale->info.lt);
#endif
else
return isalpha((unsigned char) c);
hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
hyperLogLogState full_card; /* Full key cardinality state */
double prop_card; /* Required cardinality proportion */
-#ifdef HAVE_LOCALE_T
pg_locale_t locale;
-#endif
} VarStringSortSupport;
/*
char a2buf[TEXTBUFLEN];
char *a1p,
*a2p;
-
-#ifdef HAVE_LOCALE_T
pg_locale_t mylocale = 0;
-#endif
if (collid != DEFAULT_COLLATION_OID)
{
errmsg("could not determine which collation to use for string comparison"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
-#ifdef HAVE_LOCALE_T
mylocale = pg_newlocale_from_collation(collid);
-#endif
}
/*
memcpy(a2p, arg2, len2);
a2p[len2] = '\0';
-#ifdef HAVE_LOCALE_T
if (mylocale)
- result = strcoll_l(a1p, a2p, mylocale);
- else
+ {
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+#ifdef USE_ICU
+#ifdef HAVE_UCOL_STRCOLLUTF8
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ result = ucol_strcollUTF8(mylocale->info.icu.ucol,
+ arg1, len1,
+ arg2, len2,
+ &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("collation failed: %s", u_errorName(status))));
+ }
+ else
+#endif
+ {
+ int32_t ulen1, ulen2;
+ UChar *uchar1, *uchar2;
+
+ ulen1 = icu_to_uchar(&uchar1, arg1, len1);
+ ulen2 = icu_to_uchar(&uchar2, arg2, len2);
+
+ result = ucol_strcoll(mylocale->info.icu.ucol,
+ uchar1, ulen1,
+ uchar2, ulen2);
+ }
+#else /* not USE_ICU */
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+#endif /* not USE_ICU */
+ }
+ else
+ {
+#ifdef HAVE_LOCALE_T
+ result = strcoll_l(a1p, a2p, mylocale->info.lt);
+#else
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
#endif
+ }
+ }
+ else
result = strcoll(a1p, a2p);
/*
bool abbreviate = ssup->abbreviate;
bool collate_c = false;
VarStringSortSupport *sss;
-
-#ifdef HAVE_LOCALE_T
pg_locale_t locale = 0;
-#endif
/*
* If possible, set ssup->comparator to a function which can be used to
errmsg("could not determine which collation to use for string comparison"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
-#ifdef HAVE_LOCALE_T
locale = pg_newlocale_from_collation(collid);
-#endif
}
}
* platforms.
*/
#ifndef TRUST_STRXFRM
- if (!collate_c)
+ if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
abbreviate = false;
#endif
sss->last_len2 = -1;
/* Initialize */
sss->last_returned = 0;
-#ifdef HAVE_LOCALE_T
sss->locale = locale;
-#endif
/*
* To avoid somehow confusing a strxfrm() blob and an original string,
goto done;
}
-#ifdef HAVE_LOCALE_T
if (sss->locale)
- result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
- else
+ {
+ if (sss->locale->provider == COLLPROVIDER_ICU)
+ {
+#ifdef USE_ICU
+#ifdef HAVE_UCOL_STRCOLLUTF8
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
+ a1p, len1,
+ a2p, len2,
+ &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("collation failed: %s", u_errorName(status))));
+ }
+ else
#endif
+ {
+ int32_t ulen1, ulen2;
+ UChar *uchar1, *uchar2;
+
+ ulen1 = icu_to_uchar(&uchar1, a1p, len1);
+ ulen2 = icu_to_uchar(&uchar2, a2p, len2);
+
+ result = ucol_strcoll(sss->locale->info.icu.ucol,
+ uchar1, ulen1,
+ uchar2, ulen2);
+ }
+#else /* not USE_ICU */
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
+#endif /* not USE_ICU */
+ }
+ else
+ {
+#ifdef HAVE_LOCALE_T
+ result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
+#else
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
+#endif
+ }
+ }
+ else
result = strcoll(sss->buf1, sss->buf2);
/*
else
{
Size bsize;
+#ifdef USE_ICU
+ int32_t ulen = -1;
+ UChar *uchar;
+#endif
/*
- * We're not using the C collation, so fall back on strxfrm.
+ * We're not using the C collation, so fall back on strxfrm or ICU
+ * analogs.
*/
/* By convention, we use buffer 1 to store and NUL-terminate */
goto done;
}
- /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
memcpy(sss->buf1, authoritative_data, len);
+ /* Just like strcoll(), strxfrm() expects a NUL-terminated string.
+ * Not necessary for ICU, but doesn't hurt. */
sss->buf1[len] = '\0';
sss->last_len1 = len;
+#ifdef USE_ICU
+ /* When using ICU and not UTF8, convert string to UChar. */
+ if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
+ GetDatabaseEncoding() != PG_UTF8)
+ ulen = icu_to_uchar(&uchar, sss->buf1, len);
+#endif
+
+ /*
+ * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
+ * and try again. Both of these functions have the result buffer
+ * content undefined if the result did not fit, so we need to retry
+ * until everything fits, even though we only need the first few bytes
+ * in the end. When using ucol_nextSortKeyPart(), however, we only
+ * ask for as many bytes as we actually need.
+ */
for (;;)
{
+#ifdef USE_ICU
+ if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
+ {
+ /*
+ * When using UTF8, use the iteration interface so we only
+ * need to produce as many bytes as we actually need.
+ */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UCharIterator iter;
+ uint32_t state[2];
+ UErrorCode status;
+
+ uiter_setUTF8(&iter, sss->buf1, len);
+ state[0] = state[1] = 0; /* won't need that again */
+ status = U_ZERO_ERROR;
+ bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
+ &iter,
+ state,
+ (uint8_t *) sss->buf2,
+ Min(sizeof(Datum), sss->buflen2),
+ &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("sort key generation failed: %s", u_errorName(status))));
+ }
+ else
+ bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
+ uchar, ulen,
+ (uint8_t *) sss->buf2, sss->buflen2);
+ }
+ else
+#endif
#ifdef HAVE_LOCALE_T
- if (sss->locale)
+ if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
bsize = strxfrm_l(sss->buf2, sss->buf1,
- sss->buflen2, sss->locale);
+ sss->buflen2, sss->locale->info.lt);
else
#endif
bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
break;
/*
- * The C standard states that the contents of the buffer is now
- * unspecified. Grow buffer, and retry.
+ * Grow buffer and retry.
*/
pfree(sss->buf2);
sss->buflen2 = Max(bsize + 1,
};
+#ifndef FRONTEND
+
+/*
+ * Table of encoding names for ICU
+ *
+ * Reference:
+ *
+ * NULL entries are not supported by ICU, or their mapping is unclear.
+ */
+static const char * const pg_enc2icu_tbl[] =
+{
+ NULL, /* PG_SQL_ASCII */
+ "EUC-JP", /* PG_EUC_JP */
+ "EUC-CN", /* PG_EUC_CN */
+ "EUC-KR", /* PG_EUC_KR */
+ "EUC-TW", /* PG_EUC_TW */
+ NULL, /* PG_EUC_JIS_2004 */
+ "UTF-8", /* PG_UTF8 */
+ NULL, /* PG_MULE_INTERNAL */
+ "ISO-8859-1", /* PG_LATIN1 */
+ "ISO-8859-2", /* PG_LATIN2 */
+ "ISO-8859-3", /* PG_LATIN3 */
+ "ISO-8859-4", /* PG_LATIN4 */
+ "ISO-8859-9", /* PG_LATIN5 */
+ "ISO-8859-10", /* PG_LATIN6 */
+ "ISO-8859-13", /* PG_LATIN7 */
+ "ISO-8859-14", /* PG_LATIN8 */
+ "ISO-8859-15", /* PG_LATIN9 */
+ NULL, /* PG_LATIN10 */
+ "CP1256", /* PG_WIN1256 */
+ "CP1258", /* PG_WIN1258 */
+ "CP866", /* PG_WIN866 */
+ NULL, /* PG_WIN874 */
+ "KOI8-R", /* PG_KOI8R */
+ "CP1251", /* PG_WIN1251 */
+ "CP1252", /* PG_WIN1252 */
+ "ISO-8859-5", /* PG_ISO_8859_5 */
+ "ISO-8859-6", /* PG_ISO_8859_6 */
+ "ISO-8859-7", /* PG_ISO_8859_7 */
+ "ISO-8859-8", /* PG_ISO_8859_8 */
+ "CP1250", /* PG_WIN1250 */
+ "CP1253", /* PG_WIN1253 */
+ "CP1254", /* PG_WIN1254 */
+ "CP1255", /* PG_WIN1255 */
+ "CP1257", /* PG_WIN1257 */
+ "KOI8-U", /* PG_KOI8U */
+};
+
+bool
+is_encoding_supported_by_icu(int encoding)
+{
+ return (pg_enc2icu_tbl[encoding] != NULL);
+}
+
+const char *
+get_encoding_name_for_icu(int encoding)
+{
+ const char *icu_encoding_name;
+
+ StaticAssertStmt(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
+ "pg_enc2icu_tbl incomplete");
+
+ icu_encoding_name = pg_enc2icu_tbl[encoding];
+
+ if (!icu_encoding_name)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("encoding \"%s\" not supported by ICU",
+ pg_encoding_to_char(encoding))));
+
+ return icu_encoding_name;
+}
+
+#endif /* not FRONTEND */
+
+
/* ----------
* Encoding checks, for error returns -1 else encoding id
* ----------
#include "catalog/catalog.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_class.h"
+#include "catalog/pg_collation.h"
#include "common/file_utils.h"
#include "common/restricted_token.h"
#include "common/username.h"
PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
/* Add an SQL-standard name */
- PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, PG_UTF8);
+ PG_CMD_PRINTF3("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);
}
/*
PQExpBuffer delq;
PQExpBuffer labelq;
PGresult *res;
+ int i_collprovider;
int i_collcollate;
int i_collctype;
+ const char *collprovider;
const char *collcollate;
const char *collctype;
selectSourceSchema(fout, collinfo->dobj.namespace->dobj.name);
/* Get collation-specific details */
- appendPQExpBuffer(query, "SELECT "
- "collcollate, "
- "collctype "
- "FROM pg_catalog.pg_collation c "
- "WHERE c.oid = '%u'::pg_catalog.oid",
- collinfo->dobj.catId.oid);
+ if (fout->remoteVersion >= 100000)
+ appendPQExpBuffer(query, "SELECT "
+ "collprovider, "
+ "collcollate, "
+ "collctype, "
+ "collversion "
+ "FROM pg_catalog.pg_collation c "
+ "WHERE c.oid = '%u'::pg_catalog.oid",
+ collinfo->dobj.catId.oid);
+ else
+ appendPQExpBuffer(query, "SELECT "
+ "'p'::char AS collprovider, "
+ "collcollate, "
+ "collctype, "
+ "NULL AS collversion "
+ "FROM pg_catalog.pg_collation c "
+ "WHERE c.oid = '%u'::pg_catalog.oid",
+ collinfo->dobj.catId.oid);
res = ExecuteSqlQueryForSingleRow(fout, query->data);
+ i_collprovider = PQfnumber(res, "collprovider");
i_collcollate = PQfnumber(res, "collcollate");
i_collctype = PQfnumber(res, "collctype");
+ collprovider = PQgetvalue(res, 0, i_collprovider);
collcollate = PQgetvalue(res, 0, i_collcollate);
collctype = PQgetvalue(res, 0, i_collctype);
appendPQExpBuffer(delq, ".%s;\n",
fmtId(collinfo->dobj.name));
- appendPQExpBuffer(q, "CREATE COLLATION %s (lc_collate = ",
+ appendPQExpBuffer(q, "CREATE COLLATION %s (",
fmtId(collinfo->dobj.name));
- appendStringLiteralAH(q, collcollate, fout);
- appendPQExpBufferStr(q, ", lc_ctype = ");
- appendStringLiteralAH(q, collctype, fout);
+
+ appendPQExpBufferStr(q, "provider = ");
+ if (collprovider[0] == 'c')
+ appendPQExpBufferStr(q, "libc");
+ else if (collprovider[0] == 'i')
+ appendPQExpBufferStr(q, "icu");
+ else
+ exit_horribly(NULL,
+ "unrecognized collation provider: %s\n",
+ collprovider);
+
+ if (strcmp(collcollate, collctype) == 0)
+ {
+ appendPQExpBufferStr(q, ", locale = ");
+ appendStringLiteralAH(q, collcollate, fout);
+ }
+ else
+ {
+ appendPQExpBufferStr(q, ", lc_collate = ");
+ appendStringLiteralAH(q, collcollate, fout);
+ appendPQExpBufferStr(q, ", lc_ctype = ");
+ appendStringLiteralAH(q, collctype, fout);
+ }
+
+ /*
+ * For binary upgrade, carry over the collation version. For normal
+ * dump/restore, omit the version, so that it is computed upon restore.
+ */
+ if (dopt->binary_upgrade)
+ {
+ int i_collversion;
+
+ i_collversion = PQfnumber(res, "collversion");
+ if (!PQgetisnull(res, 0, i_collversion))
+ {
+ appendPQExpBufferStr(q, ", version = ");
+ appendStringLiteralAH(q,
+ PQgetvalue(res, 0, i_collversion),
+ fout);
+ }
+ }
+
appendPQExpBufferStr(q, ");\n");
appendPQExpBuffer(labelq, "COLLATION %s", fmtId(collinfo->dobj.name));
'CREATE COLLATION test0 FROM "C";',
regexp =>
qr/^
- \QCREATE COLLATION test0 (lc_collate = 'C', lc_ctype = 'C');\E/xm,
+ \QCREATE COLLATION test0 (provider = libc, locale = 'C');\E/xm,
collation => 1,
like => {
binary_upgrade => 1,
PQExpBufferData buf;
PGresult *res;
printQueryOpt myopt = pset.popt;
- static const bool translate_columns[] = {false, false, false, false, false};
+ static const bool translate_columns[] = {false, false, false, false, false, false};
if (pset.sversion < 90100)
{
gettext_noop("Collate"),
gettext_noop("Ctype"));
+ if (pset.sversion >= 100000)
+ appendPQExpBuffer(&buf,
+ ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"",
+ gettext_noop("Provider"));
+
if (verbose)
appendPQExpBuffer(&buf,
",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"",
NameData collname; /* collation name */
Oid collnamespace; /* OID of namespace containing collation */
Oid collowner; /* owner of collation */
+ char collprovider; /* see constants below */
int32 collencoding; /* encoding for this collation; -1 = "all" */
NameData collcollate; /* LC_COLLATE setting */
NameData collctype; /* LC_CTYPE setting */
+#ifdef CATALOG_VARLEN /* variable-length fields start here */
+ text collversion; /* provider-dependent version of collation data */
+#endif
} FormData_pg_collation;
/* ----------------
* compiler constants for pg_collation
* ----------------
*/
-#define Natts_pg_collation 6
+#define Natts_pg_collation 8
#define Anum_pg_collation_collname 1
#define Anum_pg_collation_collnamespace 2
#define Anum_pg_collation_collowner 3
-#define Anum_pg_collation_collencoding 4
-#define Anum_pg_collation_collcollate 5
-#define Anum_pg_collation_collctype 6
+#define Anum_pg_collation_collprovider 4
+#define Anum_pg_collation_collencoding 5
+#define Anum_pg_collation_collcollate 6
+#define Anum_pg_collation_collctype 7
+#define Anum_pg_collation_collversion 8
/* ----------------
* initial contents of pg_collation
* ----------------
*/
-DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" ));
+DATA(insert OID = 100 ( default PGNSP PGUID d -1 "" "" 0 ));
DESCR("database's default collation");
#define DEFAULT_COLLATION_OID 100
-DATA(insert OID = 950 ( C PGNSP PGUID -1 "C" "C" ));
+DATA(insert OID = 950 ( C PGNSP PGUID c -1 "C" "C" 0 ));
DESCR("standard C collation");
#define C_COLLATION_OID 950
-DATA(insert OID = 951 ( POSIX PGNSP PGUID -1 "POSIX" "POSIX" ));
+DATA(insert OID = 951 ( POSIX PGNSP PGUID c -1 "POSIX" "POSIX" 0 ));
DESCR("standard POSIX collation");
#define POSIX_COLLATION_OID 951
+
+#define COLLPROVIDER_DEFAULT 'd'
+#define COLLPROVIDER_ICU 'i'
+#define COLLPROVIDER_LIBC 'c'
+
#endif /* PG_COLLATION_H */
extern Oid CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
+ char collprovider,
int32 collencoding,
const char *collcollate, const char *collctype,
+ const char *collversion,
bool if_not_exists);
extern void RemoveCollationById(Oid collationOid);
DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
DESCR("import collations from operating system");
+DATA(insert OID = 3448 ( pg_collation_actual_version PGNSP PGUID 12 100 0 0 0 f f f f t f v s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_collation_actual_version _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
/* system management/monitoring related functions */
DATA(insert OID = 3353 ( pg_ls_logdir PGNSP PGUID 12 10 20 0 0 f f f f t t v s 0 0 2249 "" "{25,20,1184}" "{o,o,o}" "{name,size,modification}" _null_ _null_ pg_ls_logdir _null_ _null_ _null_ ));
DESCR("list files in the log directory");
extern ObjectAddress DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists);
extern void IsThereCollationInNamespace(const char *collname, Oid nspOid);
+extern ObjectAddress AlterCollation(AlterCollationStmt *stmt);
#endif /* COLLATIONCMDS_H */
extern const pg_enc2gettext pg_enc2gettext_tbl[];
+/*
+ * Encoding names for ICU
+ */
+extern bool is_encoding_supported_by_icu(int encoding);
+extern const char *get_encoding_name_for_icu(int encoding);
+
/*
* pg_wchar stuff
*/
T_CreateSubscriptionStmt,
T_AlterSubscriptionStmt,
T_DropSubscriptionStmt,
+ T_AlterCollationStmt,
/*
* TAGS FOR PARSE TREE NODES (parsenodes.h)
} AlterTableCmd;
+/* ----------------------
+ * Alter Collation
+ * ----------------------
+ */
+typedef struct AlterCollationStmt
+{
+ NodeTag type;
+ List *collname;
+} AlterCollationStmt;
+
+
/* ----------------------
* Alter Domain
*
/* Define to 1 if you have the external array `tzname'. */
#undef HAVE_TZNAME
+/* Define to 1 if you have the `ucol_strcollUTF8' function. */
+#undef HAVE_UCOL_STRCOLLUTF8
+
/* Define to 1 if you have the header file. */
#undef HAVE_UCRED_H
(--enable-float8-byval) */
#undef USE_FLOAT8_BYVAL
+/* Define to build with ICU support. (--with-icu) */
+#undef USE_ICU
+
/* Define to 1 to build with LDAP support. (--with-ldap) */
#undef USE_LDAP
#if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE)
#include
#endif
+#ifdef USE_ICU
+#include
+#endif
#include "utils/guc.h"
* We define our own wrapper around locale_t so we can keep the same
* function signatures for all builds, while not having to create a
* fake version of the standard type locale_t in the global namespace.
- * The fake version of pg_locale_t can be checked for truth; that's
- * about all it will be needed for.
+ * pg_locale_t is occasionally checked for truth, so make it a pointer.
*/
+struct pg_locale_t
+{
+ char provider;
+ union
+ {
#ifdef HAVE_LOCALE_T
-typedef locale_t pg_locale_t;
-#else
-typedef int pg_locale_t;
+ locale_t lt;
+#endif
+#ifdef USE_ICU
+ struct {
+ const char *locale;
+ UCollator *ucol;
+ } icu;
#endif
+ } info;
+};
+
+typedef struct pg_locale_t *pg_locale_t;
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
+extern char *get_collation_actual_version(char collprovider, const char *collcollate);
+
+#ifdef USE_ICU
+extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
+extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
+#endif
+
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
#ifdef USE_WIDE_UPPER_LOWER
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
##
REGRESS_OPTS = --dlpath=. $(EXTRA_REGRESS_OPTS)
+ifeq ($(with_icu),yes)
+override EXTRA_TESTS := collate.icu $(EXTRA_TESTS)
+endif
check: all tablespace-setup
$(pg_regress_check) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS)
--- /dev/null
+/*
+ * This test is for ICU collations.
+ */
+SET client_encoding TO UTF8;
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
+CREATE TABLE collate_test1 (
+ a int,
+ b text COLLATE "en-x-icu" NOT NULL
+);
+\d collate_test1
+ Table "collate_tests.collate_test1"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | text | en-x-icu | not null |
+
+CREATE TABLE collate_test_fail (
+ a int,
+ b text COLLATE "ja_JP.eucjp-x-icu"
+);
+ERROR: collation "ja_JP.eucjp-x-icu" for encoding "UTF8" does not exist
+LINE 3: b text COLLATE "ja_JP.eucjp-x-icu"
+ ^
+CREATE TABLE collate_test_fail (
+ a int,
+ b text COLLATE "foo-x-icu"
+);
+ERROR: collation "foo-x-icu" for encoding "UTF8" does not exist
+LINE 3: b text COLLATE "foo-x-icu"
+ ^
+CREATE TABLE collate_test_fail (
+ a int COLLATE "en-x-icu",
+ b text
+);
+ERROR: collations are not supported by type integer
+LINE 2: a int COLLATE "en-x-icu",
+ ^
+CREATE TABLE collate_test_like (
+ LIKE collate_test1
+);
+\d collate_test_like
+ Table "collate_tests.collate_test_like"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | text | en-x-icu | not null |
+
+CREATE TABLE collate_test2 (
+ a int,
+ b text COLLATE "sv-x-icu"
+);
+CREATE TABLE collate_test3 (
+ a int,
+ b text COLLATE "C"
+);
+INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');
+INSERT INTO collate_test2 SELECT * FROM collate_test1;
+INSERT INTO collate_test3 SELECT * FROM collate_test1;
+SELECT * FROM collate_test1 WHERE b >= 'bbc';
+ a | b
+---+-----
+ 3 | bbc
+(1 row)
+
+SELECT * FROM collate_test2 WHERE b >= 'bbc';
+ a | b
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test3 WHERE b >= 'bbc';
+ a | b
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test3 WHERE b >= 'BBC';
+ a | b
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+(3 rows)
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+ a | b
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
+ a | b
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
+ a | b
+---+-----
+ 2 | äbc
+ 3 | bbc
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en-x-icu";
+ERROR: collation mismatch between explicit collations "C" and "en-x-icu"
+LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e...
+ ^
+CREATE DOMAIN testdomain_sv AS text COLLATE "sv-x-icu";
+CREATE DOMAIN testdomain_i AS int COLLATE "sv-x-icu"; -- fails
+ERROR: collations are not supported by type integer
+CREATE TABLE collate_test4 (
+ a int,
+ b testdomain_sv
+);
+INSERT INTO collate_test4 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test4 ORDER BY b;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+CREATE TABLE collate_test5 (
+ a int,
+ b testdomain_sv COLLATE "en-x-icu"
+);
+INSERT INTO collate_test5 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test5 ORDER BY b;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, b FROM collate_test1 ORDER BY b;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, b FROM collate_test2 ORDER BY b;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test3 ORDER BY b;
+ a | b
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+ a | b
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- star expansion
+SELECT * FROM collate_test1 ORDER BY b;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT * FROM collate_test2 ORDER BY b;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT * FROM collate_test3 ORDER BY b;
+ a | b
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- constant expression folding
+SELECT 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- upper/lower
+CREATE TABLE collate_test10 (
+ a int,
+ x text COLLATE "en-x-icu",
+ y text COLLATE "tr-x-icu"
+);
+INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
+SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
+ a | lower | lower | upper | upper | initcap | initcap
+---+-------+-------+-------+-------+---------+---------
+ 1 | hij | hij | HIJ | HİJ | Hij | Hij
+ 2 | hij | hıj | HIJ | HIJ | Hij | Hıj
+(2 rows)
+
+SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;
+ a | lower | lower
+---+-------+-------
+ 1 | hij | hij
+ 2 | hij | hij
+(2 rows)
+
+SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;
+ a | x | y
+---+-----+-----
+ 2 | HIJ | HIJ
+ 1 | hij | hij
+(2 rows)
+
+-- LIKE/ILIKE
+SELECT * FROM collate_test1 WHERE b LIKE 'abc';
+ a | b
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b LIKE 'abc%';
+ a | b
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b LIKE '%bc%';
+ a | b
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+(3 rows)
+
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
+ a | b
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+(4 rows)
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS "true";
+ true
+------
+ t
+(1 row)
+
+-- The following actually exercises the selectivity estimation for ILIKE.
+SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
+ relname
+---------
+(0 rows)
+
+-- regular expressions
+SELECT * FROM collate_test1 WHERE b ~ '^abc$';
+ a | b
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b ~ '^abc';
+ a | b
+---+-----
+ 1 | abc
+(1 row)
+
+SELECT * FROM collate_test1 WHERE b ~ 'bc';
+ a | b
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+(3 rows)
+
+SELECT * FROM collate_test1 WHERE b ~* '^abc$';
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ~* '^abc';
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+(2 rows)
+
+SELECT * FROM collate_test1 WHERE b ~* 'bc';
+ a | b
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+(4 rows)
+
+CREATE TABLE collate_test6 (
+ a int,
+ b text COLLATE "en-x-icu"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+ (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '),
+ (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+ b ~ '^[[:alpha:]]+$' AS is_alpha,
+ b ~ '^[[:upper:]]+$' AS is_upper,
+ b ~ '^[[:lower:]]+$' AS is_lower,
+ b ~ '^[[:digit:]]+$' AS is_digit,
+ b ~ '^[[:alnum:]]+$' AS is_alnum,
+ b ~ '^[[:graph:]]+$' AS is_graph,
+ b ~ '^[[:print:]]+$' AS is_print,
+ b ~ '^[[:punct:]]+$' AS is_punct,
+ b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+ b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space
+-----+----------+----------+----------+----------+----------+----------+----------+----------+----------
+ abc | t | f | t | f | t | t | t | f | f
+ ABC | t | t | f | f | t | t | t | f | f
+ 123 | f | f | f | t | t | t | t | f | f
+ ab1 | f | f | f | f | t | t | t | f | f
+ a1! | f | f | f | f | f | t | t | f | f
+ a c | f | f | f | f | f | f | t | f | f
+ !.; | f | f | f | f | f | t | t | t | f
+ | f | f | f | f | f | f | t | f | t
+ äbç | t | f | t | f | t | t | t | f | f
+ ÄBÇ | t | t | f | f | t | t | t | f | f
+(10 rows)
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ~* 'KI' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'Türkiye' COLLATE "tr-x-icu" ~* 'KI' AS "true"; -- true with ICU
+ true
+------
+ t
+(1 row)
+
+SELECT 'bıt' ~* 'BIT' COLLATE "en-x-icu" AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bıt' ~* 'BIT' COLLATE "tr-x-icu" AS "false"; -- false with ICU
+ false
+-------
+ f
+(1 row)
+
+-- The following actually exercises the selectivity estimation for ~*.
+SELECT relname FROM pg_class WHERE relname ~* '^abc';
+ relname
+---------
+(0 rows)
+
+/* not run by default because it requires tr_TR system locale
+-- to_char
+
+SET lc_time TO 'tr_TR';
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr-x-icu");
+*/
+-- backwards parsing
+CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10;
+SELECT table_name, view_definition FROM information_schema.views
+ WHERE table_name LIKE 'collview%' ORDER BY 1;
+ table_name | view_definition
+------------+--------------------------------------------------------------------------
+ collview1 | SELECT collate_test1.a, +
+ | collate_test1.b +
+ | FROM collate_test1 +
+ | WHERE ((collate_test1.b COLLATE "C") >= 'bbc'::text);
+ collview2 | SELECT collate_test1.a, +
+ | collate_test1.b +
+ | FROM collate_test1 +
+ | ORDER BY (collate_test1.b COLLATE "C");
+ collview3 | SELECT collate_test10.a, +
+ | lower(((collate_test10.x || collate_test10.x) COLLATE "C")) AS lower+
+ | FROM collate_test10;
+(3 rows)
+
+-- collation propagation in various expression types
+SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
+ a | coalesce
+---+----------
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
+ a | coalesce
+---+----------
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2;
+ a | coalesce
+---+----------
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;
+ a | lower | lower
+---+-------+-------
+ 1 | hij | hij
+ 2 | hij | hıj
+(2 rows)
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
+ a | b | greatest
+---+-----+----------
+ 1 | abc | CCC
+ 2 | äbc | CCC
+ 3 | bbc | CCC
+ 4 | ABC | CCC
+(4 rows)
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
+ a | b | greatest
+---+-----+----------
+ 1 | abc | CCC
+ 3 | bbc | CCC
+ 4 | ABC | CCC
+ 2 | äbc | äbc
+(4 rows)
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3;
+ a | b | greatest
+---+-----+----------
+ 4 | ABC | CCC
+ 1 | abc | abc
+ 3 | bbc | bbc
+ 2 | äbc | äbc
+(4 rows)
+
+SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;
+ a | x | y | lower | lower
+---+-----+-----+-------+-------
+ 1 | hij | hij | hij | hij
+ 2 | HIJ | HIJ | hij | hıj
+(2 rows)
+
+SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
+ a | nullif
+---+--------
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+ 1 |
+(4 rows)
+
+SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
+ a | nullif
+---+--------
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+ 1 |
+(4 rows)
+
+SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2;
+ a | nullif
+---+--------
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+ 1 |
+(4 rows)
+
+SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;
+ a | lower | lower
+---+-------+-------
+ 1 | hij | hij
+ 2 | hij | hıj
+(2 rows)
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
+ a | b
+---+------
+ 4 | ABC
+ 2 | äbc
+ 1 | abcd
+ 3 | bbc
+(4 rows)
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
+ a | b
+---+------
+ 4 | ABC
+ 1 | abcd
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2;
+ a | b
+---+------
+ 4 | ABC
+ 1 | abcd
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+CREATE DOMAIN testdomain AS text;
+SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b::testdomain FROM collate_test3 ORDER BY 2;
+ a | b
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;
+ a | lower | lower
+---+-------+-------
+ 1 | hij | hij
+ 2 | hij | hıj
+(2 rows)
+
+SELECT min(b), max(b) FROM collate_test1;
+ min | max
+-----+-----
+ abc | bbc
+(1 row)
+
+SELECT min(b), max(b) FROM collate_test2;
+ min | max
+-----+-----
+ abc | äbc
+(1 row)
+
+SELECT min(b), max(b) FROM collate_test3;
+ min | max
+-----+-----
+ ABC | äbc
+(1 row)
+
+SELECT array_agg(b ORDER BY b) FROM collate_test1;
+ array_agg
+-------------------
+ {abc,ABC,äbc,bbc}
+(1 row)
+
+SELECT array_agg(b ORDER BY b) FROM collate_test2;
+ array_agg
+-------------------
+ {abc,ABC,bbc,äbc}
+(1 row)
+
+SELECT array_agg(b ORDER BY b) FROM collate_test3;
+ array_agg
+-------------------
+ {ABC,abc,bbc,äbc}
+(1 row)
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 1 | abc
+ 4 | ABC
+ 4 | ABC
+ 2 | äbc
+ 2 | äbc
+ 3 | bbc
+ 3 | bbc
+(8 rows)
+
+SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2;
+ a | b
+---+-----
+ 3 | bbc
+ 2 | äbc
+(2 rows)
+
+SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2;
+ a | b
+---+-----
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(3 rows)
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR: could not determine which collation to use for string comparison
+HINT: Use the COLLATE clause to set the collation explicitly.
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
+ a | b
+---+-----
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+ 1 | abc
+ 2 | äbc
+ 3 | bbc
+ 4 | ABC
+(8 rows)
+
+SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR: collation mismatch between implicit collations "en-x-icu" and "C"
+LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat...
+ ^
+HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
+SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok
+ a | b
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR: collation mismatch between implicit collations "en-x-icu" and "C"
+LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col...
+ ^
+HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
+SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+ERROR: collation mismatch between implicit collations "en-x-icu" and "C"
+LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla...
+ ^
+HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
+CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail
+ERROR: no collation was derived for column "b" with collatable type text
+HINT: Use the COLLATE clause to set the collation explicitly.
+-- ideally this would be a parse-time error, but for now it must be run-time:
+select x < y from collate_test10; -- fail
+ERROR: could not determine which collation to use for string comparison
+HINT: Use the COLLATE clause to set the collation explicitly.
+select x || y from collate_test10; -- ok, because || is not collation aware
+ ?column?
+----------
+ hijhij
+ HIJHIJ
+(2 rows)
+
+select x, y from collate_test10 order by x || y; -- not so ok
+ERROR: collation mismatch between implicit collations "en-x-icu" and "tr-x-icu"
+LINE 1: select x, y from collate_test10 order by x || y;
+ ^
+HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
+-- collation mismatch between recursive and non-recursive term
+WITH RECURSIVE foo(x) AS
+ (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x)
+ UNION ALL
+ SELECT (x || 'c') COLLATE "de-x-icu" FROM foo WHERE length(x) < 10)
+SELECT * FROM foo;
+ERROR: recursive query "foo" column 1 has collation "en-x-icu" in non-recursive term but collation "de-x-icu" overall
+LINE 2: (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x...
+ ^
+HINT: Use the COLLATE clause to set the collation of the non-recursive term.
+-- casting
+SELECT CAST('42' AS text COLLATE "C");
+ERROR: syntax error at or near "COLLATE"
+LINE 1: SELECT CAST('42' AS text COLLATE "C");
+ ^
+SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2;
+ a | b
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- propagation of collation in SQL functions (inlined and non-inlined cases)
+-- and plpgsql functions too
+CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql
+ AS $$ select $1 < $2 $$;
+CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql
+ AS $$ select $1 < $2 limit 1 $$;
+CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql
+ AS $$ begin return $1 < $2; end $$;
+SELECT a.b AS a, b.b AS b, a.b < b.b AS lt,
+ mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b)
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+ a | b | lt | mylt | mylt_noninline | mylt_plpgsql
+-----+-----+----+------+----------------+--------------
+ abc | abc | f | f | f | f
+ abc | ABC | t | t | t | t
+ abc | äbc | t | t | t | t
+ abc | bbc | t | t | t | t
+ ABC | abc | f | f | f | f
+ ABC | ABC | f | f | f | f
+ ABC | äbc | t | t | t | t
+ ABC | bbc | t | t | t | t
+ äbc | abc | f | f | f | f
+ äbc | ABC | f | f | f | f
+ äbc | äbc | f | f | f | f
+ äbc | bbc | t | t | t | t
+ bbc | abc | f | f | f | f
+ bbc | ABC | f | f | f | f
+ bbc | äbc | f | f | f | f
+ bbc | bbc | f | f | f | f
+(16 rows)
+
+SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt,
+ mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"),
+ mylt_plpgsql(a.b, b.b COLLATE "C")
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+ a | b | lt | mylt | mylt_noninline | mylt_plpgsql
+-----+-----+----+------+----------------+--------------
+ abc | abc | f | f | f | f
+ abc | ABC | f | f | f | f
+ abc | äbc | t | t | t | t
+ abc | bbc | t | t | t | t
+ ABC | abc | t | t | t | t
+ ABC | ABC | f | f | f | f
+ ABC | äbc | t | t | t | t
+ ABC | bbc | t | t | t | t
+ äbc | abc | f | f | f | f
+ äbc | ABC | f | f | f | f
+ äbc | äbc | f | f | f | f
+ äbc | bbc | f | f | f | f
+ bbc | abc | f | f | f | f
+ bbc | ABC | f | f | f | f
+ bbc | äbc | t | t | t | t
+ bbc | bbc | f | f | f | f
+(16 rows)
+
+-- collation override in plpgsql
+CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+ xx text := x;
+ yy text := y;
+begin
+ return xx < yy;
+end
+$$;
+SELECT mylt2('a', 'B' collate "en-x-icu") as t, mylt2('a', 'B' collate "C") as f;
+ t | f
+---+---
+ t | f
+(1 row)
+
+CREATE OR REPLACE FUNCTION
+ mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+ xx text COLLATE "POSIX" := x;
+ yy text := y;
+begin
+ return xx < yy;
+end
+$$;
+SELECT mylt2('a', 'B') as f;
+ f
+---
+ f
+(1 row)
+
+SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations
+ERROR: could not determine which collation to use for string comparison
+HINT: Use the COLLATE clause to set the collation explicitly.
+CONTEXT: PL/pgSQL function mylt2(text,text) line 6 at RETURN
+SELECT mylt2('a', 'B' collate "POSIX") as f;
+ f
+---
+ f
+(1 row)
+
+-- polymorphism
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
+ unnest
+--------
+ abc
+ ABC
+ äbc
+ bbc
+(4 rows)
+
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
+ unnest
+--------
+ abc
+ ABC
+ bbc
+ äbc
+(4 rows)
+
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1;
+ unnest
+--------
+ ABC
+ abc
+ bbc
+ äbc
+(4 rows)
+
+CREATE FUNCTION dup (anyelement) RETURNS anyelement
+ AS 'select $1' LANGUAGE sql;
+SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
+ a | dup
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
+SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
+ a | dup
+---+-----
+ 1 | abc
+ 4 | ABC
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+SELECT a, dup(b) FROM collate_test3 ORDER BY 2;
+ a | dup
+---+-----
+ 4 | ABC
+ 1 | abc
+ 3 | bbc
+ 2 | äbc
+(4 rows)
+
+-- indexes
+CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
+CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C");
+CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically
+CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));
+CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail
+ERROR: collations are not supported by type integer
+CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
+ERROR: collations are not supported by type integer
+LINE 1: ...ATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C...
+ ^
+SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
+ relname | pg_get_indexdef
+--------------------+-----------------------------------------------------------------------------------------------------
+ collate_test1_idx1 | CREATE INDEX collate_test1_idx1 ON collate_test1 USING btree (b)
+ collate_test1_idx2 | CREATE INDEX collate_test1_idx2 ON collate_test1 USING btree (b COLLATE "C")
+ collate_test1_idx3 | CREATE INDEX collate_test1_idx3 ON collate_test1 USING btree (b COLLATE "C")
+ collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX")
+(4 rows)
+
+-- schema manipulation commands
+CREATE ROLE regress_test_role;
+CREATE SCHEMA test_schema;
+-- We need to do this this way to cope with varying names for encodings:
+do $$
+BEGIN
+ EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
+ quote_literal(current_setting('lc_collate')) || ');';
+END
+$$;
+CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
+ERROR: collation "test0" already exists
+do $$
+BEGIN
+ EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' ||
+ quote_literal(current_setting('lc_collate')) ||
+ ', lc_ctype = ' ||
+ quote_literal(current_setting('lc_ctype')) || ');';
+END
+$$;
+CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
+ERROR: parameter "lc_ctype" must be specified
+CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx;
+CREATE COLLATION test4 FROM nonsense;
+ERROR: collation "nonsense" for encoding "UTF8" does not exist
+CREATE COLLATION test5 FROM test0;
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
+ collname
+----------
+ test0
+ test1
+ test5
+(3 rows)
+
+ALTER COLLATION test1 RENAME TO test11;
+ALTER COLLATION test0 RENAME TO test11; -- fail
+ERROR: collation "test11" already exists in schema "collate_tests"
+ALTER COLLATION test1 RENAME TO test22; -- fail
+ERROR: collation "test1" for encoding "UTF8" does not exist
+ALTER COLLATION test11 OWNER TO regress_test_role;
+ALTER COLLATION test11 OWNER TO nonsense;
+ERROR: role "nonsense" does not exist
+ALTER COLLATION test11 SET SCHEMA test_schema;
+COMMENT ON COLLATION test0 IS 'US English';
+SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
+ FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
+ WHERE collname LIKE 'test%'
+ ORDER BY 1;
+ collname | nspname | obj_description
+----------+---------------+-----------------
+ test0 | collate_tests | US English
+ test11 | test_schema |
+ test5 | collate_tests |
+(3 rows)
+
+DROP COLLATION test0, test_schema.test11, test5;
+DROP COLLATION test0; -- fail
+ERROR: collation "test0" for encoding "UTF8" does not exist
+DROP COLLATION IF EXISTS test0;
+NOTICE: collation "test0" does not exist, skipping
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
+ collname
+----------
+(0 rows)
+
+DROP SCHEMA test_schema;
+DROP ROLE regress_test_role;
+-- ALTER
+ALTER COLLATION "en-x-icu" REFRESH VERSION;
+NOTICE: version has not changed
+-- dependencies
+CREATE COLLATION test0 FROM "C";
+CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
+CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
+CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
+CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo;
+CREATE TABLE collate_dep_test4t (a int, b text);
+CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0);
+DROP COLLATION test0 RESTRICT; -- fail
+ERROR: cannot drop collation test0 because other objects depend on it
+DETAIL: table collate_dep_test1 column b depends on collation test0
+type collate_dep_dom1 depends on collation test0
+composite type collate_dep_test2 column y depends on collation test0
+view collate_dep_test3 depends on collation test0
+index collate_dep_test4i depends on collation test0
+HINT: Use DROP ... CASCADE to drop the dependent objects too.
+DROP COLLATION test0 CASCADE;
+NOTICE: drop cascades to 5 other objects
+DETAIL: drop cascades to table collate_dep_test1 column b
+drop cascades to type collate_dep_dom1
+drop cascades to composite type collate_dep_test2 column y
+drop cascades to view collate_dep_test3
+drop cascades to index collate_dep_test4i
+\d collate_dep_test1
+ Table "collate_tests.collate_dep_test1"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+
+\d collate_dep_test2
+ Composite type "collate_tests.collate_dep_test2"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ x | integer | | |
+
+DROP TABLE collate_dep_test1, collate_dep_test4t;
+DROP TYPE collate_dep_test2;
+-- test range types and collations
+create type textrange_c as range(subtype=text, collation="C");
+create type textrange_en_us as range(subtype=text, collation="en-x-icu");
+select textrange_c('A','Z') @> 'b'::text;
+ ?column?
+----------
+ f
+(1 row)
+
+select textrange_en_us('A','Z') @> 'b'::text;
+ ?column?
+----------
+ t
+(1 row)
+
+drop type textrange_c;
+drop type textrange_en_us;
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;
+NOTICE: drop cascades to 18 other objects
+DETAIL: drop cascades to table collate_test1
+drop cascades to table collate_test_like
+drop cascades to table collate_test2
+drop cascades to table collate_test3
+drop cascades to type testdomain_sv
+drop cascades to table collate_test4
+drop cascades to table collate_test5
+drop cascades to table collate_test10
+drop cascades to table collate_test6
+drop cascades to view collview1
+drop cascades to view collview2
+drop cascades to view collview3
+drop cascades to type testdomain
+drop cascades to function mylt(text,text)
+drop cascades to function mylt_noninline(text,text)
+drop cascades to function mylt_plpgsql(text,text)
+drop cascades to function mylt2(text,text)
+drop cascades to function dup(anyelement)
+RESET search_path;
+-- leave a collation for pg_upgrade test
+CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";
* because other encodings don't support all the characters used.
*/
SET client_encoding TO UTF8;
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
CREATE TABLE collate_test1 (
a int,
b text COLLATE "en_US" NOT NULL
);
\d collate_test1
- Table "public.collate_test1"
+ Table "collate_tests.collate_test1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
a | integer | | |
LIKE collate_test1
);
\d collate_test_like
- Table "public.collate_test_like"
+ Table "collate_tests.collate_test_like"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
a | integer | | |
4 | ABC
(4 rows)
+CREATE TABLE collate_test6 (
+ a int,
+ b text COLLATE "en_US"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+ (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '),
+ (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+ b ~ '^[[:alpha:]]+$' AS is_alpha,
+ b ~ '^[[:upper:]]+$' AS is_upper,
+ b ~ '^[[:lower:]]+$' AS is_lower,
+ b ~ '^[[:digit:]]+$' AS is_digit,
+ b ~ '^[[:alnum:]]+$' AS is_alnum,
+ b ~ '^[[:graph:]]+$' AS is_graph,
+ b ~ '^[[:print:]]+$' AS is_print,
+ b ~ '^[[:punct:]]+$' AS is_punct,
+ b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+ b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space
+-----+----------+----------+----------+----------+----------+----------+----------+----------+----------
+ abc | t | f | t | f | t | t | t | f | f
+ ABC | t | t | f | f | t | t | t | f | f
+ 123 | f | f | f | t | t | t | t | f | f
+ ab1 | f | f | f | f | t | t | t | f | f
+ a1! | f | f | f | f | f | t | t | f | f
+ a c | f | f | f | f | f | f | t | f | f
+ !.; | f | f | f | f | f | t | t | t | f
+ | f | f | f | f | f | f | t | f | t
+ äbç | t | f | t | f | t | t | t | f | f
+ ÄBÇ | t | t | f | f | t | t | t | f | f
+(10 rows)
+
SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true";
true
------
CREATE COLLATION testx (locale = 'nonsense'); -- fail
ERROR: could not create locale "nonsense": No such file or directory
DETAIL: The operating system could not find any locale data for the locale name "nonsense".
+CREATE COLLATION testy (locale = 'en_US.utf8', version = 'foo'); -- fail, no versions for libc
+ERROR: collation "testy" has no actual version, but a version was specified
CREATE COLLATION test4 FROM nonsense;
ERROR: collation "nonsense" for encoding "UTF8" does not exist
CREATE COLLATION test5 FROM test0;
ALTER COLLATION test1 RENAME TO test11;
ALTER COLLATION test0 RENAME TO test11; -- fail
-ERROR: collation "test11" for encoding "UTF8" already exists in schema "public"
+ERROR: collation "test11" for encoding "UTF8" already exists in schema "collate_tests"
ALTER COLLATION test1 RENAME TO test22; -- fail
ERROR: collation "test1" for encoding "UTF8" does not exist
ALTER COLLATION test11 OWNER TO regress_test_role;
FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
WHERE collname LIKE 'test%'
ORDER BY 1;
- collname | nspname | obj_description
-----------+-------------+-----------------
- test0 | public | US English
- test11 | test_schema |
- test5 | public |
+ collname | nspname | obj_description
+----------+---------------+-----------------
+ test0 | collate_tests | US English
+ test11 | test_schema |
+ test5 | collate_tests |
(3 rows)
DROP COLLATION test0, test_schema.test11, test5;
DROP SCHEMA test_schema;
DROP ROLE regress_test_role;
+-- ALTER
+ALTER COLLATION "en_US" REFRESH VERSION;
+NOTICE: version has not changed
-- dependencies
CREATE COLLATION test0 FROM "C";
CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
drop cascades to view collate_dep_test3
drop cascades to index collate_dep_test4i
\d collate_dep_test1
- Table "public.collate_dep_test1"
+ Table "collate_tests.collate_dep_test1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
a | integer | | |
\d collate_dep_test2
- Composite type "public.collate_dep_test2"
+ Composite type "collate_tests.collate_dep_test2"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
x | integer | | |
drop type textrange_c;
drop type textrange_en_us;
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;
+NOTICE: drop cascades to 18 other objects
+DETAIL: drop cascades to table collate_test1
+drop cascades to table collate_test_like
+drop cascades to table collate_test2
+drop cascades to table collate_test3
+drop cascades to type testdomain_sv
+drop cascades to table collate_test4
+drop cascades to table collate_test5
+drop cascades to table collate_test10
+drop cascades to table collate_test6
+drop cascades to view collview1
+drop cascades to view collview2
+drop cascades to view collview3
+drop cascades to type testdomain
+drop cascades to function mylt(text,text)
+drop cascades to function mylt_noninline(text,text)
+drop cascades to function mylt_plpgsql(text,text)
+drop cascades to function mylt2(text,text)
+drop cascades to function dup(anyelement)
--- /dev/null
+/*
+ * This test is for ICU collations.
+ */
+
+SET client_encoding TO UTF8;
+
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
+
+
+CREATE TABLE collate_test1 (
+ a int,
+ b text COLLATE "en-x-icu" NOT NULL
+);
+
+\d collate_test1
+
+CREATE TABLE collate_test_fail (
+ a int,
+ b text COLLATE "ja_JP.eucjp-x-icu"
+);
+
+CREATE TABLE collate_test_fail (
+ a int,
+ b text COLLATE "foo-x-icu"
+);
+
+CREATE TABLE collate_test_fail (
+ a int COLLATE "en-x-icu",
+ b text
+);
+
+CREATE TABLE collate_test_like (
+ LIKE collate_test1
+);
+
+\d collate_test_like
+
+CREATE TABLE collate_test2 (
+ a int,
+ b text COLLATE "sv-x-icu"
+);
+
+CREATE TABLE collate_test3 (
+ a int,
+ b text COLLATE "C"
+);
+
+INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');
+INSERT INTO collate_test2 SELECT * FROM collate_test1;
+INSERT INTO collate_test3 SELECT * FROM collate_test1;
+
+SELECT * FROM collate_test1 WHERE b >= 'bbc';
+SELECT * FROM collate_test2 WHERE b >= 'bbc';
+SELECT * FROM collate_test3 WHERE b >= 'bbc';
+SELECT * FROM collate_test3 WHERE b >= 'BBC';
+
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C";
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C";
+SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en-x-icu";
+
+
+CREATE DOMAIN testdomain_sv AS text COLLATE "sv-x-icu";
+CREATE DOMAIN testdomain_i AS int COLLATE "sv-x-icu"; -- fails
+CREATE TABLE collate_test4 (
+ a int,
+ b testdomain_sv
+);
+INSERT INTO collate_test4 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test4 ORDER BY b;
+
+CREATE TABLE collate_test5 (
+ a int,
+ b testdomain_sv COLLATE "en-x-icu"
+);
+INSERT INTO collate_test5 SELECT * FROM collate_test1;
+SELECT a, b FROM collate_test5 ORDER BY b;
+
+
+SELECT a, b FROM collate_test1 ORDER BY b;
+SELECT a, b FROM collate_test2 ORDER BY b;
+SELECT a, b FROM collate_test3 ORDER BY b;
+
+SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+
+-- star expansion
+SELECT * FROM collate_test1 ORDER BY b;
+SELECT * FROM collate_test2 ORDER BY b;
+SELECT * FROM collate_test3 ORDER BY b;
+
+-- constant expression folding
+SELECT 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS "true";
+SELECT 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS "false";
+
+-- upper/lower
+
+CREATE TABLE collate_test10 (
+ a int,
+ x text COLLATE "en-x-icu",
+ y text COLLATE "tr-x-icu"
+);
+
+INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ');
+
+SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10;
+SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10;
+
+SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a;
+
+-- LIKE/ILIKE
+
+SELECT * FROM collate_test1 WHERE b LIKE 'abc';
+SELECT * FROM collate_test1 WHERE b LIKE 'abc%';
+SELECT * FROM collate_test1 WHERE b LIKE '%bc%';
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc';
+SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';
+SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS "true";
+SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS "false";
+
+SELECT 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS "false";
+SELECT 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS "true";
+
+-- The following actually exercises the selectivity estimation for ILIKE.
+SELECT relname FROM pg_class WHERE relname ILIKE 'abc%';
+
+-- regular expressions
+
+SELECT * FROM collate_test1 WHERE b ~ '^abc$';
+SELECT * FROM collate_test1 WHERE b ~ '^abc';
+SELECT * FROM collate_test1 WHERE b ~ 'bc';
+SELECT * FROM collate_test1 WHERE b ~* '^abc$';
+SELECT * FROM collate_test1 WHERE b ~* '^abc';
+SELECT * FROM collate_test1 WHERE b ~* 'bc';
+
+CREATE TABLE collate_test6 (
+ a int,
+ b text COLLATE "en-x-icu"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+ (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '),
+ (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+ b ~ '^[[:alpha:]]+$' AS is_alpha,
+ b ~ '^[[:upper:]]+$' AS is_upper,
+ b ~ '^[[:lower:]]+$' AS is_lower,
+ b ~ '^[[:digit:]]+$' AS is_digit,
+ b ~ '^[[:alnum:]]+$' AS is_alnum,
+ b ~ '^[[:graph:]]+$' AS is_graph,
+ b ~ '^[[:print:]]+$' AS is_print,
+ b ~ '^[[:punct:]]+$' AS is_punct,
+ b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+
+SELECT 'Türkiye' COLLATE "en-x-icu" ~* 'KI' AS "true";
+SELECT 'Türkiye' COLLATE "tr-x-icu" ~* 'KI' AS "true"; -- true with ICU
+
+SELECT 'bıt' ~* 'BIT' COLLATE "en-x-icu" AS "false";
+SELECT 'bıt' ~* 'BIT' COLLATE "tr-x-icu" AS "false"; -- false with ICU
+
+-- The following actually exercises the selectivity estimation for ~*.
+SELECT relname FROM pg_class WHERE relname ~* '^abc';
+
+
+/* not run by default because it requires tr_TR system locale
+-- to_char
+
+SET lc_time TO 'tr_TR';
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
+SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr-x-icu");
+*/
+
+
+-- backwards parsing
+
+CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
+CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
+CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10;
+
+SELECT table_name, view_definition FROM information_schema.views
+ WHERE table_name LIKE 'collview%' ORDER BY 1;
+
+
+-- collation propagation in various expression types
+
+SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2;
+SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2;
+SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2;
+SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10;
+
+SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;
+SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3;
+SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3;
+SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10;
+
+SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;
+SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2;
+SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2;
+SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;
+
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2;
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2;
+SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2;
+
+CREATE DOMAIN testdomain AS text;
+SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;
+SELECT a, b::testdomain FROM collate_test2 ORDER BY 2;
+SELECT a, b::testdomain FROM collate_test3 ORDER BY 2;
+SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2;
+SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10;
+
+SELECT min(b), max(b) FROM collate_test1;
+SELECT min(b), max(b) FROM collate_test2;
+SELECT min(b), max(b) FROM collate_test3;
+
+SELECT array_agg(b ORDER BY b) FROM collate_test1;
+SELECT array_agg(b ORDER BY b) FROM collate_test2;
+SELECT array_agg(b ORDER BY b) FROM collate_test3;
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
+SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
+SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2;
+SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2;
+
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok
+SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok
+SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail
+
+CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail
+
+-- ideally this would be a parse-time error, but for now it must be run-time:
+select x < y from collate_test10; -- fail
+select x || y from collate_test10; -- ok, because || is not collation aware
+select x, y from collate_test10 order by x || y; -- not so ok
+
+-- collation mismatch between recursive and non-recursive term
+WITH RECURSIVE foo(x) AS
+ (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x)
+ UNION ALL
+ SELECT (x || 'c') COLLATE "de-x-icu" FROM foo WHERE length(x) < 10)
+SELECT * FROM foo;
+
+
+-- casting
+
+SELECT CAST('42' AS text COLLATE "C");
+
+SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;
+SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2;
+SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2;
+
+
+-- propagation of collation in SQL functions (inlined and non-inlined cases)
+-- and plpgsql functions too
+
+CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql
+ AS $$ select $1 < $2 $$;
+
+CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql
+ AS $$ select $1 < $2 limit 1 $$;
+
+CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql
+ AS $$ begin return $1 < $2; end $$;
+
+SELECT a.b AS a, b.b AS b, a.b < b.b AS lt,
+ mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b)
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+
+SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt,
+ mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"),
+ mylt_plpgsql(a.b, b.b COLLATE "C")
+FROM collate_test1 a, collate_test1 b
+ORDER BY a.b, b.b;
+
+
+-- collation override in plpgsql
+
+CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+ xx text := x;
+ yy text := y;
+begin
+ return xx < yy;
+end
+$$;
+
+SELECT mylt2('a', 'B' collate "en-x-icu") as t, mylt2('a', 'B' collate "C") as f;
+
+CREATE OR REPLACE FUNCTION
+ mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$
+declare
+ xx text COLLATE "POSIX" := x;
+ yy text := y;
+begin
+ return xx < yy;
+end
+$$;
+
+SELECT mylt2('a', 'B') as f;
+SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations
+SELECT mylt2('a', 'B' collate "POSIX") as f;
+
+
+-- polymorphism
+
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1;
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1;
+SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1;
+
+CREATE FUNCTION dup (anyelement) RETURNS anyelement
+ AS 'select $1' LANGUAGE sql;
+
+SELECT a, dup(b) FROM collate_test1 ORDER BY 2;
+SELECT a, dup(b) FROM collate_test2 ORDER BY 2;
+SELECT a, dup(b) FROM collate_test3 ORDER BY 2;
+
+
+-- indexes
+
+CREATE INDEX collate_test1_idx1 ON collate_test1 (b);
+CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C");
+CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically
+CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX"));
+
+CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail
+CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
+
+SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
+
+
+-- schema manipulation commands
+
+CREATE ROLE regress_test_role;
+CREATE SCHEMA test_schema;
+
+-- We need to do this this way to cope with varying names for encodings:
+do $$
+BEGIN
+ EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
+ quote_literal(current_setting('lc_collate')) || ');';
+END
+$$;
+CREATE COLLATION test0 FROM "C"; -- fail, duplicate name
+do $$
+BEGIN
+ EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' ||
+ quote_literal(current_setting('lc_collate')) ||
+ ', lc_ctype = ' ||
+ quote_literal(current_setting('lc_ctype')) || ');';
+END
+$$;
+CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
+CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx;
+
+CREATE COLLATION test4 FROM nonsense;
+CREATE COLLATION test5 FROM test0;
+
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1;
+
+ALTER COLLATION test1 RENAME TO test11;
+ALTER COLLATION test0 RENAME TO test11; -- fail
+ALTER COLLATION test1 RENAME TO test22; -- fail
+
+ALTER COLLATION test11 OWNER TO regress_test_role;
+ALTER COLLATION test11 OWNER TO nonsense;
+ALTER COLLATION test11 SET SCHEMA test_schema;
+
+COMMENT ON COLLATION test0 IS 'US English';
+
+SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation')
+ FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid)
+ WHERE collname LIKE 'test%'
+ ORDER BY 1;
+
+DROP COLLATION test0, test_schema.test11, test5;
+DROP COLLATION test0; -- fail
+DROP COLLATION IF EXISTS test0;
+
+SELECT collname FROM pg_collation WHERE collname LIKE 'test%';
+
+DROP SCHEMA test_schema;
+DROP ROLE regress_test_role;
+
+
+-- ALTER
+
+ALTER COLLATION "en-x-icu" REFRESH VERSION;
+
+
+-- dependencies
+
+CREATE COLLATION test0 FROM "C";
+
+CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0);
+CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0;
+CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0);
+CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo;
+CREATE TABLE collate_dep_test4t (a int, b text);
+CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0);
+
+DROP COLLATION test0 RESTRICT; -- fail
+DROP COLLATION test0 CASCADE;
+
+\d collate_dep_test1
+\d collate_dep_test2
+
+DROP TABLE collate_dep_test1, collate_dep_test4t;
+DROP TYPE collate_dep_test2;
+
+-- test range types and collations
+
+create type textrange_c as range(subtype=text, collation="C");
+create type textrange_en_us as range(subtype=text, collation="en-x-icu");
+
+select textrange_c('A','Z') @> 'b'::text;
+select textrange_en_us('A','Z') @> 'b'::text;
+
+drop type textrange_c;
+drop type textrange_en_us;
+
+
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;
+RESET search_path;
+
+-- leave a collation for pg_upgrade test
+CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";
SET client_encoding TO UTF8;
+CREATE SCHEMA collate_tests;
+SET search_path = collate_tests;
+
CREATE TABLE collate_test1 (
a int,
SELECT * FROM collate_test1 WHERE b ~* '^abc';
SELECT * FROM collate_test1 WHERE b ~* 'bc';
+CREATE TABLE collate_test6 (
+ a int,
+ b text COLLATE "en_US"
+);
+INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'),
+ (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '),
+ (9, 'äbç'), (10, 'ÄBÇ');
+SELECT b,
+ b ~ '^[[:alpha:]]+$' AS is_alpha,
+ b ~ '^[[:upper:]]+$' AS is_upper,
+ b ~ '^[[:lower:]]+$' AS is_lower,
+ b ~ '^[[:digit:]]+$' AS is_digit,
+ b ~ '^[[:alnum:]]+$' AS is_alnum,
+ b ~ '^[[:graph:]]+$' AS is_graph,
+ b ~ '^[[:print:]]+$' AS is_print,
+ b ~ '^[[:punct:]]+$' AS is_punct,
+ b ~ '^[[:space:]]+$' AS is_space
+FROM collate_test6;
+
SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true";
SELECT 'Türkiye' COLLATE "tr_TR" ~* 'KI' AS "false";
$$;
CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype
CREATE COLLATION testx (locale = 'nonsense'); -- fail
+CREATE COLLATION testy (locale = 'en_US.utf8', version = 'foo'); -- fail, no versions for libc
CREATE COLLATION test4 FROM nonsense;
CREATE COLLATION test5 FROM test0;
DROP ROLE regress_test_role;
+-- ALTER
+
+ALTER COLLATION "en_US" REFRESH VERSION;
+
+
-- dependencies
CREATE COLLATION test0 FROM "C";
drop type textrange_c;
drop type textrange_en_us;
+
+
+-- cleanup
+DROP SCHEMA collate_tests CASCADE;