Further hacking on ICU collation creation and usage.

author Tom Lane

Sat, 24 Jun 2017 17:54:15 +0000 (13:54 -0400)

committer Tom Lane

Sat, 24 Jun 2017 17:54:23 +0000 (13:54 -0400)
author Tom Lane
Sat, 24 Jun 2017 17:54:15 +0000 (13:54 -0400)
committer Tom Lane
Sat, 24 Jun 2017 17:54:23 +0000 (13:54 -0400)
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml

index 5e0a0bf7a7853157a79f2ae7e1ee199143d67386..48ecfc5f48e083c8b562a444b27e459d60619e1f 100644 (file)
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -508,8 +508,8 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
      operating system C library.  These are the locales that most tools
      provided by the operating system use.  Another provider
      is icu, which uses the external
-    ICUICU library.  Support for ICU has to be
-    configured when PostgreSQL is built.
+    ICUICU library.  ICU locales can only be
+    used if support for ICU was configured when PostgreSQL was built.
     
  
     
@@ -529,12 +529,12 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
     
  
     
-    A collation provided by icu maps to a named collator
-    provided by the ICU library.  ICU does not support
-    separate collate and ctype settings, so they
-    are always the same.  Also, ICU collations are independent of the
-    encoding, so there is always only one ICU collation for a given name in a
-    database.
+    A collation object provided by icu maps to a named
+    collator provided by the ICU library.  ICU does not support
+    separate collate and ctype settings, so
+    they are always the same.  Also, ICU collations are independent of the
+    encoding, so there is always only one ICU collation of a given name in
+    a database.
     
  
     
@@ -566,10 +566,10 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
     
      If the operating system provides support for using multiple locales
      within a single program (newlocale and related functions),
-    or support for ICU is configured,
+    or if support for ICU is configured,
      then when a database cluster is initialized, initdb
      populates the system catalog pg_collation with
-    collations based on all the locales it finds on the operating
+    collations based on all the locales it finds in the operating
      system at the time.
     
  
@@ -602,10 +602,12 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
      directly to the locales installed in the operating system, which can be
      listed using the command locale -a.  In case
      a libc collation is needed that has different values
-    for LC_COLLATE and LC_CTYPE, or new
+    for LC_COLLATE and LC_CTYPE, or if new
      locales are installed in the operating system after the database system
      was initialized, then a new collation may be created using
      the  command.
+    New operating system locales can also be imported en masse using
+    the pg_import_system_collations() function.
     
  
     
@@ -617,8 +619,8 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
      Use of the stripped collation names is recommended, since it will
      make one less thing you need to change if you decide to change to
      another database encoding.  Note however that the default,
-    C, and POSIX collations, as well as all collations
-    provided by ICU can be used regardless of the database encoding.
+    C, and POSIX collations can be used regardless of
+    the database encoding.
     
  
     
@@ -641,7 +643,7 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
      Collations provided by ICU are created with names in BCP 47 language tag
      format, with a private use
      extension -x-icu appended, to distinguish them from
-    libc locales.  So de-x-icu would be an example.
+    libc locales.  So de-x-icu would be an example name.
     
  
     
@@ -652,7 +654,7 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
      See  for
      information on ICU locale naming.  initdb uses the ICU
      APIs to extract a set of locales with distinct collation rules to populate
-    the initial set of collations.  Here are some examples collations that
+    the initial set of collations.  Here are some example collations that
      might be created:
  
      
@@ -675,7 +677,7 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
        
         German collation for Austria, default variant
         
-        (Note that as of this writing, there is no,
+        (As of this writing, there is no,
          say, de-DE-x-icu or de-CH-x-icu,
          because those are equivalent to de-x-icu.)
         
@@ -701,9 +703,11 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
     
  
     
-    Some (less frequently used) encodings are not supported by ICU.  If the
-    database cluster was initialized with such an encoding, no ICU collations
-    will be predefined.
+    Some (less frequently used) encodings are not supported by ICU.  When the
+    database encoding is one of these, ICU collation entries
+    in pg_collation are ignored.  Attempting to use one
+    will draw an error along the lines of collation "de-x-icu" for
+    encoding "WIN874" does not exist.
     
     
     
@@ -761,8 +765,11 @@ CREATE COLLATION "de-DE-x-icu" FROM "de-x-icu";
     classification) and LC_COLLATE (string sort order) locale
     settings. For C or
     POSIX locale, any character set is allowed, but for other
-   locales there is only one character set that will work correctly.
+   libc-provided locales there is only one character set that will work
+   correctly.
     (On Windows, however, UTF-8 encoding can be used with any locale.)
+   If you have ICU support configured, ICU-provided locales can be used
+   with most but not all server-side encodings.
    
  
     
@@ -775,13 +782,14 @@ CREATE COLLATION "de-DE-x-icu" FROM "de-x-icu";
  
       
        <productname>PostgreSQL</productname> Character Sets
-      6">
+      7">
         
          
           Name
           Description
           Language
           Server?
+         ICU?
author	Tom Lane
	Sat, 24 Jun 2017 17:54:15 +0000 (13:54 -0400)
committer	Tom Lane
	Sat, 24 Jun 2017 17:54:23 +0000 (13:54 -0400)
doc/src/sgml/charset.sgml		patch \| blob \| blame \| history
src/backend/catalog/namespace.c		patch \| blob \| blame \| history
src/backend/commands/collationcmds.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history