Add Unicode property tables.
authorJeff Davis
Wed, 6 Mar 2024 20:50:01 +0000 (12:50 -0800)
committerJeff Davis
Wed, 6 Mar 2024 20:50:01 +0000 (12:50 -0800)
Provide functions to test for Unicode properties, such as Alphabetic
or Cased. These functions use tables derived from Unicode data files,
similar to the tables for Unicode normalization or general category,
and those tables can be updated with the 'update-unicode' build
target.

Use Unicode properties to provide functions to test for regex
character classes, like 'punct' or 'alnum'.

Infrastructure in preparation for a builtin collation provider, and
may also be useful for other callers.

Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com
Reviewed-by: Daniel Verite, Peter Eisentraut, Jeremy Schneider
src/common/unicode/Makefile
src/common/unicode/README
src/common/unicode/category_test.c
src/common/unicode/generate-unicode_category_table.pl
src/common/unicode/meson.build
src/common/unicode_category.c
src/include/common/unicode_category.h
src/include/common/unicode_category_table.h

index 04d81dd5cb581bb3e4e9eb426aa9b468b0b6b554..27f0408d8b8ea0ba6617368dda8ec262548f5915 100644 (file)
@@ -29,13 +29,13 @@ update-unicode: unicode_category_table.h unicode_east_asian_fw_table.h unicode_n
 # These files are part of the Unicode Character Database. Download
 # them on demand.  The dependency on Makefile.global is for
 # UNICODE_VERSION.
-CompositionExclusions.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt UnicodeData.txt: $(top_builddir)/src/Makefile.global
+CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt: $(top_builddir)/src/Makefile.global
    $(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
 
 unicode_version.h: generate-unicode_version.pl
    $(PERL) $< --version $(UNICODE_VERSION)
 
-unicode_category_table.h: generate-unicode_category_table.pl UnicodeData.txt
+unicode_category_table.h: generate-unicode_category_table.pl DerivedCoreProperties.txt PropList.txt UnicodeData.txt
    $(PERL) $<
 
 # Generation of conversion tables used for string normalization with
@@ -82,4 +82,4 @@ clean:
    rm -f $(OBJS) category_test category_test.o norm_test norm_test.o
 
 distclean: clean
-   rm -f CompositionExclusions.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h
+   rm -f CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h
index 56956f6a65fc18d803891c7c87f14f335bccd5ab..110ce5765d59cac66649191508f71dba8c53eec3 100644 (file)
@@ -1,22 +1,35 @@
-This directory contains tools to generate the tables in
-src/include/common/unicode_norm.h, used for Unicode normalization. The
-generated .h file is included in the source tree, so these are normally not
-needed to build PostgreSQL, only if you need to re-generate the .h file
-from the Unicode data files for some reason, e.g. to update to a new version
-of Unicode.
+This directory contains tools to download new Unicode data files and
+generate static tables. These tables are used to normalize or
+determine various properties of Unicode data.
 
-Generating unicode_norm_table.h
--------------------------------
+The generated header files are copied to src/include/common/, and
+included in the source tree, so these tools are not normally required
+to build PostgreSQL.
 
-Run
+Update Unicode Version
+----------------------
+
+Edit src/Makefile.global.in and src/common/unicode/meson.build
+to update the UNICODE_VERSION.
+
+Then, generate the new header files with:
 
     make update-unicode
 
-from the top level of the source tree and commit the result.
+or if using meson:
+
+    ninja update-unicode
+
+from the top level of the source tree. Examine the result to make sure
+the changes look reasonable (that is, that the diff size and scope is
+comparable to the Unicode changes since the last update), and then
+commit it.
 
 Tests
 -----
 
+Normalization tests:
+
 The Unicode consortium publishes a comprehensive test suite for the
 normalization algorithm, in a file called NormalizationTest.txt. This
 directory also contains a perl script and some C code, to run our
@@ -26,3 +39,15 @@ To download NormalizationTest.txt and run the tests:
     make normalization-check
 
 This is also run as part of the update-unicode target.
+
+Category & Property tests:
+
+The file category_test.c exhaustively compares the category and
+properties of each code point as determined by the generated tables
+with the category and properties as reported by ICU. For this test to
+be effective, the version of the Unicode data files must be similar to
+the version of Unicode on which ICU is based, so attempt to match the
+versions as closely as possible. A mismatched Unicode will skip over
+codepoints that are assigned in one version and not the other, and may
+falsely report failures. This test is run as a part of the
+update-unicode target.
index f1aaac0f6137f502d2138436a1dc0899e19b5e2e..e823044d63a2f5664e6bb7fcb21ba511ef65a7ab 100644 (file)
@@ -1,6 +1,6 @@
 /*-------------------------------------------------------------------------
  * category_test.c
- *     Program to test Unicode general category functions.
+ *     Program to test Unicode general category and character properties.
  *
  * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
  *
 #include 
 #include 
 #include 
+#include 
 
 #ifdef USE_ICU
 #include 
 #endif
+
 #include "common/unicode_category.h"
 #include "common/unicode_version.h"
 
+static int pg_unicode_version = 0;
+#ifdef USE_ICU
+static int icu_unicode_version = 0;
+#endif
+
 /*
  * Parse version into integer for easy comparison.
  */
-#ifdef USE_ICU
 static int
 parse_unicode_version(const char *version)
 {
@@ -39,57 +45,175 @@ parse_unicode_version(const char *version)
 
    return major * 100 + minor;
 }
-#endif
 
+#ifdef USE_ICU
 /*
- * Exhaustively test that the Unicode category for each codepoint matches that
- * returned by ICU.
+ * Test Postgres Unicode tables by comparing with ICU. Test the General
+ * Category, as well as the properties Alphabetic, Lowercase, Uppercase,
+ * White_Space, and Hex_Digit.
  */
-int
-main(int argc, char **argv)
+static void
+icu_test()
 {
-#ifdef USE_ICU
-   int         pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION);
-   int         icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION);
+   int         successful = 0;
    int         pg_skipped_codepoints = 0;
    int         icu_skipped_codepoints = 0;
 
-   printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
-   printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
-
-   for (UChar32 code = 0; code <= 0x10ffff; code++)
+   for (pg_wchar code = 0; code <= 0x10ffff; code++)
    {
        uint8_t     pg_category = unicode_category(code);
        uint8_t     icu_category = u_charType(code);
 
+       /* Property tests */
+       bool        prop_alphabetic = pg_u_prop_alphabetic(code);
+       bool        prop_lowercase = pg_u_prop_lowercase(code);
+       bool        prop_uppercase = pg_u_prop_uppercase(code);
+       bool        prop_cased = pg_u_prop_cased(code);
+       bool        prop_case_ignorable = pg_u_prop_case_ignorable(code);
+       bool        prop_white_space = pg_u_prop_white_space(code);
+       bool        prop_hex_digit = pg_u_prop_hex_digit(code);
+       bool        prop_join_control = pg_u_prop_join_control(code);
+
+       bool        icu_prop_alphabetic = u_hasBinaryProperty(
+                                                             code, UCHAR_ALPHABETIC);
+       bool        icu_prop_lowercase = u_hasBinaryProperty(
+                                                            code, UCHAR_LOWERCASE);
+       bool        icu_prop_uppercase = u_hasBinaryProperty(
+                                                            code, UCHAR_UPPERCASE);
+       bool        icu_prop_cased = u_hasBinaryProperty(
+                                                        code, UCHAR_CASED);
+       bool        icu_prop_case_ignorable = u_hasBinaryProperty(
+                                                                 code, UCHAR_CASE_IGNORABLE);
+       bool        icu_prop_white_space = u_hasBinaryProperty(
+                                                              code, UCHAR_WHITE_SPACE);
+       bool        icu_prop_hex_digit = u_hasBinaryProperty(
+                                                            code, UCHAR_HEX_DIGIT);
+       bool        icu_prop_join_control = u_hasBinaryProperty(
+                                                               code, UCHAR_JOIN_CONTROL);
+
+       /*
+        * Compare with ICU for character classes using:
+        *
+        * https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uchar_8h.html#details
+        *
+        * which describes how to use ICU to test for membership in regex
+        * character classes.
+        *
+        * NB: the document suggests testing for some properties such as
+        * UCHAR_POSIX_ALNUM, but that doesn't mean that we're testing for the
+        * "POSIX Compatible" character classes.
+        */
+       bool        isalpha = pg_u_isalpha(code);
+       bool        islower = pg_u_islower(code);
+       bool        isupper = pg_u_isupper(code);
+       bool        ispunct = pg_u_ispunct(code, false);
+       bool        isdigit = pg_u_isdigit(code, false);
+       bool        isxdigit = pg_u_isxdigit(code, false);
+       bool        isalnum = pg_u_isalnum(code, false);
+       bool        isspace = pg_u_isspace(code);
+       bool        isblank = pg_u_isblank(code);
+       bool        iscntrl = pg_u_iscntrl(code);
+       bool        isgraph = pg_u_isgraph(code);
+       bool        isprint = pg_u_isprint(code);
+
+       bool        icu_isalpha = u_isUAlphabetic(code);
+       bool        icu_islower = u_isULowercase(code);
+       bool        icu_isupper = u_isUUppercase(code);
+       bool        icu_ispunct = u_ispunct(code);
+       bool        icu_isdigit = u_isdigit(code);
+       bool        icu_isxdigit = u_hasBinaryProperty(code,
+                                                      UCHAR_POSIX_XDIGIT);
+       bool        icu_isalnum = u_hasBinaryProperty(code,
+                                                     UCHAR_POSIX_ALNUM);
+       bool        icu_isspace = u_isUWhiteSpace(code);
+       bool        icu_isblank = u_isblank(code);
+       bool        icu_iscntrl = icu_category == PG_U_CONTROL;
+       bool        icu_isgraph = u_hasBinaryProperty(code,
+                                                     UCHAR_POSIX_GRAPH);
+       bool        icu_isprint = u_hasBinaryProperty(code,
+                                                     UCHAR_POSIX_PRINT);
+
+       /*
+        * A version mismatch means that some assigned codepoints in the newer
+        * version may be unassigned in the older version. That's OK, though
+        * the test will not cover those codepoints marked unassigned in the
+        * older version (that is, it will no longer be an exhaustive test).
+        */
+       if (pg_category == PG_U_UNASSIGNED &&
+           icu_category != PG_U_UNASSIGNED &&
+           pg_unicode_version < icu_unicode_version)
+       {
+           pg_skipped_codepoints++;
+           continue;
+       }
+
+       if (icu_category == PG_U_UNASSIGNED &&
+           pg_category != PG_U_UNASSIGNED &&
+           icu_unicode_version < pg_unicode_version)
+       {
+           icu_skipped_codepoints++;
+           continue;
+       }
+
        if (pg_category != icu_category)
        {
-           /*
-            * A version mismatch means that some assigned codepoints in the
-            * newer version may be unassigned in the older version. That's
-            * OK, though the test will not cover those codepoints marked
-            * unassigned in the older version (that is, it will no longer be
-            * an exhaustive test).
-            */
-           if (pg_category == PG_U_UNASSIGNED &&
-               pg_unicode_version < icu_unicode_version)
-               pg_skipped_codepoints++;
-           else if (icu_category == PG_U_UNASSIGNED &&
-                    icu_unicode_version < pg_unicode_version)
-               icu_skipped_codepoints++;
-           else
-           {
-               printf("category_test: FAILURE for codepoint 0x%06x\n", code);
-               printf("category_test: Postgres category:   %02d %s %s\n", pg_category,
-                      unicode_category_abbrev(pg_category),
-                      unicode_category_string(pg_category));
-               printf("category_test: ICU category:        %02d %s %s\n", icu_category,
-                      unicode_category_abbrev(icu_category),
-                      unicode_category_string(icu_category));
-               printf("\n");
-               exit(1);
-           }
+           printf("category_test: FAILURE for codepoint 0x%06x\n", code);
+           printf("category_test: Postgres category:   %02d %s %s\n", pg_category,
+                  unicode_category_abbrev(pg_category),
+                  unicode_category_string(pg_category));
+           printf("category_test: ICU category:        %02d %s %s\n", icu_category,
+                  unicode_category_abbrev(icu_category),
+                  unicode_category_string(icu_category));
+           printf("\n");
+           exit(1);
+       }
+
+       if (prop_alphabetic != icu_prop_alphabetic ||
+           prop_lowercase != icu_prop_lowercase ||
+           prop_uppercase != icu_prop_uppercase ||
+           prop_cased != icu_prop_cased ||
+           prop_case_ignorable != icu_prop_case_ignorable ||
+           prop_white_space != icu_prop_white_space ||
+           prop_hex_digit != icu_prop_hex_digit ||
+           prop_join_control != icu_prop_join_control)
+       {
+           printf("category_test: FAILURE for codepoint 0x%06x\n", code);
+           printf("category_test: Postgres property    alphabetic/lowercase/uppercase/cased/case_ignorable/white_space/hex_digit/join_control: %d/%d/%d/%d/%d/%d/%d/%d\n",
+                  prop_alphabetic, prop_lowercase, prop_uppercase,
+                  prop_cased, prop_case_ignorable,
+                  prop_white_space, prop_hex_digit, prop_join_control);
+           printf("category_test: ICU  property    alphabetic/lowercase/uppercase/cased/case_ignorable/white_space/hex_digit/join_control: %d/%d/%d/%d/%d/%d/%d/%d\n",
+                  icu_prop_alphabetic, icu_prop_lowercase, icu_prop_uppercase,
+                  icu_prop_cased, icu_prop_case_ignorable,
+                  icu_prop_white_space, icu_prop_hex_digit, icu_prop_join_control);
+           printf("\n");
+           exit(1);
        }
+
+       if (isalpha != icu_isalpha ||
+           islower != icu_islower ||
+           isupper != icu_isupper ||
+           ispunct != icu_ispunct ||
+           isdigit != icu_isdigit ||
+           isxdigit != icu_isxdigit ||
+           isalnum != icu_isalnum ||
+           isspace != icu_isspace ||
+           isblank != icu_isblank ||
+           iscntrl != icu_iscntrl ||
+           isgraph != icu_isgraph ||
+           isprint != icu_isprint)
+       {
+           printf("category_test: FAILURE for codepoint 0x%06x\n", code);
+           printf("category_test: Postgres class   alpha/lower/upper/punct/digit/xdigit/alnum/space/blank/cntrl/graph/print: %d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d\n",
+                  isalpha, islower, isupper, ispunct, isdigit, isxdigit, isalnum, isspace, isblank, iscntrl, isgraph, isprint);
+           printf("category_test: ICU class    alpha/lower/upper/punct/digit/xdigit/alnum/space/blank/cntrl/graph/print: %d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d\n",
+                  icu_isalpha, icu_islower, icu_isupper, icu_ispunct, icu_isdigit, icu_isxdigit, icu_isalnum, icu_isspace, icu_isblank, icu_iscntrl, icu_isgraph, icu_isprint);
+           printf("\n");
+           exit(1);
+       }
+
+       if (pg_category != PG_U_UNASSIGNED)
+           successful++;
    }
 
    if (pg_skipped_codepoints > 0)
@@ -99,10 +223,22 @@ main(int argc, char **argv)
        printf("category_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
               icu_skipped_codepoints);
 
-   printf("category_test: success\n");
-   exit(0);
+   printf("category_test: ICU test: %d codepoints successful\n", successful);
+}
+#endif
+
+int
+main(int argc, char **argv)
+{
+   pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION);
+   printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
+
+#ifdef USE_ICU
+   icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION);
+   printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
+
+   icu_test();
 #else
-   printf("category_test: ICU support required for test; skipping\n");
-   exit(0);
+   printf("category_test: ICU not available; skipping\n");
 #endif
 }
index a50c87b7e96752ac116078d3e95fc8938a9d4da6..12914c0243399561d98925f1a966be863ab17d25 100644 (file)
@@ -25,6 +25,10 @@ my $output_table_file = "$output_path/unicode_category_table.h";
 
 my $FH;
 
+# create a table of all codepoints < 0x80 and their associated
+# categories and properties for fast lookups
+my %opt_ascii = ();
+
 # Read entries from UnicodeData.txt into a list of codepoint ranges
 # and their general category.
 my @category_ranges = ();
@@ -48,21 +52,42 @@ while (my $line = <$FH>)
    my $category = $elts[2];
 
    die "codepoint out of range" if $code > 0x10FFFF;
-   die "unassigned codepoint in UnicodeData.txt" if $category eq $CATEGORY_UNASSIGNED;
+   die "unassigned codepoint in UnicodeData.txt"
+     if $category eq $CATEGORY_UNASSIGNED;
+
+   if ($code < 0x80)
+   {
+       my @properties = ();
+       # No ASCII characters have category Titlecase_Letter,
+       # but include here for completeness.
+       push @properties, "PG_U_PROP_CASED" if ($category eq 'Lt');
+       $opt_ascii{$code} = {
+           Category => $category,
+           Properties => \@properties
+       };
+   }
 
-   if (!defined($range_start)) {
+   if (!defined($range_start))
+   {
        my $code_str = sprintf "0x%06x", $code;
-       die if defined($range_end) || defined($range_category) || defined($gap_category);
+       die
+         if defined($range_end)
+         || defined($range_category)
+         || defined($gap_category);
        die "unexpected first entry <..., Last>" if ($name =~ /Last>/);
-       die "expected 0x000000 for first entry, got $code_str" if $code != 0x000000;
+       die "expected 0x000000 for first entry, got $code_str"
+         if $code != 0x000000;
 
        # initialize
        $range_start = $code;
        $range_end = $code;
        $range_category = $category;
-       if ($name =~ /<.*, First>$/) {
+       if ($name =~ /<.*, First>$/)
+       {
            $gap_category = $category;
-       } else {
+       }
+       else
+       {
            $gap_category = $CATEGORY_UNASSIGNED;
        }
        next;
@@ -71,10 +96,17 @@ while (my $line = <$FH>)
    # Gap in codepoints detected. If it's a different category than
    # the current range, emit the current range and initialize a new
    # range representing the gap.
-   if ($range_end + 1 != $code && $range_category ne $gap_category) {
-       if ($range_category ne $CATEGORY_UNASSIGNED) {
-           push(@category_ranges, {start => $range_start, end => $range_end,
-                                   category => $range_category});
+   if ($range_end + 1 != $code && $range_category ne $gap_category)
+   {
+       if ($range_category ne $CATEGORY_UNASSIGNED)
+       {
+           push(
+               @category_ranges,
+               {
+                   start => $range_start,
+                   end => $range_end,
+                   category => $range_category
+               });
        }
        $range_start = $range_end + 1;
        $range_end = $code - 1;
@@ -82,27 +114,39 @@ while (my $line = <$FH>)
    }
 
    # different category; new range
-   if ($range_category ne $category) {
-       if ($range_category ne $CATEGORY_UNASSIGNED) {
-           push(@category_ranges, {start => $range_start, end => $range_end,
-                                   category => $range_category});
+   if ($range_category ne $category)
+   {
+       if ($range_category ne $CATEGORY_UNASSIGNED)
+       {
+           push(
+               @category_ranges,
+               {
+                   start => $range_start,
+                   end => $range_end,
+                   category => $range_category
+               });
        }
        $range_start = $code;
        $range_end = $code;
        $range_category = $category;
    }
 
-   if ($name =~ /<.*, First>$/) {
-       die "<..., First> entry unexpectedly follows another <..., First> entry"
+   if ($name =~ /<.*, First>$/)
+   {
+       die
+         "<..., First> entry unexpectedly follows another <..., First> entry"
          if $gap_category ne $CATEGORY_UNASSIGNED;
        $gap_category = $category;
    }
-   elsif ($name =~ /<.*, Last>$/) {
-       die "<..., First> and <..., Last> entries have mismatching general category"
+   elsif ($name =~ /<.*, Last>$/)
+   {
+       die
+         "<..., First> and <..., Last> entries have mismatching general category"
          if $gap_category ne $category;
        $gap_category = $CATEGORY_UNASSIGNED;
    }
-   else {
+   else
+   {
        die "unexpected entry found between <..., First> and <..., Last>"
          if $gap_category ne $CATEGORY_UNASSIGNED;
    }
@@ -115,13 +159,17 @@ die "<..., First> entry with no corresponding <..., Last> entry"
   if $gap_category ne $CATEGORY_UNASSIGNED;
 
 # emit final range
-if ($range_category ne $CATEGORY_UNASSIGNED) {
-   push(@category_ranges, {start => $range_start, end => $range_end,
-                           category => $range_category});
+if ($range_category ne $CATEGORY_UNASSIGNED)
+{
+   push(
+       @category_ranges,
+       {
+           start => $range_start,
+           end => $range_end,
+           category => $range_category
+       });
 }
 
-my $num_ranges = scalar @category_ranges;
-
 # See: https://www.unicode.org/reports/tr44/#General_Category_Values
 my $categories = {
    Cn => 'PG_U_UNASSIGNED',
@@ -156,11 +204,146 @@ my $categories = {
    Pf => 'PG_U_FINAL_PUNCTUATION'
 };
 
-# Start writing out the output files
+# Find White_Space and Hex_Digit characters
+my @white_space = ();
+my @hex_digits = ();
+my @join_control = ();
+open($FH, '<', "$output_path/PropList.txt")
+  or die "Could not open $output_path/PropList.txt: $!.";
+while (my $line = <$FH>)
+{
+   my $pattern = qr/([0-9A-F\.]+)\s*;\s*(\w+)\s*#.*/s;
+   next unless $line =~ $pattern;
+
+   my $code = $line =~ s/$pattern/$1/rg;
+   my $property = $line =~ s/$pattern/$2/rg;
+   my $start;
+   my $end;
+
+   if ($code =~ /\.\./)
+   {
+       # code range
+       my @sp = split /\.\./, $code;
+       $start = hex($sp[0]);
+       $end = hex($sp[1]);
+   }
+   else
+   {
+       # single code point
+       $start = hex($code);
+       $end = hex($code);
+   }
+
+   if ($property eq "White_Space")
+   {
+       push @white_space, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_WHITE_SPACE";
+       }
+   }
+   elsif ($property eq "Hex_Digit")
+   {
+       push @hex_digits, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_HEX_DIGIT";
+       }
+   }
+   elsif ($property eq "Join_Control")
+   {
+       push @join_control, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_JOIN_CONTROL";
+       }
+   }
+}
+
+# Find Alphabetic, Lowercase, and Uppercase characters
+my @alphabetic = ();
+my @lowercase = ();
+my @uppercase = ();
+my @case_ignorable = ();
+open($FH, '<', "$output_path/DerivedCoreProperties.txt")
+  or die "Could not open $output_path/DerivedCoreProperties.txt: $!.";
+while (my $line = <$FH>)
+{
+   my $pattern = qr/^([0-9A-F\.]+)\s*;\s*(\w+)\s*#.*$/s;
+   next unless $line =~ $pattern;
+
+   my $code = $line =~ s/$pattern/$1/rg;
+   my $property = $line =~ s/$pattern/$2/rg;
+   my $start;
+   my $end;
+
+   if ($code =~ /\.\./)
+   {
+       # code range
+       my @sp = split /\.\./, $code;
+       die "line: {$line} code: {$code} sp[0] {$sp[0]} sp[1] {$sp[1]}"
+         unless $sp[0] =~ /^[0-9A-F]+$/ && $sp[1] =~ /^[0-9A-F]+$/;
+       $start = hex($sp[0]);
+       $end = hex($sp[1]);
+   }
+   else
+   {
+       die "line: {$line} code: {$code}" unless $code =~ /^[0-9A-F]+$/;
+       # single code point
+       $start = hex($code);
+       $end = hex($code);
+   }
+
+   if ($property eq "Alphabetic")
+   {
+       push @alphabetic, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_ALPHABETIC";
+       }
+   }
+   elsif ($property eq "Lowercase")
+   {
+       push @lowercase, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_LOWERCASE";
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASED";
+       }
+   }
+   elsif ($property eq "Uppercase")
+   {
+       push @uppercase, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_UPPERCASE";
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASED";
+       }
+   }
+   elsif ($property eq "Case_Ignorable")
+   {
+       push @case_ignorable, { start => $start, end => $end };
+       for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+       {
+           push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASE_IGNORABLE";
+       }
+   }
+}
+
+my $num_category_ranges = scalar @category_ranges;
+my $num_alphabetic_ranges = scalar @alphabetic;
+my $num_lowercase_ranges = scalar @lowercase;
+my $num_uppercase_ranges = scalar @uppercase;
+my $num_case_ignorable_ranges = scalar @case_ignorable;
+my $num_white_space_ranges = scalar @white_space;
+my $num_hex_digit_ranges = scalar @hex_digits;
+my $num_join_control_ranges = scalar @join_control;
+
+# Start writing out the output file
 open my $OT, '>', $output_table_file
   or die "Could not open output file $output_table_file: $!\n";
 
-print $OT <<HEADER;
+print $OT <<"EOS";
 /*-------------------------------------------------------------------------
  *
  * unicode_category_table.h
@@ -188,18 +371,153 @@ typedef struct
    uint8       category;       /* General Category */
 }          pg_category_range;
 
-/* table of Unicode codepoint ranges and their categories */
-static const pg_category_range unicode_categories[$num_ranges] =
+typedef struct
+{
+   uint32      first;          /* Unicode codepoint */
+   uint32      last;           /* Unicode codepoint */
+}          pg_unicode_range;
+
+typedef struct
+{
+   uint8       category;
+   uint8       properties;
+}          pg_unicode_properties;
+
+/*
+ * The properties currently used, in no particular order. Fits in a uint8, but
+ * if more properties are added, a wider integer will be needed.
+ */
+#define PG_U_PROP_ALPHABETIC       (1 << 0)
+#define PG_U_PROP_LOWERCASE            (1 << 1)
+#define PG_U_PROP_UPPERCASE            (1 << 2)
+#define PG_U_PROP_CASED                (1 << 3)
+#define PG_U_PROP_CASE_IGNORABLE   (1 << 4)
+#define PG_U_PROP_WHITE_SPACE      (1 << 5)
+#define PG_U_PROP_JOIN_CONTROL     (1 << 6)
+#define PG_U_PROP_HEX_DIGIT            (1 << 7)
+
+EOS
+
+print $OT <<"EOS";
+/* table for fast lookup of ASCII codepoints */
+static const pg_unicode_properties unicode_opt_ascii[128] =
+{
+EOS
+
+for (my $i = 0; $i < 128; $i++)
 {
-HEADER
+   my $category_str = $categories->{ $opt_ascii{$i}->{Category} };
+   my $props_str = (join ' | ', @{ $opt_ascii{$i}{Properties} }) || "0";
+   printf $OT
+     "\t{\n\t\t/* 0x%06x */\n\t\t.category = %s,\n\t\t.properties = %s\n\t},\n",
+     $i, $category_str, $props_str;
+}
 
-my $firsttime = 1;
-foreach my $range (@category_ranges) {
-   printf $OT ",\n" unless $firsttime;
-   $firsttime = 0;
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges and their categories */
+static const pg_category_range unicode_categories[$num_category_ranges] =
+{
+EOS
 
-   my $category = $categories->{$range->{category}};
+foreach my $range (@category_ranges)
+{
+   my $category = $categories->{ $range->{category} };
    die "category missing: $range->{category}" unless $category;
-   printf $OT "\t{0x%06x, 0x%06x, %s}", $range->{start}, $range->{end}, $category;
+   printf $OT "\t{0x%06x, 0x%06x, %s},\n", $range->{start}, $range->{end},
+     $category;
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Alphabetic characters */
+static const pg_unicode_range unicode_alphabetic[$num_alphabetic_ranges] =
+{
+EOS
+
+foreach my $range (@alphabetic)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Lowercase characters */
+static const pg_unicode_range unicode_lowercase[$num_lowercase_ranges] =
+{
+EOS
+
+foreach my $range (@lowercase)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Uppercase characters */
+static const pg_unicode_range unicode_uppercase[$num_uppercase_ranges] =
+{
+EOS
+
+foreach my $range (@uppercase)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
 }
-print $OT "\n};\n";
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Case_Ignorable characters */
+static const pg_unicode_range unicode_case_ignorable[$num_case_ignorable_ranges] =
+{
+EOS
+
+foreach my $range (@case_ignorable)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of White_Space characters */
+static const pg_unicode_range unicode_white_space[$num_white_space_ranges] =
+{
+EOS
+
+foreach my $range (@white_space)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Hex_Digit characters */
+static const pg_unicode_range unicode_hex_digit[$num_hex_digit_ranges] =
+{
+EOS
+
+foreach my $range (@hex_digits)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Join_Control characters */
+static const pg_unicode_range unicode_join_control[$num_join_control_ranges] =
+{
+EOS
+
+foreach my $range (@join_control)
+{
+   printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n";
index df4f3a4ed1d510c5d8d4078df2ff6fe802372b4d..d7190bb8ca92f67d9d2718c4a8023c7e7600b7c3 100644 (file)
@@ -11,7 +11,7 @@ endif
 
 # These files are part of the Unicode Character Database. Download them on
 # demand.
-foreach f : ['CompositionExclusions.txt', 'DerivedNormalizationProps.txt', 'EastAsianWidth.txt', 'NormalizationTest.txt', 'UnicodeData.txt']
+foreach f : ['CompositionExclusions.txt', 'DerivedCoreProperties.txt', 'DerivedNormalizationProps.txt', 'EastAsianWidth.txt', 'NormalizationTest.txt', 'PropList.txt', 'UnicodeData.txt']
   url = unicode_baseurl.format(UNICODE_VERSION, f)
   target = custom_target(f,
     output: f,
@@ -26,7 +26,7 @@ update_unicode_targets = []
 
 update_unicode_targets += \
   custom_target('unicode_category_table.h',
-    input: [unicode_data['UnicodeData.txt']],
+    input: [unicode_data['UnicodeData.txt'], unicode_data['DerivedCoreProperties.txt'], unicode_data['PropList.txt']],
     output: ['unicode_category_table.h'],
     command: [
       perl, files('generate-unicode_category_table.pl'),
index 668051b461c5fc8e76ceb811544ee19cb3e4adbc..bece7334f5b559b1d93e749fc84896d04d23c432 100644 (file)
@@ -1,6 +1,8 @@
 /*-------------------------------------------------------------------------
  * unicode_category.c
- *     Determine general category of Unicode characters.
+ *     Determine general category and character properties of Unicode
+ *     characters. Encoding must be UTF8, where we assume that the pg_wchar
+ *     representation is a code point.
  *
  * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
  *
 #include "common/unicode_category.h"
 #include "common/unicode_category_table.h"
 
+/*
+ * Create bitmasks from pg_unicode_category values for efficient comparison of
+ * multiple categories. For instance, PG_U_MN_MASK is a bitmask representing
+ * the general cateogry Mn; and PG_U_M_MASK represents general categories Mn,
+ * Me, and Mc.
+ *
+ * The number of Unicode General Categories should never grow, so a 32-bit
+ * mask is fine.
+ */
+#define PG_U_CATEGORY_MASK(X) ((uint32)(1 << (X)))
+
+#define PG_U_LU_MASK PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)
+#define PG_U_LL_MASK PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)
+#define PG_U_LT_MASK PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)
+#define PG_U_LC_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)
+#define PG_U_LM_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)
+#define PG_U_LO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)
+#define PG_U_L_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK|PG_U_LM_MASK|\
+                    PG_U_LO_MASK)
+#define PG_U_MN_MASK PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)
+#define PG_U_ME_MASK PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)
+#define PG_U_MC_MASK PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)
+#define PG_U_M_MASK (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)
+#define PG_U_ND_MASK PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)
+#define PG_U_NL_MASK PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)
+#define PG_U_NO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)
+#define PG_U_N_MASK (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)
+#define PG_U_PC_MASK PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)
+#define PG_U_PD_MASK PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)
+#define PG_U_PS_MASK PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)
+#define PG_U_PE_MASK PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)
+#define PG_U_PI_MASK PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)
+#define PG_U_PF_MASK PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)
+#define PG_U_PO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)
+#define PG_U_P_MASK (PG_U_PC_MASK|PG_U_PD_MASK|PG_U_PS_MASK|PG_U_PE_MASK|\
+                    PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK)
+#define PG_U_SM_MASK PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)
+#define PG_U_SC_MASK PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)
+#define PG_U_SK_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)
+#define PG_U_SO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)
+#define PG_U_S_MASK (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)
+#define PG_U_ZS_MASK PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)
+#define PG_U_ZL_MASK PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)
+#define PG_U_ZP_MASK PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)
+#define PG_U_Z_MASK (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)
+#define PG_U_CC_MASK PG_U_CATEGORY_MASK(PG_U_CONTROL)
+#define PG_U_CF_MASK PG_U_CATEGORY_MASK(PG_U_FORMAT)
+#define PG_U_CS_MASK PG_U_CATEGORY_MASK(PG_U_SURROGATE)
+#define PG_U_CO_MASK PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)
+#define PG_U_CN_MASK PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)
+#define PG_U_C_MASK (PG_U_CC_MASK|PG_U_CF_MASK|PG_U_CS_MASK|PG_U_CO_MASK|\
+                    PG_U_CN_MASK)
+
+#define PG_U_CHARACTER_TAB 0x09
+
+static bool range_search(const pg_unicode_range * tbl, size_t size,
+                        pg_wchar code);
+
 /*
  * Unicode general category for the given codepoint.
  */
 pg_unicode_category
-unicode_category(pg_wchar ucs)
+unicode_category(pg_wchar code)
 {
    int         min = 0;
    int         mid;
    int         max = lengthof(unicode_categories) - 1;
 
-   Assert(ucs <= 0x10ffff);
+   Assert(code <= 0x10ffff);
+
+   if (code < 0x80)
+       return unicode_opt_ascii[code].category;
 
    while (max >= min)
    {
        mid = (min + max) / 2;
-       if (ucs > unicode_categories[mid].last)
+       if (code > unicode_categories[mid].last)
            min = mid + 1;
-       else if (ucs < unicode_categories[mid].first)
+       else if (code < unicode_categories[mid].first)
            max = mid - 1;
        else
            return unicode_categories[mid].category;
@@ -44,6 +107,224 @@ unicode_category(pg_wchar ucs)
    return PG_U_UNASSIGNED;
 }
 
+bool
+pg_u_prop_alphabetic(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC;
+
+   return range_search(unicode_alphabetic,
+                       lengthof(unicode_alphabetic),
+                       code);
+}
+
+bool
+pg_u_prop_lowercase(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE;
+
+   return range_search(unicode_lowercase,
+                       lengthof(unicode_lowercase),
+                       code);
+}
+
+bool
+pg_u_prop_uppercase(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE;
+
+   return range_search(unicode_uppercase,
+                       lengthof(unicode_uppercase),
+                       code);
+}
+
+bool
+pg_u_prop_cased(pg_wchar code)
+{
+   uint32      category_mask;
+
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_CASED;
+
+   category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+   return category_mask & PG_U_LT_MASK ||
+       pg_u_prop_lowercase(code) ||
+       pg_u_prop_uppercase(code);
+}
+
+bool
+pg_u_prop_case_ignorable(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE;
+
+   return range_search(unicode_case_ignorable,
+                       lengthof(unicode_case_ignorable),
+                       code);
+}
+
+bool
+pg_u_prop_white_space(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE;
+
+   return range_search(unicode_white_space,
+                       lengthof(unicode_white_space),
+                       code);
+}
+
+bool
+pg_u_prop_hex_digit(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT;
+
+   return range_search(unicode_hex_digit,
+                       lengthof(unicode_hex_digit),
+                       code);
+}
+
+bool
+pg_u_prop_join_control(pg_wchar code)
+{
+   if (code < 0x80)
+       return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL;
+
+   return range_search(unicode_join_control,
+                       lengthof(unicode_join_control),
+                       code);
+}
+
+/*
+ * The following functions implement the Compatibility Properties described
+ * at: http://www.unicode.org/reports/tr18/#Compatibility_Properties
+ *
+ * If 'posix' is true, implements the "POSIX Compatible" variant, otherwise
+ * the "Standard" variant.
+ */
+
+bool
+pg_u_isdigit(pg_wchar code, bool posix)
+{
+   if (posix)
+       return ('0' <= code && code <= '9');
+   else
+       return unicode_category(code) == PG_U_DECIMAL_NUMBER;
+}
+
+bool
+pg_u_isalpha(pg_wchar code)
+{
+   return pg_u_prop_alphabetic(code);
+}
+
+bool
+pg_u_isalnum(pg_wchar code, bool posix)
+{
+   return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
+}
+
+bool
+pg_u_isword(pg_wchar code)
+{
+   uint32      category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+   return
+       category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) ||
+       pg_u_isalpha(code) ||
+       pg_u_prop_join_control(code);
+}
+
+bool
+pg_u_isupper(pg_wchar code)
+{
+   return pg_u_prop_uppercase(code);
+}
+
+bool
+pg_u_islower(pg_wchar code)
+{
+   return pg_u_prop_lowercase(code);
+}
+
+bool
+pg_u_isblank(pg_wchar code)
+{
+   return code == PG_U_CHARACTER_TAB ||
+       unicode_category(code) == PG_U_SPACE_SEPARATOR;
+}
+
+bool
+pg_u_iscntrl(pg_wchar code)
+{
+   return unicode_category(code) == PG_U_CONTROL;
+}
+
+bool
+pg_u_isgraph(pg_wchar code)
+{
+   uint32      category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+   if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) ||
+       pg_u_isspace(code))
+       return false;
+   return true;
+}
+
+bool
+pg_u_isprint(pg_wchar code)
+{
+   pg_unicode_category category = unicode_category(code);
+
+   if (category == PG_U_CONTROL)
+       return false;
+
+   return pg_u_isgraph(code) || pg_u_isblank(code);
+}
+
+bool
+pg_u_ispunct(pg_wchar code, bool posix)
+{
+   uint32      category_mask;
+
+   if (posix)
+   {
+       if (pg_u_isalpha(code))
+           return false;
+
+       category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+       return category_mask & (PG_U_P_MASK | PG_U_S_MASK);
+   }
+   else
+   {
+       category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+       return category_mask & PG_U_P_MASK;
+   }
+}
+
+bool
+pg_u_isspace(pg_wchar code)
+{
+   return pg_u_prop_white_space(code);
+}
+
+bool
+pg_u_isxdigit(pg_wchar code, bool posix)
+{
+   if (posix)
+       return (('0' <= code && code <= '9') ||
+               ('A' <= code && code <= 'F') ||
+               ('a' <= code && code <= 'f'));
+   else
+       return unicode_category(code) == PG_U_DECIMAL_NUMBER ||
+           pg_u_prop_hex_digit(code);
+}
+
 /*
  * Description of Unicode general category.
  */
@@ -191,3 +472,30 @@ unicode_category_abbrev(pg_unicode_category category)
    Assert(false);
    return "??";                /* keep compiler quiet */
 }
+
+/*
+ * Binary search to test if given codepoint exists in one of the ranges in the
+ * given table.
+ */
+static bool
+range_search(const pg_unicode_range * tbl, size_t size, pg_wchar code)
+{
+   int         min = 0;
+   int         mid;
+   int         max = size - 1;
+
+   Assert(code <= 0x10ffff);
+
+   while (max >= min)
+   {
+       mid = (min + max) / 2;
+       if (code > tbl[mid].last)
+           min = mid + 1;
+       else if (code < tbl[mid].first)
+           max = mid - 1;
+       else
+           return true;
+   }
+
+   return false;
+}
index 5bad2806150a2012d59395d89003353b95ac9984..f185b5890099668b657d3f400d38cefe8cec6bcf 100644 (file)
@@ -62,7 +62,30 @@ typedef enum pg_unicode_category
 } pg_unicode_category;
 
 extern pg_unicode_category unicode_category(pg_wchar ucs);
-const char *unicode_category_string(pg_unicode_category category);
-const char *unicode_category_abbrev(pg_unicode_category category);
+extern const char *unicode_category_string(pg_unicode_category category);
+extern const char *unicode_category_abbrev(pg_unicode_category category);
+
+extern bool pg_u_prop_alphabetic(pg_wchar c);
+extern bool pg_u_prop_lowercase(pg_wchar c);
+extern bool pg_u_prop_uppercase(pg_wchar c);
+extern bool pg_u_prop_cased(pg_wchar c);
+extern bool pg_u_prop_case_ignorable(pg_wchar c);
+extern bool pg_u_prop_white_space(pg_wchar c);
+extern bool pg_u_prop_hex_digit(pg_wchar c);
+extern bool pg_u_prop_join_control(pg_wchar c);
+
+extern bool pg_u_isdigit(pg_wchar c, bool posix);
+extern bool pg_u_isalpha(pg_wchar c);
+extern bool pg_u_isalnum(pg_wchar c, bool posix);
+extern bool pg_u_isword(pg_wchar c);
+extern bool pg_u_isupper(pg_wchar c);
+extern bool pg_u_islower(pg_wchar c);
+extern bool pg_u_isblank(pg_wchar c);
+extern bool pg_u_iscntrl(pg_wchar c);
+extern bool pg_u_isgraph(pg_wchar c);
+extern bool pg_u_isprint(pg_wchar c);
+extern bool pg_u_ispunct(pg_wchar c, bool posix);
+extern bool pg_u_isspace(pg_wchar c);
+extern bool pg_u_isxdigit(pg_wchar c, bool posix);
 
 #endif                         /* UNICODE_CATEGORY_H */
index d7ef996189a54743f1a06d9471239bf9cb29d4e3..ff35ff45e83aba725f5782d5a5f5067696b18b0a 100644 (file)
@@ -25,6 +25,676 @@ typedef struct
    uint8       category;       /* General Category */
 }          pg_category_range;
 
+typedef struct
+{
+   uint32      first;          /* Unicode codepoint */
+   uint32      last;           /* Unicode codepoint */
+}          pg_unicode_range;
+
+typedef struct
+{
+   uint8       category;
+   uint8       properties;
+}          pg_unicode_properties;
+
+/*
+ * The properties currently used, in no particular order. Fits in a uint8, but
+ * if more properties are added, a wider integer will be needed.
+ */
+#define PG_U_PROP_ALPHABETIC       (1 << 0)
+#define PG_U_PROP_LOWERCASE            (1 << 1)
+#define PG_U_PROP_UPPERCASE            (1 << 2)
+#define PG_U_PROP_CASED                (1 << 3)
+#define PG_U_PROP_CASE_IGNORABLE   (1 << 4)
+#define PG_U_PROP_WHITE_SPACE      (1 << 5)
+#define PG_U_PROP_JOIN_CONTROL     (1 << 6)
+#define PG_U_PROP_HEX_DIGIT            (1 << 7)
+
+/* table for fast lookup of ASCII codepoints */
+static const pg_unicode_properties unicode_opt_ascii[128] =
+{
+   {
+       /* 0x000000 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000001 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000002 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000003 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000004 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000005 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000006 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000007 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000008 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000009 */
+       .category = PG_U_CONTROL,
+       .properties = PG_U_PROP_WHITE_SPACE
+   },
+   {
+       /* 0x00000a */
+       .category = PG_U_CONTROL,
+       .properties = PG_U_PROP_WHITE_SPACE
+   },
+   {
+       /* 0x00000b */
+       .category = PG_U_CONTROL,
+       .properties = PG_U_PROP_WHITE_SPACE
+   },
+   {
+       /* 0x00000c */
+       .category = PG_U_CONTROL,
+       .properties = PG_U_PROP_WHITE_SPACE
+   },
+   {
+       /* 0x00000d */
+       .category = PG_U_CONTROL,
+       .properties = PG_U_PROP_WHITE_SPACE
+   },
+   {
+       /* 0x00000e */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00000f */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000010 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000011 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000012 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000013 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000014 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000015 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000016 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000017 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000018 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000019 */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00001a */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00001b */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00001c */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00001d */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00001e */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x00001f */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+   {
+       /* 0x000020 */
+       .category = PG_U_SPACE_SEPARATOR,
+       .properties = PG_U_PROP_WHITE_SPACE
+   },
+   {
+       /* 0x000021 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000022 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000023 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000024 */
+       .category = PG_U_CURRENCY_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x000025 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000026 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000027 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = PG_U_PROP_CASE_IGNORABLE
+   },
+   {
+       /* 0x000028 */
+       .category = PG_U_OPEN_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000029 */
+       .category = PG_U_CLOSE_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00002a */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00002b */
+       .category = PG_U_MATH_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x00002c */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00002d */
+       .category = PG_U_DASH_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00002e */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = PG_U_PROP_CASE_IGNORABLE
+   },
+   {
+       /* 0x00002f */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000030 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000031 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000032 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000033 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000034 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000035 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000036 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000037 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000038 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x000039 */
+       .category = PG_U_DECIMAL_NUMBER,
+       .properties = PG_U_PROP_HEX_DIGIT
+   },
+   {
+       /* 0x00003a */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = PG_U_PROP_CASE_IGNORABLE
+   },
+   {
+       /* 0x00003b */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00003c */
+       .category = PG_U_MATH_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x00003d */
+       .category = PG_U_MATH_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x00003e */
+       .category = PG_U_MATH_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x00003f */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000040 */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000041 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000042 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000043 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000044 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000045 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000046 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000047 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000048 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000049 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00004a */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00004b */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00004c */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00004d */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00004e */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00004f */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000050 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000051 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000052 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000053 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000054 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000055 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000056 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000057 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000058 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000059 */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00005a */
+       .category = PG_U_UPPERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00005b */
+       .category = PG_U_OPEN_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00005c */
+       .category = PG_U_OTHER_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00005d */
+       .category = PG_U_CLOSE_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00005e */
+       .category = PG_U_MODIFIER_SYMBOL,
+       .properties = PG_U_PROP_CASE_IGNORABLE
+   },
+   {
+       /* 0x00005f */
+       .category = PG_U_CONNECTOR_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x000060 */
+       .category = PG_U_MODIFIER_SYMBOL,
+       .properties = PG_U_PROP_CASE_IGNORABLE
+   },
+   {
+       /* 0x000061 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000062 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000063 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000064 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000065 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000066 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000067 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000068 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000069 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00006a */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00006b */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00006c */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00006d */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00006e */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00006f */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000070 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000071 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000072 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000073 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000074 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000075 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000076 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000077 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000078 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x000079 */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00007a */
+       .category = PG_U_LOWERCASE_LETTER,
+       .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+   },
+   {
+       /* 0x00007b */
+       .category = PG_U_OPEN_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00007c */
+       .category = PG_U_MATH_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x00007d */
+       .category = PG_U_CLOSE_PUNCTUATION,
+       .properties = 0
+   },
+   {
+       /* 0x00007e */
+       .category = PG_U_MATH_SYMBOL,
+       .properties = 0
+   },
+   {
+       /* 0x00007f */
+       .category = PG_U_CONTROL,
+       .properties = 0
+   },
+};
+
 /* table of Unicode codepoint ranges and their categories */
 static const pg_category_range unicode_categories[3302] =
 {
@@ -3329,5 +3999,3027 @@ static const pg_category_range unicode_categories[3302] =
    {0x0e0020, 0x0e007f, PG_U_FORMAT},
    {0x0e0100, 0x0e01ef, PG_U_NONSPACING_MARK},
    {0x0f0000, 0x0ffffd, PG_U_PRIVATE_USE},
-   {0x100000, 0x10fffd, PG_U_PRIVATE_USE}
+   {0x100000, 0x10fffd, PG_U_PRIVATE_USE},
+};
+
+/* table of Unicode codepoint ranges of Alphabetic characters */
+static const pg_unicode_range unicode_alphabetic[1141] =
+{
+   {0x000041, 0x00005a},
+   {0x000061, 0x00007a},
+   {0x0000aa, 0x0000aa},
+   {0x0000b5, 0x0000b5},
+   {0x0000ba, 0x0000ba},
+   {0x0000c0, 0x0000d6},
+   {0x0000d8, 0x0000f6},
+   {0x0000f8, 0x0001ba},
+   {0x0001bb, 0x0001bb},
+   {0x0001bc, 0x0001bf},
+   {0x0001c0, 0x0001c3},
+   {0x0001c4, 0x000293},
+   {0x000294, 0x000294},
+   {0x000295, 0x0002af},
+   {0x0002b0, 0x0002c1},
+   {0x0002c6, 0x0002d1},
+   {0x0002e0, 0x0002e4},
+   {0x0002ec, 0x0002ec},
+   {0x0002ee, 0x0002ee},
+   {0x000345, 0x000345},
+   {0x000370, 0x000373},
+   {0x000374, 0x000374},
+   {0x000376, 0x000377},
+   {0x00037a, 0x00037a},
+   {0x00037b, 0x00037d},
+   {0x00037f, 0x00037f},
+   {0x000386, 0x000386},
+   {0x000388, 0x00038a},
+   {0x00038c, 0x00038c},
+   {0x00038e, 0x0003a1},
+   {0x0003a3, 0x0003f5},
+   {0x0003f7, 0x000481},
+   {0x00048a, 0x00052f},
+   {0x000531, 0x000556},
+   {0x000559, 0x000559},
+   {0x000560, 0x000588},
+   {0x0005b0, 0x0005bd},
+   {0x0005bf, 0x0005bf},
+   {0x0005c1, 0x0005c2},
+   {0x0005c4, 0x0005c5},
+   {0x0005c7, 0x0005c7},
+   {0x0005d0, 0x0005ea},
+   {0x0005ef, 0x0005f2},
+   {0x000610, 0x00061a},
+   {0x000620, 0x00063f},
+   {0x000640, 0x000640},
+   {0x000641, 0x00064a},
+   {0x00064b, 0x000657},
+   {0x000659, 0x00065f},
+   {0x00066e, 0x00066f},
+   {0x000670, 0x000670},
+   {0x000671, 0x0006d3},
+   {0x0006d5, 0x0006d5},
+   {0x0006d6, 0x0006dc},
+   {0x0006e1, 0x0006e4},
+   {0x0006e5, 0x0006e6},
+   {0x0006e7, 0x0006e8},
+   {0x0006ed, 0x0006ed},
+   {0x0006ee, 0x0006ef},
+   {0x0006fa, 0x0006fc},
+   {0x0006ff, 0x0006ff},
+   {0x000710, 0x000710},
+   {0x000711, 0x000711},
+   {0x000712, 0x00072f},
+   {0x000730, 0x00073f},
+   {0x00074d, 0x0007a5},
+   {0x0007a6, 0x0007b0},
+   {0x0007b1, 0x0007b1},
+   {0x0007ca, 0x0007ea},
+   {0x0007f4, 0x0007f5},
+   {0x0007fa, 0x0007fa},
+   {0x000800, 0x000815},
+   {0x000816, 0x000817},
+   {0x00081a, 0x00081a},
+   {0x00081b, 0x000823},
+   {0x000824, 0x000824},
+   {0x000825, 0x000827},
+   {0x000828, 0x000828},
+   {0x000829, 0x00082c},
+   {0x000840, 0x000858},
+   {0x000860, 0x00086a},
+   {0x000870, 0x000887},
+   {0x000889, 0x00088e},
+   {0x0008a0, 0x0008c8},
+   {0x0008c9, 0x0008c9},
+   {0x0008d4, 0x0008df},
+   {0x0008e3, 0x0008e9},
+   {0x0008f0, 0x000902},
+   {0x000903, 0x000903},
+   {0x000904, 0x000939},
+   {0x00093a, 0x00093a},
+   {0x00093b, 0x00093b},
+   {0x00093d, 0x00093d},
+   {0x00093e, 0x000940},
+   {0x000941, 0x000948},
+   {0x000949, 0x00094c},
+   {0x00094e, 0x00094f},
+   {0x000950, 0x000950},
+   {0x000955, 0x000957},
+   {0x000958, 0x000961},
+   {0x000962, 0x000963},
+   {0x000971, 0x000971},
+   {0x000972, 0x000980},
+   {0x000981, 0x000981},
+   {0x000982, 0x000983},
+   {0x000985, 0x00098c},
+   {0x00098f, 0x000990},
+   {0x000993, 0x0009a8},
+   {0x0009aa, 0x0009b0},
+   {0x0009b2, 0x0009b2},
+   {0x0009b6, 0x0009b9},
+   {0x0009bd, 0x0009bd},
+   {0x0009be, 0x0009c0},
+   {0x0009c1, 0x0009c4},
+   {0x0009c7, 0x0009c8},
+   {0x0009cb, 0x0009cc},
+   {0x0009ce, 0x0009ce},
+   {0x0009d7, 0x0009d7},
+   {0x0009dc, 0x0009dd},
+   {0x0009df, 0x0009e1},
+   {0x0009e2, 0x0009e3},
+   {0x0009f0, 0x0009f1},
+   {0x0009fc, 0x0009fc},
+   {0x000a01, 0x000a02},
+   {0x000a03, 0x000a03},
+   {0x000a05, 0x000a0a},
+   {0x000a0f, 0x000a10},
+   {0x000a13, 0x000a28},
+   {0x000a2a, 0x000a30},
+   {0x000a32, 0x000a33},
+   {0x000a35, 0x000a36},
+   {0x000a38, 0x000a39},
+   {0x000a3e, 0x000a40},
+   {0x000a41, 0x000a42},
+   {0x000a47, 0x000a48},
+   {0x000a4b, 0x000a4c},
+   {0x000a51, 0x000a51},
+   {0x000a59, 0x000a5c},
+   {0x000a5e, 0x000a5e},
+   {0x000a70, 0x000a71},
+   {0x000a72, 0x000a74},
+   {0x000a75, 0x000a75},
+   {0x000a81, 0x000a82},
+   {0x000a83, 0x000a83},
+   {0x000a85, 0x000a8d},
+   {0x000a8f, 0x000a91},
+   {0x000a93, 0x000aa8},
+   {0x000aaa, 0x000ab0},
+   {0x000ab2, 0x000ab3},
+   {0x000ab5, 0x000ab9},
+   {0x000abd, 0x000abd},
+   {0x000abe, 0x000ac0},
+   {0x000ac1, 0x000ac5},
+   {0x000ac7, 0x000ac8},
+   {0x000ac9, 0x000ac9},
+   {0x000acb, 0x000acc},
+   {0x000ad0, 0x000ad0},
+   {0x000ae0, 0x000ae1},
+   {0x000ae2, 0x000ae3},
+   {0x000af9, 0x000af9},
+   {0x000afa, 0x000afc},
+   {0x000b01, 0x000b01},
+   {0x000b02, 0x000b03},
+   {0x000b05, 0x000b0c},
+   {0x000b0f, 0x000b10},
+   {0x000b13, 0x000b28},
+   {0x000b2a, 0x000b30},
+   {0x000b32, 0x000b33},
+   {0x000b35, 0x000b39},
+   {0x000b3d, 0x000b3d},
+   {0x000b3e, 0x000b3e},
+   {0x000b3f, 0x000b3f},
+   {0x000b40, 0x000b40},
+   {0x000b41, 0x000b44},
+   {0x000b47, 0x000b48},
+   {0x000b4b, 0x000b4c},
+   {0x000b56, 0x000b56},
+   {0x000b57, 0x000b57},
+   {0x000b5c, 0x000b5d},
+   {0x000b5f, 0x000b61},
+   {0x000b62, 0x000b63},
+   {0x000b71, 0x000b71},
+   {0x000b82, 0x000b82},
+   {0x000b83, 0x000b83},
+   {0x000b85, 0x000b8a},
+   {0x000b8e, 0x000b90},
+   {0x000b92, 0x000b95},
+   {0x000b99, 0x000b9a},
+   {0x000b9c, 0x000b9c},
+   {0x000b9e, 0x000b9f},
+   {0x000ba3, 0x000ba4},
+   {0x000ba8, 0x000baa},
+   {0x000bae, 0x000bb9},
+   {0x000bbe, 0x000bbf},
+   {0x000bc0, 0x000bc0},
+   {0x000bc1, 0x000bc2},
+   {0x000bc6, 0x000bc8},
+   {0x000bca, 0x000bcc},
+   {0x000bd0, 0x000bd0},
+   {0x000bd7, 0x000bd7},
+   {0x000c00, 0x000c00},
+   {0x000c01, 0x000c03},
+   {0x000c04, 0x000c04},
+   {0x000c05, 0x000c0c},
+   {0x000c0e, 0x000c10},
+   {0x000c12, 0x000c28},
+   {0x000c2a, 0x000c39},
+   {0x000c3d, 0x000c3d},
+   {0x000c3e, 0x000c40},
+   {0x000c41, 0x000c44},
+   {0x000c46, 0x000c48},
+   {0x000c4a, 0x000c4c},
+   {0x000c55, 0x000c56},
+   {0x000c58, 0x000c5a},
+   {0x000c5d, 0x000c5d},
+   {0x000c60, 0x000c61},
+   {0x000c62, 0x000c63},
+   {0x000c80, 0x000c80},
+   {0x000c81, 0x000c81},
+   {0x000c82, 0x000c83},
+   {0x000c85, 0x000c8c},
+   {0x000c8e, 0x000c90},
+   {0x000c92, 0x000ca8},
+   {0x000caa, 0x000cb3},
+   {0x000cb5, 0x000cb9},
+   {0x000cbd, 0x000cbd},
+   {0x000cbe, 0x000cbe},
+   {0x000cbf, 0x000cbf},
+   {0x000cc0, 0x000cc4},
+   {0x000cc6, 0x000cc6},
+   {0x000cc7, 0x000cc8},
+   {0x000cca, 0x000ccb},
+   {0x000ccc, 0x000ccc},
+   {0x000cd5, 0x000cd6},
+   {0x000cdd, 0x000cde},
+   {0x000ce0, 0x000ce1},
+   {0x000ce2, 0x000ce3},
+   {0x000cf1, 0x000cf2},
+   {0x000cf3, 0x000cf3},
+   {0x000d00, 0x000d01},
+   {0x000d02, 0x000d03},
+   {0x000d04, 0x000d0c},
+   {0x000d0e, 0x000d10},
+   {0x000d12, 0x000d3a},
+   {0x000d3d, 0x000d3d},
+   {0x000d3e, 0x000d40},
+   {0x000d41, 0x000d44},
+   {0x000d46, 0x000d48},
+   {0x000d4a, 0x000d4c},
+   {0x000d4e, 0x000d4e},
+   {0x000d54, 0x000d56},
+   {0x000d57, 0x000d57},
+   {0x000d5f, 0x000d61},
+   {0x000d62, 0x000d63},
+   {0x000d7a, 0x000d7f},
+   {0x000d81, 0x000d81},
+   {0x000d82, 0x000d83},
+   {0x000d85, 0x000d96},
+   {0x000d9a, 0x000db1},
+   {0x000db3, 0x000dbb},
+   {0x000dbd, 0x000dbd},
+   {0x000dc0, 0x000dc6},
+   {0x000dcf, 0x000dd1},
+   {0x000dd2, 0x000dd4},
+   {0x000dd6, 0x000dd6},
+   {0x000dd8, 0x000ddf},
+   {0x000df2, 0x000df3},
+   {0x000e01, 0x000e30},
+   {0x000e31, 0x000e31},
+   {0x000e32, 0x000e33},
+   {0x000e34, 0x000e3a},
+   {0x000e40, 0x000e45},
+   {0x000e46, 0x000e46},
+   {0x000e4d, 0x000e4d},
+   {0x000e81, 0x000e82},
+   {0x000e84, 0x000e84},
+   {0x000e86, 0x000e8a},
+   {0x000e8c, 0x000ea3},
+   {0x000ea5, 0x000ea5},
+   {0x000ea7, 0x000eb0},
+   {0x000eb1, 0x000eb1},
+   {0x000eb2, 0x000eb3},
+   {0x000eb4, 0x000eb9},
+   {0x000ebb, 0x000ebc},
+   {0x000ebd, 0x000ebd},
+   {0x000ec0, 0x000ec4},
+   {0x000ec6, 0x000ec6},
+   {0x000ecd, 0x000ecd},
+   {0x000edc, 0x000edf},
+   {0x000f00, 0x000f00},
+   {0x000f40, 0x000f47},
+   {0x000f49, 0x000f6c},
+   {0x000f71, 0x000f7e},
+   {0x000f7f, 0x000f7f},
+   {0x000f80, 0x000f83},
+   {0x000f88, 0x000f8c},
+   {0x000f8d, 0x000f97},
+   {0x000f99, 0x000fbc},
+   {0x001000, 0x00102a},
+   {0x00102b, 0x00102c},
+   {0x00102d, 0x001030},
+   {0x001031, 0x001031},
+   {0x001032, 0x001036},
+   {0x001038, 0x001038},
+   {0x00103b, 0x00103c},
+   {0x00103d, 0x00103e},
+   {0x00103f, 0x00103f},
+   {0x001050, 0x001055},
+   {0x001056, 0x001057},
+   {0x001058, 0x001059},
+   {0x00105a, 0x00105d},
+   {0x00105e, 0x001060},
+   {0x001061, 0x001061},
+   {0x001062, 0x001064},
+   {0x001065, 0x001066},
+   {0x001067, 0x00106d},
+   {0x00106e, 0x001070},
+   {0x001071, 0x001074},
+   {0x001075, 0x001081},
+   {0x001082, 0x001082},
+   {0x001083, 0x001084},
+   {0x001085, 0x001086},
+   {0x001087, 0x00108c},
+   {0x00108d, 0x00108d},
+   {0x00108e, 0x00108e},
+   {0x00108f, 0x00108f},
+   {0x00109a, 0x00109c},
+   {0x00109d, 0x00109d},
+   {0x0010a0, 0x0010c5},
+   {0x0010c7, 0x0010c7},
+   {0x0010cd, 0x0010cd},
+   {0x0010d0, 0x0010fa},
+   {0x0010fc, 0x0010fc},
+   {0x0010fd, 0x0010ff},
+   {0x001100, 0x001248},
+   {0x00124a, 0x00124d},
+   {0x001250, 0x001256},
+   {0x001258, 0x001258},
+   {0x00125a, 0x00125d},
+   {0x001260, 0x001288},
+   {0x00128a, 0x00128d},
+   {0x001290, 0x0012b0},
+   {0x0012b2, 0x0012b5},
+   {0x0012b8, 0x0012be},
+   {0x0012c0, 0x0012c0},
+   {0x0012c2, 0x0012c5},
+   {0x0012c8, 0x0012d6},
+   {0x0012d8, 0x001310},
+   {0x001312, 0x001315},
+   {0x001318, 0x00135a},
+   {0x001380, 0x00138f},
+   {0x0013a0, 0x0013f5},
+   {0x0013f8, 0x0013fd},
+   {0x001401, 0x00166c},
+   {0x00166f, 0x00167f},
+   {0x001681, 0x00169a},
+   {0x0016a0, 0x0016ea},
+   {0x0016ee, 0x0016f0},
+   {0x0016f1, 0x0016f8},
+   {0x001700, 0x001711},
+   {0x001712, 0x001713},
+   {0x00171f, 0x001731},
+   {0x001732, 0x001733},
+   {0x001740, 0x001751},
+   {0x001752, 0x001753},
+   {0x001760, 0x00176c},
+   {0x00176e, 0x001770},
+   {0x001772, 0x001773},
+   {0x001780, 0x0017b3},
+   {0x0017b6, 0x0017b6},
+   {0x0017b7, 0x0017bd},
+   {0x0017be, 0x0017c5},
+   {0x0017c6, 0x0017c6},
+   {0x0017c7, 0x0017c8},
+   {0x0017d7, 0x0017d7},
+   {0x0017dc, 0x0017dc},
+   {0x001820, 0x001842},
+   {0x001843, 0x001843},
+   {0x001844, 0x001878},
+   {0x001880, 0x001884},
+   {0x001885, 0x001886},
+   {0x001887, 0x0018a8},
+   {0x0018a9, 0x0018a9},
+   {0x0018aa, 0x0018aa},
+   {0x0018b0, 0x0018f5},
+   {0x001900, 0x00191e},
+   {0x001920, 0x001922},
+   {0x001923, 0x001926},
+   {0x001927, 0x001928},
+   {0x001929, 0x00192b},
+   {0x001930, 0x001931},
+   {0x001932, 0x001932},
+   {0x001933, 0x001938},
+   {0x001950, 0x00196d},
+   {0x001970, 0x001974},
+   {0x001980, 0x0019ab},
+   {0x0019b0, 0x0019c9},
+   {0x001a00, 0x001a16},
+   {0x001a17, 0x001a18},
+   {0x001a19, 0x001a1a},
+   {0x001a1b, 0x001a1b},
+   {0x001a20, 0x001a54},
+   {0x001a55, 0x001a55},
+   {0x001a56, 0x001a56},
+   {0x001a57, 0x001a57},
+   {0x001a58, 0x001a5e},
+   {0x001a61, 0x001a61},
+   {0x001a62, 0x001a62},
+   {0x001a63, 0x001a64},
+   {0x001a65, 0x001a6c},
+   {0x001a6d, 0x001a72},
+   {0x001a73, 0x001a74},
+   {0x001aa7, 0x001aa7},
+   {0x001abf, 0x001ac0},
+   {0x001acc, 0x001ace},
+   {0x001b00, 0x001b03},
+   {0x001b04, 0x001b04},
+   {0x001b05, 0x001b33},
+   {0x001b35, 0x001b35},
+   {0x001b36, 0x001b3a},
+   {0x001b3b, 0x001b3b},
+   {0x001b3c, 0x001b3c},
+   {0x001b3d, 0x001b41},
+   {0x001b42, 0x001b42},
+   {0x001b43, 0x001b43},
+   {0x001b45, 0x001b4c},
+   {0x001b80, 0x001b81},
+   {0x001b82, 0x001b82},
+   {0x001b83, 0x001ba0},
+   {0x001ba1, 0x001ba1},
+   {0x001ba2, 0x001ba5},
+   {0x001ba6, 0x001ba7},
+   {0x001ba8, 0x001ba9},
+   {0x001bac, 0x001bad},
+   {0x001bae, 0x001baf},
+   {0x001bba, 0x001be5},
+   {0x001be7, 0x001be7},
+   {0x001be8, 0x001be9},
+   {0x001bea, 0x001bec},
+   {0x001bed, 0x001bed},
+   {0x001bee, 0x001bee},
+   {0x001bef, 0x001bf1},
+   {0x001c00, 0x001c23},
+   {0x001c24, 0x001c2b},
+   {0x001c2c, 0x001c33},
+   {0x001c34, 0x001c35},
+   {0x001c36, 0x001c36},
+   {0x001c4d, 0x001c4f},
+   {0x001c5a, 0x001c77},
+   {0x001c78, 0x001c7d},
+   {0x001c80, 0x001c88},
+   {0x001c90, 0x001cba},
+   {0x001cbd, 0x001cbf},
+   {0x001ce9, 0x001cec},
+   {0x001cee, 0x001cf3},
+   {0x001cf5, 0x001cf6},
+   {0x001cfa, 0x001cfa},
+   {0x001d00, 0x001d2b},
+   {0x001d2c, 0x001d6a},
+   {0x001d6b, 0x001d77},
+   {0x001d78, 0x001d78},
+   {0x001d79, 0x001d9a},
+   {0x001d9b, 0x001dbf},
+   {0x001de7, 0x001df4},
+   {0x001e00, 0x001f15},
+   {0x001f18, 0x001f1d},
+   {0x001f20, 0x001f45},
+   {0x001f48, 0x001f4d},
+   {0x001f50, 0x001f57},
+   {0x001f59, 0x001f59},
+   {0x001f5b, 0x001f5b},
+   {0x001f5d, 0x001f5d},
+   {0x001f5f, 0x001f7d},
+   {0x001f80, 0x001fb4},
+   {0x001fb6, 0x001fbc},
+   {0x001fbe, 0x001fbe},
+   {0x001fc2, 0x001fc4},
+   {0x001fc6, 0x001fcc},
+   {0x001fd0, 0x001fd3},
+   {0x001fd6, 0x001fdb},
+   {0x001fe0, 0x001fec},
+   {0x001ff2, 0x001ff4},
+   {0x001ff6, 0x001ffc},
+   {0x002071, 0x002071},
+   {0x00207f, 0x00207f},
+   {0x002090, 0x00209c},
+   {0x002102, 0x002102},
+   {0x002107, 0x002107},
+   {0x00210a, 0x002113},
+   {0x002115, 0x002115},
+   {0x002119, 0x00211d},
+   {0x002124, 0x002124},
+   {0x002126, 0x002126},
+   {0x002128, 0x002128},
+   {0x00212a, 0x00212d},
+   {0x00212f, 0x002134},
+   {0x002135, 0x002138},
+   {0x002139, 0x002139},
+   {0x00213c, 0x00213f},
+   {0x002145, 0x002149},
+   {0x00214e, 0x00214e},
+   {0x002160, 0x002182},
+   {0x002183, 0x002184},
+   {0x002185, 0x002188},
+   {0x0024b6, 0x0024e9},
+   {0x002c00, 0x002c7b},
+   {0x002c7c, 0x002c7d},
+   {0x002c7e, 0x002ce4},
+   {0x002ceb, 0x002cee},
+   {0x002cf2, 0x002cf3},
+   {0x002d00, 0x002d25},
+   {0x002d27, 0x002d27},
+   {0x002d2d, 0x002d2d},
+   {0x002d30, 0x002d67},
+   {0x002d6f, 0x002d6f},
+   {0x002d80, 0x002d96},
+   {0x002da0, 0x002da6},
+   {0x002da8, 0x002dae},
+   {0x002db0, 0x002db6},
+   {0x002db8, 0x002dbe},
+   {0x002dc0, 0x002dc6},
+   {0x002dc8, 0x002dce},
+   {0x002dd0, 0x002dd6},
+   {0x002dd8, 0x002dde},
+   {0x002de0, 0x002dff},
+   {0x002e2f, 0x002e2f},
+   {0x003005, 0x003005},
+   {0x003006, 0x003006},
+   {0x003007, 0x003007},
+   {0x003021, 0x003029},
+   {0x003031, 0x003035},
+   {0x003038, 0x00303a},
+   {0x00303b, 0x00303b},
+   {0x00303c, 0x00303c},
+   {0x003041, 0x003096},
+   {0x00309d, 0x00309e},
+   {0x00309f, 0x00309f},
+   {0x0030a1, 0x0030fa},
+   {0x0030fc, 0x0030fe},
+   {0x0030ff, 0x0030ff},
+   {0x003105, 0x00312f},
+   {0x003131, 0x00318e},
+   {0x0031a0, 0x0031bf},
+   {0x0031f0, 0x0031ff},
+   {0x003400, 0x004dbf},
+   {0x004e00, 0x00a014},
+   {0x00a015, 0x00a015},
+   {0x00a016, 0x00a48c},
+   {0x00a4d0, 0x00a4f7},
+   {0x00a4f8, 0x00a4fd},
+   {0x00a500, 0x00a60b},
+   {0x00a60c, 0x00a60c},
+   {0x00a610, 0x00a61f},
+   {0x00a62a, 0x00a62b},
+   {0x00a640, 0x00a66d},
+   {0x00a66e, 0x00a66e},
+   {0x00a674, 0x00a67b},
+   {0x00a67f, 0x00a67f},
+   {0x00a680, 0x00a69b},
+   {0x00a69c, 0x00a69d},
+   {0x00a69e, 0x00a69f},
+   {0x00a6a0, 0x00a6e5},
+   {0x00a6e6, 0x00a6ef},
+   {0x00a717, 0x00a71f},
+   {0x00a722, 0x00a76f},
+   {0x00a770, 0x00a770},
+   {0x00a771, 0x00a787},
+   {0x00a788, 0x00a788},
+   {0x00a78b, 0x00a78e},
+   {0x00a78f, 0x00a78f},
+   {0x00a790, 0x00a7ca},
+   {0x00a7d0, 0x00a7d1},
+   {0x00a7d3, 0x00a7d3},
+   {0x00a7d5, 0x00a7d9},
+   {0x00a7f2, 0x00a7f4},
+   {0x00a7f5, 0x00a7f6},
+   {0x00a7f7, 0x00a7f7},
+   {0x00a7f8, 0x00a7f9},
+   {0x00a7fa, 0x00a7fa},
+   {0x00a7fb, 0x00a801},
+   {0x00a802, 0x00a802},
+   {0x00a803, 0x00a805},
+   {0x00a807, 0x00a80a},
+   {0x00a80b, 0x00a80b},
+   {0x00a80c, 0x00a822},
+   {0x00a823, 0x00a824},
+   {0x00a825, 0x00a826},
+   {0x00a827, 0x00a827},
+   {0x00a840, 0x00a873},
+   {0x00a880, 0x00a881},
+   {0x00a882, 0x00a8b3},
+   {0x00a8b4, 0x00a8c3},
+   {0x00a8c5, 0x00a8c5},
+   {0x00a8f2, 0x00a8f7},
+   {0x00a8fb, 0x00a8fb},
+   {0x00a8fd, 0x00a8fe},
+   {0x00a8ff, 0x00a8ff},
+   {0x00a90a, 0x00a925},
+   {0x00a926, 0x00a92a},
+   {0x00a930, 0x00a946},
+   {0x00a947, 0x00a951},
+   {0x00a952, 0x00a952},
+   {0x00a960, 0x00a97c},
+   {0x00a980, 0x00a982},
+   {0x00a983, 0x00a983},
+   {0x00a984, 0x00a9b2},
+   {0x00a9b4, 0x00a9b5},
+   {0x00a9b6, 0x00a9b9},
+   {0x00a9ba, 0x00a9bb},
+   {0x00a9bc, 0x00a9bd},
+   {0x00a9be, 0x00a9bf},
+   {0x00a9cf, 0x00a9cf},
+   {0x00a9e0, 0x00a9e4},
+   {0x00a9e5, 0x00a9e5},
+   {0x00a9e6, 0x00a9e6},
+   {0x00a9e7, 0x00a9ef},
+   {0x00a9fa, 0x00a9fe},
+   {0x00aa00, 0x00aa28},
+   {0x00aa29, 0x00aa2e},
+   {0x00aa2f, 0x00aa30},
+   {0x00aa31, 0x00aa32},
+   {0x00aa33, 0x00aa34},
+   {0x00aa35, 0x00aa36},
+   {0x00aa40, 0x00aa42},
+   {0x00aa43, 0x00aa43},
+   {0x00aa44, 0x00aa4b},
+   {0x00aa4c, 0x00aa4c},
+   {0x00aa4d, 0x00aa4d},
+   {0x00aa60, 0x00aa6f},
+   {0x00aa70, 0x00aa70},
+   {0x00aa71, 0x00aa76},
+   {0x00aa7a, 0x00aa7a},
+   {0x00aa7b, 0x00aa7b},
+   {0x00aa7c, 0x00aa7c},
+   {0x00aa7d, 0x00aa7d},
+   {0x00aa7e, 0x00aaaf},
+   {0x00aab0, 0x00aab0},
+   {0x00aab1, 0x00aab1},
+   {0x00aab2, 0x00aab4},
+   {0x00aab5, 0x00aab6},
+   {0x00aab7, 0x00aab8},
+   {0x00aab9, 0x00aabd},
+   {0x00aabe, 0x00aabe},
+   {0x00aac0, 0x00aac0},
+   {0x00aac2, 0x00aac2},
+   {0x00aadb, 0x00aadc},
+   {0x00aadd, 0x00aadd},
+   {0x00aae0, 0x00aaea},
+   {0x00aaeb, 0x00aaeb},
+   {0x00aaec, 0x00aaed},
+   {0x00aaee, 0x00aaef},
+   {0x00aaf2, 0x00aaf2},
+   {0x00aaf3, 0x00aaf4},
+   {0x00aaf5, 0x00aaf5},
+   {0x00ab01, 0x00ab06},
+   {0x00ab09, 0x00ab0e},
+   {0x00ab11, 0x00ab16},
+   {0x00ab20, 0x00ab26},
+   {0x00ab28, 0x00ab2e},
+   {0x00ab30, 0x00ab5a},
+   {0x00ab5c, 0x00ab5f},
+   {0x00ab60, 0x00ab68},
+   {0x00ab69, 0x00ab69},
+   {0x00ab70, 0x00abbf},
+   {0x00abc0, 0x00abe2},
+   {0x00abe3, 0x00abe4},
+   {0x00abe5, 0x00abe5},
+   {0x00abe6, 0x00abe7},
+   {0x00abe8, 0x00abe8},
+   {0x00abe9, 0x00abea},
+   {0x00ac00, 0x00d7a3},
+   {0x00d7b0, 0x00d7c6},
+   {0x00d7cb, 0x00d7fb},
+   {0x00f900, 0x00fa6d},
+   {0x00fa70, 0x00fad9},
+   {0x00fb00, 0x00fb06},
+   {0x00fb13, 0x00fb17},
+   {0x00fb1d, 0x00fb1d},
+   {0x00fb1e, 0x00fb1e},
+   {0x00fb1f, 0x00fb28},
+   {0x00fb2a, 0x00fb36},
+   {0x00fb38, 0x00fb3c},
+   {0x00fb3e, 0x00fb3e},
+   {0x00fb40, 0x00fb41},
+   {0x00fb43, 0x00fb44},
+   {0x00fb46, 0x00fbb1},
+   {0x00fbd3, 0x00fd3d},
+   {0x00fd50, 0x00fd8f},
+   {0x00fd92, 0x00fdc7},
+   {0x00fdf0, 0x00fdfb},
+   {0x00fe70, 0x00fe74},
+   {0x00fe76, 0x00fefc},
+   {0x00ff21, 0x00ff3a},
+   {0x00ff41, 0x00ff5a},
+   {0x00ff66, 0x00ff6f},
+   {0x00ff70, 0x00ff70},
+   {0x00ff71, 0x00ff9d},
+   {0x00ff9e, 0x00ff9f},
+   {0x00ffa0, 0x00ffbe},
+   {0x00ffc2, 0x00ffc7},
+   {0x00ffca, 0x00ffcf},
+   {0x00ffd2, 0x00ffd7},
+   {0x00ffda, 0x00ffdc},
+   {0x010000, 0x01000b},
+   {0x01000d, 0x010026},
+   {0x010028, 0x01003a},
+   {0x01003c, 0x01003d},
+   {0x01003f, 0x01004d},
+   {0x010050, 0x01005d},
+   {0x010080, 0x0100fa},
+   {0x010140, 0x010174},
+   {0x010280, 0x01029c},
+   {0x0102a0, 0x0102d0},
+   {0x010300, 0x01031f},
+   {0x01032d, 0x010340},
+   {0x010341, 0x010341},
+   {0x010342, 0x010349},
+   {0x01034a, 0x01034a},
+   {0x010350, 0x010375},
+   {0x010376, 0x01037a},
+   {0x010380, 0x01039d},
+   {0x0103a0, 0x0103c3},
+   {0x0103c8, 0x0103cf},
+   {0x0103d1, 0x0103d5},
+   {0x010400, 0x01044f},
+   {0x010450, 0x01049d},
+   {0x0104b0, 0x0104d3},
+   {0x0104d8, 0x0104fb},
+   {0x010500, 0x010527},
+   {0x010530, 0x010563},
+   {0x010570, 0x01057a},
+   {0x01057c, 0x01058a},
+   {0x01058c, 0x010592},
+   {0x010594, 0x010595},
+   {0x010597, 0x0105a1},
+   {0x0105a3, 0x0105b1},
+   {0x0105b3, 0x0105b9},
+   {0x0105bb, 0x0105bc},
+   {0x010600, 0x010736},
+   {0x010740, 0x010755},
+   {0x010760, 0x010767},
+   {0x010780, 0x010785},
+   {0x010787, 0x0107b0},
+   {0x0107b2, 0x0107ba},
+   {0x010800, 0x010805},
+   {0x010808, 0x010808},
+   {0x01080a, 0x010835},
+   {0x010837, 0x010838},
+   {0x01083c, 0x01083c},
+   {0x01083f, 0x010855},
+   {0x010860, 0x010876},
+   {0x010880, 0x01089e},
+   {0x0108e0, 0x0108f2},
+   {0x0108f4, 0x0108f5},
+   {0x010900, 0x010915},
+   {0x010920, 0x010939},
+   {0x010980, 0x0109b7},
+   {0x0109be, 0x0109bf},
+   {0x010a00, 0x010a00},
+   {0x010a01, 0x010a03},
+   {0x010a05, 0x010a06},
+   {0x010a0c, 0x010a0f},
+   {0x010a10, 0x010a13},
+   {0x010a15, 0x010a17},
+   {0x010a19, 0x010a35},
+   {0x010a60, 0x010a7c},
+   {0x010a80, 0x010a9c},
+   {0x010ac0, 0x010ac7},
+   {0x010ac9, 0x010ae4},
+   {0x010b00, 0x010b35},
+   {0x010b40, 0x010b55},
+   {0x010b60, 0x010b72},
+   {0x010b80, 0x010b91},
+   {0x010c00, 0x010c48},
+   {0x010c80, 0x010cb2},
+   {0x010cc0, 0x010cf2},
+   {0x010d00, 0x010d23},
+   {0x010d24, 0x010d27},
+   {0x010e80, 0x010ea9},
+   {0x010eab, 0x010eac},
+   {0x010eb0, 0x010eb1},
+   {0x010f00, 0x010f1c},
+   {0x010f27, 0x010f27},
+   {0x010f30, 0x010f45},
+   {0x010f70, 0x010f81},
+   {0x010fb0, 0x010fc4},
+   {0x010fe0, 0x010ff6},
+   {0x011000, 0x011000},
+   {0x011001, 0x011001},
+   {0x011002, 0x011002},
+   {0x011003, 0x011037},
+   {0x011038, 0x011045},
+   {0x011071, 0x011072},
+   {0x011073, 0x011074},
+   {0x011075, 0x011075},
+   {0x011080, 0x011081},
+   {0x011082, 0x011082},
+   {0x011083, 0x0110af},
+   {0x0110b0, 0x0110b2},
+   {0x0110b3, 0x0110b6},
+   {0x0110b7, 0x0110b8},
+   {0x0110c2, 0x0110c2},
+   {0x0110d0, 0x0110e8},
+   {0x011100, 0x011102},
+   {0x011103, 0x011126},
+   {0x011127, 0x01112b},
+   {0x01112c, 0x01112c},
+   {0x01112d, 0x011132},
+   {0x011144, 0x011144},
+   {0x011145, 0x011146},
+   {0x011147, 0x011147},
+   {0x011150, 0x011172},
+   {0x011176, 0x011176},
+   {0x011180, 0x011181},
+   {0x011182, 0x011182},
+   {0x011183, 0x0111b2},
+   {0x0111b3, 0x0111b5},
+   {0x0111b6, 0x0111be},
+   {0x0111bf, 0x0111bf},
+   {0x0111c1, 0x0111c4},
+   {0x0111ce, 0x0111ce},
+   {0x0111cf, 0x0111cf},
+   {0x0111da, 0x0111da},
+   {0x0111dc, 0x0111dc},
+   {0x011200, 0x011211},
+   {0x011213, 0x01122b},
+   {0x01122c, 0x01122e},
+   {0x01122f, 0x011231},
+   {0x011232, 0x011233},
+   {0x011234, 0x011234},
+   {0x011237, 0x011237},
+   {0x01123e, 0x01123e},
+   {0x01123f, 0x011240},
+   {0x011241, 0x011241},
+   {0x011280, 0x011286},
+   {0x011288, 0x011288},
+   {0x01128a, 0x01128d},
+   {0x01128f, 0x01129d},
+   {0x01129f, 0x0112a8},
+   {0x0112b0, 0x0112de},
+   {0x0112df, 0x0112df},
+   {0x0112e0, 0x0112e2},
+   {0x0112e3, 0x0112e8},
+   {0x011300, 0x011301},
+   {0x011302, 0x011303},
+   {0x011305, 0x01130c},
+   {0x01130f, 0x011310},
+   {0x011313, 0x011328},
+   {0x01132a, 0x011330},
+   {0x011332, 0x011333},
+   {0x011335, 0x011339},
+   {0x01133d, 0x01133d},
+   {0x01133e, 0x01133f},
+   {0x011340, 0x011340},
+   {0x011341, 0x011344},
+   {0x011347, 0x011348},
+   {0x01134b, 0x01134c},
+   {0x011350, 0x011350},
+   {0x011357, 0x011357},
+   {0x01135d, 0x011361},
+   {0x011362, 0x011363},
+   {0x011400, 0x011434},
+   {0x011435, 0x011437},
+   {0x011438, 0x01143f},
+   {0x011440, 0x011441},
+   {0x011443, 0x011444},
+   {0x011445, 0x011445},
+   {0x011447, 0x01144a},
+   {0x01145f, 0x011461},
+   {0x011480, 0x0114af},
+   {0x0114b0, 0x0114b2},
+   {0x0114b3, 0x0114b8},
+   {0x0114b9, 0x0114b9},
+   {0x0114ba, 0x0114ba},
+   {0x0114bb, 0x0114be},
+   {0x0114bf, 0x0114c0},
+   {0x0114c1, 0x0114c1},
+   {0x0114c4, 0x0114c5},
+   {0x0114c7, 0x0114c7},
+   {0x011580, 0x0115ae},
+   {0x0115af, 0x0115b1},
+   {0x0115b2, 0x0115b5},
+   {0x0115b8, 0x0115bb},
+   {0x0115bc, 0x0115bd},
+   {0x0115be, 0x0115be},
+   {0x0115d8, 0x0115db},
+   {0x0115dc, 0x0115dd},
+   {0x011600, 0x01162f},
+   {0x011630, 0x011632},
+   {0x011633, 0x01163a},
+   {0x01163b, 0x01163c},
+   {0x01163d, 0x01163d},
+   {0x01163e, 0x01163e},
+   {0x011640, 0x011640},
+   {0x011644, 0x011644},
+   {0x011680, 0x0116aa},
+   {0x0116ab, 0x0116ab},
+   {0x0116ac, 0x0116ac},
+   {0x0116ad, 0x0116ad},
+   {0x0116ae, 0x0116af},
+   {0x0116b0, 0x0116b5},
+   {0x0116b8, 0x0116b8},
+   {0x011700, 0x01171a},
+   {0x01171d, 0x01171f},
+   {0x011720, 0x011721},
+   {0x011722, 0x011725},
+   {0x011726, 0x011726},
+   {0x011727, 0x01172a},
+   {0x011740, 0x011746},
+   {0x011800, 0x01182b},
+   {0x01182c, 0x01182e},
+   {0x01182f, 0x011837},
+   {0x011838, 0x011838},
+   {0x0118a0, 0x0118df},
+   {0x0118ff, 0x011906},
+   {0x011909, 0x011909},
+   {0x01190c, 0x011913},
+   {0x011915, 0x011916},
+   {0x011918, 0x01192f},
+   {0x011930, 0x011935},
+   {0x011937, 0x011938},
+   {0x01193b, 0x01193c},
+   {0x01193f, 0x01193f},
+   {0x011940, 0x011940},
+   {0x011941, 0x011941},
+   {0x011942, 0x011942},
+   {0x0119a0, 0x0119a7},
+   {0x0119aa, 0x0119d0},
+   {0x0119d1, 0x0119d3},
+   {0x0119d4, 0x0119d7},
+   {0x0119da, 0x0119db},
+   {0x0119dc, 0x0119df},
+   {0x0119e1, 0x0119e1},
+   {0x0119e3, 0x0119e3},
+   {0x0119e4, 0x0119e4},
+   {0x011a00, 0x011a00},
+   {0x011a01, 0x011a0a},
+   {0x011a0b, 0x011a32},
+   {0x011a35, 0x011a38},
+   {0x011a39, 0x011a39},
+   {0x011a3a, 0x011a3a},
+   {0x011a3b, 0x011a3e},
+   {0x011a50, 0x011a50},
+   {0x011a51, 0x011a56},
+   {0x011a57, 0x011a58},
+   {0x011a59, 0x011a5b},
+   {0x011a5c, 0x011a89},
+   {0x011a8a, 0x011a96},
+   {0x011a97, 0x011a97},
+   {0x011a9d, 0x011a9d},
+   {0x011ab0, 0x011af8},
+   {0x011c00, 0x011c08},
+   {0x011c0a, 0x011c2e},
+   {0x011c2f, 0x011c2f},
+   {0x011c30, 0x011c36},
+   {0x011c38, 0x011c3d},
+   {0x011c3e, 0x011c3e},
+   {0x011c40, 0x011c40},
+   {0x011c72, 0x011c8f},
+   {0x011c92, 0x011ca7},
+   {0x011ca9, 0x011ca9},
+   {0x011caa, 0x011cb0},
+   {0x011cb1, 0x011cb1},
+   {0x011cb2, 0x011cb3},
+   {0x011cb4, 0x011cb4},
+   {0x011cb5, 0x011cb6},
+   {0x011d00, 0x011d06},
+   {0x011d08, 0x011d09},
+   {0x011d0b, 0x011d30},
+   {0x011d31, 0x011d36},
+   {0x011d3a, 0x011d3a},
+   {0x011d3c, 0x011d3d},
+   {0x011d3f, 0x011d41},
+   {0x011d43, 0x011d43},
+   {0x011d46, 0x011d46},
+   {0x011d47, 0x011d47},
+   {0x011d60, 0x011d65},
+   {0x011d67, 0x011d68},
+   {0x011d6a, 0x011d89},
+   {0x011d8a, 0x011d8e},
+   {0x011d90, 0x011d91},
+   {0x011d93, 0x011d94},
+   {0x011d95, 0x011d95},
+   {0x011d96, 0x011d96},
+   {0x011d98, 0x011d98},
+   {0x011ee0, 0x011ef2},
+   {0x011ef3, 0x011ef4},
+   {0x011ef5, 0x011ef6},
+   {0x011f00, 0x011f01},
+   {0x011f02, 0x011f02},
+   {0x011f03, 0x011f03},
+   {0x011f04, 0x011f10},
+   {0x011f12, 0x011f33},
+   {0x011f34, 0x011f35},
+   {0x011f36, 0x011f3a},
+   {0x011f3e, 0x011f3f},
+   {0x011f40, 0x011f40},
+   {0x011fb0, 0x011fb0},
+   {0x012000, 0x012399},
+   {0x012400, 0x01246e},
+   {0x012480, 0x012543},
+   {0x012f90, 0x012ff0},
+   {0x013000, 0x01342f},
+   {0x013441, 0x013446},
+   {0x014400, 0x014646},
+   {0x016800, 0x016a38},
+   {0x016a40, 0x016a5e},
+   {0x016a70, 0x016abe},
+   {0x016ad0, 0x016aed},
+   {0x016b00, 0x016b2f},
+   {0x016b40, 0x016b43},
+   {0x016b63, 0x016b77},
+   {0x016b7d, 0x016b8f},
+   {0x016e40, 0x016e7f},
+   {0x016f00, 0x016f4a},
+   {0x016f4f, 0x016f4f},
+   {0x016f50, 0x016f50},
+   {0x016f51, 0x016f87},
+   {0x016f8f, 0x016f92},
+   {0x016f93, 0x016f9f},
+   {0x016fe0, 0x016fe1},
+   {0x016fe3, 0x016fe3},
+   {0x016ff0, 0x016ff1},
+   {0x017000, 0x0187f7},
+   {0x018800, 0x018cd5},
+   {0x018d00, 0x018d08},
+   {0x01aff0, 0x01aff3},
+   {0x01aff5, 0x01affb},
+   {0x01affd, 0x01affe},
+   {0x01b000, 0x01b122},
+   {0x01b132, 0x01b132},
+   {0x01b150, 0x01b152},
+   {0x01b155, 0x01b155},
+   {0x01b164, 0x01b167},
+   {0x01b170, 0x01b2fb},
+   {0x01bc00, 0x01bc6a},
+   {0x01bc70, 0x01bc7c},
+   {0x01bc80, 0x01bc88},
+   {0x01bc90, 0x01bc99},
+   {0x01bc9e, 0x01bc9e},
+   {0x01d400, 0x01d454},
+   {0x01d456, 0x01d49c},
+   {0x01d49e, 0x01d49f},
+   {0x01d4a2, 0x01d4a2},
+   {0x01d4a5, 0x01d4a6},
+   {0x01d4a9, 0x01d4ac},
+   {0x01d4ae, 0x01d4b9},
+   {0x01d4bb, 0x01d4bb},
+   {0x01d4bd, 0x01d4c3},
+   {0x01d4c5, 0x01d505},
+   {0x01d507, 0x01d50a},
+   {0x01d50d, 0x01d514},
+   {0x01d516, 0x01d51c},
+   {0x01d51e, 0x01d539},
+   {0x01d53b, 0x01d53e},
+   {0x01d540, 0x01d544},
+   {0x01d546, 0x01d546},
+   {0x01d54a, 0x01d550},
+   {0x01d552, 0x01d6a5},
+   {0x01d6a8, 0x01d6c0},
+   {0x01d6c2, 0x01d6da},
+   {0x01d6dc, 0x01d6fa},
+   {0x01d6fc, 0x01d714},
+   {0x01d716, 0x01d734},
+   {0x01d736, 0x01d74e},
+   {0x01d750, 0x01d76e},
+   {0x01d770, 0x01d788},
+   {0x01d78a, 0x01d7a8},
+   {0x01d7aa, 0x01d7c2},
+   {0x01d7c4, 0x01d7cb},
+   {0x01df00, 0x01df09},
+   {0x01df0a, 0x01df0a},
+   {0x01df0b, 0x01df1e},
+   {0x01df25, 0x01df2a},
+   {0x01e000, 0x01e006},
+   {0x01e008, 0x01e018},
+   {0x01e01b, 0x01e021},
+   {0x01e023, 0x01e024},
+   {0x01e026, 0x01e02a},
+   {0x01e030, 0x01e06d},
+   {0x01e08f, 0x01e08f},
+   {0x01e100, 0x01e12c},
+   {0x01e137, 0x01e13d},
+   {0x01e14e, 0x01e14e},
+   {0x01e290, 0x01e2ad},
+   {0x01e2c0, 0x01e2eb},
+   {0x01e4d0, 0x01e4ea},
+   {0x01e4eb, 0x01e4eb},
+   {0x01e7e0, 0x01e7e6},
+   {0x01e7e8, 0x01e7eb},
+   {0x01e7ed, 0x01e7ee},
+   {0x01e7f0, 0x01e7fe},
+   {0x01e800, 0x01e8c4},
+   {0x01e900, 0x01e943},
+   {0x01e947, 0x01e947},
+   {0x01e94b, 0x01e94b},
+   {0x01ee00, 0x01ee03},
+   {0x01ee05, 0x01ee1f},
+   {0x01ee21, 0x01ee22},
+   {0x01ee24, 0x01ee24},
+   {0x01ee27, 0x01ee27},
+   {0x01ee29, 0x01ee32},
+   {0x01ee34, 0x01ee37},
+   {0x01ee39, 0x01ee39},
+   {0x01ee3b, 0x01ee3b},
+   {0x01ee42, 0x01ee42},
+   {0x01ee47, 0x01ee47},
+   {0x01ee49, 0x01ee49},
+   {0x01ee4b, 0x01ee4b},
+   {0x01ee4d, 0x01ee4f},
+   {0x01ee51, 0x01ee52},
+   {0x01ee54, 0x01ee54},
+   {0x01ee57, 0x01ee57},
+   {0x01ee59, 0x01ee59},
+   {0x01ee5b, 0x01ee5b},
+   {0x01ee5d, 0x01ee5d},
+   {0x01ee5f, 0x01ee5f},
+   {0x01ee61, 0x01ee62},
+   {0x01ee64, 0x01ee64},
+   {0x01ee67, 0x01ee6a},
+   {0x01ee6c, 0x01ee72},
+   {0x01ee74, 0x01ee77},
+   {0x01ee79, 0x01ee7c},
+   {0x01ee7e, 0x01ee7e},
+   {0x01ee80, 0x01ee89},
+   {0x01ee8b, 0x01ee9b},
+   {0x01eea1, 0x01eea3},
+   {0x01eea5, 0x01eea9},
+   {0x01eeab, 0x01eebb},
+   {0x01f130, 0x01f149},
+   {0x01f150, 0x01f169},
+   {0x01f170, 0x01f189},
+   {0x020000, 0x02a6df},
+   {0x02a700, 0x02b739},
+   {0x02b740, 0x02b81d},
+   {0x02b820, 0x02cea1},
+   {0x02ceb0, 0x02ebe0},
+   {0x02ebf0, 0x02ee5d},
+   {0x02f800, 0x02fa1d},
+   {0x030000, 0x03134a},
+   {0x031350, 0x0323af},
+};
+
+/* table of Unicode codepoint ranges of Lowercase characters */
+static const pg_unicode_range unicode_lowercase[686] =
+{
+   {0x000061, 0x00007a},
+   {0x0000aa, 0x0000aa},
+   {0x0000b5, 0x0000b5},
+   {0x0000ba, 0x0000ba},
+   {0x0000df, 0x0000f6},
+   {0x0000f8, 0x0000ff},
+   {0x000101, 0x000101},
+   {0x000103, 0x000103},
+   {0x000105, 0x000105},
+   {0x000107, 0x000107},
+   {0x000109, 0x000109},
+   {0x00010b, 0x00010b},
+   {0x00010d, 0x00010d},
+   {0x00010f, 0x00010f},
+   {0x000111, 0x000111},
+   {0x000113, 0x000113},
+   {0x000115, 0x000115},
+   {0x000117, 0x000117},
+   {0x000119, 0x000119},
+   {0x00011b, 0x00011b},
+   {0x00011d, 0x00011d},
+   {0x00011f, 0x00011f},
+   {0x000121, 0x000121},
+   {0x000123, 0x000123},
+   {0x000125, 0x000125},
+   {0x000127, 0x000127},
+   {0x000129, 0x000129},
+   {0x00012b, 0x00012b},
+   {0x00012d, 0x00012d},
+   {0x00012f, 0x00012f},
+   {0x000131, 0x000131},
+   {0x000133, 0x000133},
+   {0x000135, 0x000135},
+   {0x000137, 0x000138},
+   {0x00013a, 0x00013a},
+   {0x00013c, 0x00013c},
+   {0x00013e, 0x00013e},
+   {0x000140, 0x000140},
+   {0x000142, 0x000142},
+   {0x000144, 0x000144},
+   {0x000146, 0x000146},
+   {0x000148, 0x000149},
+   {0x00014b, 0x00014b},
+   {0x00014d, 0x00014d},
+   {0x00014f, 0x00014f},
+   {0x000151, 0x000151},
+   {0x000153, 0x000153},
+   {0x000155, 0x000155},
+   {0x000157, 0x000157},
+   {0x000159, 0x000159},
+   {0x00015b, 0x00015b},
+   {0x00015d, 0x00015d},
+   {0x00015f, 0x00015f},
+   {0x000161, 0x000161},
+   {0x000163, 0x000163},
+   {0x000165, 0x000165},
+   {0x000167, 0x000167},
+   {0x000169, 0x000169},
+   {0x00016b, 0x00016b},
+   {0x00016d, 0x00016d},
+   {0x00016f, 0x00016f},
+   {0x000171, 0x000171},
+   {0x000173, 0x000173},
+   {0x000175, 0x000175},
+   {0x000177, 0x000177},
+   {0x00017a, 0x00017a},
+   {0x00017c, 0x00017c},
+   {0x00017e, 0x000180},
+   {0x000183, 0x000183},
+   {0x000185, 0x000185},
+   {0x000188, 0x000188},
+   {0x00018c, 0x00018d},
+   {0x000192, 0x000192},
+   {0x000195, 0x000195},
+   {0x000199, 0x00019b},
+   {0x00019e, 0x00019e},
+   {0x0001a1, 0x0001a1},
+   {0x0001a3, 0x0001a3},
+   {0x0001a5, 0x0001a5},
+   {0x0001a8, 0x0001a8},
+   {0x0001aa, 0x0001ab},
+   {0x0001ad, 0x0001ad},
+   {0x0001b0, 0x0001b0},
+   {0x0001b4, 0x0001b4},
+   {0x0001b6, 0x0001b6},
+   {0x0001b9, 0x0001ba},
+   {0x0001bd, 0x0001bf},
+   {0x0001c6, 0x0001c6},
+   {0x0001c9, 0x0001c9},
+   {0x0001cc, 0x0001cc},
+   {0x0001ce, 0x0001ce},
+   {0x0001d0, 0x0001d0},
+   {0x0001d2, 0x0001d2},
+   {0x0001d4, 0x0001d4},
+   {0x0001d6, 0x0001d6},
+   {0x0001d8, 0x0001d8},
+   {0x0001da, 0x0001da},
+   {0x0001dc, 0x0001dd},
+   {0x0001df, 0x0001df},
+   {0x0001e1, 0x0001e1},
+   {0x0001e3, 0x0001e3},
+   {0x0001e5, 0x0001e5},
+   {0x0001e7, 0x0001e7},
+   {0x0001e9, 0x0001e9},
+   {0x0001eb, 0x0001eb},
+   {0x0001ed, 0x0001ed},
+   {0x0001ef, 0x0001f0},
+   {0x0001f3, 0x0001f3},
+   {0x0001f5, 0x0001f5},
+   {0x0001f9, 0x0001f9},
+   {0x0001fb, 0x0001fb},
+   {0x0001fd, 0x0001fd},
+   {0x0001ff, 0x0001ff},
+   {0x000201, 0x000201},
+   {0x000203, 0x000203},
+   {0x000205, 0x000205},
+   {0x000207, 0x000207},
+   {0x000209, 0x000209},
+   {0x00020b, 0x00020b},
+   {0x00020d, 0x00020d},
+   {0x00020f, 0x00020f},
+   {0x000211, 0x000211},
+   {0x000213, 0x000213},
+   {0x000215, 0x000215},
+   {0x000217, 0x000217},
+   {0x000219, 0x000219},
+   {0x00021b, 0x00021b},
+   {0x00021d, 0x00021d},
+   {0x00021f, 0x00021f},
+   {0x000221, 0x000221},
+   {0x000223, 0x000223},
+   {0x000225, 0x000225},
+   {0x000227, 0x000227},
+   {0x000229, 0x000229},
+   {0x00022b, 0x00022b},
+   {0x00022d, 0x00022d},
+   {0x00022f, 0x00022f},
+   {0x000231, 0x000231},
+   {0x000233, 0x000239},
+   {0x00023c, 0x00023c},
+   {0x00023f, 0x000240},
+   {0x000242, 0x000242},
+   {0x000247, 0x000247},
+   {0x000249, 0x000249},
+   {0x00024b, 0x00024b},
+   {0x00024d, 0x00024d},
+   {0x00024f, 0x000293},
+   {0x000295, 0x0002af},
+   {0x0002b0, 0x0002b8},
+   {0x0002c0, 0x0002c1},
+   {0x0002e0, 0x0002e4},
+   {0x000345, 0x000345},
+   {0x000371, 0x000371},
+   {0x000373, 0x000373},
+   {0x000377, 0x000377},
+   {0x00037a, 0x00037a},
+   {0x00037b, 0x00037d},
+   {0x000390, 0x000390},
+   {0x0003ac, 0x0003ce},
+   {0x0003d0, 0x0003d1},
+   {0x0003d5, 0x0003d7},
+   {0x0003d9, 0x0003d9},
+   {0x0003db, 0x0003db},
+   {0x0003dd, 0x0003dd},
+   {0x0003df, 0x0003df},
+   {0x0003e1, 0x0003e1},
+   {0x0003e3, 0x0003e3},
+   {0x0003e5, 0x0003e5},
+   {0x0003e7, 0x0003e7},
+   {0x0003e9, 0x0003e9},
+   {0x0003eb, 0x0003eb},
+   {0x0003ed, 0x0003ed},
+   {0x0003ef, 0x0003f3},
+   {0x0003f5, 0x0003f5},
+   {0x0003f8, 0x0003f8},
+   {0x0003fb, 0x0003fc},
+   {0x000430, 0x00045f},
+   {0x000461, 0x000461},
+   {0x000463, 0x000463},
+   {0x000465, 0x000465},
+   {0x000467, 0x000467},
+   {0x000469, 0x000469},
+   {0x00046b, 0x00046b},
+   {0x00046d, 0x00046d},
+   {0x00046f, 0x00046f},
+   {0x000471, 0x000471},
+   {0x000473, 0x000473},
+   {0x000475, 0x000475},
+   {0x000477, 0x000477},
+   {0x000479, 0x000479},
+   {0x00047b, 0x00047b},
+   {0x00047d, 0x00047d},
+   {0x00047f, 0x00047f},
+   {0x000481, 0x000481},
+   {0x00048b, 0x00048b},
+   {0x00048d, 0x00048d},
+   {0x00048f, 0x00048f},
+   {0x000491, 0x000491},
+   {0x000493, 0x000493},
+   {0x000495, 0x000495},
+   {0x000497, 0x000497},
+   {0x000499, 0x000499},
+   {0x00049b, 0x00049b},
+   {0x00049d, 0x00049d},
+   {0x00049f, 0x00049f},
+   {0x0004a1, 0x0004a1},
+   {0x0004a3, 0x0004a3},
+   {0x0004a5, 0x0004a5},
+   {0x0004a7, 0x0004a7},
+   {0x0004a9, 0x0004a9},
+   {0x0004ab, 0x0004ab},
+   {0x0004ad, 0x0004ad},
+   {0x0004af, 0x0004af},
+   {0x0004b1, 0x0004b1},
+   {0x0004b3, 0x0004b3},
+   {0x0004b5, 0x0004b5},
+   {0x0004b7, 0x0004b7},
+   {0x0004b9, 0x0004b9},
+   {0x0004bb, 0x0004bb},
+   {0x0004bd, 0x0004bd},
+   {0x0004bf, 0x0004bf},
+   {0x0004c2, 0x0004c2},
+   {0x0004c4, 0x0004c4},
+   {0x0004c6, 0x0004c6},
+   {0x0004c8, 0x0004c8},
+   {0x0004ca, 0x0004ca},
+   {0x0004cc, 0x0004cc},
+   {0x0004ce, 0x0004cf},
+   {0x0004d1, 0x0004d1},
+   {0x0004d3, 0x0004d3},
+   {0x0004d5, 0x0004d5},
+   {0x0004d7, 0x0004d7},
+   {0x0004d9, 0x0004d9},
+   {0x0004db, 0x0004db},
+   {0x0004dd, 0x0004dd},
+   {0x0004df, 0x0004df},
+   {0x0004e1, 0x0004e1},
+   {0x0004e3, 0x0004e3},
+   {0x0004e5, 0x0004e5},
+   {0x0004e7, 0x0004e7},
+   {0x0004e9, 0x0004e9},
+   {0x0004eb, 0x0004eb},
+   {0x0004ed, 0x0004ed},
+   {0x0004ef, 0x0004ef},
+   {0x0004f1, 0x0004f1},
+   {0x0004f3, 0x0004f3},
+   {0x0004f5, 0x0004f5},
+   {0x0004f7, 0x0004f7},
+   {0x0004f9, 0x0004f9},
+   {0x0004fb, 0x0004fb},
+   {0x0004fd, 0x0004fd},
+   {0x0004ff, 0x0004ff},
+   {0x000501, 0x000501},
+   {0x000503, 0x000503},
+   {0x000505, 0x000505},
+   {0x000507, 0x000507},
+   {0x000509, 0x000509},
+   {0x00050b, 0x00050b},
+   {0x00050d, 0x00050d},
+   {0x00050f, 0x00050f},
+   {0x000511, 0x000511},
+   {0x000513, 0x000513},
+   {0x000515, 0x000515},
+   {0x000517, 0x000517},
+   {0x000519, 0x000519},
+   {0x00051b, 0x00051b},
+   {0x00051d, 0x00051d},
+   {0x00051f, 0x00051f},
+   {0x000521, 0x000521},
+   {0x000523, 0x000523},
+   {0x000525, 0x000525},
+   {0x000527, 0x000527},
+   {0x000529, 0x000529},
+   {0x00052b, 0x00052b},
+   {0x00052d, 0x00052d},
+   {0x00052f, 0x00052f},
+   {0x000560, 0x000588},
+   {0x0010d0, 0x0010fa},
+   {0x0010fc, 0x0010fc},
+   {0x0010fd, 0x0010ff},
+   {0x0013f8, 0x0013fd},
+   {0x001c80, 0x001c88},
+   {0x001d00, 0x001d2b},
+   {0x001d2c, 0x001d6a},
+   {0x001d6b, 0x001d77},
+   {0x001d78, 0x001d78},
+   {0x001d79, 0x001d9a},
+   {0x001d9b, 0x001dbf},
+   {0x001e01, 0x001e01},
+   {0x001e03, 0x001e03},
+   {0x001e05, 0x001e05},
+   {0x001e07, 0x001e07},
+   {0x001e09, 0x001e09},
+   {0x001e0b, 0x001e0b},
+   {0x001e0d, 0x001e0d},
+   {0x001e0f, 0x001e0f},
+   {0x001e11, 0x001e11},
+   {0x001e13, 0x001e13},
+   {0x001e15, 0x001e15},
+   {0x001e17, 0x001e17},
+   {0x001e19, 0x001e19},
+   {0x001e1b, 0x001e1b},
+   {0x001e1d, 0x001e1d},
+   {0x001e1f, 0x001e1f},
+   {0x001e21, 0x001e21},
+   {0x001e23, 0x001e23},
+   {0x001e25, 0x001e25},
+   {0x001e27, 0x001e27},
+   {0x001e29, 0x001e29},
+   {0x001e2b, 0x001e2b},
+   {0x001e2d, 0x001e2d},
+   {0x001e2f, 0x001e2f},
+   {0x001e31, 0x001e31},
+   {0x001e33, 0x001e33},
+   {0x001e35, 0x001e35},
+   {0x001e37, 0x001e37},
+   {0x001e39, 0x001e39},
+   {0x001e3b, 0x001e3b},
+   {0x001e3d, 0x001e3d},
+   {0x001e3f, 0x001e3f},
+   {0x001e41, 0x001e41},
+   {0x001e43, 0x001e43},
+   {0x001e45, 0x001e45},
+   {0x001e47, 0x001e47},
+   {0x001e49, 0x001e49},
+   {0x001e4b, 0x001e4b},
+   {0x001e4d, 0x001e4d},
+   {0x001e4f, 0x001e4f},
+   {0x001e51, 0x001e51},
+   {0x001e53, 0x001e53},
+   {0x001e55, 0x001e55},
+   {0x001e57, 0x001e57},
+   {0x001e59, 0x001e59},
+   {0x001e5b, 0x001e5b},
+   {0x001e5d, 0x001e5d},
+   {0x001e5f, 0x001e5f},
+   {0x001e61, 0x001e61},
+   {0x001e63, 0x001e63},
+   {0x001e65, 0x001e65},
+   {0x001e67, 0x001e67},
+   {0x001e69, 0x001e69},
+   {0x001e6b, 0x001e6b},
+   {0x001e6d, 0x001e6d},
+   {0x001e6f, 0x001e6f},
+   {0x001e71, 0x001e71},
+   {0x001e73, 0x001e73},
+   {0x001e75, 0x001e75},
+   {0x001e77, 0x001e77},
+   {0x001e79, 0x001e79},
+   {0x001e7b, 0x001e7b},
+   {0x001e7d, 0x001e7d},
+   {0x001e7f, 0x001e7f},
+   {0x001e81, 0x001e81},
+   {0x001e83, 0x001e83},
+   {0x001e85, 0x001e85},
+   {0x001e87, 0x001e87},
+   {0x001e89, 0x001e89},
+   {0x001e8b, 0x001e8b},
+   {0x001e8d, 0x001e8d},
+   {0x001e8f, 0x001e8f},
+   {0x001e91, 0x001e91},
+   {0x001e93, 0x001e93},
+   {0x001e95, 0x001e9d},
+   {0x001e9f, 0x001e9f},
+   {0x001ea1, 0x001ea1},
+   {0x001ea3, 0x001ea3},
+   {0x001ea5, 0x001ea5},
+   {0x001ea7, 0x001ea7},
+   {0x001ea9, 0x001ea9},
+   {0x001eab, 0x001eab},
+   {0x001ead, 0x001ead},
+   {0x001eaf, 0x001eaf},
+   {0x001eb1, 0x001eb1},
+   {0x001eb3, 0x001eb3},
+   {0x001eb5, 0x001eb5},
+   {0x001eb7, 0x001eb7},
+   {0x001eb9, 0x001eb9},
+   {0x001ebb, 0x001ebb},
+   {0x001ebd, 0x001ebd},
+   {0x001ebf, 0x001ebf},
+   {0x001ec1, 0x001ec1},
+   {0x001ec3, 0x001ec3},
+   {0x001ec5, 0x001ec5},
+   {0x001ec7, 0x001ec7},
+   {0x001ec9, 0x001ec9},
+   {0x001ecb, 0x001ecb},
+   {0x001ecd, 0x001ecd},
+   {0x001ecf, 0x001ecf},
+   {0x001ed1, 0x001ed1},
+   {0x001ed3, 0x001ed3},
+   {0x001ed5, 0x001ed5},
+   {0x001ed7, 0x001ed7},
+   {0x001ed9, 0x001ed9},
+   {0x001edb, 0x001edb},
+   {0x001edd, 0x001edd},
+   {0x001edf, 0x001edf},
+   {0x001ee1, 0x001ee1},
+   {0x001ee3, 0x001ee3},
+   {0x001ee5, 0x001ee5},
+   {0x001ee7, 0x001ee7},
+   {0x001ee9, 0x001ee9},
+   {0x001eeb, 0x001eeb},
+   {0x001eed, 0x001eed},
+   {0x001eef, 0x001eef},
+   {0x001ef1, 0x001ef1},
+   {0x001ef3, 0x001ef3},
+   {0x001ef5, 0x001ef5},
+   {0x001ef7, 0x001ef7},
+   {0x001ef9, 0x001ef9},
+   {0x001efb, 0x001efb},
+   {0x001efd, 0x001efd},
+   {0x001eff, 0x001f07},
+   {0x001f10, 0x001f15},
+   {0x001f20, 0x001f27},
+   {0x001f30, 0x001f37},
+   {0x001f40, 0x001f45},
+   {0x001f50, 0x001f57},
+   {0x001f60, 0x001f67},
+   {0x001f70, 0x001f7d},
+   {0x001f80, 0x001f87},
+   {0x001f90, 0x001f97},
+   {0x001fa0, 0x001fa7},
+   {0x001fb0, 0x001fb4},
+   {0x001fb6, 0x001fb7},
+   {0x001fbe, 0x001fbe},
+   {0x001fc2, 0x001fc4},
+   {0x001fc6, 0x001fc7},
+   {0x001fd0, 0x001fd3},
+   {0x001fd6, 0x001fd7},
+   {0x001fe0, 0x001fe7},
+   {0x001ff2, 0x001ff4},
+   {0x001ff6, 0x001ff7},
+   {0x002071, 0x002071},
+   {0x00207f, 0x00207f},
+   {0x002090, 0x00209c},
+   {0x00210a, 0x00210a},
+   {0x00210e, 0x00210f},
+   {0x002113, 0x002113},
+   {0x00212f, 0x00212f},
+   {0x002134, 0x002134},
+   {0x002139, 0x002139},
+   {0x00213c, 0x00213d},
+   {0x002146, 0x002149},
+   {0x00214e, 0x00214e},
+   {0x002170, 0x00217f},
+   {0x002184, 0x002184},
+   {0x0024d0, 0x0024e9},
+   {0x002c30, 0x002c5f},
+   {0x002c61, 0x002c61},
+   {0x002c65, 0x002c66},
+   {0x002c68, 0x002c68},
+   {0x002c6a, 0x002c6a},
+   {0x002c6c, 0x002c6c},
+   {0x002c71, 0x002c71},
+   {0x002c73, 0x002c74},
+   {0x002c76, 0x002c7b},
+   {0x002c7c, 0x002c7d},
+   {0x002c81, 0x002c81},
+   {0x002c83, 0x002c83},
+   {0x002c85, 0x002c85},
+   {0x002c87, 0x002c87},
+   {0x002c89, 0x002c89},
+   {0x002c8b, 0x002c8b},
+   {0x002c8d, 0x002c8d},
+   {0x002c8f, 0x002c8f},
+   {0x002c91, 0x002c91},
+   {0x002c93, 0x002c93},
+   {0x002c95, 0x002c95},
+   {0x002c97, 0x002c97},
+   {0x002c99, 0x002c99},
+   {0x002c9b, 0x002c9b},
+   {0x002c9d, 0x002c9d},
+   {0x002c9f, 0x002c9f},
+   {0x002ca1, 0x002ca1},
+   {0x002ca3, 0x002ca3},
+   {0x002ca5, 0x002ca5},
+   {0x002ca7, 0x002ca7},
+   {0x002ca9, 0x002ca9},
+   {0x002cab, 0x002cab},
+   {0x002cad, 0x002cad},
+   {0x002caf, 0x002caf},
+   {0x002cb1, 0x002cb1},
+   {0x002cb3, 0x002cb3},
+   {0x002cb5, 0x002cb5},
+   {0x002cb7, 0x002cb7},
+   {0x002cb9, 0x002cb9},
+   {0x002cbb, 0x002cbb},
+   {0x002cbd, 0x002cbd},
+   {0x002cbf, 0x002cbf},
+   {0x002cc1, 0x002cc1},
+   {0x002cc3, 0x002cc3},
+   {0x002cc5, 0x002cc5},
+   {0x002cc7, 0x002cc7},
+   {0x002cc9, 0x002cc9},
+   {0x002ccb, 0x002ccb},
+   {0x002ccd, 0x002ccd},
+   {0x002ccf, 0x002ccf},
+   {0x002cd1, 0x002cd1},
+   {0x002cd3, 0x002cd3},
+   {0x002cd5, 0x002cd5},
+   {0x002cd7, 0x002cd7},
+   {0x002cd9, 0x002cd9},
+   {0x002cdb, 0x002cdb},
+   {0x002cdd, 0x002cdd},
+   {0x002cdf, 0x002cdf},
+   {0x002ce1, 0x002ce1},
+   {0x002ce3, 0x002ce4},
+   {0x002cec, 0x002cec},
+   {0x002cee, 0x002cee},
+   {0x002cf3, 0x002cf3},
+   {0x002d00, 0x002d25},
+   {0x002d27, 0x002d27},
+   {0x002d2d, 0x002d2d},
+   {0x00a641, 0x00a641},
+   {0x00a643, 0x00a643},
+   {0x00a645, 0x00a645},
+   {0x00a647, 0x00a647},
+   {0x00a649, 0x00a649},
+   {0x00a64b, 0x00a64b},
+   {0x00a64d, 0x00a64d},
+   {0x00a64f, 0x00a64f},
+   {0x00a651, 0x00a651},
+   {0x00a653, 0x00a653},
+   {0x00a655, 0x00a655},
+   {0x00a657, 0x00a657},
+   {0x00a659, 0x00a659},
+   {0x00a65b, 0x00a65b},
+   {0x00a65d, 0x00a65d},
+   {0x00a65f, 0x00a65f},
+   {0x00a661, 0x00a661},
+   {0x00a663, 0x00a663},
+   {0x00a665, 0x00a665},
+   {0x00a667, 0x00a667},
+   {0x00a669, 0x00a669},
+   {0x00a66b, 0x00a66b},
+   {0x00a66d, 0x00a66d},
+   {0x00a681, 0x00a681},
+   {0x00a683, 0x00a683},
+   {0x00a685, 0x00a685},
+   {0x00a687, 0x00a687},
+   {0x00a689, 0x00a689},
+   {0x00a68b, 0x00a68b},
+   {0x00a68d, 0x00a68d},
+   {0x00a68f, 0x00a68f},
+   {0x00a691, 0x00a691},
+   {0x00a693, 0x00a693},
+   {0x00a695, 0x00a695},
+   {0x00a697, 0x00a697},
+   {0x00a699, 0x00a699},
+   {0x00a69b, 0x00a69b},
+   {0x00a69c, 0x00a69d},
+   {0x00a723, 0x00a723},
+   {0x00a725, 0x00a725},
+   {0x00a727, 0x00a727},
+   {0x00a729, 0x00a729},
+   {0x00a72b, 0x00a72b},
+   {0x00a72d, 0x00a72d},
+   {0x00a72f, 0x00a731},
+   {0x00a733, 0x00a733},
+   {0x00a735, 0x00a735},
+   {0x00a737, 0x00a737},
+   {0x00a739, 0x00a739},
+   {0x00a73b, 0x00a73b},
+   {0x00a73d, 0x00a73d},
+   {0x00a73f, 0x00a73f},
+   {0x00a741, 0x00a741},
+   {0x00a743, 0x00a743},
+   {0x00a745, 0x00a745},
+   {0x00a747, 0x00a747},
+   {0x00a749, 0x00a749},
+   {0x00a74b, 0x00a74b},
+   {0x00a74d, 0x00a74d},
+   {0x00a74f, 0x00a74f},
+   {0x00a751, 0x00a751},
+   {0x00a753, 0x00a753},
+   {0x00a755, 0x00a755},
+   {0x00a757, 0x00a757},
+   {0x00a759, 0x00a759},
+   {0x00a75b, 0x00a75b},
+   {0x00a75d, 0x00a75d},
+   {0x00a75f, 0x00a75f},
+   {0x00a761, 0x00a761},
+   {0x00a763, 0x00a763},
+   {0x00a765, 0x00a765},
+   {0x00a767, 0x00a767},
+   {0x00a769, 0x00a769},
+   {0x00a76b, 0x00a76b},
+   {0x00a76d, 0x00a76d},
+   {0x00a76f, 0x00a76f},
+   {0x00a770, 0x00a770},
+   {0x00a771, 0x00a778},
+   {0x00a77a, 0x00a77a},
+   {0x00a77c, 0x00a77c},
+   {0x00a77f, 0x00a77f},
+   {0x00a781, 0x00a781},
+   {0x00a783, 0x00a783},
+   {0x00a785, 0x00a785},
+   {0x00a787, 0x00a787},
+   {0x00a78c, 0x00a78c},
+   {0x00a78e, 0x00a78e},
+   {0x00a791, 0x00a791},
+   {0x00a793, 0x00a795},
+   {0x00a797, 0x00a797},
+   {0x00a799, 0x00a799},
+   {0x00a79b, 0x00a79b},
+   {0x00a79d, 0x00a79d},
+   {0x00a79f, 0x00a79f},
+   {0x00a7a1, 0x00a7a1},
+   {0x00a7a3, 0x00a7a3},
+   {0x00a7a5, 0x00a7a5},
+   {0x00a7a7, 0x00a7a7},
+   {0x00a7a9, 0x00a7a9},
+   {0x00a7af, 0x00a7af},
+   {0x00a7b5, 0x00a7b5},
+   {0x00a7b7, 0x00a7b7},
+   {0x00a7b9, 0x00a7b9},
+   {0x00a7bb, 0x00a7bb},
+   {0x00a7bd, 0x00a7bd},
+   {0x00a7bf, 0x00a7bf},
+   {0x00a7c1, 0x00a7c1},
+   {0x00a7c3, 0x00a7c3},
+   {0x00a7c8, 0x00a7c8},
+   {0x00a7ca, 0x00a7ca},
+   {0x00a7d1, 0x00a7d1},
+   {0x00a7d3, 0x00a7d3},
+   {0x00a7d5, 0x00a7d5},
+   {0x00a7d7, 0x00a7d7},
+   {0x00a7d9, 0x00a7d9},
+   {0x00a7f2, 0x00a7f4},
+   {0x00a7f6, 0x00a7f6},
+   {0x00a7f8, 0x00a7f9},
+   {0x00a7fa, 0x00a7fa},
+   {0x00ab30, 0x00ab5a},
+   {0x00ab5c, 0x00ab5f},
+   {0x00ab60, 0x00ab68},
+   {0x00ab69, 0x00ab69},
+   {0x00ab70, 0x00abbf},
+   {0x00fb00, 0x00fb06},
+   {0x00fb13, 0x00fb17},
+   {0x00ff41, 0x00ff5a},
+   {0x010428, 0x01044f},
+   {0x0104d8, 0x0104fb},
+   {0x010597, 0x0105a1},
+   {0x0105a3, 0x0105b1},
+   {0x0105b3, 0x0105b9},
+   {0x0105bb, 0x0105bc},
+   {0x010780, 0x010780},
+   {0x010783, 0x010785},
+   {0x010787, 0x0107b0},
+   {0x0107b2, 0x0107ba},
+   {0x010cc0, 0x010cf2},
+   {0x0118c0, 0x0118df},
+   {0x016e60, 0x016e7f},
+   {0x01d41a, 0x01d433},
+   {0x01d44e, 0x01d454},
+   {0x01d456, 0x01d467},
+   {0x01d482, 0x01d49b},
+   {0x01d4b6, 0x01d4b9},
+   {0x01d4bb, 0x01d4bb},
+   {0x01d4bd, 0x01d4c3},
+   {0x01d4c5, 0x01d4cf},
+   {0x01d4ea, 0x01d503},
+   {0x01d51e, 0x01d537},
+   {0x01d552, 0x01d56b},
+   {0x01d586, 0x01d59f},
+   {0x01d5ba, 0x01d5d3},
+   {0x01d5ee, 0x01d607},
+   {0x01d622, 0x01d63b},
+   {0x01d656, 0x01d66f},
+   {0x01d68a, 0x01d6a5},
+   {0x01d6c2, 0x01d6da},
+   {0x01d6dc, 0x01d6e1},
+   {0x01d6fc, 0x01d714},
+   {0x01d716, 0x01d71b},
+   {0x01d736, 0x01d74e},
+   {0x01d750, 0x01d755},
+   {0x01d770, 0x01d788},
+   {0x01d78a, 0x01d78f},
+   {0x01d7aa, 0x01d7c2},
+   {0x01d7c4, 0x01d7c9},
+   {0x01d7cb, 0x01d7cb},
+   {0x01df00, 0x01df09},
+   {0x01df0b, 0x01df1e},
+   {0x01df25, 0x01df2a},
+   {0x01e030, 0x01e06d},
+   {0x01e922, 0x01e943},
+};
+
+/* table of Unicode codepoint ranges of Uppercase characters */
+static const pg_unicode_range unicode_uppercase[651] =
+{
+   {0x000041, 0x00005a},
+   {0x0000c0, 0x0000d6},
+   {0x0000d8, 0x0000de},
+   {0x000100, 0x000100},
+   {0x000102, 0x000102},
+   {0x000104, 0x000104},
+   {0x000106, 0x000106},
+   {0x000108, 0x000108},
+   {0x00010a, 0x00010a},
+   {0x00010c, 0x00010c},
+   {0x00010e, 0x00010e},
+   {0x000110, 0x000110},
+   {0x000112, 0x000112},
+   {0x000114, 0x000114},
+   {0x000116, 0x000116},
+   {0x000118, 0x000118},
+   {0x00011a, 0x00011a},
+   {0x00011c, 0x00011c},
+   {0x00011e, 0x00011e},
+   {0x000120, 0x000120},
+   {0x000122, 0x000122},
+   {0x000124, 0x000124},
+   {0x000126, 0x000126},
+   {0x000128, 0x000128},
+   {0x00012a, 0x00012a},
+   {0x00012c, 0x00012c},
+   {0x00012e, 0x00012e},
+   {0x000130, 0x000130},
+   {0x000132, 0x000132},
+   {0x000134, 0x000134},
+   {0x000136, 0x000136},
+   {0x000139, 0x000139},
+   {0x00013b, 0x00013b},
+   {0x00013d, 0x00013d},
+   {0x00013f, 0x00013f},
+   {0x000141, 0x000141},
+   {0x000143, 0x000143},
+   {0x000145, 0x000145},
+   {0x000147, 0x000147},
+   {0x00014a, 0x00014a},
+   {0x00014c, 0x00014c},
+   {0x00014e, 0x00014e},
+   {0x000150, 0x000150},
+   {0x000152, 0x000152},
+   {0x000154, 0x000154},
+   {0x000156, 0x000156},
+   {0x000158, 0x000158},
+   {0x00015a, 0x00015a},
+   {0x00015c, 0x00015c},
+   {0x00015e, 0x00015e},
+   {0x000160, 0x000160},
+   {0x000162, 0x000162},
+   {0x000164, 0x000164},
+   {0x000166, 0x000166},
+   {0x000168, 0x000168},
+   {0x00016a, 0x00016a},
+   {0x00016c, 0x00016c},
+   {0x00016e, 0x00016e},
+   {0x000170, 0x000170},
+   {0x000172, 0x000172},
+   {0x000174, 0x000174},
+   {0x000176, 0x000176},
+   {0x000178, 0x000179},
+   {0x00017b, 0x00017b},
+   {0x00017d, 0x00017d},
+   {0x000181, 0x000182},
+   {0x000184, 0x000184},
+   {0x000186, 0x000187},
+   {0x000189, 0x00018b},
+   {0x00018e, 0x000191},
+   {0x000193, 0x000194},
+   {0x000196, 0x000198},
+   {0x00019c, 0x00019d},
+   {0x00019f, 0x0001a0},
+   {0x0001a2, 0x0001a2},
+   {0x0001a4, 0x0001a4},
+   {0x0001a6, 0x0001a7},
+   {0x0001a9, 0x0001a9},
+   {0x0001ac, 0x0001ac},
+   {0x0001ae, 0x0001af},
+   {0x0001b1, 0x0001b3},
+   {0x0001b5, 0x0001b5},
+   {0x0001b7, 0x0001b8},
+   {0x0001bc, 0x0001bc},
+   {0x0001c4, 0x0001c4},
+   {0x0001c7, 0x0001c7},
+   {0x0001ca, 0x0001ca},
+   {0x0001cd, 0x0001cd},
+   {0x0001cf, 0x0001cf},
+   {0x0001d1, 0x0001d1},
+   {0x0001d3, 0x0001d3},
+   {0x0001d5, 0x0001d5},
+   {0x0001d7, 0x0001d7},
+   {0x0001d9, 0x0001d9},
+   {0x0001db, 0x0001db},
+   {0x0001de, 0x0001de},
+   {0x0001e0, 0x0001e0},
+   {0x0001e2, 0x0001e2},
+   {0x0001e4, 0x0001e4},
+   {0x0001e6, 0x0001e6},
+   {0x0001e8, 0x0001e8},
+   {0x0001ea, 0x0001ea},
+   {0x0001ec, 0x0001ec},
+   {0x0001ee, 0x0001ee},
+   {0x0001f1, 0x0001f1},
+   {0x0001f4, 0x0001f4},
+   {0x0001f6, 0x0001f8},
+   {0x0001fa, 0x0001fa},
+   {0x0001fc, 0x0001fc},
+   {0x0001fe, 0x0001fe},
+   {0x000200, 0x000200},
+   {0x000202, 0x000202},
+   {0x000204, 0x000204},
+   {0x000206, 0x000206},
+   {0x000208, 0x000208},
+   {0x00020a, 0x00020a},
+   {0x00020c, 0x00020c},
+   {0x00020e, 0x00020e},
+   {0x000210, 0x000210},
+   {0x000212, 0x000212},
+   {0x000214, 0x000214},
+   {0x000216, 0x000216},
+   {0x000218, 0x000218},
+   {0x00021a, 0x00021a},
+   {0x00021c, 0x00021c},
+   {0x00021e, 0x00021e},
+   {0x000220, 0x000220},
+   {0x000222, 0x000222},
+   {0x000224, 0x000224},
+   {0x000226, 0x000226},
+   {0x000228, 0x000228},
+   {0x00022a, 0x00022a},
+   {0x00022c, 0x00022c},
+   {0x00022e, 0x00022e},
+   {0x000230, 0x000230},
+   {0x000232, 0x000232},
+   {0x00023a, 0x00023b},
+   {0x00023d, 0x00023e},
+   {0x000241, 0x000241},
+   {0x000243, 0x000246},
+   {0x000248, 0x000248},
+   {0x00024a, 0x00024a},
+   {0x00024c, 0x00024c},
+   {0x00024e, 0x00024e},
+   {0x000370, 0x000370},
+   {0x000372, 0x000372},
+   {0x000376, 0x000376},
+   {0x00037f, 0x00037f},
+   {0x000386, 0x000386},
+   {0x000388, 0x00038a},
+   {0x00038c, 0x00038c},
+   {0x00038e, 0x00038f},
+   {0x000391, 0x0003a1},
+   {0x0003a3, 0x0003ab},
+   {0x0003cf, 0x0003cf},
+   {0x0003d2, 0x0003d4},
+   {0x0003d8, 0x0003d8},
+   {0x0003da, 0x0003da},
+   {0x0003dc, 0x0003dc},
+   {0x0003de, 0x0003de},
+   {0x0003e0, 0x0003e0},
+   {0x0003e2, 0x0003e2},
+   {0x0003e4, 0x0003e4},
+   {0x0003e6, 0x0003e6},
+   {0x0003e8, 0x0003e8},
+   {0x0003ea, 0x0003ea},
+   {0x0003ec, 0x0003ec},
+   {0x0003ee, 0x0003ee},
+   {0x0003f4, 0x0003f4},
+   {0x0003f7, 0x0003f7},
+   {0x0003f9, 0x0003fa},
+   {0x0003fd, 0x00042f},
+   {0x000460, 0x000460},
+   {0x000462, 0x000462},
+   {0x000464, 0x000464},
+   {0x000466, 0x000466},
+   {0x000468, 0x000468},
+   {0x00046a, 0x00046a},
+   {0x00046c, 0x00046c},
+   {0x00046e, 0x00046e},
+   {0x000470, 0x000470},
+   {0x000472, 0x000472},
+   {0x000474, 0x000474},
+   {0x000476, 0x000476},
+   {0x000478, 0x000478},
+   {0x00047a, 0x00047a},
+   {0x00047c, 0x00047c},
+   {0x00047e, 0x00047e},
+   {0x000480, 0x000480},
+   {0x00048a, 0x00048a},
+   {0x00048c, 0x00048c},
+   {0x00048e, 0x00048e},
+   {0x000490, 0x000490},
+   {0x000492, 0x000492},
+   {0x000494, 0x000494},
+   {0x000496, 0x000496},
+   {0x000498, 0x000498},
+   {0x00049a, 0x00049a},
+   {0x00049c, 0x00049c},
+   {0x00049e, 0x00049e},
+   {0x0004a0, 0x0004a0},
+   {0x0004a2, 0x0004a2},
+   {0x0004a4, 0x0004a4},
+   {0x0004a6, 0x0004a6},
+   {0x0004a8, 0x0004a8},
+   {0x0004aa, 0x0004aa},
+   {0x0004ac, 0x0004ac},
+   {0x0004ae, 0x0004ae},
+   {0x0004b0, 0x0004b0},
+   {0x0004b2, 0x0004b2},
+   {0x0004b4, 0x0004b4},
+   {0x0004b6, 0x0004b6},
+   {0x0004b8, 0x0004b8},
+   {0x0004ba, 0x0004ba},
+   {0x0004bc, 0x0004bc},
+   {0x0004be, 0x0004be},
+   {0x0004c0, 0x0004c1},
+   {0x0004c3, 0x0004c3},
+   {0x0004c5, 0x0004c5},
+   {0x0004c7, 0x0004c7},
+   {0x0004c9, 0x0004c9},
+   {0x0004cb, 0x0004cb},
+   {0x0004cd, 0x0004cd},
+   {0x0004d0, 0x0004d0},
+   {0x0004d2, 0x0004d2},
+   {0x0004d4, 0x0004d4},
+   {0x0004d6, 0x0004d6},
+   {0x0004d8, 0x0004d8},
+   {0x0004da, 0x0004da},
+   {0x0004dc, 0x0004dc},
+   {0x0004de, 0x0004de},
+   {0x0004e0, 0x0004e0},
+   {0x0004e2, 0x0004e2},
+   {0x0004e4, 0x0004e4},
+   {0x0004e6, 0x0004e6},
+   {0x0004e8, 0x0004e8},
+   {0x0004ea, 0x0004ea},
+   {0x0004ec, 0x0004ec},
+   {0x0004ee, 0x0004ee},
+   {0x0004f0, 0x0004f0},
+   {0x0004f2, 0x0004f2},
+   {0x0004f4, 0x0004f4},
+   {0x0004f6, 0x0004f6},
+   {0x0004f8, 0x0004f8},
+   {0x0004fa, 0x0004fa},
+   {0x0004fc, 0x0004fc},
+   {0x0004fe, 0x0004fe},
+   {0x000500, 0x000500},
+   {0x000502, 0x000502},
+   {0x000504, 0x000504},
+   {0x000506, 0x000506},
+   {0x000508, 0x000508},
+   {0x00050a, 0x00050a},
+   {0x00050c, 0x00050c},
+   {0x00050e, 0x00050e},
+   {0x000510, 0x000510},
+   {0x000512, 0x000512},
+   {0x000514, 0x000514},
+   {0x000516, 0x000516},
+   {0x000518, 0x000518},
+   {0x00051a, 0x00051a},
+   {0x00051c, 0x00051c},
+   {0x00051e, 0x00051e},
+   {0x000520, 0x000520},
+   {0x000522, 0x000522},
+   {0x000524, 0x000524},
+   {0x000526, 0x000526},
+   {0x000528, 0x000528},
+   {0x00052a, 0x00052a},
+   {0x00052c, 0x00052c},
+   {0x00052e, 0x00052e},
+   {0x000531, 0x000556},
+   {0x0010a0, 0x0010c5},
+   {0x0010c7, 0x0010c7},
+   {0x0010cd, 0x0010cd},
+   {0x0013a0, 0x0013f5},
+   {0x001c90, 0x001cba},
+   {0x001cbd, 0x001cbf},
+   {0x001e00, 0x001e00},
+   {0x001e02, 0x001e02},
+   {0x001e04, 0x001e04},
+   {0x001e06, 0x001e06},
+   {0x001e08, 0x001e08},
+   {0x001e0a, 0x001e0a},
+   {0x001e0c, 0x001e0c},
+   {0x001e0e, 0x001e0e},
+   {0x001e10, 0x001e10},
+   {0x001e12, 0x001e12},
+   {0x001e14, 0x001e14},
+   {0x001e16, 0x001e16},
+   {0x001e18, 0x001e18},
+   {0x001e1a, 0x001e1a},
+   {0x001e1c, 0x001e1c},
+   {0x001e1e, 0x001e1e},
+   {0x001e20, 0x001e20},
+   {0x001e22, 0x001e22},
+   {0x001e24, 0x001e24},
+   {0x001e26, 0x001e26},
+   {0x001e28, 0x001e28},
+   {0x001e2a, 0x001e2a},
+   {0x001e2c, 0x001e2c},
+   {0x001e2e, 0x001e2e},
+   {0x001e30, 0x001e30},
+   {0x001e32, 0x001e32},
+   {0x001e34, 0x001e34},
+   {0x001e36, 0x001e36},
+   {0x001e38, 0x001e38},
+   {0x001e3a, 0x001e3a},
+   {0x001e3c, 0x001e3c},
+   {0x001e3e, 0x001e3e},
+   {0x001e40, 0x001e40},
+   {0x001e42, 0x001e42},
+   {0x001e44, 0x001e44},
+   {0x001e46, 0x001e46},
+   {0x001e48, 0x001e48},
+   {0x001e4a, 0x001e4a},
+   {0x001e4c, 0x001e4c},
+   {0x001e4e, 0x001e4e},
+   {0x001e50, 0x001e50},
+   {0x001e52, 0x001e52},
+   {0x001e54, 0x001e54},
+   {0x001e56, 0x001e56},
+   {0x001e58, 0x001e58},
+   {0x001e5a, 0x001e5a},
+   {0x001e5c, 0x001e5c},
+   {0x001e5e, 0x001e5e},
+   {0x001e60, 0x001e60},
+   {0x001e62, 0x001e62},
+   {0x001e64, 0x001e64},
+   {0x001e66, 0x001e66},
+   {0x001e68, 0x001e68},
+   {0x001e6a, 0x001e6a},
+   {0x001e6c, 0x001e6c},
+   {0x001e6e, 0x001e6e},
+   {0x001e70, 0x001e70},
+   {0x001e72, 0x001e72},
+   {0x001e74, 0x001e74},
+   {0x001e76, 0x001e76},
+   {0x001e78, 0x001e78},
+   {0x001e7a, 0x001e7a},
+   {0x001e7c, 0x001e7c},
+   {0x001e7e, 0x001e7e},
+   {0x001e80, 0x001e80},
+   {0x001e82, 0x001e82},
+   {0x001e84, 0x001e84},
+   {0x001e86, 0x001e86},
+   {0x001e88, 0x001e88},
+   {0x001e8a, 0x001e8a},
+   {0x001e8c, 0x001e8c},
+   {0x001e8e, 0x001e8e},
+   {0x001e90, 0x001e90},
+   {0x001e92, 0x001e92},
+   {0x001e94, 0x001e94},
+   {0x001e9e, 0x001e9e},
+   {0x001ea0, 0x001ea0},
+   {0x001ea2, 0x001ea2},
+   {0x001ea4, 0x001ea4},
+   {0x001ea6, 0x001ea6},
+   {0x001ea8, 0x001ea8},
+   {0x001eaa, 0x001eaa},
+   {0x001eac, 0x001eac},
+   {0x001eae, 0x001eae},
+   {0x001eb0, 0x001eb0},
+   {0x001eb2, 0x001eb2},
+   {0x001eb4, 0x001eb4},
+   {0x001eb6, 0x001eb6},
+   {0x001eb8, 0x001eb8},
+   {0x001eba, 0x001eba},
+   {0x001ebc, 0x001ebc},
+   {0x001ebe, 0x001ebe},
+   {0x001ec0, 0x001ec0},
+   {0x001ec2, 0x001ec2},
+   {0x001ec4, 0x001ec4},
+   {0x001ec6, 0x001ec6},
+   {0x001ec8, 0x001ec8},
+   {0x001eca, 0x001eca},
+   {0x001ecc, 0x001ecc},
+   {0x001ece, 0x001ece},
+   {0x001ed0, 0x001ed0},
+   {0x001ed2, 0x001ed2},
+   {0x001ed4, 0x001ed4},
+   {0x001ed6, 0x001ed6},
+   {0x001ed8, 0x001ed8},
+   {0x001eda, 0x001eda},
+   {0x001edc, 0x001edc},
+   {0x001ede, 0x001ede},
+   {0x001ee0, 0x001ee0},
+   {0x001ee2, 0x001ee2},
+   {0x001ee4, 0x001ee4},
+   {0x001ee6, 0x001ee6},
+   {0x001ee8, 0x001ee8},
+   {0x001eea, 0x001eea},
+   {0x001eec, 0x001eec},
+   {0x001eee, 0x001eee},
+   {0x001ef0, 0x001ef0},
+   {0x001ef2, 0x001ef2},
+   {0x001ef4, 0x001ef4},
+   {0x001ef6, 0x001ef6},
+   {0x001ef8, 0x001ef8},
+   {0x001efa, 0x001efa},
+   {0x001efc, 0x001efc},
+   {0x001efe, 0x001efe},
+   {0x001f08, 0x001f0f},
+   {0x001f18, 0x001f1d},
+   {0x001f28, 0x001f2f},
+   {0x001f38, 0x001f3f},
+   {0x001f48, 0x001f4d},
+   {0x001f59, 0x001f59},
+   {0x001f5b, 0x001f5b},
+   {0x001f5d, 0x001f5d},
+   {0x001f5f, 0x001f5f},
+   {0x001f68, 0x001f6f},
+   {0x001fb8, 0x001fbb},
+   {0x001fc8, 0x001fcb},
+   {0x001fd8, 0x001fdb},
+   {0x001fe8, 0x001fec},
+   {0x001ff8, 0x001ffb},
+   {0x002102, 0x002102},
+   {0x002107, 0x002107},
+   {0x00210b, 0x00210d},
+   {0x002110, 0x002112},
+   {0x002115, 0x002115},
+   {0x002119, 0x00211d},
+   {0x002124, 0x002124},
+   {0x002126, 0x002126},
+   {0x002128, 0x002128},
+   {0x00212a, 0x00212d},
+   {0x002130, 0x002133},
+   {0x00213e, 0x00213f},
+   {0x002145, 0x002145},
+   {0x002160, 0x00216f},
+   {0x002183, 0x002183},
+   {0x0024b6, 0x0024cf},
+   {0x002c00, 0x002c2f},
+   {0x002c60, 0x002c60},
+   {0x002c62, 0x002c64},
+   {0x002c67, 0x002c67},
+   {0x002c69, 0x002c69},
+   {0x002c6b, 0x002c6b},
+   {0x002c6d, 0x002c70},
+   {0x002c72, 0x002c72},
+   {0x002c75, 0x002c75},
+   {0x002c7e, 0x002c80},
+   {0x002c82, 0x002c82},
+   {0x002c84, 0x002c84},
+   {0x002c86, 0x002c86},
+   {0x002c88, 0x002c88},
+   {0x002c8a, 0x002c8a},
+   {0x002c8c, 0x002c8c},
+   {0x002c8e, 0x002c8e},
+   {0x002c90, 0x002c90},
+   {0x002c92, 0x002c92},
+   {0x002c94, 0x002c94},
+   {0x002c96, 0x002c96},
+   {0x002c98, 0x002c98},
+   {0x002c9a, 0x002c9a},
+   {0x002c9c, 0x002c9c},
+   {0x002c9e, 0x002c9e},
+   {0x002ca0, 0x002ca0},
+   {0x002ca2, 0x002ca2},
+   {0x002ca4, 0x002ca4},
+   {0x002ca6, 0x002ca6},
+   {0x002ca8, 0x002ca8},
+   {0x002caa, 0x002caa},
+   {0x002cac, 0x002cac},
+   {0x002cae, 0x002cae},
+   {0x002cb0, 0x002cb0},
+   {0x002cb2, 0x002cb2},
+   {0x002cb4, 0x002cb4},
+   {0x002cb6, 0x002cb6},
+   {0x002cb8, 0x002cb8},
+   {0x002cba, 0x002cba},
+   {0x002cbc, 0x002cbc},
+   {0x002cbe, 0x002cbe},
+   {0x002cc0, 0x002cc0},
+   {0x002cc2, 0x002cc2},
+   {0x002cc4, 0x002cc4},
+   {0x002cc6, 0x002cc6},
+   {0x002cc8, 0x002cc8},
+   {0x002cca, 0x002cca},
+   {0x002ccc, 0x002ccc},
+   {0x002cce, 0x002cce},
+   {0x002cd0, 0x002cd0},
+   {0x002cd2, 0x002cd2},
+   {0x002cd4, 0x002cd4},
+   {0x002cd6, 0x002cd6},
+   {0x002cd8, 0x002cd8},
+   {0x002cda, 0x002cda},
+   {0x002cdc, 0x002cdc},
+   {0x002cde, 0x002cde},
+   {0x002ce0, 0x002ce0},
+   {0x002ce2, 0x002ce2},
+   {0x002ceb, 0x002ceb},
+   {0x002ced, 0x002ced},
+   {0x002cf2, 0x002cf2},
+   {0x00a640, 0x00a640},
+   {0x00a642, 0x00a642},
+   {0x00a644, 0x00a644},
+   {0x00a646, 0x00a646},
+   {0x00a648, 0x00a648},
+   {0x00a64a, 0x00a64a},
+   {0x00a64c, 0x00a64c},
+   {0x00a64e, 0x00a64e},
+   {0x00a650, 0x00a650},
+   {0x00a652, 0x00a652},
+   {0x00a654, 0x00a654},
+   {0x00a656, 0x00a656},
+   {0x00a658, 0x00a658},
+   {0x00a65a, 0x00a65a},
+   {0x00a65c, 0x00a65c},
+   {0x00a65e, 0x00a65e},
+   {0x00a660, 0x00a660},
+   {0x00a662, 0x00a662},
+   {0x00a664, 0x00a664},
+   {0x00a666, 0x00a666},
+   {0x00a668, 0x00a668},
+   {0x00a66a, 0x00a66a},
+   {0x00a66c, 0x00a66c},
+   {0x00a680, 0x00a680},
+   {0x00a682, 0x00a682},
+   {0x00a684, 0x00a684},
+   {0x00a686, 0x00a686},
+   {0x00a688, 0x00a688},
+   {0x00a68a, 0x00a68a},
+   {0x00a68c, 0x00a68c},
+   {0x00a68e, 0x00a68e},
+   {0x00a690, 0x00a690},
+   {0x00a692, 0x00a692},
+   {0x00a694, 0x00a694},
+   {0x00a696, 0x00a696},
+   {0x00a698, 0x00a698},
+   {0x00a69a, 0x00a69a},
+   {0x00a722, 0x00a722},
+   {0x00a724, 0x00a724},
+   {0x00a726, 0x00a726},
+   {0x00a728, 0x00a728},
+   {0x00a72a, 0x00a72a},
+   {0x00a72c, 0x00a72c},
+   {0x00a72e, 0x00a72e},
+   {0x00a732, 0x00a732},
+   {0x00a734, 0x00a734},
+   {0x00a736, 0x00a736},
+   {0x00a738, 0x00a738},
+   {0x00a73a, 0x00a73a},
+   {0x00a73c, 0x00a73c},
+   {0x00a73e, 0x00a73e},
+   {0x00a740, 0x00a740},
+   {0x00a742, 0x00a742},
+   {0x00a744, 0x00a744},
+   {0x00a746, 0x00a746},
+   {0x00a748, 0x00a748},
+   {0x00a74a, 0x00a74a},
+   {0x00a74c, 0x00a74c},
+   {0x00a74e, 0x00a74e},
+   {0x00a750, 0x00a750},
+   {0x00a752, 0x00a752},
+   {0x00a754, 0x00a754},
+   {0x00a756, 0x00a756},
+   {0x00a758, 0x00a758},
+   {0x00a75a, 0x00a75a},
+   {0x00a75c, 0x00a75c},
+   {0x00a75e, 0x00a75e},
+   {0x00a760, 0x00a760},
+   {0x00a762, 0x00a762},
+   {0x00a764, 0x00a764},
+   {0x00a766, 0x00a766},
+   {0x00a768, 0x00a768},
+   {0x00a76a, 0x00a76a},
+   {0x00a76c, 0x00a76c},
+   {0x00a76e, 0x00a76e},
+   {0x00a779, 0x00a779},
+   {0x00a77b, 0x00a77b},
+   {0x00a77d, 0x00a77e},
+   {0x00a780, 0x00a780},
+   {0x00a782, 0x00a782},
+   {0x00a784, 0x00a784},
+   {0x00a786, 0x00a786},
+   {0x00a78b, 0x00a78b},
+   {0x00a78d, 0x00a78d},
+   {0x00a790, 0x00a790},
+   {0x00a792, 0x00a792},
+   {0x00a796, 0x00a796},
+   {0x00a798, 0x00a798},
+   {0x00a79a, 0x00a79a},
+   {0x00a79c, 0x00a79c},
+   {0x00a79e, 0x00a79e},
+   {0x00a7a0, 0x00a7a0},
+   {0x00a7a2, 0x00a7a2},
+   {0x00a7a4, 0x00a7a4},
+   {0x00a7a6, 0x00a7a6},
+   {0x00a7a8, 0x00a7a8},
+   {0x00a7aa, 0x00a7ae},
+   {0x00a7b0, 0x00a7b4},
+   {0x00a7b6, 0x00a7b6},
+   {0x00a7b8, 0x00a7b8},
+   {0x00a7ba, 0x00a7ba},
+   {0x00a7bc, 0x00a7bc},
+   {0x00a7be, 0x00a7be},
+   {0x00a7c0, 0x00a7c0},
+   {0x00a7c2, 0x00a7c2},
+   {0x00a7c4, 0x00a7c7},
+   {0x00a7c9, 0x00a7c9},
+   {0x00a7d0, 0x00a7d0},
+   {0x00a7d6, 0x00a7d6},
+   {0x00a7d8, 0x00a7d8},
+   {0x00a7f5, 0x00a7f5},
+   {0x00ff21, 0x00ff3a},
+   {0x010400, 0x010427},
+   {0x0104b0, 0x0104d3},
+   {0x010570, 0x01057a},
+   {0x01057c, 0x01058a},
+   {0x01058c, 0x010592},
+   {0x010594, 0x010595},
+   {0x010c80, 0x010cb2},
+   {0x0118a0, 0x0118bf},
+   {0x016e40, 0x016e5f},
+   {0x01d400, 0x01d419},
+   {0x01d434, 0x01d44d},
+   {0x01d468, 0x01d481},
+   {0x01d49c, 0x01d49c},
+   {0x01d49e, 0x01d49f},
+   {0x01d4a2, 0x01d4a2},
+   {0x01d4a5, 0x01d4a6},
+   {0x01d4a9, 0x01d4ac},
+   {0x01d4ae, 0x01d4b5},
+   {0x01d4d0, 0x01d4e9},
+   {0x01d504, 0x01d505},
+   {0x01d507, 0x01d50a},
+   {0x01d50d, 0x01d514},
+   {0x01d516, 0x01d51c},
+   {0x01d538, 0x01d539},
+   {0x01d53b, 0x01d53e},
+   {0x01d540, 0x01d544},
+   {0x01d546, 0x01d546},
+   {0x01d54a, 0x01d550},
+   {0x01d56c, 0x01d585},
+   {0x01d5a0, 0x01d5b9},
+   {0x01d5d4, 0x01d5ed},
+   {0x01d608, 0x01d621},
+   {0x01d63c, 0x01d655},
+   {0x01d670, 0x01d689},
+   {0x01d6a8, 0x01d6c0},
+   {0x01d6e2, 0x01d6fa},
+   {0x01d71c, 0x01d734},
+   {0x01d756, 0x01d76e},
+   {0x01d790, 0x01d7a8},
+   {0x01d7ca, 0x01d7ca},
+   {0x01e900, 0x01e921},
+   {0x01f130, 0x01f149},
+   {0x01f150, 0x01f169},
+   {0x01f170, 0x01f189},
+};
+
+/* table of Unicode codepoint ranges of Case_Ignorable characters */
+static const pg_unicode_range unicode_case_ignorable[491] =
+{
+   {0x000027, 0x000027},
+   {0x00002e, 0x00002e},
+   {0x00003a, 0x00003a},
+   {0x00005e, 0x00005e},
+   {0x000060, 0x000060},
+   {0x0000a8, 0x0000a8},
+   {0x0000ad, 0x0000ad},
+   {0x0000af, 0x0000af},
+   {0x0000b4, 0x0000b4},
+   {0x0000b7, 0x0000b7},
+   {0x0000b8, 0x0000b8},
+   {0x0002b0, 0x0002c1},
+   {0x0002c2, 0x0002c5},
+   {0x0002c6, 0x0002d1},
+   {0x0002d2, 0x0002df},
+   {0x0002e0, 0x0002e4},
+   {0x0002e5, 0x0002eb},
+   {0x0002ec, 0x0002ec},
+   {0x0002ed, 0x0002ed},
+   {0x0002ee, 0x0002ee},
+   {0x0002ef, 0x0002ff},
+   {0x000300, 0x00036f},
+   {0x000374, 0x000374},
+   {0x000375, 0x000375},
+   {0x00037a, 0x00037a},
+   {0x000384, 0x000385},
+   {0x000387, 0x000387},
+   {0x000483, 0x000487},
+   {0x000488, 0x000489},
+   {0x000559, 0x000559},
+   {0x00055f, 0x00055f},
+   {0x000591, 0x0005bd},
+   {0x0005bf, 0x0005bf},
+   {0x0005c1, 0x0005c2},
+   {0x0005c4, 0x0005c5},
+   {0x0005c7, 0x0005c7},
+   {0x0005f4, 0x0005f4},
+   {0x000600, 0x000605},
+   {0x000610, 0x00061a},
+   {0x00061c, 0x00061c},
+   {0x000640, 0x000640},
+   {0x00064b, 0x00065f},
+   {0x000670, 0x000670},
+   {0x0006d6, 0x0006dc},
+   {0x0006dd, 0x0006dd},
+   {0x0006df, 0x0006e4},
+   {0x0006e5, 0x0006e6},
+   {0x0006e7, 0x0006e8},
+   {0x0006ea, 0x0006ed},
+   {0x00070f, 0x00070f},
+   {0x000711, 0x000711},
+   {0x000730, 0x00074a},
+   {0x0007a6, 0x0007b0},
+   {0x0007eb, 0x0007f3},
+   {0x0007f4, 0x0007f5},
+   {0x0007fa, 0x0007fa},
+   {0x0007fd, 0x0007fd},
+   {0x000816, 0x000819},
+   {0x00081a, 0x00081a},
+   {0x00081b, 0x000823},
+   {0x000824, 0x000824},
+   {0x000825, 0x000827},
+   {0x000828, 0x000828},
+   {0x000829, 0x00082d},
+   {0x000859, 0x00085b},
+   {0x000888, 0x000888},
+   {0x000890, 0x000891},
+   {0x000898, 0x00089f},
+   {0x0008c9, 0x0008c9},
+   {0x0008ca, 0x0008e1},
+   {0x0008e2, 0x0008e2},
+   {0x0008e3, 0x000902},
+   {0x00093a, 0x00093a},
+   {0x00093c, 0x00093c},
+   {0x000941, 0x000948},
+   {0x00094d, 0x00094d},
+   {0x000951, 0x000957},
+   {0x000962, 0x000963},
+   {0x000971, 0x000971},
+   {0x000981, 0x000981},
+   {0x0009bc, 0x0009bc},
+   {0x0009c1, 0x0009c4},
+   {0x0009cd, 0x0009cd},
+   {0x0009e2, 0x0009e3},
+   {0x0009fe, 0x0009fe},
+   {0x000a01, 0x000a02},
+   {0x000a3c, 0x000a3c},
+   {0x000a41, 0x000a42},
+   {0x000a47, 0x000a48},
+   {0x000a4b, 0x000a4d},
+   {0x000a51, 0x000a51},
+   {0x000a70, 0x000a71},
+   {0x000a75, 0x000a75},
+   {0x000a81, 0x000a82},
+   {0x000abc, 0x000abc},
+   {0x000ac1, 0x000ac5},
+   {0x000ac7, 0x000ac8},
+   {0x000acd, 0x000acd},
+   {0x000ae2, 0x000ae3},
+   {0x000afa, 0x000aff},
+   {0x000b01, 0x000b01},
+   {0x000b3c, 0x000b3c},
+   {0x000b3f, 0x000b3f},
+   {0x000b41, 0x000b44},
+   {0x000b4d, 0x000b4d},
+   {0x000b55, 0x000b56},
+   {0x000b62, 0x000b63},
+   {0x000b82, 0x000b82},
+   {0x000bc0, 0x000bc0},
+   {0x000bcd, 0x000bcd},
+   {0x000c00, 0x000c00},
+   {0x000c04, 0x000c04},
+   {0x000c3c, 0x000c3c},
+   {0x000c3e, 0x000c40},
+   {0x000c46, 0x000c48},
+   {0x000c4a, 0x000c4d},
+   {0x000c55, 0x000c56},
+   {0x000c62, 0x000c63},
+   {0x000c81, 0x000c81},
+   {0x000cbc, 0x000cbc},
+   {0x000cbf, 0x000cbf},
+   {0x000cc6, 0x000cc6},
+   {0x000ccc, 0x000ccd},
+   {0x000ce2, 0x000ce3},
+   {0x000d00, 0x000d01},
+   {0x000d3b, 0x000d3c},
+   {0x000d41, 0x000d44},
+   {0x000d4d, 0x000d4d},
+   {0x000d62, 0x000d63},
+   {0x000d81, 0x000d81},
+   {0x000dca, 0x000dca},
+   {0x000dd2, 0x000dd4},
+   {0x000dd6, 0x000dd6},
+   {0x000e31, 0x000e31},
+   {0x000e34, 0x000e3a},
+   {0x000e46, 0x000e46},
+   {0x000e47, 0x000e4e},
+   {0x000eb1, 0x000eb1},
+   {0x000eb4, 0x000ebc},
+   {0x000ec6, 0x000ec6},
+   {0x000ec8, 0x000ece},
+   {0x000f18, 0x000f19},
+   {0x000f35, 0x000f35},
+   {0x000f37, 0x000f37},
+   {0x000f39, 0x000f39},
+   {0x000f71, 0x000f7e},
+   {0x000f80, 0x000f84},
+   {0x000f86, 0x000f87},
+   {0x000f8d, 0x000f97},
+   {0x000f99, 0x000fbc},
+   {0x000fc6, 0x000fc6},
+   {0x00102d, 0x001030},
+   {0x001032, 0x001037},
+   {0x001039, 0x00103a},
+   {0x00103d, 0x00103e},
+   {0x001058, 0x001059},
+   {0x00105e, 0x001060},
+   {0x001071, 0x001074},
+   {0x001082, 0x001082},
+   {0x001085, 0x001086},
+   {0x00108d, 0x00108d},
+   {0x00109d, 0x00109d},
+   {0x0010fc, 0x0010fc},
+   {0x00135d, 0x00135f},
+   {0x001712, 0x001714},
+   {0x001732, 0x001733},
+   {0x001752, 0x001753},
+   {0x001772, 0x001773},
+   {0x0017b4, 0x0017b5},
+   {0x0017b7, 0x0017bd},
+   {0x0017c6, 0x0017c6},
+   {0x0017c9, 0x0017d3},
+   {0x0017d7, 0x0017d7},
+   {0x0017dd, 0x0017dd},
+   {0x00180b, 0x00180d},
+   {0x00180e, 0x00180e},
+   {0x00180f, 0x00180f},
+   {0x001843, 0x001843},
+   {0x001885, 0x001886},
+   {0x0018a9, 0x0018a9},
+   {0x001920, 0x001922},
+   {0x001927, 0x001928},
+   {0x001932, 0x001932},
+   {0x001939, 0x00193b},
+   {0x001a17, 0x001a18},
+   {0x001a1b, 0x001a1b},
+   {0x001a56, 0x001a56},
+   {0x001a58, 0x001a5e},
+   {0x001a60, 0x001a60},
+   {0x001a62, 0x001a62},
+   {0x001a65, 0x001a6c},
+   {0x001a73, 0x001a7c},
+   {0x001a7f, 0x001a7f},
+   {0x001aa7, 0x001aa7},
+   {0x001ab0, 0x001abd},
+   {0x001abe, 0x001abe},
+   {0x001abf, 0x001ace},
+   {0x001b00, 0x001b03},
+   {0x001b34, 0x001b34},
+   {0x001b36, 0x001b3a},
+   {0x001b3c, 0x001b3c},
+   {0x001b42, 0x001b42},
+   {0x001b6b, 0x001b73},
+   {0x001b80, 0x001b81},
+   {0x001ba2, 0x001ba5},
+   {0x001ba8, 0x001ba9},
+   {0x001bab, 0x001bad},
+   {0x001be6, 0x001be6},
+   {0x001be8, 0x001be9},
+   {0x001bed, 0x001bed},
+   {0x001bef, 0x001bf1},
+   {0x001c2c, 0x001c33},
+   {0x001c36, 0x001c37},
+   {0x001c78, 0x001c7d},
+   {0x001cd0, 0x001cd2},
+   {0x001cd4, 0x001ce0},
+   {0x001ce2, 0x001ce8},
+   {0x001ced, 0x001ced},
+   {0x001cf4, 0x001cf4},
+   {0x001cf8, 0x001cf9},
+   {0x001d2c, 0x001d6a},
+   {0x001d78, 0x001d78},
+   {0x001d9b, 0x001dbf},
+   {0x001dc0, 0x001dff},
+   {0x001fbd, 0x001fbd},
+   {0x001fbf, 0x001fc1},
+   {0x001fcd, 0x001fcf},
+   {0x001fdd, 0x001fdf},
+   {0x001fed, 0x001fef},
+   {0x001ffd, 0x001ffe},
+   {0x00200b, 0x00200f},
+   {0x002018, 0x002018},
+   {0x002019, 0x002019},
+   {0x002024, 0x002024},
+   {0x002027, 0x002027},
+   {0x00202a, 0x00202e},
+   {0x002060, 0x002064},
+   {0x002066, 0x00206f},
+   {0x002071, 0x002071},
+   {0x00207f, 0x00207f},
+   {0x002090, 0x00209c},
+   {0x0020d0, 0x0020dc},
+   {0x0020dd, 0x0020e0},
+   {0x0020e1, 0x0020e1},
+   {0x0020e2, 0x0020e4},
+   {0x0020e5, 0x0020f0},
+   {0x002c7c, 0x002c7d},
+   {0x002cef, 0x002cf1},
+   {0x002d6f, 0x002d6f},
+   {0x002d7f, 0x002d7f},
+   {0x002de0, 0x002dff},
+   {0x002e2f, 0x002e2f},
+   {0x003005, 0x003005},
+   {0x00302a, 0x00302d},
+   {0x003031, 0x003035},
+   {0x00303b, 0x00303b},
+   {0x003099, 0x00309a},
+   {0x00309b, 0x00309c},
+   {0x00309d, 0x00309e},
+   {0x0030fc, 0x0030fe},
+   {0x00a015, 0x00a015},
+   {0x00a4f8, 0x00a4fd},
+   {0x00a60c, 0x00a60c},
+   {0x00a66f, 0x00a66f},
+   {0x00a670, 0x00a672},
+   {0x00a674, 0x00a67d},
+   {0x00a67f, 0x00a67f},
+   {0x00a69c, 0x00a69d},
+   {0x00a69e, 0x00a69f},
+   {0x00a6f0, 0x00a6f1},
+   {0x00a700, 0x00a716},
+   {0x00a717, 0x00a71f},
+   {0x00a720, 0x00a721},
+   {0x00a770, 0x00a770},
+   {0x00a788, 0x00a788},
+   {0x00a789, 0x00a78a},
+   {0x00a7f2, 0x00a7f4},
+   {0x00a7f8, 0x00a7f9},
+   {0x00a802, 0x00a802},
+   {0x00a806, 0x00a806},
+   {0x00a80b, 0x00a80b},
+   {0x00a825, 0x00a826},
+   {0x00a82c, 0x00a82c},
+   {0x00a8c4, 0x00a8c5},
+   {0x00a8e0, 0x00a8f1},
+   {0x00a8ff, 0x00a8ff},
+   {0x00a926, 0x00a92d},
+   {0x00a947, 0x00a951},
+   {0x00a980, 0x00a982},
+   {0x00a9b3, 0x00a9b3},
+   {0x00a9b6, 0x00a9b9},
+   {0x00a9bc, 0x00a9bd},
+   {0x00a9cf, 0x00a9cf},
+   {0x00a9e5, 0x00a9e5},
+   {0x00a9e6, 0x00a9e6},
+   {0x00aa29, 0x00aa2e},
+   {0x00aa31, 0x00aa32},
+   {0x00aa35, 0x00aa36},
+   {0x00aa43, 0x00aa43},
+   {0x00aa4c, 0x00aa4c},
+   {0x00aa70, 0x00aa70},
+   {0x00aa7c, 0x00aa7c},
+   {0x00aab0, 0x00aab0},
+   {0x00aab2, 0x00aab4},
+   {0x00aab7, 0x00aab8},
+   {0x00aabe, 0x00aabf},
+   {0x00aac1, 0x00aac1},
+   {0x00aadd, 0x00aadd},
+   {0x00aaec, 0x00aaed},
+   {0x00aaf3, 0x00aaf4},
+   {0x00aaf6, 0x00aaf6},
+   {0x00ab5b, 0x00ab5b},
+   {0x00ab5c, 0x00ab5f},
+   {0x00ab69, 0x00ab69},
+   {0x00ab6a, 0x00ab6b},
+   {0x00abe5, 0x00abe5},
+   {0x00abe8, 0x00abe8},
+   {0x00abed, 0x00abed},
+   {0x00fb1e, 0x00fb1e},
+   {0x00fbb2, 0x00fbc2},
+   {0x00fe00, 0x00fe0f},
+   {0x00fe13, 0x00fe13},
+   {0x00fe20, 0x00fe2f},
+   {0x00fe52, 0x00fe52},
+   {0x00fe55, 0x00fe55},
+   {0x00feff, 0x00feff},
+   {0x00ff07, 0x00ff07},
+   {0x00ff0e, 0x00ff0e},
+   {0x00ff1a, 0x00ff1a},
+   {0x00ff3e, 0x00ff3e},
+   {0x00ff40, 0x00ff40},
+   {0x00ff70, 0x00ff70},
+   {0x00ff9e, 0x00ff9f},
+   {0x00ffe3, 0x00ffe3},
+   {0x00fff9, 0x00fffb},
+   {0x0101fd, 0x0101fd},
+   {0x0102e0, 0x0102e0},
+   {0x010376, 0x01037a},
+   {0x010780, 0x010785},
+   {0x010787, 0x0107b0},
+   {0x0107b2, 0x0107ba},
+   {0x010a01, 0x010a03},
+   {0x010a05, 0x010a06},
+   {0x010a0c, 0x010a0f},
+   {0x010a38, 0x010a3a},
+   {0x010a3f, 0x010a3f},
+   {0x010ae5, 0x010ae6},
+   {0x010d24, 0x010d27},
+   {0x010eab, 0x010eac},
+   {0x010efd, 0x010eff},
+   {0x010f46, 0x010f50},
+   {0x010f82, 0x010f85},
+   {0x011001, 0x011001},
+   {0x011038, 0x011046},
+   {0x011070, 0x011070},
+   {0x011073, 0x011074},
+   {0x01107f, 0x011081},
+   {0x0110b3, 0x0110b6},
+   {0x0110b9, 0x0110ba},
+   {0x0110bd, 0x0110bd},
+   {0x0110c2, 0x0110c2},
+   {0x0110cd, 0x0110cd},
+   {0x011100, 0x011102},
+   {0x011127, 0x01112b},
+   {0x01112d, 0x011134},
+   {0x011173, 0x011173},
+   {0x011180, 0x011181},
+   {0x0111b6, 0x0111be},
+   {0x0111c9, 0x0111cc},
+   {0x0111cf, 0x0111cf},
+   {0x01122f, 0x011231},
+   {0x011234, 0x011234},
+   {0x011236, 0x011237},
+   {0x01123e, 0x01123e},
+   {0x011241, 0x011241},
+   {0x0112df, 0x0112df},
+   {0x0112e3, 0x0112ea},
+   {0x011300, 0x011301},
+   {0x01133b, 0x01133c},
+   {0x011340, 0x011340},
+   {0x011366, 0x01136c},
+   {0x011370, 0x011374},
+   {0x011438, 0x01143f},
+   {0x011442, 0x011444},
+   {0x011446, 0x011446},
+   {0x01145e, 0x01145e},
+   {0x0114b3, 0x0114b8},
+   {0x0114ba, 0x0114ba},
+   {0x0114bf, 0x0114c0},
+   {0x0114c2, 0x0114c3},
+   {0x0115b2, 0x0115b5},
+   {0x0115bc, 0x0115bd},
+   {0x0115bf, 0x0115c0},
+   {0x0115dc, 0x0115dd},
+   {0x011633, 0x01163a},
+   {0x01163d, 0x01163d},
+   {0x01163f, 0x011640},
+   {0x0116ab, 0x0116ab},
+   {0x0116ad, 0x0116ad},
+   {0x0116b0, 0x0116b5},
+   {0x0116b7, 0x0116b7},
+   {0x01171d, 0x01171f},
+   {0x011722, 0x011725},
+   {0x011727, 0x01172b},
+   {0x01182f, 0x011837},
+   {0x011839, 0x01183a},
+   {0x01193b, 0x01193c},
+   {0x01193e, 0x01193e},
+   {0x011943, 0x011943},
+   {0x0119d4, 0x0119d7},
+   {0x0119da, 0x0119db},
+   {0x0119e0, 0x0119e0},
+   {0x011a01, 0x011a0a},
+   {0x011a33, 0x011a38},
+   {0x011a3b, 0x011a3e},
+   {0x011a47, 0x011a47},
+   {0x011a51, 0x011a56},
+   {0x011a59, 0x011a5b},
+   {0x011a8a, 0x011a96},
+   {0x011a98, 0x011a99},
+   {0x011c30, 0x011c36},
+   {0x011c38, 0x011c3d},
+   {0x011c3f, 0x011c3f},
+   {0x011c92, 0x011ca7},
+   {0x011caa, 0x011cb0},
+   {0x011cb2, 0x011cb3},
+   {0x011cb5, 0x011cb6},
+   {0x011d31, 0x011d36},
+   {0x011d3a, 0x011d3a},
+   {0x011d3c, 0x011d3d},
+   {0x011d3f, 0x011d45},
+   {0x011d47, 0x011d47},
+   {0x011d90, 0x011d91},
+   {0x011d95, 0x011d95},
+   {0x011d97, 0x011d97},
+   {0x011ef3, 0x011ef4},
+   {0x011f00, 0x011f01},
+   {0x011f36, 0x011f3a},
+   {0x011f40, 0x011f40},
+   {0x011f42, 0x011f42},
+   {0x013430, 0x01343f},
+   {0x013440, 0x013440},
+   {0x013447, 0x013455},
+   {0x016af0, 0x016af4},
+   {0x016b30, 0x016b36},
+   {0x016b40, 0x016b43},
+   {0x016f4f, 0x016f4f},
+   {0x016f8f, 0x016f92},
+   {0x016f93, 0x016f9f},
+   {0x016fe0, 0x016fe1},
+   {0x016fe3, 0x016fe3},
+   {0x016fe4, 0x016fe4},
+   {0x01aff0, 0x01aff3},
+   {0x01aff5, 0x01affb},
+   {0x01affd, 0x01affe},
+   {0x01bc9d, 0x01bc9e},
+   {0x01bca0, 0x01bca3},
+   {0x01cf00, 0x01cf2d},
+   {0x01cf30, 0x01cf46},
+   {0x01d167, 0x01d169},
+   {0x01d173, 0x01d17a},
+   {0x01d17b, 0x01d182},
+   {0x01d185, 0x01d18b},
+   {0x01d1aa, 0x01d1ad},
+   {0x01d242, 0x01d244},
+   {0x01da00, 0x01da36},
+   {0x01da3b, 0x01da6c},
+   {0x01da75, 0x01da75},
+   {0x01da84, 0x01da84},
+   {0x01da9b, 0x01da9f},
+   {0x01daa1, 0x01daaf},
+   {0x01e000, 0x01e006},
+   {0x01e008, 0x01e018},
+   {0x01e01b, 0x01e021},
+   {0x01e023, 0x01e024},
+   {0x01e026, 0x01e02a},
+   {0x01e030, 0x01e06d},
+   {0x01e08f, 0x01e08f},
+   {0x01e130, 0x01e136},
+   {0x01e137, 0x01e13d},
+   {0x01e2ae, 0x01e2ae},
+   {0x01e2ec, 0x01e2ef},
+   {0x01e4eb, 0x01e4eb},
+   {0x01e4ec, 0x01e4ef},
+   {0x01e8d0, 0x01e8d6},
+   {0x01e944, 0x01e94a},
+   {0x01e94b, 0x01e94b},
+   {0x01f3fb, 0x01f3ff},
+   {0x0e0001, 0x0e0001},
+   {0x0e0020, 0x0e007f},
+   {0x0e0100, 0x0e01ef},
+};
+
+/* table of Unicode codepoint ranges of White_Space characters */
+static const pg_unicode_range unicode_white_space[11] =
+{
+   {0x000009, 0x00000d},
+   {0x000020, 0x000020},
+   {0x000085, 0x000085},
+   {0x0000a0, 0x0000a0},
+   {0x001680, 0x001680},
+   {0x002000, 0x00200a},
+   {0x002028, 0x002028},
+   {0x002029, 0x002029},
+   {0x00202f, 0x00202f},
+   {0x00205f, 0x00205f},
+   {0x003000, 0x003000},
+};
+
+/* table of Unicode codepoint ranges of Hex_Digit characters */
+static const pg_unicode_range unicode_hex_digit[6] =
+{
+   {0x000030, 0x000039},
+   {0x000041, 0x000046},
+   {0x000061, 0x000066},
+   {0x00ff10, 0x00ff19},
+   {0x00ff21, 0x00ff26},
+   {0x00ff41, 0x00ff46},
+};
+
+/* table of Unicode codepoint ranges of Join_Control characters */
+static const pg_unicode_range unicode_join_control[1] =
+{
+   {0x00200c, 0x00200d},
 };