Repair bug in regexp split performance improvements.

author Andrew Gierth

Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)

committer Andrew Gierth

Wed, 12 Sep 2018 18:44:28 +0000 (19:44 +0100)
author Andrew Gierth
Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
committer Andrew Gierth
Wed, 12 Sep 2018 18:44:28 +0000 (19:44 +0100)
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c

index 3b7adfb0479df6a4575ac090850b9a84313783d7..92525c2c4de4c2fb463539e554efb91758fcaabf 100644 (file)
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -982,6 +982,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
     int         array_len;
     int         array_idx;
     int         prev_match_end;
+   int         prev_valid_match_end;
     int         start_search;
     int         maxlen = 0;     /* largest fetch length in characters */
  
@@ -1024,6 +1025,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
  
     /* search for the pattern, perhaps repeatedly */
     prev_match_end = 0;
+   prev_valid_match_end = 0;
     start_search = 0;
     while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
                             pmatch_len, pmatch))
@@ -1076,13 +1078,15 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
             matchctx->nmatches++;
  
             /*
-            * check length of unmatched portion between end of previous match
-            * and start of current one
+            * check length of unmatched portion between end of previous valid
+            * (nondegenerate, or degenerate but not ignored) match and start
+            * of current one
              */
             if (fetching_unmatched &&
                 pmatch[0].rm_so >= 0 &&
-               (pmatch[0].rm_so - prev_match_end) > maxlen)
-               maxlen = (pmatch[0].rm_so - prev_match_end);
+               (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
+               maxlen = (pmatch[0].rm_so - prev_valid_match_end);
+           prev_valid_match_end = pmatch[0].rm_eo;
         }
         prev_match_end = pmatch[0].rm_eo;
  
@@ -1108,8 +1112,8 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
      * input string
      */
     if (fetching_unmatched &&
-       (wide_len - prev_match_end) > maxlen)
-       maxlen = (wide_len - prev_match_end);
+       (wide_len - prev_valid_match_end) > maxlen)
+       maxlen = (wide_len - prev_valid_match_end);
  
     /*
      * Keep a note of the end position of the string for the benefit of
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out

index 35cadb24aa1f61d91185d0bc9a0eda2ac59d8d4b..3b1a7d80b8269ae0c6f13e39fcbe6dc3cda2db78 100644 (file)
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -674,6 +674,24 @@ SELECT regexp_split_to_array('123456','.');
   {"","","","","","",""}
  (1 row)
  
+SELECT regexp_split_to_array('123456','');
+ regexp_split_to_array 
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('123456','(?:)');
+ regexp_split_to_array 
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('1','');
+ regexp_split_to_array 
+-----------------------
+ {1}
+(1 row)
+
  -- errors
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
  ERROR:  invalid regexp option: "z"
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql

index f9cfaeb44ac2f55d8f5c859a0951aaba9e1dd381..5e39458bd2234d5e4b13016115bca137f01831db 100644 (file)
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -188,6 +188,9 @@ SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 'nom
  SELECT regexp_split_to_array('123456','1');
  SELECT regexp_split_to_array('123456','6');
  SELECT regexp_split_to_array('123456','.');
+SELECT regexp_split_to_array('123456','');
+SELECT regexp_split_to_array('123456','(?:)');
+SELECT regexp_split_to_array('1','');
  -- errors
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
  SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
author	Andrew Gierth
	Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
committer	Andrew Gierth
	Wed, 12 Sep 2018 18:44:28 +0000 (19:44 +0100)
src/backend/utils/adt/regexp.c		patch \| blob \| blame \| history
src/test/regress/expected/strings.out		patch \| blob \| blame \| history
src/test/regress/sql/strings.sql		patch \| blob \| blame \| history