Fixing and improve compound word support. This changes cannot be applied to
authorTeodor Sigaev
Mon, 20 Feb 2006 17:51:05 +0000 (17:51 +0000)
committerTeodor Sigaev
Mon, 20 Feb 2006 17:51:05 +0000 (17:51 +0000)
previous version iwthout recreating tsvector fields...

Thanks to Alexander Presber  to discover a problem.

contrib/tsearch2/ispell/spell.c

index 4ee75e680aedac75cca60cd79f9aea3d32a26af7..1960f9510bde52efe9ba479f62ba93cac254bcd5 100644 (file)
@@ -737,9 +737,9 @@ NISortAffixes(IspellDict * Conf)
        {
            if (firstsuffix < 0)
                firstsuffix = i;
-           if (Affix->flagflags & FF_COMPOUNDONLYAFX)
+           if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen>0 )
            {
-               if (!ptr->affix ||
+               if (ptr == Conf->CompoundAffix ||
                    strbncmp((const unsigned char *) (ptr - 1)->affix,
                             (const unsigned char *) Affix->repl,
                             (ptr - 1)->len))
@@ -1024,17 +1024,31 @@ typedef struct SplitVar
 }  SplitVar;
 
 static int
-CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
+CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
 {
-   while ((*ptr)->affix)
-   {
-       if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
+   if ( CheckInPlace ) {
+       while ((*ptr)->affix)
+       {
+           if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
+           {
+               len = (*ptr)->len;
+               (*ptr)++;
+               return len;
+           }
+           (*ptr)++;
+       }
+   } else {
+       char *affbegin;
+       while ((*ptr)->affix)
        {
-           len = (*ptr)->len;
+           if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
+           {
+               len = (*ptr)->len + (affbegin-word);
+               (*ptr)++;
+               return len;
+           }
            (*ptr)++;
-           return len;
        }
-       (*ptr)++;
    }
    return 0;
 }
@@ -1078,26 +1092,11 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
    memset(notprobed, 1, wordlen);
    var = CopyVar(orig, 1);
 
-   while (node && level < wordlen)
+   while (level < wordlen)
    {
-       StopLow = node->data;
-       StopHigh = node->data + node->length;
-       while (StopLow < StopHigh)
-       {
-           StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
-           if (StopMiddle->val == ((uint8 *) (word))[level])
-               break;
-           else if (StopMiddle->val < ((uint8 *) (word))[level])
-               StopLow = StopMiddle + 1;
-           else
-               StopHigh = StopMiddle;
-       }
-       if (StopLow >= StopHigh)
-           break;
-
-       /* find word with epenthetic */
+       /* find word with epenthetic or/and compound suffix */
        caff = Conf->CompoundAffix;
-       while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0)
+       while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) > 0)
        {
            /*
             * there is one of compound suffixes, so check word for existings
@@ -1143,41 +1142,61 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
            }
        }
 
-       /* find infinitive */
-       if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
+       if ( !node )
+           break; 
+
+       StopLow = node->data;
+       StopHigh = node->data + node->length;
+       while (StopLow < StopHigh)
        {
-           /* ok, we found full compoundallowed word */
-           if (level > minpos)
+           StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+           if (StopMiddle->val == ((uint8 *) (word))[level])
+               break;
+           else if (StopMiddle->val < ((uint8 *) (word))[level])
+               StopLow = StopMiddle + 1;
+           else
+               StopHigh = StopMiddle;
+       }
+
+       if (StopLow < StopHigh) {
+
+           /* find infinitive */
+           if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
            {
-               /* and its length more than minimal */
-               if (wordlen == level + 1)
-               {
-                   /* well, it was last word */
-                   var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
-                   var->nstem++;
-                   pfree(notprobed);
-                   return var;
-               }
-               else
+               /* ok, we found full compoundallowed word */
+               if (level > minpos)
                {
-                   /* then we will search more big word at the same point */
-                   SplitVar   *ptr = var;
-
-                   while (ptr->next)
-                       ptr = ptr->next;
-                   ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
-                   /* we can find next word */
-                   level++;
-                   var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos);
-                   var->nstem++;
-                   node = Conf->Dictionary;
-                   startpos = level;
-                   continue;
+                   /* and its length more than minimal */
+                   if (wordlen == level + 1)
+                   {
+                       /* well, it was last word */
+                       var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
+                       var->nstem++;
+                       pfree(notprobed);
+                       return var;
+                   }
+                   else
+                   {
+                       /* then we will search more big word at the same point */
+                       SplitVar   *ptr = var;
+   
+                       while (ptr->next)
+                           ptr = ptr->next;
+                       ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
+                       /* we can find next word */
+                       level++;
+                       var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos);
+                       var->nstem++;
+                       node = Conf->Dictionary;
+                       startpos = level;
+                       continue;
+                   }
                }
            }
-       }
+           node = StopMiddle->node;
+       } else
+           node = NULL;  
        level++;
-       node = StopMiddle->node;
    }
 
    var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);