--- /dev/null
+This directory contains a module that implements the "Metaphone" code as
+a PostgreSQL user-defined function. The Metaphone system is a method of
+matching similar sounding names (or any words) to the same code.
+
+Metaphone was invented by Lawrence Philips as an improvement to the popular
+name-hashing routine, Soundex.
+
+This metaphone code is from Michael Kuhn, and is detailed at
+ http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt
+
+Code for this (including this help file!) was liberally borrowed from
+the soundex() module for PostgreSQL.
+
+There are two functions:
+ metaphone(text) : returns hash of a name
+ metaphone(text,int) : returns hash (maximum length of int) of name
+
+---
+
+To install it, first configure the main source tree, then run make;
+make install in this directory. Finally, load the function definition
+with psql:
+
+ psql -f PREFIX/share/contrib/metaphone.sql
+
+The following are some usage examples:
+
+SELECT text_metaphone('hello world!');
+SELECT text_metaphone('hello world!', 4);
+
+CREATE TABLE s (nm text)\g
+
+insert into s values ('john')\g
+insert into s values ('joan')\g
+insert into s values ('wobbly')\g
+
+select * from s
+where text_metaphone(nm) = text_metaphone('john')\g
+
+select nm from s a, s b
+where text_metaphone(a.nm) = text_metaphone(b.nm)
+and a.oid <> b.oid\g
+
+CREATE FUNCTION text_mp_eq(text, text) RETURNS bool AS
+'select text_metaphone($1) = text_metaphone($2)'
+LANGUAGE 'sql'\g
+
+CREATE FUNCTION text_mp_lt(text,text) RETURNS bool AS
+'select text_metaphone($1) < text_metaphone($2)'
+LANGUAGE 'sql'\g
+
+CREATE FUNCTION text_mp_gt(text,text) RETURNS bool AS
+'select text_metaphone($1) > text_metaphone($2)'
+LANGUAGE 'sql';
+
+CREATE FUNCTION text_mp_le(text,text) RETURNS bool AS
+'select text_metaphone($1) <= text_metaphone($2)'
+LANGUAGE 'sql';
+
+CREATE FUNCTION text_mp_ge(text,text) RETURNS bool AS
+'select text_metaphone($1) >= text_metaphone($2)'
+LANGUAGE 'sql';
+
+CREATE FUNCTION text_mp_ne(text,text) RETURNS bool AS
+'select text_metaphone($1) <> text_metaphone($2)'
+LANGUAGE 'sql';
+
+DROP OPERATOR #= (text,text)\g
+
+CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_mp_eq,
+commutator=text_mp_eq)\g
+
+SELECT *
+FROM s
+WHERE text_mp_eq(nm,'pillsbury')\g
+
+SELECT *
+from s
+where s.nm #= 'pillsbury';
--- /dev/null
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/builtins.h"
+
+#include
+#include
+#include
+
+Datum text_metaphone(PG_FUNCTION_ARGS);
+Datum text_metaphone_length(PG_FUNCTION_ARGS);
+
+void phonetic(char *name, char *metaph, int metalen);
+
+#define METAPHONE_LEN 50
+#undef METAPHONE_TEST
+
+#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
+#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
+
+#define NULLCHAR (char *) 0
+
+char *VOWELS="AEIOU",
+ *FRONTV="EIY", /* special cases for letters in FRONT of these */
+ *VARSON="CSPTG", /* variable sound--those modified by adding an "h" */
+ *DOUBLE="."; /* let these double letters through */
+
+char *excpPAIR="AGKPW", /* exceptions "ae-", "gn-", "kn-", "pn-", "wr-" */
+ *nextLTR ="ENNNR";
+char *chrptr, *chrptr1;
+
+void phonetic(name,metaph,metalen)
+char *name, *metaph;
+int metalen;
+{
+
+int ii, jj, silent, hard, Lng, lastChr;
+
+char curLtr, prevLtr, nextLtr, nextLtr2, nextLtr3;
+
+int vowelAfter, vowelBefore, frontvAfter;
+
+char wname[60];
+char *ename=wname;
+
+ jj = 0;
+ for (ii=0; name[ii] != '\0'; ii++) {
+ if ( isalpha(name[ii]) ) {
+ ename[jj] = toupper(name[ii]);
+ jj++;
+ }
+ }
+ ename[jj] = '\0';
+
+ if (strlen(ename) == 0) return;
+
+ /* if ae, gn, kn, pn, wr then drop the first letter */
+ if ( (chrptr=strchr(excpPAIR,ename[0]) ) != NULLCHAR ) {
+ chrptr1 = nextLTR + (chrptr-excpPAIR);
+ if ( *chrptr1 == ename[1] ) strcpy(ename,&ename[1]);
+ }
+ /* change x to s */
+ if (ename[0] == 'X') ename[0] = 'S';
+ /* get rid of the "h" in "wh" */
+ if ( strncmp(ename,"WH",2) == 0 ) strcpy(&ename[1], &ename[2]);
+
+ Lng = strlen(ename);
+ lastChr = Lng -1; /* index to last character in string makes code easier*/
+
+ /* Remove an S from the end of the string */
+ if ( ename[lastChr] == 'S' ) {
+ ename[lastChr] = '\0';
+ Lng = strlen(ename);
+ lastChr = Lng -1;
+ }
+
+ for (ii=0; ( (strlen(metaph) < metalen) && (ii < Lng) ); ii++) {
+
+ curLtr = ename[ii];
+
+ vowelBefore = FALSE; prevLtr = ' ';
+ if (ii > 0) {
+ prevLtr = ename[ii-1];
+ if ( strchr(VOWELS,prevLtr) != NULLCHAR ) vowelBefore = TRUE;
+ }
+ /* if first letter is a vowel KEEP it */
+ if (ii == 0 && (strchr(VOWELS,curLtr) != NULLCHAR) ) {
+ strncat(metaph,&curLtr,1);
+ continue;
+ }
+
+ vowelAfter = FALSE; frontvAfter = FALSE; nextLtr = ' ';
+ if ( ii < lastChr ) {
+ nextLtr = ename[ii+1];
+ if ( strchr(VOWELS,nextLtr) != NULLCHAR ) vowelAfter = TRUE;
+ if ( strchr(FRONTV,nextLtr) != NULLCHAR ) frontvAfter = TRUE;
+ }
+ /* skip double letters except ones in list */
+ if (curLtr == nextLtr && (strchr(DOUBLE,nextLtr) == NULLCHAR) ) continue;
+
+ nextLtr2 = ' ';
+ if (ii < (lastChr-1) ) nextLtr2 = ename[ii+2];
+
+ nextLtr3 = ' ';
+ if (ii < (lastChr-2) ) nextLtr3 = ename[ii+3];
+
+ switch (curLtr) {
+
+ case 'B': silent = FALSE;
+ if (ii == lastChr && prevLtr == 'M') silent = TRUE;
+ if (! silent) strncat(metaph,&curLtr,1);
+ break;
+
+ /*silent -sci-,-sce-,-scy-; sci-, etc OK*/
+ case 'C': if (! (ii > 1 && prevLtr == 'S' && frontvAfter) ) {
+
+ if ( ii > 0 && nextLtr == 'I' && nextLtr2 == 'A' )
+ strncat(metaph,"X",1);
+ else
+ if (frontvAfter)
+ strncat(metaph,"S",1);
+ else
+ if (ii > 1 && prevLtr == 'S' && nextLtr == 'H')
+ strncat(metaph,"K",1);
+ else
+ if (nextLtr == 'H')
+ if (ii == 0 && (strchr(VOWELS,nextLtr2) == NULLCHAR) )
+ strncat(metaph,"K",1);
+ else
+ strncat(metaph,"X",1);
+ else
+ if (prevLtr == 'C')
+ strncat(metaph,"C",1);
+ else
+ strncat(metaph,"K",1);
+ }
+ break;
+
+ case 'D': if (nextLtr == 'G' && (strchr(FRONTV,nextLtr2) != NULLCHAR))
+ strncat(metaph,"J",1);
+ else
+ strncat(metaph,"T",1);
+ break;
+
+ case 'G': silent=FALSE;
+ /* SILENT -gh- except for -gh and no vowel after h */
+ if ( (ii < (lastChr-1) && nextLtr == 'H')
+ && (strchr(VOWELS,nextLtr2) == NULLCHAR) )
+ silent=TRUE;
+
+ if ( (ii == (lastChr-3) )
+ && nextLtr == 'N' && nextLtr2 == 'E' && nextLtr3 == 'D')
+ silent=TRUE;
+ else
+ if ( (ii == (lastChr-1)) && nextLtr == 'N') silent=TRUE;
+
+ if (prevLtr == 'D' && frontvAfter) silent=TRUE;
+
+ if (prevLtr == 'G')
+ hard=TRUE;
+ else
+ hard=FALSE;
+
+ if (!silent) {
+ if (frontvAfter && (! hard) )
+ strncat(metaph,"J",1);
+ else
+ strncat(metaph,"K",1);
+ }
+ break;
+
+ case 'H': silent = FALSE;
+ if ( strchr(VARSON,prevLtr) != NULLCHAR ) silent = TRUE;
+
+ if ( vowelBefore && !vowelAfter) silent = TRUE;
+
+ if (!silent) strncat(metaph,&curLtr,1);
+ break;
+
+ case 'F':
+ case 'J':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'R': strncat(metaph,&curLtr,1);
+ break;
+
+ case 'K': if (prevLtr != 'C') strncat(metaph,&curLtr,1);
+ break;
+
+ case 'P': if (nextLtr == 'H')
+ strncat(metaph,"F",1);
+ else
+ strncat(metaph,"P",1);
+ break;
+
+ case 'Q': strncat(metaph,"K",1);
+ break;
+
+ case 'S': if (ii > 1 && nextLtr == 'I'
+ && ( nextLtr2 == 'O' || nextLtr2 == 'A') )
+ strncat(metaph,"X",1);
+ else
+ if (nextLtr == 'H')
+ strncat(metaph,"X",1);
+ else
+ strncat(metaph,"S",1);
+ break;
+
+ case 'T': if (ii > 1 && nextLtr == 'I'
+ && ( nextLtr2 == 'O' || nextLtr2 == 'A') )
+ strncat(metaph,"X",1);
+ else
+ if (nextLtr == 'H') /* The=0, Tho=T, Withrow=0 */
+ if (ii > 0 || (strchr(VOWELS,nextLtr2) != NULLCHAR) )
+ strncat(metaph,"0",1);
+ else
+ strncat(metaph,"T",1);
+ else
+ if (! (ii < (lastChr-2) && nextLtr == 'C' && nextLtr2 == 'H'))
+ strncat(metaph,"T",1);
+ break;
+
+ case 'V': strncat(metaph,"F",1);
+ break;
+
+ case 'W':
+ case 'Y': if (ii < lastChr && vowelAfter) strncat(metaph,&curLtr,1);
+ break;
+
+ case 'X': strncat(metaph,"KS",2);
+ break;
+
+ case 'Z': strncat(metaph,"S",1);
+ break;
+ }
+
+ }
+
+/* DON'T DO THIS NOW, REMOVING "S" IN BEGINNING HAS the same effect
+ with plurals, in addition imbedded S's in the Metaphone are included
+ Lng = strlen(metaph);
+ lastChr = Lng -1;
+ if ( metaph[lastChr] == 'S' && Lng >= 3 ) metaph[lastChr] = '\0';
+*/
+
+ return;
+}
+
+
+#ifdef METAPHONE_TEST
+int
+main(int argc, char *argv[])
+{
+ if (argc < 2)
+ {
+ fprintf(stderr, "usage: %s string\n", argv[0]);
+ return 1;
+ }
+ else
+ {
+ char output[51]="";
+
+ phonetic(argv[1], output, 50);
+ printf("metaphone(%s) = %s\n", argv[1], output);
+ return 0;
+ }
+}
+
+#endif /* METAPHONE_TEST */
+
+#ifndef METAPHONE_TEST
+/*
+ * SQL function: text_metaphone(text) returns text
+ */
+PG_FUNCTION_INFO_V1(text_metaphone);
+
+Datum
+text_metaphone(PG_FUNCTION_ARGS)
+{
+ char outstr[51]="";
+ char *arg;
+
+ arg = _textout(PG_GETARG_TEXT_P(0));
+
+ phonetic(arg, outstr, 50);
+
+ PG_RETURN_TEXT_P(_textin(outstr));
+}
+
+/*
+ char outstr[51]="";
+ char *arg;
+ int32 metalen;
+
+ arg = _textout(PG_GETARG_TEXT_P(0));
+ metalen = PG_GETARG_INT32(1);
+
+
+ phonetic(arg, outstr, metalen);
+*/
+
+PG_FUNCTION_INFO_V1(text_metaphone_length);
+
+Datum
+text_metaphone_length(PG_FUNCTION_ARGS)
+{
+ char outstr[51]="";
+ char *arg;
+ int32 metalen;
+
+ arg = _textout(PG_GETARG_TEXT_P(0));
+ metalen = PG_GETARG_INT32(1);
+
+ phonetic(arg, outstr, metalen);
+
+ PG_RETURN_TEXT_P(_textin(outstr));
+}
+
+
+#endif /* not METAPHONE_TEST */