CREATE OR REPLACE PACKAGE PCK_PHONETIK AS
/*
|| $Header: $
||
|| Name : PKG_PHONETIK
|| Aufgabe: An implementation of "Koelner Phonetik"
|| Autor : 13.02.2017, Jan Schreiber
||
*/
FUNCTION GET_CODE (
I_STRING IN VARCHAR2)
RETURN VARCHAR2;
END PCK_PHONETIK;
/
CREATE OR REPLACE PACKAGE BODY PCK_PHONETIK AS
/*------------------------------------------------------------------------------
||
|| 2017.02.13 Jan Schreiber: An implementation of "Koelner Phonetik".
|| 2017.02.14 Jan Schreiber: Renamed to PCK_PHONETIK
|| 2917.02.16 Jan Schreiber: Aligned with Java class ColognePhonetic
||
*/------------------------------------------------------------------------------
/*
This work is based on Andy Theilers github package: https://github.com/deezaster/germanphonetic
and Carsten Czarskis APEX implementation: https://apex.oracle.com/pls/apex/germancommunities/apexcommunity/tipp/1502/index.html
and the Java Commons 1.7 implementation ColognePhonetics from: http://archive.apache.org/dist/commons/codec/source/commons-codec-1.7-src.tar.gz
Source code: https://wiki.loopback.org/confluence/x/HoDR
*/
function soundex_ger (strword IN VARCHAR2,
intlen IN NUMBER DEFAULT 255)
return VARCHAR2 is
word varchar2(255);
wordlen number;
checklen number;
code varchar2(255);
phoneticcode varchar2(255);
intX number;
begin
if intlen is null
then
checklen := 255;
else
checklen := intlen;
end if;
Word := lower(substr(strWord,0,checkLen));
if length(Word) < 1 then return 0; end if;
Word := REGEXP_REPLACE(
REPLACE(
REPLACE(
Translate(Word, 'vwjyäöüéèêàáç', 'ffiiaoueeeaac'),
'ph', 'f'),
'ß', 'ss'),
'[^a-zA-Z]', Null);
wordlen := length(word);
if wordlen = 1
then
word := word || ' ' ;
end if;
if substr(word,1,1) = 'c'
then
case
when substr(word,2,1) in ('a','h','k','l','o','q','r','u','x') then
Code := '4';
else
Code := '8';
end case;
intX := 2;
else
intX := 1;
end if;
while intX <= wordlen loop
case
when substr(word,intX,1) in ('a','e','i','o','u') then
code := code || '0';
when substr(word,intX,1) = 'b' or substr(Word,intX,1) = 'p' then
code := code || '1';
when substr(word,intX,1) = 'd' or substr(Word,intX,1) = 't' then
if intX < wordlen then
case
when substr(word,intX+1,1) in ('c','s','z') then
code := code || '8';
else
code := code || '2';
end case;
else
code := code || '2';
end if;
when substr(word,intX,1) = 'f' then
code := code || '3';
when substr(word,intX,1) in ('g','k','q') then
code := code || '4';
when substr(word,intX,1) = 'c' then
if intX < wordlen then
case
when substr(Word,intX+1,1) in ('a','h','k','o','q','u','x') then
case
when substr(Word,intX-1,1) = 's' or substr(Word,intX-1,1) = 'z' then
code := code || '8';
else
code := code || '4';
end case;
else
code := code || '8';
end case;
else
code := code || '8';
end if;
when substr(word,intX,1) = 'x' then
if intX > 1 then
case
when substr(word,intX-1,1) in ('c','k','x') then
code := code || '8';
else
code := code || '48';
end case;
else
code := code || '48';
end if;
when substr(word,intX,1) = 'l' then
code := code || '5';
when substr(word,intX,1) = 'm' or substr(word,intX,1) = 'n' then
code := code || '6';
when substr(word,intX,1) = 'r' then
code := code || '7';
when substr(word,intX,1) = 's' or substr(word,intX,1) = 'z' then
code := code || '8';
else
code := code;
end case;
intX := intX + 1;
end loop;
phoneticcode := translate(regexp_replace(code, '(.)1+', '1'), '1234567890', '123456789');
if substr(code,1,1) = '0'
then
phoneticcode := '0' || phoneticcode;
end if;
return phoneticcode;
end soundex_ger;
function get_code
(i_string IN VARCHAR2)
return VARCHAR2
is
len number;
i number;
k number;
in_string varchar2(4000);
out_string varchar2(4000);
key_length constant number := 4;
begin
i := 1;
out_string := null ;
in_string := regexp_replace(substr(translate(i_string,'.,-;',' '),1,4000), '([[:cntrl:]])|(^t)', ' ');
len := length(in_string) ;
while i <= len loop
k := InStr(in_string, ' ', i);
case
when (k = i) then
i := i +1;
when (k > i) then
out_string := trim(out_string || ' ' || substr(soundex_ger(substr(in_string, i, k-i)),1,key_length)) ;
i := k+1 ;
when (k = 0) then
out_string := trim(out_string || ' ' || substr(soundex_ger(substr(in_string, i)),1,key_length)) ;
i := len +1;
end case;
end loop;
return out_string;
end get_code;
END PCK_PHONETIK;
/
The implementation table used is build on the article in German Wikipedia mentioned above:
| Letter | Context | Code |
|---|---|---|
| A, E, I, J, O, U, Y | 0 | |
| H | – | |
| B | 1 | |
| P | not before H | 1 |
| D, T | not before C, S, Z | 2 |
| F, V, W | 3 | |
| P | before H | 3 |
| G, K, Q | 4 | |
| C | in Anlaut before A, H, K, L, O, Q, R, U, X | 4 |
| C | before A, H, K, O, Q, U, X , but not behind S, Z | 4 |
| X | not behind C, K, Q | 48 |
| L | 5 | |
| M, N | 6 | |
| R | 7 | |
| S, Z | 8 | |
| C | behind S, Z | 8 |
| C | in Anlaut, but not before A, H, K, L, O, Q, R, U, X | 8 |
| C | not before A, H, K, O, Q, U, X | 8 |
| D, T | before C, S, Z | 8 |
| X | after C, K, Q | 8 |
