001/* ***** BEGIN LICENSE BLOCK *****
002 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
003 *
004 * The contents of this file are subject to the Mozilla Public License Version
005 * 1.1 (the "License"); you may not use this file except in compliance with
006 * the License. You may obtain a copy of the License at
007 * http://www.mozilla.org/MPL/
008 *
009 * Software distributed under the License is distributed on an "AS IS" basis,
010 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
011 * for the specific language governing rights and limitations under the
012 * License.
013 *
014 * The Original Code is part of dcm4che, an implementation of DICOM(TM) in
015 * Java(TM), hosted at https://github.com/gunterze/dcm4che.
016 *
017 * The Initial Developer of the Original Code is
018 * Agfa Healthcare.
019 * Portions created by the Initial Developer are Copyright (C) 2011
020 * the Initial Developer. All Rights Reserved.
021 *
022 * Contributor(s):
023 * See listed authors below.
024 *
025 * Alternatively, the contents of this file may be used under the terms of
026 * either the GNU General Public License Version 2 or later (the "GPL"), or
027 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
028 * in which case the provisions of the GPL or the LGPL are applicable instead
029 * of those above. If you wish to allow use of your version of this file only
030 * under the terms of either the GPL or the LGPL, and not to allow others to
031 * use your version of this file under the terms of the MPL, indicate your
032 * decision by deleting the provisions above and replace them with the notice
033 * and other provisions required by the GPL or the LGPL. If you do not delete
034 * the provisions above, a recipient may use your version of this file under
035 * the terms of any one of the MPL, the GPL or the LGPL.
036 *
037 * ***** END LICENSE BLOCK ***** */
038
039package org.dcm4che3.soundex;
040
041/**
042 * @author Gunter Zeilinger <gunterze@gmail.com>
043 */
044public class KPhonetik implements FuzzyStr {
045
046    @Override
047    public String toFuzzy(String s) {
048        if (s == null || s.length() == 0)
049            return "";
050
051        char[] in = s.toUpperCase().toCharArray();
052        int countX = 0;
053        for (char c : in)
054            if (c == 'X')
055                countX++;
056        char[] out = countX > 0 ? new char[in.length + countX] : in;
057        int i = 0;
058        int j = 0;
059        char prevout = 0;
060        char curout = 0;
061        char prev = 0;
062        char cur = 0;
063        char next = in[0];
064        
065        for (; i < in.length; i++) {
066            prev = cur;
067            cur = next;
068            next = i+1 < in.length ? in[i+1] : 0;
069            switch (cur) {
070            case 'A':
071            case 'E':
072            case 'I':
073            case 'J':
074            case 'O':
075            case 'U':
076            case 'Y':
077            case 'Ä':
078            case 'Ö':
079            case 'Ü':
080                if (j > 0) {
081                    prevout = '0';
082                    continue;
083                }
084                curout = '0';
085                break;
086            case 'B':
087                curout = '1';
088                break;
089            case 'P':
090                curout = next == 'H' ? '3' : '1';
091                break;
092            case 'D':
093            case 'T':
094                curout = (next == 'C' || next == 'S' || next == 'Z') 
095                        ? '8' : '2';
096                break;
097            case 'F':
098            case 'V':
099            case 'W':
100                curout = '3';
101                break;
102            case 'G':
103            case 'K':
104            case 'Q':
105                curout = '4';
106                break;
107            case 'C':
108                switch (next) {
109                case 'A':
110                case 'H':
111                case 'K':
112                case 'O':
113                case 'Q':
114                case 'U':
115                case 'X':
116                    curout = i == 0 || (prev != 'S' && prev != 'Z')
117                            ? '4' : '8';
118                    break;
119                case 'L':
120                case 'R':
121                    curout = i == 0 ? '4' : '8';
122                    break;
123                }
124                break;
125            case 'X':
126                if (prev != 'C' && prev != 'K' && prev != 'Q'
127                        && prevout != '4')
128                    out[j++] = prevout = '4';
129                curout = '8';
130                break;
131            case 'L':
132                curout = '5';
133                break;
134            case 'M':
135            case 'N':
136                curout = '6';
137                break;
138            case 'R':
139                curout = '7';
140                break;
141            case 'S':
142            case 'Z':
143            case 'ß':
144                curout = '8';
145                break;
146            default:
147                prevout = 0;
148                continue;
149            }
150            if (prevout != curout)
151                out[j++] = prevout = curout;
152        }
153        return new String(out, 0, j);
154    }
155
156    public static void main(String[] args) {
157        KPhonetik inst = new KPhonetik();
158        for (String arg : args)
159            System.out.println(inst.toFuzzy(arg));
160    }
161
162}