This commit is contained in:
Ken Stevens 2020-07-10 15:08:50 -04:00 committed by GitHub
parent 5c14a6c217
commit e565b1c948
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 0 deletions

View File

@ -22,9 +22,12 @@ package ca.uhn.fhir.context.phonetic;
import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.StringEncoder; import org.apache.commons.codec.StringEncoder;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.StringJoiner;
public class ApacheEncoder implements IPhoneticEncoder { public class ApacheEncoder implements IPhoneticEncoder {
private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class); private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
@ -44,10 +47,44 @@ public class ApacheEncoder implements IPhoneticEncoder {
@Override @Override
public String encode(String theString) { public String encode(String theString) {
try { try {
// If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines.
if (theString.contains(" ")) {
return encodeStringWithSpaces(theString);
}
return myStringEncoder.encode(theString); return myStringEncoder.encode(theString);
} catch (EncoderException e) { } catch (EncoderException e) {
ourLog.error("Failed to encode string " + theString, e); ourLog.error("Failed to encode string " + theString, e);
return theString; return theString;
} }
} }
private String encodeStringWithSpaces(String theString) throws EncoderException {
StringJoiner joiner = new StringJoiner(" ");
// This sub-stack holds the alpha parts
StringJoiner alphaJoiner = new StringJoiner(" ");
for (String part : theString.split("[\\s\\W]+")) {
if (StringUtils.isAlpha(part)) {
alphaJoiner.add(part);
} else {
// Once we hit a non-alpha part, encode all the alpha parts together as a single string
// This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter
alphaJoiner = encodeAlphaParts(joiner, alphaJoiner);
joiner.add(part);
}
}
encodeAlphaParts(joiner, alphaJoiner);
return joiner.toString();
}
private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException {
// Encode the alpha parts as a single string and then flush the alpha encoder
if (theAlphaJoiner.length() > 0) {
theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString()));
theAlphaJoiner = new StringJoiner(" ");
}
return theAlphaJoiner;
}
} }

View File

@ -0,0 +1,29 @@
package ca.uhn.fhir.context.phonetic;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.hamcrest.Matchers.startsWith;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.endsWith;
class PhoneticEncoderTest {
private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderTest.class);
private static final String NUMBER = "123";
private static final String STREET = "Nohili St, Suite";
private static final String SUITE = "456";
private static final String ADDRESS_LINE = NUMBER + " " + STREET + " " + SUITE;
@ParameterizedTest
@EnumSource(PhoneticEncoderEnum.class)
public void testEncodeAddress(PhoneticEncoderEnum thePhoneticEncoderEnum) {
String encoded = thePhoneticEncoderEnum.getPhoneticEncoder().encode(ADDRESS_LINE);
ourLog.info("{}: {}", thePhoneticEncoderEnum.name(), encoded);
assertThat(encoded, startsWith(NUMBER + " "));
assertThat(encoded, endsWith(" " + SUITE));
}
}