done (#1976)
This commit is contained in:
parent
5c14a6c217
commit
e565b1c948
|
@ -22,9 +22,12 @@ package ca.uhn.fhir.context.phonetic;
|
||||||
|
|
||||||
import org.apache.commons.codec.EncoderException;
|
import org.apache.commons.codec.EncoderException;
|
||||||
import org.apache.commons.codec.StringEncoder;
|
import org.apache.commons.codec.StringEncoder;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.StringJoiner;
|
||||||
|
|
||||||
public class ApacheEncoder implements IPhoneticEncoder {
|
public class ApacheEncoder implements IPhoneticEncoder {
|
||||||
private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
|
private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
|
||||||
|
|
||||||
|
@ -44,10 +47,44 @@ public class ApacheEncoder implements IPhoneticEncoder {
|
||||||
@Override
|
@Override
|
||||||
public String encode(String theString) {
|
public String encode(String theString) {
|
||||||
try {
|
try {
|
||||||
|
// If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines.
|
||||||
|
if (theString.contains(" ")) {
|
||||||
|
return encodeStringWithSpaces(theString);
|
||||||
|
}
|
||||||
return myStringEncoder.encode(theString);
|
return myStringEncoder.encode(theString);
|
||||||
} catch (EncoderException e) {
|
} catch (EncoderException e) {
|
||||||
ourLog.error("Failed to encode string " + theString, e);
|
ourLog.error("Failed to encode string " + theString, e);
|
||||||
return theString;
|
return theString;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String encodeStringWithSpaces(String theString) throws EncoderException {
|
||||||
|
StringJoiner joiner = new StringJoiner(" ");
|
||||||
|
|
||||||
|
// This sub-stack holds the alpha parts
|
||||||
|
StringJoiner alphaJoiner = new StringJoiner(" ");
|
||||||
|
|
||||||
|
for (String part : theString.split("[\\s\\W]+")) {
|
||||||
|
if (StringUtils.isAlpha(part)) {
|
||||||
|
alphaJoiner.add(part);
|
||||||
|
} else {
|
||||||
|
// Once we hit a non-alpha part, encode all the alpha parts together as a single string
|
||||||
|
// This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter
|
||||||
|
alphaJoiner = encodeAlphaParts(joiner, alphaJoiner);
|
||||||
|
joiner.add(part);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
encodeAlphaParts(joiner, alphaJoiner);
|
||||||
|
|
||||||
|
return joiner.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException {
|
||||||
|
// Encode the alpha parts as a single string and then flush the alpha encoder
|
||||||
|
if (theAlphaJoiner.length() > 0) {
|
||||||
|
theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString()));
|
||||||
|
theAlphaJoiner = new StringJoiner(" ");
|
||||||
|
}
|
||||||
|
return theAlphaJoiner;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
package ca.uhn.fhir.context.phonetic;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
|
import org.junit.jupiter.params.provider.EnumSource;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.startsWith;
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.endsWith;
|
||||||
|
|
||||||
|
class PhoneticEncoderTest {
|
||||||
|
private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderTest.class);
|
||||||
|
|
||||||
|
private static final String NUMBER = "123";
|
||||||
|
private static final String STREET = "Nohili St, Suite";
|
||||||
|
private static final String SUITE = "456";
|
||||||
|
private static final String ADDRESS_LINE = NUMBER + " " + STREET + " " + SUITE;
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@EnumSource(PhoneticEncoderEnum.class)
|
||||||
|
public void testEncodeAddress(PhoneticEncoderEnum thePhoneticEncoderEnum) {
|
||||||
|
String encoded = thePhoneticEncoderEnum.getPhoneticEncoder().encode(ADDRESS_LINE);
|
||||||
|
ourLog.info("{}: {}", thePhoneticEncoderEnum.name(), encoded);
|
||||||
|
assertThat(encoded, startsWith(NUMBER + " "));
|
||||||
|
assertThat(encoded, endsWith(" " + SUITE));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue