done (#1976)
This commit is contained in:
parent
5c14a6c217
commit
e565b1c948
|
@ -22,9 +22,12 @@ package ca.uhn.fhir.context.phonetic;
|
|||
|
||||
import org.apache.commons.codec.EncoderException;
|
||||
import org.apache.commons.codec.StringEncoder;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.StringJoiner;
|
||||
|
||||
public class ApacheEncoder implements IPhoneticEncoder {
|
||||
private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
|
||||
|
||||
|
@ -44,10 +47,44 @@ public class ApacheEncoder implements IPhoneticEncoder {
|
|||
@Override
|
||||
public String encode(String theString) {
|
||||
try {
|
||||
// If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines.
|
||||
if (theString.contains(" ")) {
|
||||
return encodeStringWithSpaces(theString);
|
||||
}
|
||||
return myStringEncoder.encode(theString);
|
||||
} catch (EncoderException e) {
|
||||
ourLog.error("Failed to encode string " + theString, e);
|
||||
return theString;
|
||||
}
|
||||
}
|
||||
|
||||
private String encodeStringWithSpaces(String theString) throws EncoderException {
|
||||
StringJoiner joiner = new StringJoiner(" ");
|
||||
|
||||
// This sub-stack holds the alpha parts
|
||||
StringJoiner alphaJoiner = new StringJoiner(" ");
|
||||
|
||||
for (String part : theString.split("[\\s\\W]+")) {
|
||||
if (StringUtils.isAlpha(part)) {
|
||||
alphaJoiner.add(part);
|
||||
} else {
|
||||
// Once we hit a non-alpha part, encode all the alpha parts together as a single string
|
||||
// This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter
|
||||
alphaJoiner = encodeAlphaParts(joiner, alphaJoiner);
|
||||
joiner.add(part);
|
||||
}
|
||||
}
|
||||
encodeAlphaParts(joiner, alphaJoiner);
|
||||
|
||||
return joiner.toString();
|
||||
}
|
||||
|
||||
private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException {
|
||||
// Encode the alpha parts as a single string and then flush the alpha encoder
|
||||
if (theAlphaJoiner.length() > 0) {
|
||||
theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString()));
|
||||
theAlphaJoiner = new StringJoiner(" ");
|
||||
}
|
||||
return theAlphaJoiner;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
package ca.uhn.fhir.context.phonetic;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.hamcrest.Matchers.startsWith;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.endsWith;
|
||||
|
||||
class PhoneticEncoderTest {
|
||||
private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderTest.class);
|
||||
|
||||
private static final String NUMBER = "123";
|
||||
private static final String STREET = "Nohili St, Suite";
|
||||
private static final String SUITE = "456";
|
||||
private static final String ADDRESS_LINE = NUMBER + " " + STREET + " " + SUITE;
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(PhoneticEncoderEnum.class)
|
||||
public void testEncodeAddress(PhoneticEncoderEnum thePhoneticEncoderEnum) {
|
||||
String encoded = thePhoneticEncoderEnum.getPhoneticEncoder().encode(ADDRESS_LINE);
|
||||
ourLog.info("{}: {}", thePhoneticEncoderEnum.name(), encoded);
|
||||
assertThat(encoded, startsWith(NUMBER + " "));
|
||||
assertThat(encoded, endsWith(" " + SUITE));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue