added numeric matcher (#2547)
* added numeric matcher * changelog * performance optimization * fix test
This commit is contained in:
parent
4cd0409bae
commit
550602b2f1
|
@ -0,0 +1,18 @@
|
|||
package ca.uhn.fhir.context.phonetic;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
|
||||
// Useful for numerical identifiers like phone numbers, address parts etc.
|
||||
// This should not be used where decimals are important. A new "quantity encoder" should be added to handle cases like that.
|
||||
public class NumericEncoder implements IPhoneticEncoder {
|
||||
@Override
|
||||
public String name() {
|
||||
return "NUMERIC";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String encode(String theString) {
|
||||
// Remove everything but the numbers
|
||||
return CharMatcher.inRange('0', '9').retainFrom(theString);
|
||||
}
|
||||
}
|
|
@ -39,7 +39,8 @@ public enum PhoneticEncoderEnum {
|
|||
METAPHONE(new ApacheEncoder("METAPHONE", new Metaphone())),
|
||||
NYSIIS(new ApacheEncoder("NYSIIS", new Nysiis())),
|
||||
REFINED_SOUNDEX(new ApacheEncoder("REFINED_SOUNDEX", new RefinedSoundex())),
|
||||
SOUNDEX(new ApacheEncoder("SOUNDEX", new Soundex()));
|
||||
SOUNDEX(new ApacheEncoder("SOUNDEX", new Soundex())),
|
||||
NUMERIC(new NumericEncoder());
|
||||
|
||||
private final IPhoneticEncoder myPhoneticEncoder;
|
||||
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
package ca.uhn.fhir.context.phonetic;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.hamcrest.Matchers.startsWith;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.endsWith;
|
||||
import static org.hamcrest.Matchers.startsWith;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class PhoneticEncoderTest {
|
||||
private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderTest.class);
|
||||
|
@ -23,7 +23,11 @@ class PhoneticEncoderTest {
|
|||
public void testEncodeAddress(PhoneticEncoderEnum thePhoneticEncoderEnum) {
|
||||
String encoded = thePhoneticEncoderEnum.getPhoneticEncoder().encode(ADDRESS_LINE);
|
||||
ourLog.info("{}: {}", thePhoneticEncoderEnum.name(), encoded);
|
||||
assertThat(encoded, startsWith(NUMBER + " "));
|
||||
assertThat(encoded, endsWith(" " + SUITE));
|
||||
if (thePhoneticEncoderEnum == PhoneticEncoderEnum.NUMERIC) {
|
||||
assertEquals(NUMBER + SUITE, encoded);
|
||||
} else {
|
||||
assertThat(encoded, startsWith(NUMBER + " "));
|
||||
assertThat(encoded, endsWith(" " + SUITE));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
type: add
|
||||
issue: 2547
|
||||
title: "Added new NUMERIC mdm matcher for matching phone numbers. Also added NUMERIC phonetic encoder to support
|
||||
adding NUMERIC encoded search parameter (e.g. if searching for matching phone numbers is required by mdm candidate searching)."
|
|
@ -292,10 +292,10 @@ The following algorithms are currently supported:
|
|||
<td>Gail = Gael, Gail != Gale, Thomas != Tom</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>CAVERPHONE1</td>
|
||||
<td>CAVERPHONE2</td>
|
||||
<td>matcher</td>
|
||||
<td>
|
||||
<a href="https://commons.apache.org/proper/commons-codec/apidocs/org/apache/commons/codec/language/Caverphone1.html">Apache Caverphone1</a>
|
||||
<a href="https://commons.apache.org/proper/commons-codec/apidocs/org/apache/commons/codec/language/Caverphone2.html">Apache Caverphone2</a>
|
||||
</td>
|
||||
<td>Gail = Gael, Gail = Gale, Thomas != Tom</td>
|
||||
</tr>
|
||||
|
@ -379,6 +379,14 @@ The following algorithms are currently supported:
|
|||
</td>
|
||||
<td>2019-12,Month = 2019-12-19,Day</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>NUMERIC</td>
|
||||
<td>matcher</td>
|
||||
<td>
|
||||
Remove all non-numeric characters from the string before comparing.
|
||||
</td>
|
||||
<td>4169671111 = (416) 967-1111</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>NAME_ANY_ORDER</td>
|
||||
<td>matcher</td>
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
package ca.uhn.fhir.jpa.dao.dstu3;
|
||||
|
||||
import ca.uhn.fhir.context.phonetic.ApacheEncoder;
|
||||
import ca.uhn.fhir.context.phonetic.NumericEncoder;
|
||||
import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
|
||||
import ca.uhn.fhir.jpa.api.config.DaoConfig;
|
||||
import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamString;
|
||||
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
|
||||
import ca.uhn.fhir.rest.server.util.ISearchParamRegistry;
|
||||
import ca.uhn.fhir.rest.param.StringParam;
|
||||
import ca.uhn.fhir.rest.server.util.ISearchParamRegistry;
|
||||
import ca.uhn.fhir.util.HapiExtensions;
|
||||
import org.apache.commons.codec.language.Soundex;
|
||||
import org.hl7.fhir.dstu3.model.Enumerations;
|
||||
|
@ -35,10 +36,14 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test
|
|||
public static final String GAIL = "Gail";
|
||||
public static final String NAME_SOUNDEX_SP = "nameSoundex";
|
||||
public static final String ADDRESS_LINE_SOUNDEX_SP = "addressLineSoundex";
|
||||
public static final String PHONE_NUMBER_SP = "phoneNumber";
|
||||
private static final String BOB = "BOB";
|
||||
private static final String ADDRESS = "123 Nohili St";
|
||||
private static final String ADDRESS_CLOSE = "123 Nohily St";
|
||||
private static final String ADDRESS_FAR = "123 College St";
|
||||
private static final String PHONE = "4169671111";
|
||||
private static final String PHONE_CLOSE = "(416) 967-1111";
|
||||
private static final String PHONE_FAR = "416 421 0421";
|
||||
|
||||
@Autowired
|
||||
ISearchParamRegistry mySearchParamRegistry;
|
||||
|
@ -49,8 +54,9 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test
|
|||
myDaoConfig.setReuseCachedSearchResultsForMillis(null);
|
||||
myDaoConfig.setFetchSizeDefaultMaximum(new DaoConfig().getFetchSizeDefaultMaximum());
|
||||
|
||||
createSoundexSearchParameter(NAME_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.name");
|
||||
createSoundexSearchParameter(ADDRESS_LINE_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.address.line");
|
||||
createPhoneticSearchParameter(NAME_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.name");
|
||||
createPhoneticSearchParameter(ADDRESS_LINE_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.address.line");
|
||||
createPhoneticSearchParameter(PHONE_NUMBER_SP, PhoneticEncoderEnum.NUMERIC, "Patient.telecom");
|
||||
mySearchParamRegistry.forceRefresh();
|
||||
mySearchParamRegistry.setPhoneticEncoder(new ApacheEncoder(PhoneticEncoderEnum.SOUNDEX.name(), new Soundex()));
|
||||
}
|
||||
|
@ -70,6 +76,15 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test
|
|||
ourLog.info("Encoded address: {}", soundex.encode(ADDRESS));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNumeric() {
|
||||
NumericEncoder numeric = new NumericEncoder();
|
||||
assertEquals(PHONE, numeric.encode(PHONE_CLOSE));
|
||||
assertEquals(PHONE, numeric.encode(PHONE));
|
||||
assertEquals(numeric.encode(PHONE), numeric.encode(PHONE_CLOSE));
|
||||
assertNotEquals(numeric.encode(PHONE), numeric.encode(PHONE_FAR));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void phoneticMatch() {
|
||||
Patient patient;
|
||||
|
@ -77,15 +92,16 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test
|
|||
patient = new Patient();
|
||||
patient.addName().addGiven(GALE);
|
||||
patient.addAddress().addLine(ADDRESS);
|
||||
patient.addTelecom().setValue(PHONE);
|
||||
ourLog.info(myFhirCtx.newJsonParser().setPrettyPrint(true).encodeResourceToString(patient));
|
||||
|
||||
IIdType pId = myPatientDao.create(patient, mySrd).getId().toUnqualifiedVersionless();
|
||||
|
||||
List<ResourceIndexedSearchParamString> stringParams = myResourceIndexedSearchParamStringDao.findAll();
|
||||
|
||||
assertThat(stringParams, hasSize(6));
|
||||
assertThat(stringParams, hasSize(7));
|
||||
List<String> stringParamNames = stringParams.stream().map(ResourceIndexedSearchParamString::getParamName).collect(Collectors.toList());
|
||||
assertThat(stringParamNames, containsInAnyOrder(Patient.SP_NAME, Patient.SP_GIVEN, Patient.SP_PHONETIC, NAME_SOUNDEX_SP, Patient.SP_ADDRESS, ADDRESS_LINE_SOUNDEX_SP));
|
||||
assertThat(stringParamNames, containsInAnyOrder(Patient.SP_NAME, Patient.SP_GIVEN, Patient.SP_PHONETIC, NAME_SOUNDEX_SP, Patient.SP_ADDRESS, ADDRESS_LINE_SOUNDEX_SP, PHONE_NUMBER_SP));
|
||||
|
||||
assertSearchMatch(pId, Patient.SP_PHONETIC, GALE);
|
||||
assertSearchMatch(pId, Patient.SP_PHONETIC, GAIL);
|
||||
|
@ -98,6 +114,10 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test
|
|||
assertSearchMatch(pId, ADDRESS_LINE_SOUNDEX_SP, ADDRESS);
|
||||
assertSearchMatch(pId, ADDRESS_LINE_SOUNDEX_SP, ADDRESS_CLOSE);
|
||||
assertNoMatch(ADDRESS_LINE_SOUNDEX_SP, ADDRESS_FAR);
|
||||
|
||||
assertSearchMatch(pId, PHONE_NUMBER_SP, PHONE);
|
||||
assertSearchMatch(pId, PHONE_NUMBER_SP, PHONE_CLOSE);
|
||||
assertNoMatch(PHONE_NUMBER_SP, PHONE_FAR);
|
||||
}
|
||||
|
||||
private void assertSearchMatch(IIdType thePId1, String theSp, String theValue) {
|
||||
|
@ -114,7 +134,7 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test
|
|||
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), hasSize(0));
|
||||
}
|
||||
|
||||
private void createSoundexSearchParameter(String theCode, PhoneticEncoderEnum theEncoder, String theFhirPath) {
|
||||
private void createPhoneticSearchParameter(String theCode, PhoneticEncoderEnum theEncoder, String theFhirPath) {
|
||||
SearchParameter searchParameter = new SearchParameter();
|
||||
searchParameter.addBase("Patient");
|
||||
searchParameter.setCode(theCode);
|
||||
|
|
|
@ -51,7 +51,8 @@ public enum MdmMatcherEnum {
|
|||
IDENTIFIER(new IdentifierMatcher()),
|
||||
|
||||
EMPTY_FIELD(new EmptyFieldMatcher()),
|
||||
EXTENSION_ANY_ORDER(new ExtensionMatcher());
|
||||
EXTENSION_ANY_ORDER(new ExtensionMatcher()),
|
||||
NUMERIC(new HapiStringMatcher(new NumericMatcher()));
|
||||
|
||||
private final IMdmFieldMatcher myMdmFieldMatcher;
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
package ca.uhn.fhir.mdm.rules.matcher;
|
||||
|
||||
import ca.uhn.fhir.context.phonetic.NumericEncoder;
|
||||
|
||||
// Useful for numerical identifiers like phone numbers, address parts etc.
|
||||
// This should not be used where decimals are important. A new "quantity matcher" should be added to handle cases like that.
|
||||
public class NumericMatcher implements IMdmStringMatcher {
|
||||
private final NumericEncoder encoder = new NumericEncoder();
|
||||
|
||||
@Override
|
||||
public boolean matches(String theLeftString, String theRightString) {
|
||||
String left = encoder.encode(theLeftString);
|
||||
String right = encoder.encode(theRightString);
|
||||
return left.equals(right);
|
||||
}
|
||||
}
|
|
@ -14,24 +14,33 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
|||
|
||||
public class StringMatcherR4Test extends BaseMatcherR4Test {
|
||||
private static final Logger ourLog = LoggerFactory.getLogger(StringMatcherR4Test.class);
|
||||
public static final String LEFT = "namadega";
|
||||
public static final String RIGHT = "namaedga";
|
||||
public static final String LEFT_NAME = "namadega";
|
||||
public static final String RIGHT_NAME = "namaedga";
|
||||
|
||||
@Test
|
||||
public void testNamadega() {
|
||||
assertTrue(match(MdmMatcherEnum.COLOGNE, LEFT, RIGHT));
|
||||
assertTrue(match(MdmMatcherEnum.DOUBLE_METAPHONE, LEFT, RIGHT));
|
||||
assertTrue(match(MdmMatcherEnum.MATCH_RATING_APPROACH, LEFT, RIGHT));
|
||||
assertTrue(match(MdmMatcherEnum.METAPHONE, LEFT, RIGHT));
|
||||
assertTrue(match(MdmMatcherEnum.SOUNDEX, LEFT, RIGHT));
|
||||
assertTrue(match(MdmMatcherEnum.METAPHONE, LEFT, RIGHT));
|
||||
String left = LEFT_NAME;
|
||||
String right = RIGHT_NAME;
|
||||
assertTrue(match(MdmMatcherEnum.COLOGNE, left, right));
|
||||
assertTrue(match(MdmMatcherEnum.DOUBLE_METAPHONE, left, right));
|
||||
assertTrue(match(MdmMatcherEnum.MATCH_RATING_APPROACH, left, right));
|
||||
assertTrue(match(MdmMatcherEnum.METAPHONE, left, right));
|
||||
assertTrue(match(MdmMatcherEnum.SOUNDEX, left, right));
|
||||
assertTrue(match(MdmMatcherEnum.METAPHONE, left, right));
|
||||
|
||||
assertFalse(match(MdmMatcherEnum.CAVERPHONE1, LEFT, RIGHT));
|
||||
assertFalse(match(MdmMatcherEnum.CAVERPHONE2, LEFT, RIGHT));
|
||||
assertFalse(match(MdmMatcherEnum.NYSIIS, LEFT, RIGHT));
|
||||
assertFalse(match(MdmMatcherEnum.REFINED_SOUNDEX, LEFT, RIGHT));
|
||||
assertFalse(match(MdmMatcherEnum.STRING, LEFT, RIGHT));
|
||||
assertFalse(match(MdmMatcherEnum.SUBSTRING, LEFT, RIGHT));
|
||||
assertFalse(match(MdmMatcherEnum.CAVERPHONE1, left, right));
|
||||
assertFalse(match(MdmMatcherEnum.CAVERPHONE2, left, right));
|
||||
assertFalse(match(MdmMatcherEnum.NYSIIS, left, right));
|
||||
assertFalse(match(MdmMatcherEnum.REFINED_SOUNDEX, left, right));
|
||||
assertFalse(match(MdmMatcherEnum.STRING, left, right));
|
||||
assertFalse(match(MdmMatcherEnum.SUBSTRING, left, right));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNumeric() {
|
||||
assertTrue(match(MdmMatcherEnum.NUMERIC, "4169671111", "(416) 967-1111"));
|
||||
assertFalse(match(MdmMatcherEnum.NUMERIC, "5169671111", "(416) 967-1111"));
|
||||
assertFalse(match(MdmMatcherEnum.NUMERIC, "4169671111", "(416) 967-1111x123"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue