From 550602b2f173efe8a8e33cb42322aacef7889658 Mon Sep 17 00:00:00 2001 From: Ken Stevens Date: Wed, 14 Apr 2021 13:15:30 -0400 Subject: [PATCH] added numeric matcher (#2547) * added numeric matcher * changelog * performance optimization * fix test --- .../fhir/context/phonetic/NumericEncoder.java | 18 +++++++++ .../context/phonetic/PhoneticEncoderEnum.java | 3 +- .../context/phonetic/PhoneticEncoderTest.java | 12 ++++-- .../5_4_0/2547-mdm-add-numeric-matcher.yaml | 5 +++ .../fhir/docs/server_jpa_mdm/mdm_rules.md | 12 +++++- ...esourceDaoDstu3PhoneticSearchNoFtTest.java | 32 +++++++++++++--- .../mdm/rules/matcher/MdmMatcherEnum.java | 3 +- .../mdm/rules/matcher/NumericMatcher.java | 16 ++++++++ .../rules/matcher/StringMatcherR4Test.java | 37 ++++++++++++------- 9 files changed, 110 insertions(+), 28 deletions(-) create mode 100644 hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/NumericEncoder.java create mode 100644 hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_4_0/2547-mdm-add-numeric-matcher.yaml create mode 100644 hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/NumericMatcher.java diff --git a/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/NumericEncoder.java b/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/NumericEncoder.java new file mode 100644 index 00000000000..1619748d470 --- /dev/null +++ b/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/NumericEncoder.java @@ -0,0 +1,18 @@ +package ca.uhn.fhir.context.phonetic; + +import com.google.common.base.CharMatcher; + +// Useful for numerical identifiers like phone numbers, address parts etc. +// This should not be used where decimals are important. A new "quantity encoder" should be added to handle cases like that. +public class NumericEncoder implements IPhoneticEncoder { + @Override + public String name() { + return "NUMERIC"; + } + + @Override + public String encode(String theString) { + // Remove everything but the numbers + return CharMatcher.inRange('0', '9').retainFrom(theString); + } +} diff --git a/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderEnum.java b/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderEnum.java index 28549a71629..605a8ae24ca 100644 --- a/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderEnum.java +++ b/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderEnum.java @@ -39,7 +39,8 @@ public enum PhoneticEncoderEnum { METAPHONE(new ApacheEncoder("METAPHONE", new Metaphone())), NYSIIS(new ApacheEncoder("NYSIIS", new Nysiis())), REFINED_SOUNDEX(new ApacheEncoder("REFINED_SOUNDEX", new RefinedSoundex())), - SOUNDEX(new ApacheEncoder("SOUNDEX", new Soundex())); + SOUNDEX(new ApacheEncoder("SOUNDEX", new Soundex())), + NUMERIC(new NumericEncoder()); private final IPhoneticEncoder myPhoneticEncoder; diff --git a/hapi-fhir-base/src/test/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderTest.java b/hapi-fhir-base/src/test/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderTest.java index bca150978cb..e43327eb818 100644 --- a/hapi-fhir-base/src/test/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderTest.java +++ b/hapi-fhir-base/src/test/java/ca/uhn/fhir/context/phonetic/PhoneticEncoderTest.java @@ -1,14 +1,14 @@ package ca.uhn.fhir.context.phonetic; -import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.hamcrest.Matchers.startsWith; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.endsWith; +import static org.hamcrest.Matchers.startsWith; +import static org.junit.jupiter.api.Assertions.assertEquals; class PhoneticEncoderTest { private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderTest.class); @@ -23,7 +23,11 @@ class PhoneticEncoderTest { public void testEncodeAddress(PhoneticEncoderEnum thePhoneticEncoderEnum) { String encoded = thePhoneticEncoderEnum.getPhoneticEncoder().encode(ADDRESS_LINE); ourLog.info("{}: {}", thePhoneticEncoderEnum.name(), encoded); - assertThat(encoded, startsWith(NUMBER + " ")); - assertThat(encoded, endsWith(" " + SUITE)); + if (thePhoneticEncoderEnum == PhoneticEncoderEnum.NUMERIC) { + assertEquals(NUMBER + SUITE, encoded); + } else { + assertThat(encoded, startsWith(NUMBER + " ")); + assertThat(encoded, endsWith(" " + SUITE)); + } } } diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_4_0/2547-mdm-add-numeric-matcher.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_4_0/2547-mdm-add-numeric-matcher.yaml new file mode 100644 index 00000000000..24aace91fdf --- /dev/null +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/5_4_0/2547-mdm-add-numeric-matcher.yaml @@ -0,0 +1,5 @@ +--- +type: add +issue: 2547 +title: "Added new NUMERIC mdm matcher for matching phone numbers. Also added NUMERIC phonetic encoder to support +adding NUMERIC encoded search parameter (e.g. if searching for matching phone numbers is required by mdm candidate searching)." diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_mdm/mdm_rules.md b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_mdm/mdm_rules.md index a4fea7f82d0..3075b67543d 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_mdm/mdm_rules.md +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/docs/server_jpa_mdm/mdm_rules.md @@ -292,10 +292,10 @@ The following algorithms are currently supported: Gail = Gael, Gail != Gale, Thomas != Tom - CAVERPHONE1 + CAVERPHONE2 matcher - Apache Caverphone1 + Apache Caverphone2 Gail = Gael, Gail = Gale, Thomas != Tom @@ -379,6 +379,14 @@ The following algorithms are currently supported: 2019-12,Month = 2019-12-19,Day + + NUMERIC + matcher + + Remove all non-numeric characters from the string before comparing. + + 4169671111 = (416) 967-1111 + NAME_ANY_ORDER matcher diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/dao/dstu3/FhirResourceDaoDstu3PhoneticSearchNoFtTest.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/dao/dstu3/FhirResourceDaoDstu3PhoneticSearchNoFtTest.java index bda399dbd76..75b74c87aa8 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/dao/dstu3/FhirResourceDaoDstu3PhoneticSearchNoFtTest.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/dao/dstu3/FhirResourceDaoDstu3PhoneticSearchNoFtTest.java @@ -1,12 +1,13 @@ package ca.uhn.fhir.jpa.dao.dstu3; import ca.uhn.fhir.context.phonetic.ApacheEncoder; +import ca.uhn.fhir.context.phonetic.NumericEncoder; import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum; import ca.uhn.fhir.jpa.api.config.DaoConfig; import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamString; import ca.uhn.fhir.jpa.searchparam.SearchParameterMap; -import ca.uhn.fhir.rest.server.util.ISearchParamRegistry; import ca.uhn.fhir.rest.param.StringParam; +import ca.uhn.fhir.rest.server.util.ISearchParamRegistry; import ca.uhn.fhir.util.HapiExtensions; import org.apache.commons.codec.language.Soundex; import org.hl7.fhir.dstu3.model.Enumerations; @@ -35,10 +36,14 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test public static final String GAIL = "Gail"; public static final String NAME_SOUNDEX_SP = "nameSoundex"; public static final String ADDRESS_LINE_SOUNDEX_SP = "addressLineSoundex"; + public static final String PHONE_NUMBER_SP = "phoneNumber"; private static final String BOB = "BOB"; private static final String ADDRESS = "123 Nohili St"; private static final String ADDRESS_CLOSE = "123 Nohily St"; private static final String ADDRESS_FAR = "123 College St"; + private static final String PHONE = "4169671111"; + private static final String PHONE_CLOSE = "(416) 967-1111"; + private static final String PHONE_FAR = "416 421 0421"; @Autowired ISearchParamRegistry mySearchParamRegistry; @@ -49,8 +54,9 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test myDaoConfig.setReuseCachedSearchResultsForMillis(null); myDaoConfig.setFetchSizeDefaultMaximum(new DaoConfig().getFetchSizeDefaultMaximum()); - createSoundexSearchParameter(NAME_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.name"); - createSoundexSearchParameter(ADDRESS_LINE_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.address.line"); + createPhoneticSearchParameter(NAME_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.name"); + createPhoneticSearchParameter(ADDRESS_LINE_SOUNDEX_SP, PhoneticEncoderEnum.SOUNDEX, "Patient.address.line"); + createPhoneticSearchParameter(PHONE_NUMBER_SP, PhoneticEncoderEnum.NUMERIC, "Patient.telecom"); mySearchParamRegistry.forceRefresh(); mySearchParamRegistry.setPhoneticEncoder(new ApacheEncoder(PhoneticEncoderEnum.SOUNDEX.name(), new Soundex())); } @@ -70,6 +76,15 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test ourLog.info("Encoded address: {}", soundex.encode(ADDRESS)); } + @Test + public void testNumeric() { + NumericEncoder numeric = new NumericEncoder(); + assertEquals(PHONE, numeric.encode(PHONE_CLOSE)); + assertEquals(PHONE, numeric.encode(PHONE)); + assertEquals(numeric.encode(PHONE), numeric.encode(PHONE_CLOSE)); + assertNotEquals(numeric.encode(PHONE), numeric.encode(PHONE_FAR)); + } + @Test public void phoneticMatch() { Patient patient; @@ -77,15 +92,16 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test patient = new Patient(); patient.addName().addGiven(GALE); patient.addAddress().addLine(ADDRESS); + patient.addTelecom().setValue(PHONE); ourLog.info(myFhirCtx.newJsonParser().setPrettyPrint(true).encodeResourceToString(patient)); IIdType pId = myPatientDao.create(patient, mySrd).getId().toUnqualifiedVersionless(); List stringParams = myResourceIndexedSearchParamStringDao.findAll(); - assertThat(stringParams, hasSize(6)); + assertThat(stringParams, hasSize(7)); List stringParamNames = stringParams.stream().map(ResourceIndexedSearchParamString::getParamName).collect(Collectors.toList()); - assertThat(stringParamNames, containsInAnyOrder(Patient.SP_NAME, Patient.SP_GIVEN, Patient.SP_PHONETIC, NAME_SOUNDEX_SP, Patient.SP_ADDRESS, ADDRESS_LINE_SOUNDEX_SP)); + assertThat(stringParamNames, containsInAnyOrder(Patient.SP_NAME, Patient.SP_GIVEN, Patient.SP_PHONETIC, NAME_SOUNDEX_SP, Patient.SP_ADDRESS, ADDRESS_LINE_SOUNDEX_SP, PHONE_NUMBER_SP)); assertSearchMatch(pId, Patient.SP_PHONETIC, GALE); assertSearchMatch(pId, Patient.SP_PHONETIC, GAIL); @@ -98,6 +114,10 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test assertSearchMatch(pId, ADDRESS_LINE_SOUNDEX_SP, ADDRESS); assertSearchMatch(pId, ADDRESS_LINE_SOUNDEX_SP, ADDRESS_CLOSE); assertNoMatch(ADDRESS_LINE_SOUNDEX_SP, ADDRESS_FAR); + + assertSearchMatch(pId, PHONE_NUMBER_SP, PHONE); + assertSearchMatch(pId, PHONE_NUMBER_SP, PHONE_CLOSE); + assertNoMatch(PHONE_NUMBER_SP, PHONE_FAR); } private void assertSearchMatch(IIdType thePId1, String theSp, String theValue) { @@ -114,7 +134,7 @@ public class FhirResourceDaoDstu3PhoneticSearchNoFtTest extends BaseJpaDstu3Test assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), hasSize(0)); } - private void createSoundexSearchParameter(String theCode, PhoneticEncoderEnum theEncoder, String theFhirPath) { + private void createPhoneticSearchParameter(String theCode, PhoneticEncoderEnum theEncoder, String theFhirPath) { SearchParameter searchParameter = new SearchParameter(); searchParameter.addBase("Patient"); searchParameter.setCode(theCode); diff --git a/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/MdmMatcherEnum.java b/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/MdmMatcherEnum.java index 458387d14d3..f29dad1827c 100644 --- a/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/MdmMatcherEnum.java +++ b/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/MdmMatcherEnum.java @@ -51,7 +51,8 @@ public enum MdmMatcherEnum { IDENTIFIER(new IdentifierMatcher()), EMPTY_FIELD(new EmptyFieldMatcher()), - EXTENSION_ANY_ORDER(new ExtensionMatcher()); + EXTENSION_ANY_ORDER(new ExtensionMatcher()), + NUMERIC(new HapiStringMatcher(new NumericMatcher())); private final IMdmFieldMatcher myMdmFieldMatcher; diff --git a/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/NumericMatcher.java b/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/NumericMatcher.java new file mode 100644 index 00000000000..82bce7d59c0 --- /dev/null +++ b/hapi-fhir-server-mdm/src/main/java/ca/uhn/fhir/mdm/rules/matcher/NumericMatcher.java @@ -0,0 +1,16 @@ +package ca.uhn.fhir.mdm.rules.matcher; + +import ca.uhn.fhir.context.phonetic.NumericEncoder; + +// Useful for numerical identifiers like phone numbers, address parts etc. +// This should not be used where decimals are important. A new "quantity matcher" should be added to handle cases like that. +public class NumericMatcher implements IMdmStringMatcher { + private final NumericEncoder encoder = new NumericEncoder(); + + @Override + public boolean matches(String theLeftString, String theRightString) { + String left = encoder.encode(theLeftString); + String right = encoder.encode(theRightString); + return left.equals(right); + } +} diff --git a/hapi-fhir-server-mdm/src/test/java/ca/uhn/fhir/mdm/rules/matcher/StringMatcherR4Test.java b/hapi-fhir-server-mdm/src/test/java/ca/uhn/fhir/mdm/rules/matcher/StringMatcherR4Test.java index 42508046adc..73aafb7aebe 100644 --- a/hapi-fhir-server-mdm/src/test/java/ca/uhn/fhir/mdm/rules/matcher/StringMatcherR4Test.java +++ b/hapi-fhir-server-mdm/src/test/java/ca/uhn/fhir/mdm/rules/matcher/StringMatcherR4Test.java @@ -14,24 +14,33 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class StringMatcherR4Test extends BaseMatcherR4Test { private static final Logger ourLog = LoggerFactory.getLogger(StringMatcherR4Test.class); - public static final String LEFT = "namadega"; - public static final String RIGHT = "namaedga"; + public static final String LEFT_NAME = "namadega"; + public static final String RIGHT_NAME = "namaedga"; @Test public void testNamadega() { - assertTrue(match(MdmMatcherEnum.COLOGNE, LEFT, RIGHT)); - assertTrue(match(MdmMatcherEnum.DOUBLE_METAPHONE, LEFT, RIGHT)); - assertTrue(match(MdmMatcherEnum.MATCH_RATING_APPROACH, LEFT, RIGHT)); - assertTrue(match(MdmMatcherEnum.METAPHONE, LEFT, RIGHT)); - assertTrue(match(MdmMatcherEnum.SOUNDEX, LEFT, RIGHT)); - assertTrue(match(MdmMatcherEnum.METAPHONE, LEFT, RIGHT)); + String left = LEFT_NAME; + String right = RIGHT_NAME; + assertTrue(match(MdmMatcherEnum.COLOGNE, left, right)); + assertTrue(match(MdmMatcherEnum.DOUBLE_METAPHONE, left, right)); + assertTrue(match(MdmMatcherEnum.MATCH_RATING_APPROACH, left, right)); + assertTrue(match(MdmMatcherEnum.METAPHONE, left, right)); + assertTrue(match(MdmMatcherEnum.SOUNDEX, left, right)); + assertTrue(match(MdmMatcherEnum.METAPHONE, left, right)); - assertFalse(match(MdmMatcherEnum.CAVERPHONE1, LEFT, RIGHT)); - assertFalse(match(MdmMatcherEnum.CAVERPHONE2, LEFT, RIGHT)); - assertFalse(match(MdmMatcherEnum.NYSIIS, LEFT, RIGHT)); - assertFalse(match(MdmMatcherEnum.REFINED_SOUNDEX, LEFT, RIGHT)); - assertFalse(match(MdmMatcherEnum.STRING, LEFT, RIGHT)); - assertFalse(match(MdmMatcherEnum.SUBSTRING, LEFT, RIGHT)); + assertFalse(match(MdmMatcherEnum.CAVERPHONE1, left, right)); + assertFalse(match(MdmMatcherEnum.CAVERPHONE2, left, right)); + assertFalse(match(MdmMatcherEnum.NYSIIS, left, right)); + assertFalse(match(MdmMatcherEnum.REFINED_SOUNDEX, left, right)); + assertFalse(match(MdmMatcherEnum.STRING, left, right)); + assertFalse(match(MdmMatcherEnum.SUBSTRING, left, right)); + } + + @Test + public void testNumeric() { + assertTrue(match(MdmMatcherEnum.NUMERIC, "4169671111", "(416) 967-1111")); + assertFalse(match(MdmMatcherEnum.NUMERIC, "5169671111", "(416) 967-1111")); + assertFalse(match(MdmMatcherEnum.NUMERIC, "4169671111", "(416) 967-1111x123")); } @Test