Merge pull request #2410 from hapifhir/2409-support-fhirpath-in-field-matchers

Support FHIRPath in field matchers if present
This commit is contained in:
Tadgh 2021-02-23 09:48:14 -05:00 committed by GitHub
commit c92f302d41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 329 additions and 42 deletions

View File

@ -51,7 +51,7 @@ Here is an example of a full HAPI MDM rules json document:
{
"name": "firstname-meta",
"resourceType": "Patient",
"resourcePath": "name.given",
"fhirPath": "name.given.first()",
"matcher": {
"algorithm": "METAPHONE"
}
@ -173,8 +173,8 @@ Here is a matcher matchField that uses the SOUNDEX matcher to determine whether
```json
{
"name": "familyname-soundex",
"resourceType": "*",
"name": "familyname-soundex",
"resourceType": "*",
"resourcePath": "name.family",
"matcher": {
"algorithm": "SOUNDEX"
@ -196,6 +196,53 @@ Here is a matcher matchField that only matches when two family names are identic
}
```
While it is often suitable to use the `resourcePath` field to indicate the location of the data to be matched, occasionally you will need more direct control over precisely which fields are matched. When performing string matching, the matcher will indiscriminately try to match all elements of the left resource to all elements of the right resource. For example, consider the following two patients and matcher.
```json
{
"resourceType": "Patient",
"name": [{
"given": ["Frank", "John"]
}]
}
```
```json
{
"resourceType": "Patient",
"name": [{
"given": ["John", "Frank"]
}]
}
```
```json
{
"name": "firstname-meta",
"resourceType": "Patient",
"resourcePath": "name.given",
"matcher": {
"algorithm": "METAPHONE"
}
}
```
In this example, these two patients would match, as the matcher will compare all elements of `["John", "Frank"]` to all elements of `["Frank", "John"]` and find that there are matches. This is when you would want to use a FHIRPath matcher, as FHIRPath expressions give you more direct control. This following example shows a matcher that would cause these two patient's not to match to each other.
```json
{
"name": "firstname-meta-fhirpath",
"resourceType": "Patient",
"fhirPath": "name.given[0]",
"matcher": {
"algorithm": "METAPHONE"
}
}
```
Since FHIRPath expressions support indexing it is possible to directly indicate that you would only like to compare the first element of each resource.
Special identifier matching is also available if you need to match on a particular identifier system:
```json
{

View File

@ -23,6 +23,7 @@ package ca.uhn.fhir.mdm.rules.config;
import ca.uhn.fhir.context.ConfigurationException;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.context.RuntimeResourceDefinition;
import ca.uhn.fhir.fhirpath.IFhirPath;
import ca.uhn.fhir.mdm.api.MdmConstants;
import ca.uhn.fhir.mdm.api.IMdmRuleValidator;
import ca.uhn.fhir.mdm.rules.json.MdmFieldMatchJson;
@ -33,7 +34,9 @@ import ca.uhn.fhir.mdm.rules.json.MdmSimilarityJson;
import ca.uhn.fhir.parser.DataFormatException;
import ca.uhn.fhir.rest.server.util.ISearchParamRetriever;
import ca.uhn.fhir.util.FhirTerser;
import org.hl7.fhir.instance.model.api.IBase;
import org.hl7.fhir.instance.model.api.IBaseResource;
import org.hl7.fhir.r4.model.Patient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -51,16 +54,14 @@ public class MdmRuleValidator implements IMdmRuleValidator {
private final FhirContext myFhirContext;
private final ISearchParamRetriever mySearchParamRetriever;
private final Class<? extends IBaseResource> myPatientClass;
private final Class<? extends IBaseResource> myPractitionerClass;
private final FhirTerser myTerser;
private final IFhirPath myFhirPath;
@Autowired
public MdmRuleValidator(FhirContext theFhirContext, ISearchParamRetriever theSearchParamRetriever) {
myFhirContext = theFhirContext;
myPatientClass = theFhirContext.getResourceDefinition("Patient").getImplementingClass();
myPractitionerClass = theFhirContext.getResourceDefinition("Practitioner").getImplementingClass();
myTerser = myFhirContext.newTerser();
myFhirPath = myFhirContext.newFhirPath();
mySearchParamRetriever = theSearchParamRetriever;
}
@ -158,20 +159,44 @@ public class MdmRuleValidator implements IMdmRuleValidator {
}
private void validateFieldPathForType(String theResourceType, MdmFieldMatchJson theFieldMatch) {
ourLog.debug(" validating resource {} for {} ", theResourceType, theFieldMatch.getResourcePath());
ourLog.debug("Validating resource {} for {} ", theResourceType, theFieldMatch.getResourcePath());
try {
RuntimeResourceDefinition resourceDefinition = myFhirContext.getResourceDefinition(theResourceType);
Class<? extends IBaseResource> implementingClass = resourceDefinition.getImplementingClass();
String path = theResourceType + "." + theFieldMatch.getResourcePath();
myTerser.getDefinition(implementingClass, path);
} catch (DataFormatException | ConfigurationException | ClassCastException e) {
throw new ConfigurationException("MatchField " +
if (theFieldMatch.getFhirPath() != null && theFieldMatch.getResourcePath() != null) {
throw new ConfigurationException("MatchField [" +
theFieldMatch.getName() +
" resourceType " +
"] resourceType [" +
theFieldMatch.getResourceType() +
" has invalid path '" + theFieldMatch.getResourcePath() + "'. " +
e.getMessage());
"] has defined both a resourcePath and a fhirPath. You must define one of the two.");
}
if (theFieldMatch.getResourcePath() == null && theFieldMatch.getFhirPath() == null) {
throw new ConfigurationException("MatchField [" +
theFieldMatch.getName() +
"] resourceType [" +
theFieldMatch.getResourceType() +
"] has defined neither a resourcePath or a fhirPath. You must define one of the two.");
}
if (theFieldMatch.getResourcePath() != null) {
try { //Try to validate the struture definition path
RuntimeResourceDefinition resourceDefinition = myFhirContext.getResourceDefinition(theResourceType);
Class<? extends IBaseResource> implementingClass = resourceDefinition.getImplementingClass();
String path = theResourceType + "." + theFieldMatch.getResourcePath();
myTerser.getDefinition(implementingClass, path);
} catch (DataFormatException | ConfigurationException | ClassCastException e) {
//Fallback to attempting to FHIRPath evaluate it.
throw new ConfigurationException("MatchField " +
theFieldMatch.getName() +
" resourceType " +
theFieldMatch.getResourceType() +
" has invalid path '" + theFieldMatch.getResourcePath() + "'. " + e.getMessage());
}
} else { //Try to validate the FHIRPath
try {
myFhirPath.parse(theResourceType + "." + theFieldMatch.getFhirPath());
} catch (Exception e) {
throw new ConfigurationException("MatchField [" + theFieldMatch.getName() + "] resourceType [" + theFieldMatch.getResourceType() + "] has failed FHIRPath evaluation. " + e.getMessage());
}
}
}

View File

@ -44,9 +44,12 @@ public class MdmFieldMatchJson implements IModelJson {
@JsonProperty(value = "resourceType", required = true)
String myResourceType;
@JsonProperty(value = "resourcePath", required = true)
@JsonProperty(value = "resourcePath", required = false)
String myResourcePath;
@JsonProperty(value = "fhirPath", required = false)
String myFhirPath;
@JsonProperty(value = "matcher", required = false)
MdmMatcherJson myMatcher;
@ -112,4 +115,13 @@ public class MdmFieldMatchJson implements IModelJson {
}
throw new InternalErrorException("Field Match " + myName + " has neither a matcher nor a similarity.");
}
public String getFhirPath() {
return myFhirPath;
}
public MdmFieldMatchJson setFhirPath(String theFhirPath) {
myFhirPath = theFhirPath;
return this;
}
}

View File

@ -21,6 +21,7 @@ package ca.uhn.fhir.mdm.rules.svc;
*/
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.fhirpath.IFhirPath;
import ca.uhn.fhir.mdm.api.MdmMatchEvaluation;
import ca.uhn.fhir.mdm.rules.json.MdmFieldMatchJson;
import ca.uhn.fhir.mdm.rules.json.MdmRulesJson;
@ -43,16 +44,20 @@ public class MdmResourceFieldMatcher {
private final MdmFieldMatchJson myMdmFieldMatchJson;
private final String myResourceType;
private final String myResourcePath;
private final String myFhirPath;
private final MdmRulesJson myMdmRulesJson;
private final String myName;
private final boolean myIsFhirPathExpression;
public MdmResourceFieldMatcher(FhirContext theFhirContext, MdmFieldMatchJson theMdmFieldMatchJson, MdmRulesJson theMdmRulesJson) {
myFhirContext = theFhirContext;
myMdmFieldMatchJson = theMdmFieldMatchJson;
myResourceType = theMdmFieldMatchJson.getResourceType();
myResourcePath = theMdmFieldMatchJson.getResourcePath();
myFhirPath = theMdmFieldMatchJson.getFhirPath();
myName = theMdmFieldMatchJson.getName();
myMdmRulesJson = theMdmRulesJson;
myIsFhirPathExpression = myFhirPath != null;
}
/**
@ -71,9 +76,18 @@ public class MdmResourceFieldMatcher {
validate(theLeftResource);
validate(theRightResource);
FhirTerser terser = myFhirContext.newTerser();
List<IBase> leftValues = terser.getValues(theLeftResource, myResourcePath, IBase.class);
List<IBase> rightValues = terser.getValues(theRightResource, myResourcePath, IBase.class);
List<IBase> leftValues;
List<IBase> rightValues;
if (myIsFhirPathExpression) {
IFhirPath fhirPath = myFhirContext.newFhirPath();
leftValues = fhirPath.evaluate(theLeftResource, myFhirPath, IBase.class);
rightValues = fhirPath.evaluate(theRightResource, myFhirPath, IBase.class);
} else {
FhirTerser fhirTerser = myFhirContext.newTerser();
leftValues = fhirTerser.getValues(theLeftResource, myResourcePath, IBase.class);
rightValues = fhirTerser.getValues(theRightResource, myResourcePath, IBase.class);
}
return match(leftValues, rightValues);
}

View File

@ -144,7 +144,8 @@ public class MdmResourceMatcherSvc {
return retVal;
}
private boolean isValidResourceType(String theResourceType, String theFieldComparatorType) {
private boolean isValidResourceType(String theResourceType, String theFieldComparatorType) {
return (
theFieldComparatorType.equalsIgnoreCase(MdmConstants.ALL_RESOURCE_SEARCH_PARAM_TYPE)
|| theFieldComparatorType.equalsIgnoreCase(theResourceType)

View File

@ -71,6 +71,36 @@ public class MdmRuleValidatorTest extends BaseR4Test {
}
}
@Test
public void testMatcherBadFhirPath() throws IOException {
try {
setMdmRuleJson("bad-rules-bad-fhirpath.json");
fail();
} catch (ConfigurationException e) {
assertThat(e.getMessage(), startsWith("MatchField [given-name] resourceType [Patient] has failed FHIRPath evaluation. Error in ?? at 1, 1: The name blurst is not a valid function name"));
}
}
@Test
public void testBadRulesMissingBothPaths() throws IOException {
try {
setMdmRuleJson("bad-rules-no-path.json");
fail();
} catch (ConfigurationException e) {
assertThat(e.getMessage(), startsWith("MatchField [given-name] resourceType [Patient] has defined neither a resourcePath or a fhirPath. You must define one of the two."));
}
}
@Test
public void testBadRulesBothPathsFilled() throws IOException {
try {
setMdmRuleJson("bad-rules-both-paths.json");
fail();
} catch (ConfigurationException e) {
assertThat(e.getMessage(), startsWith("MatchField [given-name] resourceType [Patient] has defined both a resourcePath and a fhirPath. You must define one of the two."));
}
}
@Test
public void testMatcherBadSearchParam() throws IOException {
try {

View File

@ -16,6 +16,7 @@ import java.util.Arrays;
public abstract class BaseMdmRulesR4Test extends BaseR4Test {
public static final String PATIENT_GIVEN = "patient-given";
public static final String PATIENT_GIVEN_FIRST = "patient-given-first";
public static final String PATIENT_FAMILY = "patient-last";
public static final double NAME_THRESHOLD = 0.8;
@ -36,6 +37,7 @@ public abstract class BaseMdmRulesR4Test extends BaseR4Test {
.setResourceType("Patient")
.setResourcePath("name.given")
.setSimilarity(new MdmSimilarityJson().setAlgorithm(MdmSimilarityEnum.COSINE).setMatchThreshold(NAME_THRESHOLD));
myBothNameFields = String.join(",", PATIENT_GIVEN, PATIENT_FAMILY);
}

View File

@ -0,0 +1,116 @@
package ca.uhn.fhir.mdm.rules.svc;
import ca.uhn.fhir.context.RuntimeSearchParam;
import ca.uhn.fhir.mdm.api.MdmMatchOutcome;
import ca.uhn.fhir.mdm.api.MdmMatchResultEnum;
import ca.uhn.fhir.mdm.rules.json.MdmFieldMatchJson;
import ca.uhn.fhir.mdm.rules.json.MdmMatcherJson;
import ca.uhn.fhir.mdm.rules.json.MdmRulesJson;
import ca.uhn.fhir.mdm.rules.matcher.MdmMatcherEnum;
import org.hl7.fhir.r4.model.HumanName;
import org.hl7.fhir.r4.model.Patient;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class FhirPathResourceMatcherR4Test extends BaseMdmRulesR4Test {
private static final String MATCH_FIELDS = PATIENT_GIVEN_FIRST + "," + PATIENT_GIVEN;
private Patient myLeft;
private Patient myRight;
@Override
@BeforeEach
public void before() {
super.before();
when(mySearchParamRetriever.getActiveSearchParam("Patient", "birthdate")).thenReturn(mock(RuntimeSearchParam.class));
when(mySearchParamRetriever.getActiveSearchParam("Patient", "identifier")).thenReturn(mock(RuntimeSearchParam.class));
when(mySearchParamRetriever.getActiveSearchParam("Patient", "active")).thenReturn(mock(RuntimeSearchParam.class));
{
myLeft = new Patient();
HumanName name = myLeft.addName();
name.addGiven("Gary");
name.addGiven("John");
myLeft.setId("Patient/1");
}
{
myRight = new Patient();
HumanName name = myRight.addName();
name.addGiven("John");
name.addGiven("Gary");
myRight.setId("Patient/2");
}
}
@Test
public void testFhirPathOrderedMatches() {
MdmResourceMatcherSvc matcherSvc = buildMatcher(buildOrderedGivenNameRules(MdmMatcherEnum.STRING));
myLeft = new Patient();
HumanName name = myLeft.addName();
name.addGiven("Gary");
name.addGiven("John");
myLeft.setId("Patient/1");
myRight = new Patient();
HumanName name2 = myRight.addName();
name2.addGiven("John");
name2.addGiven("Gary");
myRight.setId("Patient/2");
MdmMatchOutcome result = matcherSvc.match(myLeft, myRight);
assertMatchResult(MdmMatchResultEnum.NO_MATCH, 0L, 0.0, false, false, result);
myRight = new Patient();
name = myRight.addName();
name.addGiven("John");
name.addGiven("Gary");
myRight.setId("Patient/2");
myLeft = new Patient();
name2 = myLeft.addName();
name2.addGiven("Frank");
name2.addGiven("Gary");
myLeft.setId("Patient/1");
result = matcherSvc.match(myLeft, myRight);
assertMatchResult(MdmMatchResultEnum.POSSIBLE_MATCH, 1L, 1.0, false, false, result);
}
@Test
public void testStringMatchResult() {
MdmResourceMatcherSvc matcherSvc = buildMatcher(buildOrderedGivenNameRules(MdmMatcherEnum.STRING));
MdmMatchOutcome result = matcherSvc.match(myLeft, myRight);
assertMatchResult(MdmMatchResultEnum.NO_MATCH, 0L, 0.0, false, false, result);
}
protected MdmRulesJson buildOrderedGivenNameRules(MdmMatcherEnum theMatcherEnum) {
MdmFieldMatchJson firstGivenNameMatchField = new MdmFieldMatchJson()
.setName(PATIENT_GIVEN_FIRST)
.setResourceType("Patient")
.setFhirPath("name.given.first()")
.setMatcher(new MdmMatcherJson().setAlgorithm(theMatcherEnum));
MdmFieldMatchJson secondGivenNameMatchField = new MdmFieldMatchJson()
.setName(PATIENT_GIVEN)
.setResourceType("Patient")
.setFhirPath("name.given[1]")
.setMatcher(new MdmMatcherJson().setAlgorithm(theMatcherEnum));
MdmRulesJson retval = new MdmRulesJson();
retval.setVersion("test version");
retval.addMatchField(secondGivenNameMatchField);
retval.addMatchField(firstGivenNameMatchField);
retval.setMdmTypes(Arrays.asList("Patient"));
retval.putMatchResult(MATCH_FIELDS, MdmMatchResultEnum.MATCH);
retval.putMatchResult(PATIENT_GIVEN_FIRST, MdmMatchResultEnum.POSSIBLE_MATCH);
retval.putMatchResult(PATIENT_GIVEN, MdmMatchResultEnum.POSSIBLE_MATCH);
return retval;
}
}

View File

@ -30,7 +30,6 @@ public class MdmResourceFieldMatcherR4Test extends BaseMdmRulesR4Test {
@BeforeEach
public void before() {
super.before();
myComparator = new MdmResourceFieldMatcher(ourFhirContext, myGivenNameMatchField, myMdmRulesJson);
myJohn = buildJohn();
myJohny = buildJohny();
@ -91,22 +90,6 @@ public class MdmResourceFieldMatcherR4Test extends BaseMdmRulesR4Test {
}
}
@Test
public void testBadPath() {
try {
MdmFieldMatchJson matchField = new MdmFieldMatchJson()
.setName("patient-foo")
.setResourceType("Patient")
.setResourcePath("foo")
.setSimilarity(new MdmSimilarityJson().setAlgorithm(MdmSimilarityEnum.COSINE).setMatchThreshold(NAME_THRESHOLD));
MdmResourceFieldMatcher comparator = new MdmResourceFieldMatcher(ourFhirContext, matchField, myMdmRulesJson);
comparator.match(myJohn, myJohny);
fail();
} catch (DataFormatException e) {
assertThat(e.getMessage(), startsWith("Unknown child name 'foo' in element Patient"));
}
}
@Test
public void testMatch() {
assertTrue(myComparator.match(myJohn, myJohny).match);

View File

@ -7,12 +7,15 @@ import ca.uhn.fhir.mdm.rules.json.MdmFieldMatchJson;
import ca.uhn.fhir.mdm.rules.json.MdmMatcherJson;
import ca.uhn.fhir.mdm.rules.json.MdmRulesJson;
import ca.uhn.fhir.mdm.rules.matcher.MdmMatcherEnum;
import ca.uhn.fhir.util.StopWatch;
import org.hl7.fhir.r4.model.HumanName;
import org.hl7.fhir.r4.model.Patient;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@ -56,8 +59,8 @@ public class ResourceMatcherR4Test extends BaseMdmRulesR4Test {
@Test
public void testMetaphoneMatchResult() {
MdmResourceMatcherSvc matcherSvc = buildMatcher(buildNamePhoneRules(MdmMatcherEnum.METAPHONE));
MdmMatchOutcome result = matcherSvc.match(myLeft, myRight);
assertMatchResult(MdmMatchResultEnum.MATCH, 7L, 3.0, false, false, result);
MdmMatchOutcome result = matcherSvc.match(myLeft, myRight);
assertMatchResult(MdmMatchResultEnum.MATCH, 7L, 3.0, false, false, result);
}
@Test

View File

@ -0,0 +1,18 @@
{
"version": "1",
"mdmTypes": ["Patient", "Practitioner", "Medication"],
"candidateSearchParams" : [],
"candidateFilterSearchParams" : [],
"matchFields" : [ {
"name" : "given-name",
"resourceType" : "Patient",
"fhirPath" : "name.given.blurst()",
"matcher" : {
"algorithm": "STRING",
"exact" : true
}
}],
"matchResultMap" : {
"given-name" : "POSSIBLE_MATCH"
}
}

View File

@ -0,0 +1,19 @@
{
"version": "1",
"mdmTypes": ["Patient", "Practitioner", "Medication"],
"candidateSearchParams" : [],
"candidateFilterSearchParams" : [],
"matchFields" : [ {
"name" : "given-name",
"resourceType" : "Patient",
"resourcePath" : "name.first",
"fhirPath" : "name.given.first()",
"matcher" : {
"algorithm": "STRING",
"exact" : true
}
}],
"matchResultMap" : {
"given-name" : "POSSIBLE_MATCH"
}
}

View File

@ -0,0 +1,17 @@
{
"version": "1",
"mdmTypes": ["Patient", "Practitioner", "Medication"],
"candidateSearchParams" : [],
"candidateFilterSearchParams" : [],
"matchFields" : [ {
"name" : "given-name",
"resourceType" : "Patient",
"matcher" : {
"algorithm": "STRING",
"exact" : true
}
}],
"matchResultMap" : {
"given-name" : "POSSIBLE_MATCH"
}
}