fixing a bug with duplicte golden resources being returned (#5085)

* fixing a bug with duplicte golden resources being returned

* review points

* fixing tests

* blah

---------

Co-authored-by: leif stawnyczy <leifstawnyczy@leifs-MacBook-Pro.local>
This commit is contained in:
TipzCM 2023-07-13 15:58:27 -04:00 committed by GitHub
parent 906355dd65
commit 2401b2339e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 235 additions and 42 deletions

View File

@ -0,0 +1,6 @@
---
type: fix
issue: 5084
title: "Fixed FindCandidateByExampleSvc.findMatchGoldenResourceCandidates
to not return duplicate golden resource candidates.
"

View File

@ -37,8 +37,10 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
@Service
@ -75,27 +77,31 @@ public class FindCandidateByExampleSvc<P extends IResourcePersistentId> extends
List<P> goldenResourcePidsToExclude = getNoMatchGoldenResourcePids(theTarget);
List<MatchedTarget> matchedCandidates = myMdmMatchFinderSvc.getMatchedTargets(
myFhirContext.getResourceType(theTarget),
theTarget,
myMdmPartitionHelper.getRequestPartitionIdFromResourceForSearch(theTarget));
myFhirContext.getResourceType(theTarget),
theTarget,
myMdmPartitionHelper.getRequestPartitionIdFromResourceForSearch(theTarget));
// Convert all possible match targets to their equivalent Golden Resources by looking up in the MdmLink table,
// while ensuring that the matches aren't in our NO_MATCH list.
// The data flow is as follows ->
// MatchedTargetCandidate -> Golden Resource -> MdmLink -> MatchedGoldenResourceCandidate
matchedCandidates = matchedCandidates.stream()
.filter(mc -> mc.isMatch() || mc.isPossibleMatch())
.collect(Collectors.toList());
.filter(mc -> mc.isMatch() || mc.isPossibleMatch())
.collect(Collectors.toList());
List<String> skippedLogMessages = new ArrayList<>();
List<String> matchedLogMessages = new ArrayList<>();
// we'll track the added ids so we don't add the same resources twice
// note, all these resources are the same type, so we only need the Long value
Set<Long> currentIds = new HashSet<>();
for (MatchedTarget match : matchedCandidates) {
Optional<? extends IMdmLink> optionalMdmLink = myMdmLinkDaoSvc.getMatchedLinkForSourcePid(
myIdHelperService.getPidOrNull(RequestPartitionId.allPartitions(), match.getTarget()));
myIdHelperService.getPidOrNull(RequestPartitionId.allPartitions(), match.getTarget()));
if (!optionalMdmLink.isPresent()) {
if (ourLog.isDebugEnabled()) {
skippedLogMessages.add(String.format(
"%s does not link to a Golden Resource (it may be a Golden Resource itself). Removing candidate.",
match.getTarget().getIdElement().toUnqualifiedVersionless()));
"%s does not link to a Golden Resource (it may be a Golden Resource itself). Removing candidate.",
match.getTarget().getIdElement().toUnqualifiedVersionless()));
}
continue;
}
@ -103,22 +109,26 @@ public class FindCandidateByExampleSvc<P extends IResourcePersistentId> extends
IMdmLink matchMdmLink = optionalMdmLink.get();
if (goldenResourcePidsToExclude.contains(matchMdmLink.getGoldenResourcePersistenceId())) {
skippedLogMessages.add(String.format(
"Skipping MDM on candidate Golden Resource with PID %s due to manual NO_MATCH",
matchMdmLink.getGoldenResourcePersistenceId().toString()));
"Skipping MDM on candidate Golden Resource with PID %s due to manual NO_MATCH",
matchMdmLink.getGoldenResourcePersistenceId().toString()));
continue;
}
MatchedGoldenResourceCandidate candidate = new MatchedGoldenResourceCandidate(
matchMdmLink.getGoldenResourcePersistenceId(), match.getMatchResult());
matchMdmLink.getGoldenResourcePersistenceId(), match.getMatchResult());
if (ourLog.isDebugEnabled()) {
matchedLogMessages.add(String.format(
"Navigating from matched resource %s to its Golden Resource %s",
match.getTarget().getIdElement().toUnqualifiedVersionless(),
matchMdmLink.getGoldenResourcePersistenceId().toString()));
"Navigating from matched resource %s to its Golden Resource %s",
match.getTarget().getIdElement().toUnqualifiedVersionless(),
matchMdmLink.getGoldenResourcePersistenceId().toString())
);
}
retval.add(candidate);
//only add if it's not already in the list
if (currentIds.add((Long) candidate.getCandidateGoldenResourcePid().getId())) {
retval.add(candidate);
}
}
if (ourLog.isDebugEnabled()) {
@ -135,8 +145,8 @@ public class FindCandidateByExampleSvc<P extends IResourcePersistentId> extends
private List<P> getNoMatchGoldenResourcePids(IBaseResource theBaseResource) {
P targetPid = myIdHelperService.getPidOrNull(RequestPartitionId.allPartitions(), theBaseResource);
return myMdmLinkDaoSvc.getMdmLinksBySourcePidAndMatchResult(targetPid, MdmMatchResultEnum.NO_MATCH).stream()
.map(IMdmLink::getGoldenResourcePersistenceId)
.collect(Collectors.toList());
.map(IMdmLink::getGoldenResourcePersistenceId)
.collect(Collectors.toList());
}
@Override

View File

@ -21,7 +21,6 @@ import ca.uhn.fhir.jpa.mdm.svc.MdmMatchLinkSvc;
import ca.uhn.fhir.jpa.model.config.PartitionSettings;
import ca.uhn.fhir.jpa.model.dao.JpaPid;
import ca.uhn.fhir.jpa.partition.IPartitionLookupSvc;
import ca.uhn.fhir.rest.api.server.SystemRequestDetails;
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
import ca.uhn.fhir.jpa.searchparam.registry.SearchParamRegistryImpl;
import ca.uhn.fhir.jpa.subscription.match.config.SubscriptionProcessorConfig;
@ -39,6 +38,7 @@ import ca.uhn.fhir.mdm.util.MdmResourceUtil;
import ca.uhn.fhir.model.api.TemporalPrecisionEnum;
import ca.uhn.fhir.rest.api.Constants;
import ca.uhn.fhir.rest.api.server.IBundleProvider;
import ca.uhn.fhir.rest.api.server.SystemRequestDetails;
import ca.uhn.fhir.rest.param.TokenParam;
import ca.uhn.fhir.rest.server.servlet.ServletRequestDetails;
import org.apache.commons.lang3.StringUtils;

View File

@ -23,10 +23,19 @@ public abstract class BaseTestMdmConfig {
@Value("${mdm.prevent_multiple_eids:true}")
boolean myPreventMultipleEids;
/**
* We might not want the same file for every test.
* See ca.uhn.fhir.jpa.mdm.svc.candidate.MdmGoldenResourceFindingSvcTest
* for an example.
*/
@Value("${module.mdm.config.script.file}")
Resource myRulesFile;
@Bean
IMdmSettings mdmSettings(MdmRuleValidator theMdmRuleValidator) throws IOException {
DefaultResourceLoader resourceLoader = new DefaultResourceLoader();
Resource resource = resourceLoader.getResource("mdm/mdm-rules.json");
Resource resource = (myRulesFile == null || !myRulesFile.exists()) ?
resourceLoader.getResource("mdm/mdm-rules.json") : myRulesFile;
String json = IOUtils.toString(resource.getInputStream(), Charsets.UTF_8);
return new MdmSettings(theMdmRuleValidator)
.setEnabled(false)

View File

@ -3,12 +3,14 @@ package ca.uhn.fhir.jpa.mdm.svc;
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.jpa.api.model.DaoMethodOutcome;
import ca.uhn.fhir.jpa.mdm.BaseMdmR4Test;
import ca.uhn.fhir.jpa.mdm.helper.MdmLinkHelper;
import ca.uhn.fhir.mdm.api.IMdmLink;
import ca.uhn.fhir.mdm.api.IMdmLinkUpdaterSvc;
import ca.uhn.fhir.mdm.api.MdmLinkSourceEnum;
import ca.uhn.fhir.mdm.api.MdmMatchOutcome;
import ca.uhn.fhir.mdm.api.MdmMatchResultEnum;
import ca.uhn.fhir.mdm.model.MdmTransactionContext;
import ca.uhn.fhir.mdm.rules.json.MdmRulesJson;
import ca.uhn.fhir.mdm.util.MessageHelper;
import ca.uhn.fhir.rest.api.server.SystemRequestDetails;
import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
@ -16,8 +18,10 @@ import org.hl7.fhir.r4.model.Patient;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.ResourceUtils;
import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
@ -45,8 +49,6 @@ class MdmLinkUpdaterSvcImplIT extends BaseMdmR4Test {
@Autowired
private MessageHelper myMessageHelper;
@Test
void testUpdateLinkToMatchWhenAnotherLinkToDifferentGoldenExistsMustFail() throws Exception {
// create Patient A -> MATCH GR A
@ -117,7 +119,6 @@ class MdmLinkUpdaterSvcImplIT extends BaseMdmR4Test {
return golden;
}
private Patient createPatientFromJsonInputFile(String thePath) throws Exception {
return createPatientFromJsonInputFile(thePath, true);
}
@ -126,7 +127,11 @@ class MdmLinkUpdaterSvcImplIT extends BaseMdmR4Test {
File jsonInputUrl = ResourceUtils.getFile(ResourceUtils.CLASSPATH_URL_PREFIX + thePath);
String jsonPatient = Files.readString(Paths.get(jsonInputUrl.toURI()), StandardCharsets.UTF_8);
Patient patient = (Patient) myFhirContext.newJsonParser().parseResource(jsonPatient);
return createPatientFromJsonString(jsonPatient, theCreateGolden);
}
private Patient createPatientFromJsonString(String theStr, boolean theCreateGolden) {
Patient patient = (Patient) myFhirContext.newJsonParser().parseResource(theStr);
DaoMethodOutcome daoOutcome = myPatientDao.create(patient, new SystemRequestDetails());
if (theCreateGolden) {

View File

@ -1,45 +1,163 @@
package ca.uhn.fhir.jpa.mdm.svc.candidate;
import ca.uhn.fhir.jpa.api.model.DaoMethodOutcome;
import ca.uhn.fhir.jpa.entity.MdmLink;
import ca.uhn.fhir.jpa.mdm.BaseMdmR4Test;
import ca.uhn.fhir.jpa.mdm.dao.MdmLinkDaoSvc;
import ca.uhn.fhir.mdm.api.IMdmSettings;
import ca.uhn.fhir.mdm.api.MdmLinkSourceEnum;
import ca.uhn.fhir.mdm.api.MdmMatchResultEnum;
import ca.uhn.fhir.mdm.rules.json.MdmRulesJson;
import ca.uhn.fhir.rest.api.server.SystemRequestDetails;
import ca.uhn.fhir.test.utilities.UnregisterScheduledProcessor;
import org.hl7.fhir.r4.model.Patient;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestPropertySource;
import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.hasSize;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ExtendWith(MockitoExtension.class)
@TestPropertySource(properties = {
"module.mdm.config.script.file=classpath:mdm/mdm-rules-john-doe.json"
})
class MdmGoldenResourceFindingSvcTest extends BaseMdmR4Test {
@Autowired
MdmGoldenResourceFindingSvc myMdmGoldenResourceFindingSvc = new MdmGoldenResourceFindingSvc();
@Autowired
MdmLinkDaoSvc myMdmLinkDaoSvc;
@Autowired
MdmGoldenResourceFindingSvc myMdmGoldenResourceFindingSvc = new MdmGoldenResourceFindingSvc();
@Autowired
MdmLinkDaoSvc myMdmLinkDaoSvc;
@Test
public void testNoMatchCandidatesSkipped() {
// setup
Patient jane = createPatientAndUpdateLinks(addExternalEID(buildJanePatient(), EID_1));
@Test
public void testNoMatchCandidatesSkipped() {
// setup
Patient jane = createPatientAndUpdateLinks(addExternalEID(buildJanePatient(), EID_1));
// hack the link into a NO_MATCH
List<MdmLink> links = (List<MdmLink>) myMdmLinkDaoSvc.findMdmLinksBySourceResource(jane);
assertThat(links, hasSize(1));
MdmLink link = links.get(0);
link.setMatchResult(MdmMatchResultEnum.NO_MATCH);
link.setLinkSource(MdmLinkSourceEnum.MANUAL);
myMdmLinkDaoSvc.save(link);
// hack the link into a NO_MATCH
List<MdmLink> links = (List<MdmLink>) myMdmLinkDaoSvc.findMdmLinksBySourceResource(jane);
assertThat(links, hasSize(1));
MdmLink link = links.get(0);
link.setMatchResult(MdmMatchResultEnum.NO_MATCH);
link.setLinkSource(MdmLinkSourceEnum.MANUAL);
myMdmLinkDaoSvc.save(link);
// the NO_MATCH golden resource should not be a candidate
CandidateList candidateList = myMdmGoldenResourceFindingSvc.findGoldenResourceCandidates(jane);
assertEquals(0, candidateList.size());
}
@Test
public void findMdmLinksBySourceResource_withMatchingResources_doesNotReturnDuplicates() throws IOException {
// setup
// create a bunch of patients that match
// (according to the rules in mdm-rules-john-doe.json)
// patient 1
{
String patientStr = """
{
"resourceType": "Patient",
"name": [ {
"family": "Jho",
"given": [ "Doe"]
} ],
"birthDate": "1974-12-25"
}
""";
createPatientFromJsonString(patientStr, true);
}
// patient 2
{
String patientStr = """
{
"resourceType": "Patient",
"name": [ {
"family": "Jhyee",
"given": [ "Deeon"]
} ],
"birthDate": "1974-12-25"
}
""";
createPatientFromJsonString(patientStr, true);
}
// patient 3
{
String patientStr = """
{
"resourceType": "Patient",
"name": [ {
"family": "Jhoye",
"given": [ "Deo"]
} ],
"birthDate": "1974-12-25"
}
""";
createPatientFromJsonString(patientStr, true);
}
// patient 4
{
String patientStr = """
{
"resourceType": "Patient",
"name": [ {
"family": "Jhee",
"given": [ "Deo"]
} ],
"birthDate": "1974-12-25"
}
""";
createPatientFromJsonString(patientStr, true);
}
// patient 5
Patient candidate;
{
String patientStr = """
{
"resourceType": "Patient",
"name": [ {
"family": "Jhee",
"given": [ "Doe"]
} ],
"birthDate": "1974-12-25"
}
""";
candidate = createPatientFromJsonString(patientStr, true);
}
// test
CandidateList candidateList = myMdmGoldenResourceFindingSvc.findGoldenResourceCandidates(candidate);
// verify
assertNotNull(candidateList);
Set<Long> ids = new HashSet<>();
for (MatchedGoldenResourceCandidate c : candidateList.getCandidates()) {
assertTrue(ids.add((Long)c.getCandidateGoldenResourcePid().getId()));
}
}
private Patient createPatientFromJsonString(String theStr, boolean theCreateGolden) {
Patient patient = (Patient) myFhirContext.newJsonParser().parseResource(theStr);
DaoMethodOutcome daoOutcome = myPatientDao.create(patient, new SystemRequestDetails());
if (theCreateGolden) {
myMdmMatchLinkSvc.updateMdmLinksForMdmSource(patient, createContextForCreate("Patient"));
}
return (Patient) daoOutcome.getResource();
}
// the NO_MATCH golden resource should not be a candidate
CandidateList candidateList = myMdmGoldenResourceFindingSvc.findGoldenResourceCandidates(jane);
assertEquals(0, candidateList.size());
}
}

View File

@ -0,0 +1,45 @@
{
"version": "1",
"mdmTypes": ["Patient"],
"candidateSearchParams": [
{
"resourceType": "Patient",
"searchParams": ["birthdate"]
}
],
"candidateFilterSearchParams": [],
"matchFields": [
{
"name": "birthday",
"resourceType": "Patient",
"resourcePath": "birthDate",
"matcher": {
"algorithm": "STRING"
}
},
{
"name": "firstname-jaro",
"resourceType": "Patient",
"resourcePath": "name.given",
"similarity": {
"algorithm": "JARO_WINKLER",
"matchThreshold": 0.8
}
},
{
"name": "lastname-jaro",
"resourceType": "Patient",
"resourcePath": "name.family",
"similarity": {
"algorithm": "JARO_WINKLER",
"matchThreshold": 0.8
}
}
],
"matchResultMap": {
"firstname-jaro,lastname-jaro,birthday": "MATCH",
"lastname-jaro,birthday": "POSSIBLE_MATCH",
"firstname-jaro,birthday": "POSSIBLE_MATCH"
},
"eidSystems": {}
}