Issue 4052 addition of properties to the loinc terminology uploading process (#4135)

* Allow for easier override of DataSource and dialect

* Add debug logging

* Load AskAtOrderEntry and AssociatedObservations properties from loinc.cvs.
Add changelog.

* Add disabled sandbox test used to develop loinc upload modifications

* Add flags to easily select running mode

* Add validation counters and use gziped csv input to save space in repo

* Implement revision suggestions.
As test is a sandbox remove large files and add readme and test notes to locate them before running the test.

* Add new CODING property handler to run after all TermConcepts are created, to have them accessible for extracting display value

* Add small test file to ease setup

Co-authored-by: juan.marchionatto <juan.marchionatto@smilecdr.com>
This commit is contained in:
jmarchionatto 2022-10-14 08:12:12 -04:00 committed by GitHub
parent 422ef87a5d
commit f00f65aae4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 541 additions and 109 deletions

View File

@ -14,6 +14,7 @@ import ca.uhn.fhir.jpa.term.icd10.Icd10Loader;
import ca.uhn.fhir.jpa.term.icd10cm.Icd10CmLoader;
import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListHandler;
import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListLinkHandler;
import ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler;
import ca.uhn.fhir.jpa.term.loinc.LoincConsumerNameHandler;
import ca.uhn.fhir.jpa.term.loinc.LoincDocumentOntologyHandler;
import ca.uhn.fhir.jpa.term.loinc.LoincGroupFileHandler;
@ -634,7 +635,7 @@ public class TermLoaderSvcImpl implements ITermLoaderSvc {
iterateOverZipFileCsv(theDescriptors, theUploadProperties.getProperty(LOINC_PART_FILE.getCode(), LOINC_PART_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
Map<PartTypeAndPartName, String> partTypeAndPartNameToPartNumber = ((LoincPartHandler) handler).getPartTypeAndPartNameToPartNumber();
// LOINC codes
// LOINC string properties
handler = new LoincHandler(codeSystemVersion, code2concept, propertyNamesToTypes, partTypeAndPartNameToPartNumber);
iterateOverZipFileCsv(theDescriptors, theUploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
@ -707,6 +708,10 @@ public class TermLoaderSvcImpl implements ITermLoaderSvc {
handler = new LoincConsumerNameHandler(code2concept);
iterateOverZipFileCsvOptional(theDescriptors, theUploadProperties.getProperty(LOINC_CONSUMER_NAME_FILE.getCode(), LOINC_CONSUMER_NAME_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
// LOINC coding properties (must run after all TermConcepts were created)
handler = new LoincCodingPropertiesHandler(code2concept, propertyNamesToTypes);
iterateOverZipFileCsv(theDescriptors, theUploadProperties.getProperty(LOINC_FILE.getCode(), LOINC_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);
// Linguistic Variants
handler = new LoincLinguisticVariantsHandler(linguisticVariants);
iterateOverZipFileCsvOptional(theDescriptors, theUploadProperties.getProperty(LOINC_LINGUISTIC_VARIANTS_FILE.getCode(), LOINC_LINGUISTIC_VARIANTS_FILE_DEFAULT.getCode()), handler, ',', QuoteMode.NON_NUMERIC, false);

View File

@ -0,0 +1,126 @@
package ca.uhn.fhir.jpa.term.loinc;
/*-
* #%L
* HAPI FHIR JPA Server
* %%
* Copyright (C) 2014 - 2022 Smile CDR, Inc.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import ca.uhn.fhir.jpa.entity.TermConcept;
import ca.uhn.fhir.jpa.term.IZipContentsHandlerCsv;
import ca.uhn.fhir.jpa.term.api.ITermLoaderSvc;
import org.apache.commons.csv.CSVRecord;
import org.hl7.fhir.r4.model.CodeSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.StringUtils.trim;
/**
* Handler to process coding type properties 'AskAtOrderEntry' and 'AssociatedObservations'.
*
* These properties are added in a specific handler which is involved after all TermConcepts
* are created, because they require a 'display' value associated to other TermConcept (pointed by the 'code'
* property value), which require that concept to have been created.
*/
public class LoincCodingPropertiesHandler implements IZipContentsHandlerCsv {
private static final Logger ourLog = LoggerFactory.getLogger(LoincCodingPropertiesHandler.class);
public static final String ASK_AT_ORDER_ENTRY_PROP_NAME = "AskAtOrderEntry";
public static final String ASSOCIATED_OBSERVATIONS_PROP_NAME = "AssociatedObservations";
public static final String LOINC_NUM = "LOINC_NUM";
private final Map<String, TermConcept> myCode2Concept;
private final Map<String, CodeSystem.PropertyType> myPropertyNameTypeMap;
public LoincCodingPropertiesHandler(Map<String, TermConcept> theCode2concept,
Map<String, CodeSystem.PropertyType> thePropertyNameTypeMap) {
myCode2Concept = theCode2concept;
myPropertyNameTypeMap = thePropertyNameTypeMap;
}
@Override
public void accept(CSVRecord theRecord) {
if ( ! anyValidProperty()) { return; }
String code = trim(theRecord.get(LOINC_NUM));
if (isBlank(code)) { return; }
String askAtOrderEntryValue = trim(theRecord.get(ASK_AT_ORDER_ENTRY_PROP_NAME));
String associatedObservationsValue = trim(theRecord.get(ASSOCIATED_OBSERVATIONS_PROP_NAME));
// any of the record properties have a valid value?
if (isBlank(askAtOrderEntryValue) && isBlank(associatedObservationsValue)) {
return;
}
TermConcept srcTermConcept = myCode2Concept.get(code);
if (isNotBlank(askAtOrderEntryValue)) {
addCodingProperties(srcTermConcept, ASK_AT_ORDER_ENTRY_PROP_NAME, askAtOrderEntryValue);
}
if (isNotBlank(associatedObservationsValue)) {
addCodingProperties(srcTermConcept, ASSOCIATED_OBSERVATIONS_PROP_NAME, associatedObservationsValue);
}
}
/**
* Validates that at least one ot target properties is defined in loinc.xml file and is of type "CODING"
*/
private boolean anyValidProperty() {
CodeSystem.PropertyType askAtOrderEntryPropType = myPropertyNameTypeMap.get(ASK_AT_ORDER_ENTRY_PROP_NAME);
CodeSystem.PropertyType associatedObservationsPropType = myPropertyNameTypeMap.get(ASSOCIATED_OBSERVATIONS_PROP_NAME);
return askAtOrderEntryPropType == CodeSystem.PropertyType.CODING
|| associatedObservationsPropType == CodeSystem.PropertyType.CODING;
}
private void addCodingProperties(TermConcept theSrcTermConcept, String thePropertyName, String thePropertyValue) {
List<String> propertyCodeValues = parsePropertyCodeValues(thePropertyValue);
for (String propertyCodeValue : propertyCodeValues) {
TermConcept targetTermConcept = myCode2Concept.get(propertyCodeValue);
if (targetTermConcept == null) {
ourLog.error("Couldn't find TermConcept for code: '{}'. Display property set to blank for property: '{}'",
propertyCodeValue, thePropertyName);
continue;
}
theSrcTermConcept.addPropertyCoding(thePropertyName, ITermLoaderSvc.LOINC_URI, propertyCodeValue, targetTermConcept.getDisplay());
ourLog.trace("Adding coding property: {} to concept.code {}", thePropertyName, theSrcTermConcept.getCode());
}
}
private List<String> parsePropertyCodeValues(String theValue) {
return Arrays.stream( theValue.split(";") )
.map(String::trim)
.collect(Collectors.toList());
}
}

View File

@ -25,7 +25,6 @@ import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
import ca.uhn.fhir.jpa.entity.TermConcept;
import ca.uhn.fhir.jpa.term.IZipContentsHandlerCsv;
import ca.uhn.fhir.jpa.term.TermLoaderSvcImpl;
import ca.uhn.fhir.jpa.term.api.ITermLoaderSvc;
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.Validate;
@ -33,10 +32,7 @@ import org.hl7.fhir.r4.model.CodeSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.StringUtils.trim;
@ -44,10 +40,6 @@ import static org.apache.commons.lang3.StringUtils.trim;
public class LoincHandler implements IZipContentsHandlerCsv {
private static final Logger ourLog = LoggerFactory.getLogger(LoincHandler.class);
// most coding properties are not loaded by this handler, except these
private static final List<String> myCodingPropertiesToLoad = List.of("AskAtOrderEntry", "AssociatedObservations");
private final Map<String, TermConcept> myCode2Concept;
private final TermCodeSystemVersion myCodeSystemVersion;
private final Map<String, CodeSystem.PropertyType> myPropertyNames;
@ -97,14 +89,7 @@ public class LoincHandler implements IZipContentsHandlerCsv {
break;
case CODING:
if (myCodingPropertiesToLoad.contains(nextPropertyName)) {
List<String> propertyCodeValues = parsePropertyCodeValues(nextPropertyValue);
for (String propertyCodeValue : propertyCodeValues) {
concept.addPropertyCoding(nextPropertyName, ITermLoaderSvc.LOINC_URI, propertyCodeValue, display);
ourLog.trace("Adding coding property: {} to concept.code {}", nextPropertyName, concept.getCode());
}
}
// rest of "Coding" property types are handled by partlink, hierarchy, RsnaPlaybook or DocumentOntology handlers
// "Coding" property types are handled by loincCodingProperties, partlink, hierarchy, RsnaPlaybook or DocumentOntology handlers
break;
case DECIMAL:
@ -123,10 +108,4 @@ public class LoincHandler implements IZipContentsHandlerCsv {
myCode2Concept.put(code, concept);
}
}
private List<String> parsePropertyCodeValues(String theValue) {
return Arrays.stream( theValue.split(";") )
.map(String::trim)
.collect(Collectors.toList());
}
}

View File

@ -0,0 +1,155 @@
package ca.uhn.fhir.jpa.term.loinc;
import ca.uhn.fhir.jpa.entity.TermConcept;
import ca.uhn.fhir.jpa.term.api.ITermLoaderSvc;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.read.ListAppender;
import org.apache.commons.csv.CSVRecord;
import org.hl7.fhir.r4.model.CodeSystem;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler.ASK_AT_ORDER_ENTRY_PROP_NAME;
import static ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler.ASSOCIATED_OBSERVATIONS_PROP_NAME;
import static ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler.LOINC_NUM;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class LoincCodingPropertiesHandlerTest {
private static final String CODE_A = "code-A";
private static LoincCodingPropertiesHandler testedHandler;
@Mock private CSVRecord myCsvRecord;
@Mock private TermConcept myTargetTermConcept;
@Mock private TermConcept myRefTermConcept1;
@Mock private TermConcept myRefTermConcept2;
private final Map<String, TermConcept> myCode2concept = new HashMap<>();
private final Map<String, CodeSystem.PropertyType> myPropertyNameTypeMap = new HashMap<>();
@BeforeEach
void setUp() {
myCode2concept.put(CODE_A, myTargetTermConcept);
testedHandler = new LoincCodingPropertiesHandler(myCode2concept, myPropertyNameTypeMap);
}
@Test
void not_any_property_valid_does_nothing() {
myPropertyNameTypeMap.put("prop_1", CodeSystem.PropertyType.CODING); // uninteresting property name
myPropertyNameTypeMap.put(ASK_AT_ORDER_ENTRY_PROP_NAME, CodeSystem.PropertyType.STRING); // wrong property type
testedHandler.accept(myCsvRecord);
verify(myTargetTermConcept, never()).addPropertyCoding(anyString(), anyString(), anyString(), anyString());
}
@Test
void record_no_loinc_num_property_does_nothing() {
myPropertyNameTypeMap.put(ASK_AT_ORDER_ENTRY_PROP_NAME, CodeSystem.PropertyType.CODING); // wrong property type
when(myCsvRecord.get(LOINC_NUM)).thenReturn(null);
testedHandler.accept(myCsvRecord);
verify(myTargetTermConcept, never()).addPropertyCoding(anyString(), anyString(), anyString(), anyString());
}
@Test
void no_property_valid_value_does_nothing() {
myPropertyNameTypeMap.put(ASK_AT_ORDER_ENTRY_PROP_NAME, CodeSystem.PropertyType.CODING); // wrong property type
when(myCsvRecord.get(LOINC_NUM)).thenReturn(CODE_A);
testedHandler.accept(myCsvRecord);
verify(myTargetTermConcept, never()).addPropertyCoding(anyString(), anyString(), anyString(), anyString());
}
@ParameterizedTest
@ValueSource(strings = {ASK_AT_ORDER_ENTRY_PROP_NAME, ASSOCIATED_OBSERVATIONS_PROP_NAME})
void each_tested_record_prop_creates_term_concept_prop(String thePropName) {
myPropertyNameTypeMap.put(thePropName, CodeSystem.PropertyType.CODING);
when(myCsvRecord.get(LOINC_NUM)).thenReturn(CODE_A);
myCode2concept.put(CODE_A, myTargetTermConcept);
myCode2concept.put("ref-code-01", myRefTermConcept1);
myCode2concept.put("ref-code-02", myRefTermConcept2);
lenient().when(myCsvRecord.get(thePropName)).thenReturn("ref-code-01; ref-code-02");
when(myRefTermConcept1.getDisplay()).thenReturn("display-value-01");
when(myRefTermConcept2.getDisplay()).thenReturn("display-value-02");
testedHandler.accept(myCsvRecord);
verify(myTargetTermConcept, times(1)).addPropertyCoding(
thePropName, ITermLoaderSvc.LOINC_URI, "ref-code-01", "display-value-01");
verify(myTargetTermConcept, times(1)).addPropertyCoding(
thePropName, ITermLoaderSvc.LOINC_URI, "ref-code-02", "display-value-02");
}
@ParameterizedTest
@ValueSource(strings = {ASK_AT_ORDER_ENTRY_PROP_NAME, ASSOCIATED_OBSERVATIONS_PROP_NAME})
void each_tested_record_prop_not_existing_target_is_logged(String thePropName) {
myPropertyNameTypeMap.put(thePropName, CodeSystem.PropertyType.CODING);
when(myCsvRecord.get(LOINC_NUM)).thenReturn(CODE_A);
myCode2concept.put(CODE_A, myTargetTermConcept);
myCode2concept.put("ref-code-01", myRefTermConcept1);
lenient().when(myCsvRecord.get(thePropName)).thenReturn("ref-code-01; ref-code-02");
when(myRefTermConcept1.getDisplay()).thenReturn("display-value-01");
Logger testLogger = (Logger) LoggerFactory.getLogger(LoincCodingPropertiesHandler.class);
ListAppender<ILoggingEvent> testListAppender = addTestLogAppenderForClass(testLogger);
try {
// call method under test
testedHandler.accept(myCsvRecord);
// JUnit assertions
List<ILoggingEvent> logsList = testListAppender.list;
assertEquals(1, logsList.size());
assertEquals(Level.ERROR, logsList.get(0).getLevel());
assertTrue(logsList.get(0).getFormattedMessage().startsWith("Couldn't find TermConcept for code: 'ref-code-02'"));
assertTrue(logsList.get(0).getFormattedMessage().contains(thePropName));
} finally {
testLogger.detachAppender(testListAppender);
}
}
private ListAppender<ILoggingEvent> addTestLogAppenderForClass(Logger theLogger) {
// create and start a ListAppender
ListAppender<ILoggingEvent> testListAppender = new ListAppender<>();
testListAppender.start();
// add the appender to the logger
theLogger.addAppender(testListAppender);
return testListAppender;
}
}

View File

@ -20,6 +20,8 @@ import ca.uhn.fhir.jpa.test.BaseJpaTest;
import ca.uhn.fhir.jpa.test.config.TestHSearchAddInConfig;
import ca.uhn.fhir.jpa.test.config.TestR4Config;
import ca.uhn.fhir.util.StopWatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import net.ttddyy.dsproxy.support.ProxyDataSourceBuilder;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
@ -28,6 +30,7 @@ import org.apache.commons.csv.QuoteMode;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.hibernate.dialect.PostgreSQL10Dialect;
import org.hl7.fhir.r4.model.CodeableConcept;
import org.hl7.fhir.r4.model.Coding;
@ -35,6 +38,7 @@ import org.hl7.fhir.r4.model.ValueSet;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.function.Executable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -51,27 +55,32 @@ import javax.annotation.Nonnull;
import javax.persistence.EntityManager;
import javax.persistence.Query;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import static ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler.ASK_AT_ORDER_ENTRY_PROP_NAME;
import static ca.uhn.fhir.jpa.term.loinc.LoincCodingPropertiesHandler.ASSOCIATED_OBSERVATIONS_PROP_NAME;
import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toSet;
import static org.junit.jupiter.api.Assertions.assertAll;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@ -81,59 +90,54 @@ import static org.junit.jupiter.api.Assertions.fail;
/**
* Sandbox test (not intended to run on CI build) so must be kept disabled
*
* Requires the loinc-full resource directory to contain the following three files:
* _ Loinc.csv.gz
* _ Loinc_1.11.zip and
* Requires the loinc-full resource directory to contain the following files:
* _ Loinc_1.11.zip
* _ v1.11_loincupload.properties
*
* but they are too large for the repo, so before running this test, copy them from:
* but last one is too large for the repo, so before running this test, copy it from:
* https://drive.google.com/drive/folders/18be2R5IurlWnugkl18nDG7wrwPsOtfR-?usp=sharing
* (SmileCDR has access)
*
* Can be executed with Lucene or Elastic configuration
*
* Requires 4Gb mem to run, so pom needs to be changed to run from IDE:
* <surefire_jvm_args>-Dfile.encoding=UTF-8 -Xmx5g</surefire_jvm_args>
* or to run from maven use:
* mvn test -pl :hapi-fhir-jpaserver-test-utilities -Dtest=LoincFullLoadR4SandboxIT#uploadLoincCodeSystem -Dsurefire_jvm_args="-Xmx5g"
*
*/
@Disabled("Sandbox test which requires 5Gb memory")
@Disabled("Sandbox test")
@ExtendWith(SpringExtension.class)
@ContextConfiguration(classes = {
LoincFullLoadR4SandboxIT.NoopMandatoryTransactionListener.class
// one of the following needs to be present
// TestR4Config.class // uses in-memory DB
,LoincFullLoadR4SandboxIT.OverriddenR4Config.class // your configured persistent DB
// pick up elastic or lucene engine:
// pick up elastic, lucene or no-full-text engine:
,TestHSearchAddInConfig.NoFT.class
})
public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
private static final Logger ourLog = LoggerFactory.getLogger(LoincFullLoadR4SandboxIT.class);
private static final DecimalFormat ourDecimalFormat = new DecimalFormat("#,###");
public static final boolean USE_REAL_DB = true;
public static final boolean LOAD_DB = false;
public static final String DB_NAME = "testDB_new";
public static final String DB_NAME = "cdr_loinc_display";
private static final DecimalFormat ourDecimalFormat = new DecimalFormat("#,###");
public static final String LOINC_URL = "http://loinc.org";
public static final String TEST_FILES_CLASSPATH = "loinc-full/";
public static final boolean CLEANUP_DATA = true;
static {
System.setProperty("unlimited_db_connection", "true");
}
private final Collection<Executable> mapToAsserts = new ArrayList<>();
// -----------------------------------------------------------------------------------------
// full LOINC file 1.11 (initially cloned from 2.73 for tests, with custom lonc.xml file with added 24 new properties)
// full LOINC file 1.11 Initially cloned from 2.73 for tests, with custom lonc.xml file with added 24 new properties
// Note that internal defined version is 2.78
public static final String CS_VERSION = "1.11";
// public static final String CS_VERSION = "1.11";
public static final String CS_VERSION = "2.78";
public static final int CS_CONCEPTS_COUNT = 234_390;
public static final int ASSOCIATED_OBSERVATIONS_COUNT = 8_058;
public static final int ASK_AT_ORDER_ENTRY_COUNT = 65;
@ -141,11 +145,12 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
public static final String LOINC_PROPERTIES_CLASSPATH =
ResourceUtils.CLASSPATH_URL_PREFIX + TEST_FILES_CLASSPATH + "v1.11_loincupload.properties";
public static final String LOINC_ZIP_CLASSPATH =
ResourceUtils.CLASSPATH_URL_PREFIX + TEST_FILES_CLASSPATH + "Loinc_1.11.zip";
public static final String BASE_LOINC_FILE_NAME = "Loinc_1.11";
public static final String LOINC_CSV_ZIP_CLASSPATH =
ResourceUtils.CLASSPATH_URL_PREFIX + TEST_FILES_CLASSPATH + "Loinc.csv.gz";
public static final String LOINC_ZIP_CLASSPATH =
ResourceUtils.CLASSPATH_URL_PREFIX + TEST_FILES_CLASSPATH + BASE_LOINC_FILE_NAME + ".zip";
public static final String LOINC_CSV_ZIP_ENTRY_PATH = BASE_LOINC_FILE_NAME + "/LoincTable/Loinc.csv";
public static final String LOINC_MAP_TO_ZIP_ENTRY_PATH = BASE_LOINC_FILE_NAME + "/LoincTable/MapTo.csv";
// -----------------------------------------------------------------------------------------
@Autowired private FhirContext myFhirCtx;
@ -158,14 +163,11 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
@Autowired private ITermCodeSystemDao myTermCodeSystemDao;
@Autowired private ITermCodeSystemVersionDao myTermCodeSystemVersionDao;
@Autowired
@Qualifier("myValueSetDaoR4")
protected IFhirResourceDaoValueSet<ValueSet, Coding, CodeableConcept> myValueSetDao;
private IFhirResourceDaoValueSet<ValueSet, Coding, CodeableConcept> myValueSetDao;
private long termCodeSystemVersionWithVersionId;
private TermCodeSystemVersion termCodeSystemVersion;
private int associatedObservationsCount = 0;
private int askAtOrderEntryCount = 0;
@ -202,7 +204,7 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
@Test()
public void uploadLoincCodeSystem() throws Exception {
if (USE_REAL_DB && LOAD_DB) {
if (LOAD_DB) {
List<ITermLoaderSvc.FileDescriptor> myFileDescriptors = buildFileDescriptors();
// upload terminology
@ -212,24 +214,29 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
// save all deferred concepts, properties, links, etc
sw.restart();
myTerminologyDeferredStorageSvc.saveAllDeferred();
saveAllDeferredNoTimeout();
ourLog.info("=================> Saving all terminology deferred entities took {}", sw);
validateSavedConceptsCount();
sw.restart();
myTermReadSvc.preExpandDeferredValueSetsToTerminologyTables();
ourLog.info("=================> Pre-expanding ValueSets took {}", sw);
// tested properties have no special relation with ValueSet(s), however we mey want
// ValueSets to be expanded in same cases so don't remove the following commented code
// sw.restart();
// myTermReadSvc.preExpandDeferredValueSetsToTerminologyTables();
// ourLog.info("=================> Pre-expanding ValueSets took {}", sw);
return;
}
// validation:
// create from loinc.csv file map of code | set of not-blank-properties
// query each code and validate that all properties in map are set (can we check type also)
// create from mapto.csv file map of code | Pair<mapToCode, display>
// query each code and validate that all properties in both maps are set
List<Map<String, String>> conceptPropertyRecords = readCsvRecordsAsMap();
List<Map<String, String>> conceptPropertyCvsMap = readLoincCsvRecordsAsMap();
Multimap<String, Pair<String, String>> conceptMapToCvsMap = ArrayListMultimap.create();
validateCreatedConceptsHaveAllProperties( conceptPropertyRecords );
validateCreatedConceptsHaveAllProperties( conceptPropertyCvsMap, conceptMapToCvsMap );
ourLog.info("Processed properties : {}", processedPropertiesCounter);
ourLog.info("associatedObservationsCount : {}", associatedObservationsCount);
@ -239,7 +246,17 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
assertEquals(ASK_AT_ORDER_ENTRY_COUNT, askAtOrderEntryCount);
assertEquals(ASSOCIATED_OBSERVATIONS_COUNT, associatedObservationsCount);
}
// ass asserts are used for some validation, but we want all problems to be displayed,
// we just collect assertions and execute them all et the end (here).
assertAll(mapToAsserts);
}
private void saveAllDeferredNoTimeout() {
while( ! myTerminologyDeferredStorageSvc.isStorageQueueEmpty() ) {
myTerminologyDeferredStorageSvc.saveDeferred();
}
}
/**
* Used occasionally for some manual validation - don't delete
@ -249,7 +266,7 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
Query q = myEntityManager.createQuery("from ForcedId where myForcedId like 'LG8749-6%'");
@SuppressWarnings("unchecked")
List<ForcedId> fIds = (List<ForcedId>) q.getResultList();
long res_id = fIds.stream().map(ForcedId::getId).sorted().findFirst().get();
long res_id = fIds.stream().map(ForcedId::getId).sorted().findFirst().orElse(fail("ForcedId not found"));
Query q1 = myEntityManager.createQuery("from ResourceTable where id = " + res_id);
@SuppressWarnings("unchecked")
@ -269,7 +286,9 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
}
private void validateCreatedConceptsHaveAllProperties(List<Map<String, String>> theConceptPropertyInputMap) {
private void validateCreatedConceptsHaveAllProperties(List<Map<String, String>> theConceptPropertyInputMap,
Multimap<String, Pair<String, String>> theConceptMapToCvsMap) {
TermCodeSystemVersion tcsVersion = getTermCodeSystemVersion();
ourLog.info("Properties to process: {}", ourDecimalFormat.format(theConceptPropertyInputMap.size()));
@ -281,7 +300,7 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
runInTransaction(() -> {
Optional<TermConcept> tcFomDbOpt = myTermConceptDao.findByCodeSystemAndCode(tcsVersion, recordCode);
tcFomDbOpt.ifPresentOrElse(
tc -> validateTermConceptEntry(tc, tcRecordMap),
tc -> validateTermConceptEntry(tc, tcRecordMap, theConceptMapToCvsMap),
() -> ourLog.error("Couldn't find TermConcept with code: {} in DB", recordCode));
});
@ -301,23 +320,33 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
}
private void validateTermConceptEntry(TermConcept theTermConcept, Map<String, String> theRecordMap) {
private void validateTermConceptEntry(TermConcept theTermConcept,
Map<String, String> theRecordMap, Multimap<String, Pair<String, String>> theConceptMapToCvsMap) {
String recordCode = getRecordCode(theRecordMap);
if ( ! theTermConcept.getCode().equals(recordCode) ) {
fail("Received non matching inputs code from file: " + recordCode + ", code from DB: " + theTermConcept.getCode());
}
ourLog.trace("Validating TC with code: {}", theTermConcept.getCode());
Map<String, Set<String>> tcConceptPropertyMap = theTermConcept.getProperties().stream()
.collect(Collectors.groupingBy(TermConceptProperty::getKey, HashMap::new, mapping(TermConceptProperty::getValue, toSet())));
ourLog.trace("Validating new properties for TC with code: {}", theTermConcept.getCode());
// map of TC property name | set of property values
HashMap<String, Set<String>> tcConceptPropertyMap = theTermConcept.getProperties().stream()
.collect(Collectors.groupingBy(TermConceptProperty::getKey,
HashMap::new,
mapping(TermConceptProperty::getValue, toSet())));
validateNewProperties(recordCode, theRecordMap, tcConceptPropertyMap);
validateNewProperties(theTermConcept, theRecordMap, tcConceptPropertyMap);
Collection<Pair<String, String>> toMapRecordForTermConcept = theConceptMapToCvsMap.get(recordCode);
// validateMapToProperties(recordCode, tcConceptPropertyMap, toMapRecordForTermConcept);
}
private void validateNewProperties(String theTcCode, Map<String, String> theRecordPropsMap, Map<String, Set<String>> theTcConceptPropertyMap) {
private void validateNewProperties(TermConcept theTermConcept, Map<String, String> theRecordPropsMap,
HashMap<String, Set<String>> theTcConceptPropertyMap) {
// make sure we are good so far and both entries to compare are for same TermConcept code
assertEquals(theTcCode, theRecordPropsMap.get("LOINC_NUM"), "theTcCode and record key (LOINC_NUM) must match");
assertEquals(theTermConcept.getCode(), theRecordPropsMap.get("LOINC_NUM"), "theTcCode and record key (LOINC_NUM) must match");
for (Map.Entry<String, String> recordEntry : theRecordPropsMap.entrySet()) {
@ -327,41 +356,76 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
// bypass old properties
if ( ! newRecordPropertyNames.contains(recordEntry.getKey()) ) { continue; }
Set<String> tcConceptPropValues = theTcConceptPropertyMap.get(recordEntry.getKey());
if (CollectionUtils.isEmpty(tcConceptPropValues)) {
ourLog.error("TCConcept with code: {} does not have property: {} which in csv file has value: {}",
theTcCode, recordEntry.getKey(), recordEntry.getValue());
continue;
}
Set<String> tcConceptValues = theTcConceptPropertyMap.get(recordEntry.getKey());
// special case because we need to parse ';' separated codes from file property value
if ( "AssociatedObservations".equals(recordEntry.getKey()) ) {
if ( ASSOCIATED_OBSERVATIONS_PROP_NAME.equals(recordEntry.getKey()) ) {
associatedObservationsCount++;
validateAssociatedObservations(theTcCode, recordEntry, tcConceptPropValues);
validateCodingProperties(theTermConcept, ASSOCIATED_OBSERVATIONS_PROP_NAME, recordEntry, tcConceptValues);
continue;
}
if ( "AskAtOrderEntry".equals(recordEntry.getKey()) ) { askAtOrderEntryCount++; }
if ( ASK_AT_ORDER_ENTRY_PROP_NAME.equals(recordEntry.getKey()) ) {
askAtOrderEntryCount++;
validateCodingProperties(theTermConcept, ASK_AT_ORDER_ENTRY_PROP_NAME, recordEntry, tcConceptValues);
continue;
}
if ( ! tcConceptPropValues.contains(recordEntry.getValue()) ) {
ourLog.error("For TC code: {}, prop: {}, values don't match. Record value: {} TC prop value: {}",
theTcCode, recordEntry.getKey(), recordEntry.getValue(), String.join(" - ", tcConceptPropValues));
if (CollectionUtils.isEmpty(tcConceptValues)) {
ourLog.error("TermConcept with code: {} does not have property: {} which in csv file has value: {}",
theTermConcept.getCode(), recordEntry.getKey(), recordEntry.getValue());
}
}
}
/**
* Validate that all file property codes become a "Coding" property on the TermConcept
* and display properties are the display of the target TermConcept
*/
private void validateAssociatedObservations(String theTcCode, Map.Entry<String, String> recordEntry, Set<String> tcConceptPropValues) {
private void validateCodingProperties(TermConcept theSourceTermConcept, String thePropName,
Map.Entry<String, String> recordEntry, Set<String> theTCPropValues) {
List<String> recordPropertyCodes = parsePropertyCodeValues(recordEntry.getValue());
// validate each property in the records was uploaded to the corresponding TermConcept
for (String recordPropertyCode : recordPropertyCodes) {
if ( ! tcConceptPropValues.contains(recordPropertyCode) ) {
ourLog.error("For TC code: {}, prop: {}, record code: {} not found among properties: {}",
theTcCode, recordEntry.getKey(), recordPropertyCode, String.join(" - ", tcConceptPropValues));
if ( ! theTCPropValues.contains(recordPropertyCode) ) {
ourLog.error("For TC code: {}, prop: {}, record code: {} not found among uploaded TC properties: {}",
theSourceTermConcept.getCode(), recordEntry.getKey(), recordPropertyCode, String.join(" - ", theTCPropValues));
}
// validate that the display value for each uploaded TC property of name thePropertyName is the display of the TC pointed by the TC code
validatePropertiesDisplay(theSourceTermConcept, thePropName, recordPropertyCode);
}
// also check that uploaded TC only has properties is has to have
for (String tcPropValue : theTCPropValues) {
if ( ! recordEntry.getValue().contains(tcPropValue)) {
ourLog.error("TC with code: {}, has a property with code: {}, which is not in defined property list: {}",
theSourceTermConcept.getCode(), tcPropValue, recordEntry.getValue());
}
}
}
private void validatePropertiesDisplay(TermConcept theSourceTermConcept, String thePropName, String recordPropertyCode) {
// from source TermConcept obtain the map of thePropName properties: property code - display
Map<String, String> srcTcCodeDisplayMap = theSourceTermConcept.getProperties().stream()
.filter(p -> p.getKey().equals(thePropName))
.collect(Collectors.toMap(TermConceptProperty::getValue, TermConceptProperty::getDisplay));
for (Map.Entry<String, String> tcCodeDisplayEntry : srcTcCodeDisplayMap.entrySet()) {
Optional<TermConcept> targetTermConceptOpt =
myTermConceptDao.findByCodeSystemAndCode(termCodeSystemVersion, tcCodeDisplayEntry.getKey());
if (targetTermConceptOpt.isEmpty()) {
ourLog.error("For TC code: {}, target TC with code: {} is not present in DB", theSourceTermConcept.getCode(), recordPropertyCode);
}
TermConcept targetTermConcept = targetTermConceptOpt.get();
if ( ! tcCodeDisplayEntry.getValue().equals(targetTermConcept.getDisplay()) ) {
ourLog.error("For TC with code: {}, display is: {}, while target TC display is: {}",
theSourceTermConcept.getCode(), tcCodeDisplayEntry.getValue(), targetTermConcept.getDisplay());
}
}
}
@ -373,8 +437,8 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
}
private List<Map<String, String>> readCsvRecordsAsMap() throws Exception {
CSVParser parser = getCsvRecords();
private List<Map<String, String>> readLoincCsvRecordsAsMap() throws Exception {
CSVParser parser = getParserForZipFile(LOINC_ZIP_CLASSPATH, LOINC_CSV_ZIP_ENTRY_PATH);
Iterator<CSVRecord> iter = parser.iterator();
Map<String, Integer> headerMap = parser.getHeaderMap();
@ -402,17 +466,9 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
}
public String getCvsStringFromZip(String theFilePath) throws Exception {
InputStream stream = new FileInputStream(ResourceUtils.getFile(theFilePath));
assertNotNull(stream);
stream = new GZIPInputStream(stream);
return IOUtils.toString(stream, StandardCharsets.UTF_8);
}
@Nonnull
private CSVParser getCsvRecords() throws Exception {
Reader reader = new StringReader(getCvsStringFromZip(LOINC_CSV_ZIP_CLASSPATH));
private CSVParser getParserForZipFile(String theZipFileClassPath, String theFileEntryPath) throws Exception {
Reader reader = new StringReader(getCvsStringFromZip(theZipFileClassPath, theFileEntryPath));
CSVFormat format = CSVFormat
.newFormat(',')
@ -424,20 +480,41 @@ public class LoincFullLoadR4SandboxIT extends BaseJpaTest {
}
public String getCvsStringFromZip(String theFilePath, String theZipFileEntryPath) {
try (ZipFile zipFile = new ZipFile(ResourceUtils.getFile(theFilePath))) {
ZipEntry zipEntry = zipFile.getEntry(theZipFileEntryPath);
assertNotNull(zipEntry, "Couldn't find file: " + theZipFileEntryPath + " inside zip file: " + theFilePath);
return IOUtils.toString(zipFile.getInputStream(zipEntry), StandardCharsets.UTF_8);
} catch (IOException e) {
fail(e.getMessage());
}
fail("Couldn't find " + theFilePath + "/" + theZipFileEntryPath);
return null;
}
private void validateSavedConceptsCount() {
termCodeSystemVersionWithVersionId = getTermCodeSystemVersion().getPid();
Long tcsvId = getTermCodeSystemVersion().getPid();
int dbVersionedTermConceptCount = runInTransaction(() ->
myTermConceptDao.countByCodeSystemVersion(termCodeSystemVersionWithVersionId) );
ourLog.info("=================> Number of stored concepts for version {}: {}", CS_VERSION, dbVersionedTermConceptCount);
myTermConceptDao.countByCodeSystemVersion(tcsvId) );
ourLog.info("=================> Number of stored concepts for version {}: {}",
CS_VERSION, ourDecimalFormat.format(dbVersionedTermConceptCount));
assertEquals(CS_CONCEPTS_COUNT, dbVersionedTermConceptCount);
}
private TermCodeSystemVersion getTermCodeSystemVersion() {
if (termCodeSystemVersion != null) {
return termCodeSystemVersion;
}
return runInTransaction(() -> {
TermCodeSystem myTermCodeSystem = myTermCodeSystemDao.findByCodeSystemUri(LOINC_URL);
TermCodeSystemVersion termCodeSystemVersion = myTermCodeSystemVersionDao
.findByCodeSystemPidAndVersion(myTermCodeSystem.getPid(), CS_VERSION);
assertNotNull(myTermCodeSystem);
termCodeSystemVersion = myTermCodeSystemVersionDao.findByCodeSystemPidAndVersion(myTermCodeSystem.getPid(), CS_VERSION);
assertNotNull(termCodeSystemVersion);
return termCodeSystemVersion;
});

View File

@ -0,0 +1,89 @@
#################
### MANDATORY ###
#################
# Answer lists (ValueSets of potential answers/values for LOINC "questions")
## File must be present
loinc.answerlist.file=AccessoryFiles/AnswerFile/AnswerList.csv
# Answer list links (connects LOINC observation codes to answer list codes)
## File must be present
loinc.answerlist.link.file=AccessoryFiles/AnswerFile/LoincAnswerListLink.csv
# Document ontology
## File must be present
loinc.document.ontology.file=AccessoryFiles/DocumentOntology/DocumentOntology.csv
# LOINC codes
## File must be present
loinc.file=LoincTable/Loinc.csv
# LOINC hierarchy
## File must be present
loinc.hierarchy.file=AccessoryFiles/ComponentHierarchyBySystem/ComponentHierarchyBySystem.csv
# IEEE medical device codes
## File must be present
loinc.ieee.medical.device.code.mapping.table.file=AccessoryFiles/LoincIeeeMedicalDeviceCodeMappingTable/LoincIeeeMedicalDeviceCodeMappingTable.csv
# Imaging document codes
## File must be present
loinc.imaging.document.codes.file=AccessoryFiles/ImagingDocuments/ImagingDocumentCodes.csv
# Part
## File must be present
loinc.part.file=AccessoryFiles/PartFile/Part.csv
# Part link
## File must be present
loinc.part.link.primary.file=AccessoryFiles/PartFile/LoincPartLink_Primary.csv
loinc.part.link.supplementary.file=AccessoryFiles/PartFile/LoincPartLink_Supplementary.csv
# Part related code mapping
## File must be present
loinc.part.related.code.mapping.file=AccessoryFiles/PartFile/PartRelatedCodeMapping.csv
# RSNA playbook
## File must be present
loinc.rsna.playbook.file=AccessoryFiles/LoincRsnaRadiologyPlaybook/LoincRsnaRadiologyPlaybook.csv
# Top 2000 codes - SI
## File must be present
loinc.top2000.common.lab.results.si.file=AccessoryFiles/Top2000Results/SI/Top2000CommonLabResultsSi.csv
# Top 2000 codes - US
## File must be present
loinc.top2000.common.lab.results.us.file=AccessoryFiles/Top2000Results/US/Top2000CommonLabResultsUs.csv
# Universal lab order ValueSet
## File must be present
loinc.universal.lab.order.valueset.file=AccessoryFiles/LoincUniversalLabOrdersValueSet/LoincUniversalLabOrdersValueSet.csv
################
### OPTIONAL ###
################
# This is the version identifier for the LOINC code system
## Key may be omitted if only a single version of LOINC is being kept.
loinc.codesystem.version=1.11
loinc.codesystem.make.current=false
# This is the version identifier for the answer list file
## Key may be omitted
loinc.answerlist.version=Beta.1
# This is the version identifier for uploaded ConceptMap resources
## Key may be omitted
loinc.conceptmap.version=Beta.1
# Group
## Default value if key not provided: AccessoryFiles/GroupFile/Group.csv
## File may be omitted
loinc.group.file=AccessoryFiles/GroupFile/Group.csv
# Group terms
## Default value if key not provided: AccessoryFiles/GroupFile/GroupLoincTerms.csv
## File may be omitted
loinc.group.terms.file=AccessoryFiles/GroupFile/GroupLoincTerms.csv
# Parent group
## Default value if key not provided: AccessoryFiles/GroupFile/ParentGroup.csv
## File may be omitted
loinc.parent.group.file=AccessoryFiles/GroupFile/ParentGroup.csv