diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu1.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu1.java index 5faab42ef75..54169c21d0e 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu1.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu1.java @@ -2,6 +2,8 @@ package ca.uhn.fhir.jpa.term; import java.util.List; +import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion; + public class HapiTerminologySvcDstu1 extends BaseHapiTerminologySvc { @Override @@ -9,4 +11,9 @@ public class HapiTerminologySvcDstu1 extends BaseHapiTerminologySvc { throw new UnsupportedOperationException(); } + @Override + public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) { + throw new UnsupportedOperationException(); + } + } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu2.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu2.java index 41dd4d0e8e8..4b4e3df4b04 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu2.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu2.java @@ -5,6 +5,8 @@ import java.util.List; import org.hl7.fhir.instance.hapi.validation.IValidationSupport; import org.springframework.beans.factory.annotation.Autowired; +import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion; + public class HapiTerminologySvcDstu2 extends BaseHapiTerminologySvc { @Autowired @@ -16,4 +18,10 @@ public class HapiTerminologySvcDstu2 extends BaseHapiTerminologySvc { throw new UnsupportedOperationException(); } + + @Override + public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) { + + } + } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu3.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu3.java index 2afadf49668..0f849233fdd 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu3.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/HapiTerminologySvcDstu3.java @@ -3,6 +3,7 @@ package ca.uhn.fhir.jpa.term; import java.util.ArrayList; import java.util.List; +import org.hl7.fhir.dstu3.model.CodeSystem; import org.hl7.fhir.dstu3.model.ValueSet; import org.hl7.fhir.dstu3.model.ValueSet.ValueSetExpansionContainsComponent; import org.hl7.fhir.dstu3.terminologies.ValueSetExpander; @@ -10,6 +11,7 @@ import org.hl7.fhir.dstu3.terminologies.ValueSetExpander.ValueSetExpansionOutcom import org.hl7.fhir.dstu3.utils.IWorkerContext; import org.springframework.beans.factory.annotation.Autowired; +import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion; import ca.uhn.fhir.rest.server.exceptions.InternalErrorException; public class HapiTerminologySvcDstu3 extends BaseHapiTerminologySvc { @@ -40,4 +42,10 @@ public class HapiTerminologySvcDstu3 extends BaseHapiTerminologySvc { } + @Override + public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) { + CodeSystem cs = new org.hl7.fhir.dstu3.model.CodeSystem(); + + } + } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologySvc.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologySvc.java index 3c6cda542e8..44115a94e3c 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologySvc.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologySvc.java @@ -35,7 +35,7 @@ public interface IHapiTerminologySvc { List findCodesBelow(String theSystem, String theCode); - void storeNewCodeSystemVersion(Long theCodeSystemResourcePid, String theSystemUri, TermCodeSystemVersion theCodeSytem); + void storeNewCodeSystemVersion(Long theCodeSystemResourcePid, String theSystemUri, TermCodeSystemVersion theCodeSytemVersion); public boolean supportsSystem(String theCodeSystem); @@ -43,4 +43,6 @@ public interface IHapiTerminologySvc { List findCodesAbove(String theSystem, String theCode); + void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion); + } diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvc.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvc.java index 8eae5b803ed..37fd8003c06 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvc.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvc.java @@ -1,65 +1,248 @@ package ca.uhn.fhir.jpa.term; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; import org.apache.commons.io.IOUtils; +import org.springframework.beans.factory.annotation.Autowired; +import com.google.common.annotations.VisibleForTesting; + +import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion; import ca.uhn.fhir.jpa.entity.TermConcept; +import ca.uhn.fhir.jpa.entity.TermConceptParentChildLink; import ca.uhn.fhir.rest.server.exceptions.InternalErrorException; public class TerminologyLoaderSvc { private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvc.class); + @Autowired + private IHapiTerminologySvc myTermSvc; + + @VisibleForTesting + void setTermSvcForUnitTests(IHapiTerminologySvc theTermSvc) { + myTermSvc = theTermSvc; + } + public void loadSnomedCt(byte[] theZipBytes) { - - Map id2concept = new HashMap(); - - ZipInputStream zis = new ZipInputStream(new ByteArrayInputStream(theZipBytes)); + String filenameDescription = "Terminology/sct2_Description_Full"; + String filenameRelationship = "Terminology/sct2_Relationship_Full"; + List allFilenames = Arrays.asList(filenameDescription, filenameRelationship); + + Map filenameToFile = new HashMap(); + ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new ByteArrayInputStream(theZipBytes))); try { - for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null; ) { - ZippedFileInputStream entryStream = new ZippedFileInputStream(zis); - byte[] bytes = IOUtils.toByteArray(entryStream); - ourLog.info("Read file {} - {} bytes", nextEntry.getName(), bytes.length); + for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null;) { + ZippedFileInputStream inputStream = new ZippedFileInputStream(zis); - String string = new String(bytes, "UTF-8"); - CSVParser parsed = CSVParser.parse(string, CSVFormat.newFormat('\t').withFirstRecordAsHeader()); - ourLog.info("Header map: {}", parsed.getHeaderMap()); + boolean want = false; + for (String next : allFilenames) { + if (nextEntry.getName().contains(next)) { + want = true; + } + } + + if (!want) { + ourLog.info("Ignoring zip entry: {}", nextEntry.getName()); + IOUtils.copy(inputStream, new SinkOutputStream()); + continue; + } + + ourLog.debug("Streaming ZIP entry {} into temporary file", nextEntry.getName()); + + File nextOutFile = File.createTempFile("hapi_fhir", ".csv"); + nextOutFile.deleteOnExit(); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(nextOutFile, false)); + try { + IOUtils.copy(inputStream, outputStream); + } finally { + IOUtils.closeQuietly(outputStream); + } + + filenameToFile.put(nextEntry.getName(), nextOutFile); } } catch (IOException e) { throw new InternalErrorException(e); } finally { IOUtils.closeQuietly(zis); } + + ourLog.info("Beginning SNOMED CT processing"); + + final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion(); + final Map id2concept = new HashMap(); + final Map code2concept = new HashMap(); + final List links = new ArrayList(); + + IRecordHandler handler = new IRecordHandler() { + @Override + public void accept(CSVRecord theRecord) { + String id = theRecord.get("id"); + boolean active = "1".equals(theRecord.get("active")); + if (!active) { + return; + } + String conceptId = theRecord.get("conceptId"); + String term = theRecord.get("term"); + + TermConcept concept = getOrCreateConcept(codeSystemVersion, id2concept, id); + concept.setCode(conceptId); + concept.setDisplay(term); + code2concept.put(conceptId, concept); + } + }; + iterateOverZipFile(filenameToFile, filenameDescription, handler); + + final HashSet rootConcepts = new HashSet(); + rootConcepts.addAll(code2concept.values()); + + handler = new IRecordHandler() { + @Override + public void accept(CSVRecord theRecord) { + String sourceId = theRecord.get("sourceId"); + String destinationId = theRecord.get("destinationId"); + String typeId = theRecord.get("typeId"); + boolean active = "1".equals(theRecord.get("active")); + if (!active) { + return; + } + TermConcept typeConcept = findConcept(code2concept, typeId); + TermConcept sourceConcept = findConcept(code2concept, sourceId); + TermConcept targetConcept = findConcept(code2concept, destinationId); + if (typeConcept.getDisplay().equals("Is a")) { + TermConceptParentChildLink link = new TermConceptParentChildLink(); + link.setChild(sourceConcept); + link.setParent(targetConcept); + link.setCodeSystem(codeSystemVersion); + rootConcepts.remove(link.getChild()); + } else { + ourLog.warn("Unknown relationship type: {}/{}", typeId, typeConcept.getDisplay()); + } + } + + private TermConcept findConcept(final Map code2concept, String typeId) { + TermConcept typeConcept = code2concept.get(typeId); + if (typeConcept == null) { + throw new InternalErrorException("Unknown type ID: " + typeId); + } + return typeConcept; + } + }; + iterateOverZipFile(filenameToFile, filenameRelationship, handler); + + ourLog.info("Done loading SNOMED CT files - {} root codes, {} total codes", rootConcepts.size(), code2concept.size()); + + codeSystemVersion.getConcepts().addAll(rootConcepts); + myTermSvc.storeNewCodeSystemVersion("http://snomed.info/sct", codeSystemVersion); } - - - + + private void iterateOverZipFile(Map theFilenameToFile, String fileNamePart, IRecordHandler handler) { + for (Entry nextEntry : theFilenameToFile.entrySet()) { + + if (nextEntry.getKey().contains(fileNamePart)) { + ourLog.info("Processing file {}", nextEntry.getKey()); + + Reader reader = null; + CSVParser parsed = null; + try { + reader = new BufferedReader(new FileReader(nextEntry.getValue())); + parsed = new CSVParser(reader, CSVFormat.newFormat('\t').withFirstRecordAsHeader()); + Iterator iter = parsed.iterator(); + ourLog.debug("Header map: {}", parsed.getHeaderMap()); + + while (iter.hasNext()) { + CSVRecord nextRecord = iter.next(); + handler.accept(nextRecord); + } + } catch (IOException e) { + throw new InternalErrorException(e); + } finally { + IOUtils.closeQuietly(parsed); + IOUtils.closeQuietly(reader); + } + } + } + } + + private TermConcept getOrCreateConcept(TermCodeSystemVersion codeSystemVersion, Map id2concept, String id) { + TermConcept concept = id2concept.get(id); + if (concept == null) { + concept = new TermConcept(); + id2concept.put(id, concept); + concept.setCodeSystem(codeSystemVersion); + } + return concept; + } + private static class ZippedFileInputStream extends InputStream { - private ZipInputStream is; + private ZipInputStream is; - public ZippedFileInputStream(ZipInputStream is){ - this.is = is; - } + public ZippedFileInputStream(ZipInputStream is) { + this.is = is; + } - @Override - public int read() throws IOException { - return is.read(); - } + @Override + public int read() throws IOException { + return is.read(); + } - @Override - public void close() throws IOException { - is.closeEntry(); - } + @Override + public void close() throws IOException { + is.closeEntry(); + } } + private interface IRecordHandler { + void accept(CSVRecord theRecord); + } + public static void main(String[] args) throws Exception { + byte[] bytes = IOUtils.toByteArray(new FileInputStream("/Users/james/Downloads/SnomedCT_Release_INT_20160131_Full.zip")); + TerminologyLoaderSvc svc = new TerminologyLoaderSvc(); + svc.loadSnomedCt(bytes); + } + + private static class SinkOutputStream extends OutputStream { + + @Override + public void write(int theB) throws IOException { + // ignore + } + + @Override + public void write(byte[] theB) throws IOException { + // ignore + } + + @Override + public void write(byte[] theB, int theOff, int theLen) throws IOException { + // ignore + } + + } + } diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcTest.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcTest.java index 9dfbf3b7286..e7ea9dd8450 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcTest.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcTest.java @@ -1,5 +1,7 @@ package ca.uhn.fhir.jpa.term; +import static org.mockito.Mockito.mock; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.zip.ZipEntry; @@ -17,10 +19,14 @@ import ca.uhn.fhir.util.TestUtil; public class TerminologyLoaderSvcTest { private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvcTest.class); private TerminologyLoaderSvc mySvc; + private IHapiTerminologySvc myTermSvc; @Before public void before() { + myTermSvc = mock(IHapiTerminologySvc.class); + mySvc = new TerminologyLoaderSvc(); + mySvc.setTermSvcForUnitTests(myTermSvc); } @AfterClass diff --git a/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Description_Full-en_INT_20160131.txt b/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Description_Full-en_INT_20160131.txt index a75ffe00b4c..3a3aec8d383 100644 --- a/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Description_Full-en_INT_20160131.txt +++ b/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Description_Full-en_INT_20160131.txt @@ -1,10 +1,11 @@ id effectiveTime active moduleId conceptId languageCode typeId term caseSignificanceId -101013 20020131 1 900000000000207008 126813005 en 900000000000013009 Neoplasm of anterior aspect of epiglottis 900000000000020002 -102018 20020131 1 900000000000207008 126814004 en 900000000000013009 Neoplasm of junctional region of epiglottis 900000000000020002 -103011 20020131 1 900000000000207008 126815003 en 900000000000013009 Neoplasm of lateral wall of oropharynx 900000000000020002 -104017 20020131 1 900000000000207008 126816002 en 900000000000013009 Neoplasm of posterior wall of oropharynx 900000000000020002 -105016 20020131 1 900000000000207008 126817006 en 900000000000013009 Neoplasm of esophagus 900000000000020002 +101013 20020131 1 900000000000207008 126813005 en 900000000000013009 ROOT1 900000000000020002 +102018 20020131 1 900000000000207008 126814004 en 900000000000013009 ROOT2 900000000000020002 +103011 20020131 1 900000000000207008 126815003 en 900000000000013009 ROOT1_1 900000000000020002 +104017 20020131 1 900000000000207008 126816002 en 900000000000013009 ROOT1_2 900000000000020002 +105016 20020131 1 900000000000207008 126817006 en 900000000000013009 ROOT1_1_1 900000000000020002 106015 20020131 1 900000000000207008 126818001 en 900000000000013009 Neoplasm of cervical esophagus 900000000000020002 107012 20020131 1 900000000000207008 126819009 en 900000000000013009 Neoplasm of thoracic esophagus 900000000000020002 108019 20020131 1 900000000000207008 126820003 en 900000000000013009 Neoplasm of abdominal esophagus 900000000000020002 110017 20020131 1 900000000000207008 126822006 en 900000000000013009 Neoplasm of middle third of esophagus 900000000000020002 +181114011 20020131 1 900000000000207008 116680003 en 900000000000013009 Is a 900000000000020002 diff --git a/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Relationship_Full_INT_20160131.txt b/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Relationship_Full_INT_20160131.txt index 0f96fbd5a2a..02087b3a883 100644 --- a/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Relationship_Full_INT_20160131.txt +++ b/hapi-fhir-jpaserver-base/src/test/resources/sct/sct2_Relationship_Full_INT_20160131.txt @@ -1,10 +1,4 @@ id effectiveTime active moduleId sourceId destinationId relationshipGroup typeId characteristicTypeId modifierId -100022 20020131 1 900000000000207008 100000000 102272007 0 116680003 900000000000011006 900000000000451002 -100022 20090731 0 900000000000207008 100000000 102272007 0 116680003 900000000000011006 900000000000451002 -101021 20020131 1 900000000000207008 10000006 29857009 0 116680003 900000000000011006 900000000000451002 -102025 20020131 1 900000000000207008 10000006 9972008 0 116680003 900000000000011006 900000000000451002 -103024 20020131 1 900000000000207008 1000004 19130008 0 116680003 900000000000011006 900000000000451002 -103024 20030131 0 900000000000207008 1000004 19130008 0 116680003 900000000000011006 900000000000451002 -104029 20020131 1 900000000000207008 100001001 102272007 0 116680003 900000000000011006 900000000000451002 -104029 20090731 0 900000000000207008 100001001 102272007 0 116680003 900000000000011006 900000000000451002 -105028 20020131 1 900000000000207008 100002008 102272007 0 116680003 900000000000011006 900000000000451002 +100022 20020131 1 900000000000207008 126815003 126813005 0 116680003 900000000000011006 900000000000451002 +100022 20090731 0 900000000000207008 126816002 126813005 0 116680003 900000000000011006 900000000000451002 +101021 20020131 1 900000000000207008 126817006 126815003 0 116680003 900000000000011006 900000000000451002