Work on term svc

This commit is contained in:
jamesagnew 2016-05-19 06:15:58 -04:00
parent 6c3446dee1
commit 81d68b07f8
8 changed files with 250 additions and 41 deletions

View File

@ -2,6 +2,8 @@ package ca.uhn.fhir.jpa.term;
import java.util.List;
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
public class HapiTerminologySvcDstu1 extends BaseHapiTerminologySvc {
@Override
@ -9,4 +11,9 @@ public class HapiTerminologySvcDstu1 extends BaseHapiTerminologySvc {
throw new UnsupportedOperationException();
}
@Override
public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) {
throw new UnsupportedOperationException();
}
}

View File

@ -5,6 +5,8 @@ import java.util.List;
import org.hl7.fhir.instance.hapi.validation.IValidationSupport;
import org.springframework.beans.factory.annotation.Autowired;
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
public class HapiTerminologySvcDstu2 extends BaseHapiTerminologySvc {
@Autowired
@ -16,4 +18,10 @@ public class HapiTerminologySvcDstu2 extends BaseHapiTerminologySvc {
throw new UnsupportedOperationException();
}
@Override
public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) {
}
}

View File

@ -3,6 +3,7 @@ package ca.uhn.fhir.jpa.term;
import java.util.ArrayList;
import java.util.List;
import org.hl7.fhir.dstu3.model.CodeSystem;
import org.hl7.fhir.dstu3.model.ValueSet;
import org.hl7.fhir.dstu3.model.ValueSet.ValueSetExpansionContainsComponent;
import org.hl7.fhir.dstu3.terminologies.ValueSetExpander;
@ -10,6 +11,7 @@ import org.hl7.fhir.dstu3.terminologies.ValueSetExpander.ValueSetExpansionOutcom
import org.hl7.fhir.dstu3.utils.IWorkerContext;
import org.springframework.beans.factory.annotation.Autowired;
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
public class HapiTerminologySvcDstu3 extends BaseHapiTerminologySvc {
@ -40,4 +42,10 @@ public class HapiTerminologySvcDstu3 extends BaseHapiTerminologySvc {
}
@Override
public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) {
CodeSystem cs = new org.hl7.fhir.dstu3.model.CodeSystem();
}
}

View File

@ -35,7 +35,7 @@ public interface IHapiTerminologySvc {
List<VersionIndependentConcept> findCodesBelow(String theSystem, String theCode);
void storeNewCodeSystemVersion(Long theCodeSystemResourcePid, String theSystemUri, TermCodeSystemVersion theCodeSytem);
void storeNewCodeSystemVersion(Long theCodeSystemResourcePid, String theSystemUri, TermCodeSystemVersion theCodeSytemVersion);
public boolean supportsSystem(String theCodeSystem);
@ -43,4 +43,6 @@ public interface IHapiTerminologySvc {
List<VersionIndependentConcept> findCodesAbove(String theSystem, String theCode);
void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion);
}

View File

@ -1,65 +1,248 @@
package ca.uhn.fhir.jpa.term;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.IOUtils;
import org.springframework.beans.factory.annotation.Autowired;
import com.google.common.annotations.VisibleForTesting;
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
import ca.uhn.fhir.jpa.entity.TermConcept;
import ca.uhn.fhir.jpa.entity.TermConceptParentChildLink;
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
public class TerminologyLoaderSvc {
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvc.class);
@Autowired
private IHapiTerminologySvc myTermSvc;
@VisibleForTesting
void setTermSvcForUnitTests(IHapiTerminologySvc theTermSvc) {
myTermSvc = theTermSvc;
}
public void loadSnomedCt(byte[] theZipBytes) {
Map<String, TermConcept> id2concept = new HashMap<String, TermConcept>();
ZipInputStream zis = new ZipInputStream(new ByteArrayInputStream(theZipBytes));
String filenameDescription = "Terminology/sct2_Description_Full";
String filenameRelationship = "Terminology/sct2_Relationship_Full";
List<String> allFilenames = Arrays.asList(filenameDescription, filenameRelationship);
Map<String, File> filenameToFile = new HashMap<String, File>();
ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new ByteArrayInputStream(theZipBytes)));
try {
for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null; ) {
ZippedFileInputStream entryStream = new ZippedFileInputStream(zis);
byte[] bytes = IOUtils.toByteArray(entryStream);
ourLog.info("Read file {} - {} bytes", nextEntry.getName(), bytes.length);
for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null;) {
ZippedFileInputStream inputStream = new ZippedFileInputStream(zis);
String string = new String(bytes, "UTF-8");
CSVParser parsed = CSVParser.parse(string, CSVFormat.newFormat('\t').withFirstRecordAsHeader());
ourLog.info("Header map: {}", parsed.getHeaderMap());
boolean want = false;
for (String next : allFilenames) {
if (nextEntry.getName().contains(next)) {
want = true;
}
}
if (!want) {
ourLog.info("Ignoring zip entry: {}", nextEntry.getName());
IOUtils.copy(inputStream, new SinkOutputStream());
continue;
}
ourLog.debug("Streaming ZIP entry {} into temporary file", nextEntry.getName());
File nextOutFile = File.createTempFile("hapi_fhir", ".csv");
nextOutFile.deleteOnExit();
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(nextOutFile, false));
try {
IOUtils.copy(inputStream, outputStream);
} finally {
IOUtils.closeQuietly(outputStream);
}
filenameToFile.put(nextEntry.getName(), nextOutFile);
}
} catch (IOException e) {
throw new InternalErrorException(e);
} finally {
IOUtils.closeQuietly(zis);
}
ourLog.info("Beginning SNOMED CT processing");
final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
final Map<String, TermConcept> id2concept = new HashMap<String, TermConcept>();
final Map<String, TermConcept> code2concept = new HashMap<String, TermConcept>();
final List<TermConceptParentChildLink> links = new ArrayList<TermConceptParentChildLink>();
IRecordHandler handler = new IRecordHandler() {
@Override
public void accept(CSVRecord theRecord) {
String id = theRecord.get("id");
boolean active = "1".equals(theRecord.get("active"));
if (!active) {
return;
}
String conceptId = theRecord.get("conceptId");
String term = theRecord.get("term");
TermConcept concept = getOrCreateConcept(codeSystemVersion, id2concept, id);
concept.setCode(conceptId);
concept.setDisplay(term);
code2concept.put(conceptId, concept);
}
};
iterateOverZipFile(filenameToFile, filenameDescription, handler);
final HashSet<TermConcept> rootConcepts = new HashSet<TermConcept>();
rootConcepts.addAll(code2concept.values());
handler = new IRecordHandler() {
@Override
public void accept(CSVRecord theRecord) {
String sourceId = theRecord.get("sourceId");
String destinationId = theRecord.get("destinationId");
String typeId = theRecord.get("typeId");
boolean active = "1".equals(theRecord.get("active"));
if (!active) {
return;
}
TermConcept typeConcept = findConcept(code2concept, typeId);
TermConcept sourceConcept = findConcept(code2concept, sourceId);
TermConcept targetConcept = findConcept(code2concept, destinationId);
if (typeConcept.getDisplay().equals("Is a")) {
TermConceptParentChildLink link = new TermConceptParentChildLink();
link.setChild(sourceConcept);
link.setParent(targetConcept);
link.setCodeSystem(codeSystemVersion);
rootConcepts.remove(link.getChild());
} else {
ourLog.warn("Unknown relationship type: {}/{}", typeId, typeConcept.getDisplay());
}
}
private TermConcept findConcept(final Map<String, TermConcept> code2concept, String typeId) {
TermConcept typeConcept = code2concept.get(typeId);
if (typeConcept == null) {
throw new InternalErrorException("Unknown type ID: " + typeId);
}
return typeConcept;
}
};
iterateOverZipFile(filenameToFile, filenameRelationship, handler);
ourLog.info("Done loading SNOMED CT files - {} root codes, {} total codes", rootConcepts.size(), code2concept.size());
codeSystemVersion.getConcepts().addAll(rootConcepts);
myTermSvc.storeNewCodeSystemVersion("http://snomed.info/sct", codeSystemVersion);
}
private void iterateOverZipFile(Map<String, File> theFilenameToFile, String fileNamePart, IRecordHandler handler) {
for (Entry<String, File> nextEntry : theFilenameToFile.entrySet()) {
if (nextEntry.getKey().contains(fileNamePart)) {
ourLog.info("Processing file {}", nextEntry.getKey());
Reader reader = null;
CSVParser parsed = null;
try {
reader = new BufferedReader(new FileReader(nextEntry.getValue()));
parsed = new CSVParser(reader, CSVFormat.newFormat('\t').withFirstRecordAsHeader());
Iterator<CSVRecord> iter = parsed.iterator();
ourLog.debug("Header map: {}", parsed.getHeaderMap());
while (iter.hasNext()) {
CSVRecord nextRecord = iter.next();
handler.accept(nextRecord);
}
} catch (IOException e) {
throw new InternalErrorException(e);
} finally {
IOUtils.closeQuietly(parsed);
IOUtils.closeQuietly(reader);
}
}
}
}
private TermConcept getOrCreateConcept(TermCodeSystemVersion codeSystemVersion, Map<String, TermConcept> id2concept, String id) {
TermConcept concept = id2concept.get(id);
if (concept == null) {
concept = new TermConcept();
id2concept.put(id, concept);
concept.setCodeSystem(codeSystemVersion);
}
return concept;
}
private static class ZippedFileInputStream extends InputStream {
private ZipInputStream is;
private ZipInputStream is;
public ZippedFileInputStream(ZipInputStream is){
this.is = is;
}
public ZippedFileInputStream(ZipInputStream is) {
this.is = is;
}
@Override
public int read() throws IOException {
return is.read();
}
@Override
public int read() throws IOException {
return is.read();
}
@Override
public void close() throws IOException {
is.closeEntry();
}
@Override
public void close() throws IOException {
is.closeEntry();
}
}
private interface IRecordHandler {
void accept(CSVRecord theRecord);
}
public static void main(String[] args) throws Exception {
byte[] bytes = IOUtils.toByteArray(new FileInputStream("/Users/james/Downloads/SnomedCT_Release_INT_20160131_Full.zip"));
TerminologyLoaderSvc svc = new TerminologyLoaderSvc();
svc.loadSnomedCt(bytes);
}
private static class SinkOutputStream extends OutputStream {
@Override
public void write(int theB) throws IOException {
// ignore
}
@Override
public void write(byte[] theB) throws IOException {
// ignore
}
@Override
public void write(byte[] theB, int theOff, int theLen) throws IOException {
// ignore
}
}
}

View File

@ -1,5 +1,7 @@
package ca.uhn.fhir.jpa.term;
import static org.mockito.Mockito.mock;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.ZipEntry;
@ -17,10 +19,14 @@ import ca.uhn.fhir.util.TestUtil;
public class TerminologyLoaderSvcTest {
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvcTest.class);
private TerminologyLoaderSvc mySvc;
private IHapiTerminologySvc myTermSvc;
@Before
public void before() {
myTermSvc = mock(IHapiTerminologySvc.class);
mySvc = new TerminologyLoaderSvc();
mySvc.setTermSvcForUnitTests(myTermSvc);
}
@AfterClass

View File

@ -1,10 +1,11 @@
id effectiveTime active moduleId conceptId languageCode typeId term caseSignificanceId
101013 20020131 1 900000000000207008 126813005 en 900000000000013009 Neoplasm of anterior aspect of epiglottis 900000000000020002
102018 20020131 1 900000000000207008 126814004 en 900000000000013009 Neoplasm of junctional region of epiglottis 900000000000020002
103011 20020131 1 900000000000207008 126815003 en 900000000000013009 Neoplasm of lateral wall of oropharynx 900000000000020002
104017 20020131 1 900000000000207008 126816002 en 900000000000013009 Neoplasm of posterior wall of oropharynx 900000000000020002
105016 20020131 1 900000000000207008 126817006 en 900000000000013009 Neoplasm of esophagus 900000000000020002
101013 20020131 1 900000000000207008 126813005 en 900000000000013009 ROOT1 900000000000020002
102018 20020131 1 900000000000207008 126814004 en 900000000000013009 ROOT2 900000000000020002
103011 20020131 1 900000000000207008 126815003 en 900000000000013009 ROOT1_1 900000000000020002
104017 20020131 1 900000000000207008 126816002 en 900000000000013009 ROOT1_2 900000000000020002
105016 20020131 1 900000000000207008 126817006 en 900000000000013009 ROOT1_1_1 900000000000020002
106015 20020131 1 900000000000207008 126818001 en 900000000000013009 Neoplasm of cervical esophagus 900000000000020002
107012 20020131 1 900000000000207008 126819009 en 900000000000013009 Neoplasm of thoracic esophagus 900000000000020002
108019 20020131 1 900000000000207008 126820003 en 900000000000013009 Neoplasm of abdominal esophagus 900000000000020002
110017 20020131 1 900000000000207008 126822006 en 900000000000013009 Neoplasm of middle third of esophagus 900000000000020002
181114011 20020131 1 900000000000207008 116680003 en 900000000000013009 Is a 900000000000020002

View File

@ -1,10 +1,4 @@
id effectiveTime active moduleId sourceId destinationId relationshipGroup typeId characteristicTypeId modifierId
100022 20020131 1 900000000000207008 100000000 102272007 0 116680003 900000000000011006 900000000000451002
100022 20090731 0 900000000000207008 100000000 102272007 0 116680003 900000000000011006 900000000000451002
101021 20020131 1 900000000000207008 10000006 29857009 0 116680003 900000000000011006 900000000000451002
102025 20020131 1 900000000000207008 10000006 9972008 0 116680003 900000000000011006 900000000000451002
103024 20020131 1 900000000000207008 1000004 19130008 0 116680003 900000000000011006 900000000000451002
103024 20030131 0 900000000000207008 1000004 19130008 0 116680003 900000000000011006 900000000000451002
104029 20020131 1 900000000000207008 100001001 102272007 0 116680003 900000000000011006 900000000000451002
104029 20090731 0 900000000000207008 100001001 102272007 0 116680003 900000000000011006 900000000000451002
105028 20020131 1 900000000000207008 100002008 102272007 0 116680003 900000000000011006 900000000000451002
100022 20020131 1 900000000000207008 126815003 126813005 0 116680003 900000000000011006 900000000000451002
100022 20090731 0 900000000000207008 126816002 126813005 0 116680003 900000000000011006 900000000000451002
101021 20020131 1 900000000000207008 126817006 126815003 0 116680003 900000000000011006 900000000000451002