Work on term svc
This commit is contained in:
parent
6c3446dee1
commit
81d68b07f8
|
@ -2,6 +2,8 @@ package ca.uhn.fhir.jpa.term;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
|
||||
|
||||
public class HapiTerminologySvcDstu1 extends BaseHapiTerminologySvc {
|
||||
|
||||
@Override
|
||||
|
@ -9,4 +11,9 @@ public class HapiTerminologySvcDstu1 extends BaseHapiTerminologySvc {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,6 +5,8 @@ import java.util.List;
|
|||
import org.hl7.fhir.instance.hapi.validation.IValidationSupport;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
|
||||
|
||||
public class HapiTerminologySvcDstu2 extends BaseHapiTerminologySvc {
|
||||
|
||||
@Autowired
|
||||
|
@ -16,4 +18,10 @@ public class HapiTerminologySvcDstu2 extends BaseHapiTerminologySvc {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package ca.uhn.fhir.jpa.term;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.hl7.fhir.dstu3.model.CodeSystem;
|
||||
import org.hl7.fhir.dstu3.model.ValueSet;
|
||||
import org.hl7.fhir.dstu3.model.ValueSet.ValueSetExpansionContainsComponent;
|
||||
import org.hl7.fhir.dstu3.terminologies.ValueSetExpander;
|
||||
|
@ -10,6 +11,7 @@ import org.hl7.fhir.dstu3.terminologies.ValueSetExpander.ValueSetExpansionOutcom
|
|||
import org.hl7.fhir.dstu3.utils.IWorkerContext;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
|
||||
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
|
||||
|
||||
public class HapiTerminologySvcDstu3 extends BaseHapiTerminologySvc {
|
||||
|
@ -40,4 +42,10 @@ public class HapiTerminologySvcDstu3 extends BaseHapiTerminologySvc {
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion) {
|
||||
CodeSystem cs = new org.hl7.fhir.dstu3.model.CodeSystem();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ public interface IHapiTerminologySvc {
|
|||
|
||||
List<VersionIndependentConcept> findCodesBelow(String theSystem, String theCode);
|
||||
|
||||
void storeNewCodeSystemVersion(Long theCodeSystemResourcePid, String theSystemUri, TermCodeSystemVersion theCodeSytem);
|
||||
void storeNewCodeSystemVersion(Long theCodeSystemResourcePid, String theSystemUri, TermCodeSystemVersion theCodeSytemVersion);
|
||||
|
||||
public boolean supportsSystem(String theCodeSystem);
|
||||
|
||||
|
@ -43,4 +43,6 @@ public interface IHapiTerminologySvc {
|
|||
|
||||
List<VersionIndependentConcept> findCodesAbove(String theSystem, String theCode);
|
||||
|
||||
void storeNewCodeSystemVersion(String theSystem, TermCodeSystemVersion theCodeSystemVersion);
|
||||
|
||||
}
|
||||
|
|
|
@ -1,65 +1,248 @@
|
|||
package ca.uhn.fhir.jpa.term;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVParser;
|
||||
import org.apache.commons.csv.CSVRecord;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
|
||||
import ca.uhn.fhir.jpa.entity.TermConcept;
|
||||
import ca.uhn.fhir.jpa.entity.TermConceptParentChildLink;
|
||||
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
|
||||
|
||||
public class TerminologyLoaderSvc {
|
||||
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvc.class);
|
||||
|
||||
@Autowired
|
||||
private IHapiTerminologySvc myTermSvc;
|
||||
|
||||
@VisibleForTesting
|
||||
void setTermSvcForUnitTests(IHapiTerminologySvc theTermSvc) {
|
||||
myTermSvc = theTermSvc;
|
||||
}
|
||||
|
||||
public void loadSnomedCt(byte[] theZipBytes) {
|
||||
|
||||
Map<String, TermConcept> id2concept = new HashMap<String, TermConcept>();
|
||||
|
||||
ZipInputStream zis = new ZipInputStream(new ByteArrayInputStream(theZipBytes));
|
||||
String filenameDescription = "Terminology/sct2_Description_Full";
|
||||
String filenameRelationship = "Terminology/sct2_Relationship_Full";
|
||||
List<String> allFilenames = Arrays.asList(filenameDescription, filenameRelationship);
|
||||
|
||||
Map<String, File> filenameToFile = new HashMap<String, File>();
|
||||
ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new ByteArrayInputStream(theZipBytes)));
|
||||
try {
|
||||
for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null; ) {
|
||||
ZippedFileInputStream entryStream = new ZippedFileInputStream(zis);
|
||||
byte[] bytes = IOUtils.toByteArray(entryStream);
|
||||
ourLog.info("Read file {} - {} bytes", nextEntry.getName(), bytes.length);
|
||||
for (ZipEntry nextEntry; (nextEntry = zis.getNextEntry()) != null;) {
|
||||
ZippedFileInputStream inputStream = new ZippedFileInputStream(zis);
|
||||
|
||||
String string = new String(bytes, "UTF-8");
|
||||
CSVParser parsed = CSVParser.parse(string, CSVFormat.newFormat('\t').withFirstRecordAsHeader());
|
||||
ourLog.info("Header map: {}", parsed.getHeaderMap());
|
||||
boolean want = false;
|
||||
for (String next : allFilenames) {
|
||||
if (nextEntry.getName().contains(next)) {
|
||||
want = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!want) {
|
||||
ourLog.info("Ignoring zip entry: {}", nextEntry.getName());
|
||||
IOUtils.copy(inputStream, new SinkOutputStream());
|
||||
continue;
|
||||
}
|
||||
|
||||
ourLog.debug("Streaming ZIP entry {} into temporary file", nextEntry.getName());
|
||||
|
||||
File nextOutFile = File.createTempFile("hapi_fhir", ".csv");
|
||||
nextOutFile.deleteOnExit();
|
||||
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(nextOutFile, false));
|
||||
try {
|
||||
IOUtils.copy(inputStream, outputStream);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(outputStream);
|
||||
}
|
||||
|
||||
filenameToFile.put(nextEntry.getName(), nextOutFile);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new InternalErrorException(e);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(zis);
|
||||
}
|
||||
|
||||
ourLog.info("Beginning SNOMED CT processing");
|
||||
|
||||
final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
|
||||
final Map<String, TermConcept> id2concept = new HashMap<String, TermConcept>();
|
||||
final Map<String, TermConcept> code2concept = new HashMap<String, TermConcept>();
|
||||
final List<TermConceptParentChildLink> links = new ArrayList<TermConceptParentChildLink>();
|
||||
|
||||
IRecordHandler handler = new IRecordHandler() {
|
||||
@Override
|
||||
public void accept(CSVRecord theRecord) {
|
||||
String id = theRecord.get("id");
|
||||
boolean active = "1".equals(theRecord.get("active"));
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
String conceptId = theRecord.get("conceptId");
|
||||
String term = theRecord.get("term");
|
||||
|
||||
TermConcept concept = getOrCreateConcept(codeSystemVersion, id2concept, id);
|
||||
concept.setCode(conceptId);
|
||||
concept.setDisplay(term);
|
||||
code2concept.put(conceptId, concept);
|
||||
}
|
||||
};
|
||||
iterateOverZipFile(filenameToFile, filenameDescription, handler);
|
||||
|
||||
final HashSet<TermConcept> rootConcepts = new HashSet<TermConcept>();
|
||||
rootConcepts.addAll(code2concept.values());
|
||||
|
||||
handler = new IRecordHandler() {
|
||||
@Override
|
||||
public void accept(CSVRecord theRecord) {
|
||||
String sourceId = theRecord.get("sourceId");
|
||||
String destinationId = theRecord.get("destinationId");
|
||||
String typeId = theRecord.get("typeId");
|
||||
boolean active = "1".equals(theRecord.get("active"));
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
TermConcept typeConcept = findConcept(code2concept, typeId);
|
||||
TermConcept sourceConcept = findConcept(code2concept, sourceId);
|
||||
TermConcept targetConcept = findConcept(code2concept, destinationId);
|
||||
if (typeConcept.getDisplay().equals("Is a")) {
|
||||
TermConceptParentChildLink link = new TermConceptParentChildLink();
|
||||
link.setChild(sourceConcept);
|
||||
link.setParent(targetConcept);
|
||||
link.setCodeSystem(codeSystemVersion);
|
||||
rootConcepts.remove(link.getChild());
|
||||
} else {
|
||||
ourLog.warn("Unknown relationship type: {}/{}", typeId, typeConcept.getDisplay());
|
||||
}
|
||||
}
|
||||
|
||||
private TermConcept findConcept(final Map<String, TermConcept> code2concept, String typeId) {
|
||||
TermConcept typeConcept = code2concept.get(typeId);
|
||||
if (typeConcept == null) {
|
||||
throw new InternalErrorException("Unknown type ID: " + typeId);
|
||||
}
|
||||
return typeConcept;
|
||||
}
|
||||
};
|
||||
iterateOverZipFile(filenameToFile, filenameRelationship, handler);
|
||||
|
||||
ourLog.info("Done loading SNOMED CT files - {} root codes, {} total codes", rootConcepts.size(), code2concept.size());
|
||||
|
||||
codeSystemVersion.getConcepts().addAll(rootConcepts);
|
||||
myTermSvc.storeNewCodeSystemVersion("http://snomed.info/sct", codeSystemVersion);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private void iterateOverZipFile(Map<String, File> theFilenameToFile, String fileNamePart, IRecordHandler handler) {
|
||||
for (Entry<String, File> nextEntry : theFilenameToFile.entrySet()) {
|
||||
|
||||
if (nextEntry.getKey().contains(fileNamePart)) {
|
||||
ourLog.info("Processing file {}", nextEntry.getKey());
|
||||
|
||||
Reader reader = null;
|
||||
CSVParser parsed = null;
|
||||
try {
|
||||
reader = new BufferedReader(new FileReader(nextEntry.getValue()));
|
||||
parsed = new CSVParser(reader, CSVFormat.newFormat('\t').withFirstRecordAsHeader());
|
||||
Iterator<CSVRecord> iter = parsed.iterator();
|
||||
ourLog.debug("Header map: {}", parsed.getHeaderMap());
|
||||
|
||||
while (iter.hasNext()) {
|
||||
CSVRecord nextRecord = iter.next();
|
||||
handler.accept(nextRecord);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new InternalErrorException(e);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(parsed);
|
||||
IOUtils.closeQuietly(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private TermConcept getOrCreateConcept(TermCodeSystemVersion codeSystemVersion, Map<String, TermConcept> id2concept, String id) {
|
||||
TermConcept concept = id2concept.get(id);
|
||||
if (concept == null) {
|
||||
concept = new TermConcept();
|
||||
id2concept.put(id, concept);
|
||||
concept.setCodeSystem(codeSystemVersion);
|
||||
}
|
||||
return concept;
|
||||
}
|
||||
|
||||
private static class ZippedFileInputStream extends InputStream {
|
||||
|
||||
private ZipInputStream is;
|
||||
private ZipInputStream is;
|
||||
|
||||
public ZippedFileInputStream(ZipInputStream is){
|
||||
this.is = is;
|
||||
}
|
||||
public ZippedFileInputStream(ZipInputStream is) {
|
||||
this.is = is;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return is.read();
|
||||
}
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return is.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
is.closeEntry();
|
||||
}
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
is.closeEntry();
|
||||
}
|
||||
}
|
||||
|
||||
private interface IRecordHandler {
|
||||
void accept(CSVRecord theRecord);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
byte[] bytes = IOUtils.toByteArray(new FileInputStream("/Users/james/Downloads/SnomedCT_Release_INT_20160131_Full.zip"));
|
||||
TerminologyLoaderSvc svc = new TerminologyLoaderSvc();
|
||||
svc.loadSnomedCt(bytes);
|
||||
}
|
||||
|
||||
private static class SinkOutputStream extends OutputStream {
|
||||
|
||||
@Override
|
||||
public void write(int theB) throws IOException {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] theB) throws IOException {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] theB, int theOff, int theLen) throws IOException {
|
||||
// ignore
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package ca.uhn.fhir.jpa.term;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.zip.ZipEntry;
|
||||
|
@ -17,10 +19,14 @@ import ca.uhn.fhir.util.TestUtil;
|
|||
public class TerminologyLoaderSvcTest {
|
||||
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(TerminologyLoaderSvcTest.class);
|
||||
private TerminologyLoaderSvc mySvc;
|
||||
private IHapiTerminologySvc myTermSvc;
|
||||
|
||||
@Before
|
||||
public void before() {
|
||||
myTermSvc = mock(IHapiTerminologySvc.class);
|
||||
|
||||
mySvc = new TerminologyLoaderSvc();
|
||||
mySvc.setTermSvcForUnitTests(myTermSvc);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
id effectiveTime active moduleId conceptId languageCode typeId term caseSignificanceId
|
||||
101013 20020131 1 900000000000207008 126813005 en 900000000000013009 Neoplasm of anterior aspect of epiglottis 900000000000020002
|
||||
102018 20020131 1 900000000000207008 126814004 en 900000000000013009 Neoplasm of junctional region of epiglottis 900000000000020002
|
||||
103011 20020131 1 900000000000207008 126815003 en 900000000000013009 Neoplasm of lateral wall of oropharynx 900000000000020002
|
||||
104017 20020131 1 900000000000207008 126816002 en 900000000000013009 Neoplasm of posterior wall of oropharynx 900000000000020002
|
||||
105016 20020131 1 900000000000207008 126817006 en 900000000000013009 Neoplasm of esophagus 900000000000020002
|
||||
101013 20020131 1 900000000000207008 126813005 en 900000000000013009 ROOT1 900000000000020002
|
||||
102018 20020131 1 900000000000207008 126814004 en 900000000000013009 ROOT2 900000000000020002
|
||||
103011 20020131 1 900000000000207008 126815003 en 900000000000013009 ROOT1_1 900000000000020002
|
||||
104017 20020131 1 900000000000207008 126816002 en 900000000000013009 ROOT1_2 900000000000020002
|
||||
105016 20020131 1 900000000000207008 126817006 en 900000000000013009 ROOT1_1_1 900000000000020002
|
||||
106015 20020131 1 900000000000207008 126818001 en 900000000000013009 Neoplasm of cervical esophagus 900000000000020002
|
||||
107012 20020131 1 900000000000207008 126819009 en 900000000000013009 Neoplasm of thoracic esophagus 900000000000020002
|
||||
108019 20020131 1 900000000000207008 126820003 en 900000000000013009 Neoplasm of abdominal esophagus 900000000000020002
|
||||
110017 20020131 1 900000000000207008 126822006 en 900000000000013009 Neoplasm of middle third of esophagus 900000000000020002
|
||||
181114011 20020131 1 900000000000207008 116680003 en 900000000000013009 Is a 900000000000020002
|
||||
|
|
|
@ -1,10 +1,4 @@
|
|||
id effectiveTime active moduleId sourceId destinationId relationshipGroup typeId characteristicTypeId modifierId
|
||||
100022 20020131 1 900000000000207008 100000000 102272007 0 116680003 900000000000011006 900000000000451002
|
||||
100022 20090731 0 900000000000207008 100000000 102272007 0 116680003 900000000000011006 900000000000451002
|
||||
101021 20020131 1 900000000000207008 10000006 29857009 0 116680003 900000000000011006 900000000000451002
|
||||
102025 20020131 1 900000000000207008 10000006 9972008 0 116680003 900000000000011006 900000000000451002
|
||||
103024 20020131 1 900000000000207008 1000004 19130008 0 116680003 900000000000011006 900000000000451002
|
||||
103024 20030131 0 900000000000207008 1000004 19130008 0 116680003 900000000000011006 900000000000451002
|
||||
104029 20020131 1 900000000000207008 100001001 102272007 0 116680003 900000000000011006 900000000000451002
|
||||
104029 20090731 0 900000000000207008 100001001 102272007 0 116680003 900000000000011006 900000000000451002
|
||||
105028 20020131 1 900000000000207008 100002008 102272007 0 116680003 900000000000011006 900000000000451002
|
||||
100022 20020131 1 900000000000207008 126815003 126813005 0 116680003 900000000000011006 900000000000451002
|
||||
100022 20090731 0 900000000000207008 126816002 126813005 0 116680003 900000000000011006 900000000000451002
|
||||
101021 20020131 1 900000000000207008 126817006 126815003 0 116680003 900000000000011006 900000000000451002
|
||||
|
|
Loading…
Reference in New Issue