more Mimic work

This commit is contained in:
Grahame Grieve 2019-10-25 08:19:59 +11:00
parent cccce9e9f0
commit 958791d1cd
2 changed files with 268 additions and 4 deletions

View File

@ -1,10 +1,36 @@
package org.hl7.fhir.r4.importers;
/*-
* #%L
* org.hl7.fhir.r4.importers
* %%
* Copyright (C) 2014 - 2019 Health Level 7
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
/**
* Development Notes:
* - ignore ICUStays and Transfers for now - low yield, based on discussons with Tom/Alistair
*/
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
@ -19,14 +45,23 @@ import org.hl7.fhir.r4.formats.JsonParser;
import org.hl7.fhir.r4.model.Bundle;
import org.hl7.fhir.r4.model.Bundle.BundleType;
import org.hl7.fhir.r4.model.CodeableConcept;
import org.hl7.fhir.r4.model.Coding;
import org.hl7.fhir.r4.model.DateTimeType;
import org.hl7.fhir.r4.model.DateType;
import org.hl7.fhir.r4.model.DocumentReference;
import org.hl7.fhir.r4.model.Encounter;
import org.hl7.fhir.r4.model.Enumerations.AdministrativeGender;
import org.hl7.fhir.r4.model.Enumerations.DocumentReferenceStatus;
import org.hl7.fhir.r4.model.Observation;
import org.hl7.fhir.r4.model.Observation.ObservationComponentComponent;
import org.hl7.fhir.r4.model.Observation.ObservationStatus;
import org.hl7.fhir.r4.model.Patient;
import org.hl7.fhir.r4.model.Practitioner;
import org.hl7.fhir.r4.model.PractitionerRole;
import org.hl7.fhir.r4.model.Quantity;
import org.hl7.fhir.r4.model.Quantity.QuantityComparator;
import org.hl7.fhir.r4.model.Reference;
import org.hl7.fhir.r4.model.Type;
import org.hl7.fhir.utilities.CSVReader;
import org.hl7.fhir.utilities.Utilities;
@ -41,9 +76,24 @@ public class Mimic14Importer {
private String loinc;
}
public static class Item {
private int rowId;
private int itemId;
private String label;
private String abbreviation;
private String dbSource;
private String linksTo; // where it's used...
private String category;
private String unitName; // uom for item, if it has one
private String paramType; // type of item
private int conceptId;
}
private static final String MRN_SYSTEM = null;
private Date date;
private Map<String, LabItem> labItems = new HashMap<>();
private Map<String, Item> items = new HashMap<>();
// private Map<String, PractitionerRole> careGivers = new HashMap<>();
private Map<String, Patient> patients = new HashMap<>();
private Map<String, Encounter> encounters = new HashMap<>();
@ -64,11 +114,15 @@ public class Mimic14Importer {
ucum = new UcumEssenceService(ucumSrc);
loadItems(Utilities.path(src, "d_items.csv"));
loadLabItems(Utilities.path(src, "d_labitems.csv"));
loadCareGivers(Utilities.path(src, "caregivers.csv"), Utilities.path(dest, "care-givers.json"));
Bundle patients = processPatients(Utilities.path(src, "patients.csv"));
Bundle encounters = processAdmissions(Utilities.path(src, "admissions.csv"));
processLabEvents(Utilities.path(src, "labevents.csv"), Utilities.path(dest, "lab-observations.json"));
processMicroEvents(Utilities.path(src, "microbiologyevents.csv"), Utilities.path(dest, "micro-observations.json"));
processNoteEvents(Utilities.path(src, "noteevents.csv"), Utilities.path(dest, "notes.json"));
System.out.println("saving");
@ -78,6 +132,32 @@ public class Mimic14Importer {
System.out.println("done");
}
private void loadItems(String src) throws NumberFormatException, FHIRException, IOException {
System.out.print("Processing Items... ");
CSVReader csv = new CSVReader(new FileInputStream(src));
int t = 0;
csv.readHeaders();
while (csv.line()) {
Item item = new Item();
t++;
item.rowId = Integer.parseInt(csv.cell("row_id"));
item.itemId = Integer.parseInt(csv.cell("itemid"));
item.label = csv.cell("label");
item.abbreviation = csv.cell("abbreviation");
item.dbSource = csv.cell("dbsource");
item.linksTo = csv.cell("linksto");
item.category = csv.cell("category");
item.unitName = csv.cell("unitname");
item.paramType = csv.cell("param_type");
if (csv.has("conceptid")) {
item.conceptId = Integer.parseInt(csv.cell("conceptid"));
}
items.put(csv.cell("itemid"), item);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
}
private void loadLabItems(String src) throws NumberFormatException, FHIRException, IOException {
System.out.print("Processing Lab Items... ");
CSVReader csv = new CSVReader(new FileInputStream(src));
@ -95,6 +175,28 @@ public class Mimic14Importer {
labItems.put(csv.cell("itemid"), item);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
}
private void loadCareGivers(String src, String dest) throws NumberFormatException, FHIRException, IOException {
System.out.print("Processing Care Givers... ");
CSVReader csv = new CSVReader(new FileInputStream(src));
int t = 0;
csv.readHeaders();
Bundle bnd = new Bundle();
bnd.setId("care-givers");
bnd.setType(BundleType.COLLECTION);
bnd.setTimestamp(date);
while (csv.line()) {
PractitionerRole pr = new PractitionerRole();
t++;
pr.setId(csv.cell("cgid"));
pr.addCode().addCoding().setSystem("http://mimic.physionet.org/fhir/CareGiverType").setCode(csv.cell("label")).setDisplay(csv.cell("description"));
bnd.addEntry().setResource(pr);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
}
private void processLabEvents(String src, String dest) throws FileNotFoundException, IOException {
@ -109,10 +211,12 @@ public class Mimic14Importer {
while (csv.line()) {
Observation obs = new Observation();
t++;
Patient pat = patients.get(csv.cell("subject_id"));
Encounter enc = encounters.get(csv.cell("hadm_id"));
LabItem item = labItems.get(csv.cell("itemid"));
obs.setId(csv.cell("hadm_id"));
obs.setId(csv.cell("row_id"));
obs.setStatus(ObservationStatus.FINAL);
if (pat != null) {
obs.setSubject(new Reference("Patient/"+pat.getId()));
}
@ -152,9 +256,161 @@ public class Mimic14Importer {
bnd.addEntry().setResource(obs);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
}
private void processMicroEvents(String src, String dest) throws FileNotFoundException, IOException {
System.out.print("Processing Micro Events... ");
CSVReader csv = new CSVReader(new FileInputStream(src));
Bundle bnd = new Bundle();
bnd.setId("lab-observations");
bnd.setType(BundleType.COLLECTION);
bnd.setTimestamp(date);
csv.readHeaders();
Map<String, Observation> cache = new HashMap<>();
int t = 0;
while (csv.line()) {
String cacheId = csv.cell("hadm_id")+"|"+csv.cell("spec_itemid")+"|"+csv.cell("org_itemid")+"|"+(csv.has("charttime") ? csv.cell("charttime") : csv.cell("chartdate"))+"|"+csv.cell("isolate_num");
Patient pat = patients.get(csv.cell("subject_id"));
Encounter enc = encounters.get(csv.cell("hadm_id"));
Item spec = items.get(csv.cell("spec_itemid"));
Item org = items.get(csv.cell("org_itemid"));
Item ab = items.get(csv.cell("ab_itemid"));
Observation obs;
if (cache.containsKey(cacheId)) {
obs = cache.get(cacheId);
} else {
obs = new Observation();
t++;
cache.put(cacheId, obs);
obs.setId(csv.cell("row_id"));
obs.setStatus(ObservationStatus.FINAL);
obs.addCategory().setText("microbiology");
bnd.addEntry().setResource(obs);
// todo: these are all cultures, but the codes state mainly what the culture is on
obs.getCode().setText(csv.cell("spec_itemid"));
obs.getCode().addCoding().setSystem("http://mimic.physionet.org/fhir/TestType").setCode(csv.cell("spec_itemid"));
}
if (pat != null) {
obs.setSubject(new Reference("Patient/"+pat.getId()));
}
if (enc != null) {
obs.setEncounter(new Reference("Encounter/"+enc.getId()));
}
if (csv.has("charttime")) {
obs.setEffective(readDateTime(csv.cell("charttime")));
} else {
obs.setEffective(readDateTime(csv.cell("chartdate")));
}
if (org == null) {
obs.setValue(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/TestValue").setCode("negative")));
} else {
obs.setValue(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/Organism").setCode(csv.cell("org_itemid"))).setText(csv.cell("org_name")));
}
if (ab != null) {
ObservationComponentComponent oc = obs.addComponent();
oc.setCode(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/Antibiotic").setCode(csv.cell("ab_itemid"))).setText(csv.cell("ab_name")));
if (csv.has("dilution_text")) {
oc.setValue(parseQuantity(csv.cell("dilution_text")));
}
if (csv.has("interpretation")) {
oc.addInterpretation().addCoding().setSystem("http://mimic.physionet.org/fhir/Interpretation").setCode(csv.cell("interpretation"));
}
}
}
System.out.println(Integer.toString(t)+" found");
csv.close();
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
}
private void processNoteEvents(String src, String dest) throws FileNotFoundException, IOException {
System.out.print("Processing Note Events... ");
CSVReader csv = new CSVReader(new FileInputStream(src));
Bundle bnd = new Bundle();
bnd.setId("notes");
bnd.setType(BundleType.COLLECTION);
bnd.setTimestamp(date);
csv.readHeaders();
Map<String, Observation> cache = new HashMap<>();
int t = 0;
while (csv.line()) {
Patient pat = patients.get(csv.cell("subject_id"));
Encounter enc = encounters.get(csv.cell("hadm_id"));
DocumentReference dr = new DocumentReference();
dr.setId(csv.cell("row_id"));
if (pat != null) {
dr.setSubject(new Reference("Patient/"+pat.getId()));
}
if (enc != null) {
dr.getContext().addEncounter(new Reference("Encounter/"+enc.getId()));
}
if ("1".equals(csv.cell("iserror"))) {
dr.setStatus(DocumentReferenceStatus.ENTEREDINERROR);
} else {
dr.setStatus(DocumentReferenceStatus.CURRENT);
}
if (csv.has("cgid")) {
dr.addAuthor().setReference("PractitionerRole/"+csv.cell("cgid"));
}
String cat = csv.cell("category");
String desc = csv.cell("description");
if ("Discharge summary".equals(cat) && "Report".equals(desc)) {
dr.getType().addCoding().setSystem("http://loinc.org").setCode("18842-5").setDisplay("Discharge summary");
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
} else if ("Echo".equals(cat) && "Report".equals(desc)) {
dr.getType().addCoding().setSystem("http://loinc.org").setCode("59282-4").setDisplay("Stress cardiac echo study report US");
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
} else if ("Radiology".equals(cat) && "CHEST (PORTABLE AP)".equals(desc)) {
dr.getType().addCoding().setSystem("http://loinc.org").setCode("59282-4").setDisplay("Chest X-ray AP portable single view");
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
} else if ("Nursing/other".equals(cat) && "Report".equals(desc)) {
dr.getType().addCoding().setSystem("http://loinc.org").setCode("34119-8").setDisplay("Nursing facility Initial assessment note");
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
} else if ("Physician".equals(cat) && "Physician Surgical Admission Note".equals(desc)) {
dr.getType().addCoding().setSystem("http://loinc.org").setCode("36589-0").setDisplay("Surgery Admission evaluation note");
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
} else {
throw new FHIRException("Unhandled Note type '"+cat+"'/'"+desc+"'");
}
dr.addContent().getAttachment().setContentType("text/plain; charset=UTF-8").setData(csv.cell("text").getBytes(Charset.forName("UTF-8")));
bnd.addEntry().setResource(dr);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
}
private Type parseQuantity(String cell) {
if (cell.startsWith("<=")) {
return new Quantity().setComparator(QuantityComparator.LESS_OR_EQUAL).setValue(new BigDecimal(cell.substring(2)));
}
if (cell.startsWith("<")) {
return new Quantity().setComparator(QuantityComparator.LESS_THAN).setValue(new BigDecimal(cell.substring(2)));
}
if (cell.startsWith(">=") || cell.startsWith("=>") ) {
return new Quantity().setComparator(QuantityComparator.GREATER_OR_EQUAL).setValue(new BigDecimal(cell.substring(2)));
}
if (cell.startsWith(">")) {
return new Quantity().setComparator(QuantityComparator.GREATER_THAN).setValue(new BigDecimal(cell.substring(2)));
}
if (!Utilities.isDecimal(cell, true)) {
throw new Error("Not a valid decimal: "+cell);
}
return new Quantity().setValue(new BigDecimal(cell));
}
private Bundle processAdmissions(String src) throws FileNotFoundException, IOException {
System.out.print("Processing Admissions... ");
CSVReader csv = new CSVReader(new FileInputStream(src));
@ -202,6 +458,7 @@ public class Mimic14Importer {
bnd.addEntry().setResource(enc);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
return bnd;
}
@ -231,6 +488,7 @@ public class Mimic14Importer {
bnd.addEntry().setResource(pat);
}
System.out.println(Integer.toString(t)+" found");
csv.close();
return bnd;
}

View File

@ -152,8 +152,14 @@ public class CSVReader extends InputStreamReader {
while (inQuote || (peek() != '\r' && peek() != '\n')) {
char c = peek();
next();
if (c == '"')
inQuote = !inQuote;
if (c == '"') {
if (ready() && peek() == '"') {
b.append(c);
next();
} else {
inQuote = !inQuote;
}
}
else if (!inQuote && c == ',') {
res.add(b.toString().trim());
b = new StringBuilder();