more Mimic work
This commit is contained in:
parent
cccce9e9f0
commit
958791d1cd
|
@ -1,10 +1,36 @@
|
|||
package org.hl7.fhir.r4.importers;
|
||||
|
||||
/*-
|
||||
* #%L
|
||||
* org.hl7.fhir.r4.importers
|
||||
* %%
|
||||
* Copyright (C) 2014 - 2019 Health Level 7
|
||||
* %%
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* #L%
|
||||
*/
|
||||
|
||||
/**
|
||||
* Development Notes:
|
||||
* - ignore ICUStays and Transfers for now - low yield, based on discussons with Tom/Alistair
|
||||
*/
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -19,14 +45,23 @@ import org.hl7.fhir.r4.formats.JsonParser;
|
|||
import org.hl7.fhir.r4.model.Bundle;
|
||||
import org.hl7.fhir.r4.model.Bundle.BundleType;
|
||||
import org.hl7.fhir.r4.model.CodeableConcept;
|
||||
import org.hl7.fhir.r4.model.Coding;
|
||||
import org.hl7.fhir.r4.model.DateTimeType;
|
||||
import org.hl7.fhir.r4.model.DateType;
|
||||
import org.hl7.fhir.r4.model.DocumentReference;
|
||||
import org.hl7.fhir.r4.model.Encounter;
|
||||
import org.hl7.fhir.r4.model.Enumerations.AdministrativeGender;
|
||||
import org.hl7.fhir.r4.model.Enumerations.DocumentReferenceStatus;
|
||||
import org.hl7.fhir.r4.model.Observation;
|
||||
import org.hl7.fhir.r4.model.Observation.ObservationComponentComponent;
|
||||
import org.hl7.fhir.r4.model.Observation.ObservationStatus;
|
||||
import org.hl7.fhir.r4.model.Patient;
|
||||
import org.hl7.fhir.r4.model.Practitioner;
|
||||
import org.hl7.fhir.r4.model.PractitionerRole;
|
||||
import org.hl7.fhir.r4.model.Quantity;
|
||||
import org.hl7.fhir.r4.model.Quantity.QuantityComparator;
|
||||
import org.hl7.fhir.r4.model.Reference;
|
||||
import org.hl7.fhir.r4.model.Type;
|
||||
import org.hl7.fhir.utilities.CSVReader;
|
||||
import org.hl7.fhir.utilities.Utilities;
|
||||
|
||||
|
@ -41,9 +76,24 @@ public class Mimic14Importer {
|
|||
private String loinc;
|
||||
}
|
||||
|
||||
public static class Item {
|
||||
private int rowId;
|
||||
private int itemId;
|
||||
private String label;
|
||||
private String abbreviation;
|
||||
private String dbSource;
|
||||
private String linksTo; // where it's used...
|
||||
private String category;
|
||||
private String unitName; // uom for item, if it has one
|
||||
private String paramType; // type of item
|
||||
private int conceptId;
|
||||
}
|
||||
|
||||
private static final String MRN_SYSTEM = null;
|
||||
private Date date;
|
||||
private Map<String, LabItem> labItems = new HashMap<>();
|
||||
private Map<String, Item> items = new HashMap<>();
|
||||
// private Map<String, PractitionerRole> careGivers = new HashMap<>();
|
||||
private Map<String, Patient> patients = new HashMap<>();
|
||||
private Map<String, Encounter> encounters = new HashMap<>();
|
||||
|
||||
|
@ -64,11 +114,15 @@ public class Mimic14Importer {
|
|||
|
||||
ucum = new UcumEssenceService(ucumSrc);
|
||||
|
||||
loadItems(Utilities.path(src, "d_items.csv"));
|
||||
loadLabItems(Utilities.path(src, "d_labitems.csv"));
|
||||
loadCareGivers(Utilities.path(src, "caregivers.csv"), Utilities.path(dest, "care-givers.json"));
|
||||
|
||||
Bundle patients = processPatients(Utilities.path(src, "patients.csv"));
|
||||
Bundle encounters = processAdmissions(Utilities.path(src, "admissions.csv"));
|
||||
processLabEvents(Utilities.path(src, "labevents.csv"), Utilities.path(dest, "lab-observations.json"));
|
||||
|
||||
processMicroEvents(Utilities.path(src, "microbiologyevents.csv"), Utilities.path(dest, "micro-observations.json"));
|
||||
processNoteEvents(Utilities.path(src, "noteevents.csv"), Utilities.path(dest, "notes.json"));
|
||||
|
||||
System.out.println("saving");
|
||||
|
||||
|
@ -78,6 +132,32 @@ public class Mimic14Importer {
|
|||
System.out.println("done");
|
||||
}
|
||||
|
||||
private void loadItems(String src) throws NumberFormatException, FHIRException, IOException {
|
||||
System.out.print("Processing Items... ");
|
||||
CSVReader csv = new CSVReader(new FileInputStream(src));
|
||||
int t = 0;
|
||||
csv.readHeaders();
|
||||
while (csv.line()) {
|
||||
Item item = new Item();
|
||||
t++;
|
||||
item.rowId = Integer.parseInt(csv.cell("row_id"));
|
||||
item.itemId = Integer.parseInt(csv.cell("itemid"));
|
||||
item.label = csv.cell("label");
|
||||
item.abbreviation = csv.cell("abbreviation");
|
||||
item.dbSource = csv.cell("dbsource");
|
||||
item.linksTo = csv.cell("linksto");
|
||||
item.category = csv.cell("category");
|
||||
item.unitName = csv.cell("unitname");
|
||||
item.paramType = csv.cell("param_type");
|
||||
if (csv.has("conceptid")) {
|
||||
item.conceptId = Integer.parseInt(csv.cell("conceptid"));
|
||||
}
|
||||
items.put(csv.cell("itemid"), item);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
}
|
||||
|
||||
private void loadLabItems(String src) throws NumberFormatException, FHIRException, IOException {
|
||||
System.out.print("Processing Lab Items... ");
|
||||
CSVReader csv = new CSVReader(new FileInputStream(src));
|
||||
|
@ -95,6 +175,28 @@ public class Mimic14Importer {
|
|||
labItems.put(csv.cell("itemid"), item);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
}
|
||||
|
||||
private void loadCareGivers(String src, String dest) throws NumberFormatException, FHIRException, IOException {
|
||||
System.out.print("Processing Care Givers... ");
|
||||
CSVReader csv = new CSVReader(new FileInputStream(src));
|
||||
int t = 0;
|
||||
csv.readHeaders();
|
||||
Bundle bnd = new Bundle();
|
||||
bnd.setId("care-givers");
|
||||
bnd.setType(BundleType.COLLECTION);
|
||||
bnd.setTimestamp(date);
|
||||
while (csv.line()) {
|
||||
PractitionerRole pr = new PractitionerRole();
|
||||
t++;
|
||||
pr.setId(csv.cell("cgid"));
|
||||
pr.addCode().addCoding().setSystem("http://mimic.physionet.org/fhir/CareGiverType").setCode(csv.cell("label")).setDisplay(csv.cell("description"));
|
||||
bnd.addEntry().setResource(pr);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
|
||||
}
|
||||
|
||||
private void processLabEvents(String src, String dest) throws FileNotFoundException, IOException {
|
||||
|
@ -109,10 +211,12 @@ public class Mimic14Importer {
|
|||
while (csv.line()) {
|
||||
Observation obs = new Observation();
|
||||
t++;
|
||||
|
||||
Patient pat = patients.get(csv.cell("subject_id"));
|
||||
Encounter enc = encounters.get(csv.cell("hadm_id"));
|
||||
LabItem item = labItems.get(csv.cell("itemid"));
|
||||
obs.setId(csv.cell("hadm_id"));
|
||||
obs.setId(csv.cell("row_id"));
|
||||
obs.setStatus(ObservationStatus.FINAL);
|
||||
if (pat != null) {
|
||||
obs.setSubject(new Reference("Patient/"+pat.getId()));
|
||||
}
|
||||
|
@ -152,9 +256,161 @@ public class Mimic14Importer {
|
|||
bnd.addEntry().setResource(obs);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
|
||||
}
|
||||
|
||||
private void processMicroEvents(String src, String dest) throws FileNotFoundException, IOException {
|
||||
System.out.print("Processing Micro Events... ");
|
||||
CSVReader csv = new CSVReader(new FileInputStream(src));
|
||||
Bundle bnd = new Bundle();
|
||||
bnd.setId("lab-observations");
|
||||
bnd.setType(BundleType.COLLECTION);
|
||||
bnd.setTimestamp(date);
|
||||
csv.readHeaders();
|
||||
Map<String, Observation> cache = new HashMap<>();
|
||||
|
||||
int t = 0;
|
||||
while (csv.line()) {
|
||||
String cacheId = csv.cell("hadm_id")+"|"+csv.cell("spec_itemid")+"|"+csv.cell("org_itemid")+"|"+(csv.has("charttime") ? csv.cell("charttime") : csv.cell("chartdate"))+"|"+csv.cell("isolate_num");
|
||||
|
||||
Patient pat = patients.get(csv.cell("subject_id"));
|
||||
Encounter enc = encounters.get(csv.cell("hadm_id"));
|
||||
Item spec = items.get(csv.cell("spec_itemid"));
|
||||
Item org = items.get(csv.cell("org_itemid"));
|
||||
Item ab = items.get(csv.cell("ab_itemid"));
|
||||
|
||||
Observation obs;
|
||||
if (cache.containsKey(cacheId)) {
|
||||
obs = cache.get(cacheId);
|
||||
} else {
|
||||
obs = new Observation();
|
||||
t++;
|
||||
cache.put(cacheId, obs);
|
||||
obs.setId(csv.cell("row_id"));
|
||||
obs.setStatus(ObservationStatus.FINAL);
|
||||
obs.addCategory().setText("microbiology");
|
||||
bnd.addEntry().setResource(obs);
|
||||
|
||||
// todo: these are all cultures, but the codes state mainly what the culture is on
|
||||
obs.getCode().setText(csv.cell("spec_itemid"));
|
||||
obs.getCode().addCoding().setSystem("http://mimic.physionet.org/fhir/TestType").setCode(csv.cell("spec_itemid"));
|
||||
}
|
||||
|
||||
if (pat != null) {
|
||||
obs.setSubject(new Reference("Patient/"+pat.getId()));
|
||||
}
|
||||
if (enc != null) {
|
||||
obs.setEncounter(new Reference("Encounter/"+enc.getId()));
|
||||
}
|
||||
|
||||
if (csv.has("charttime")) {
|
||||
obs.setEffective(readDateTime(csv.cell("charttime")));
|
||||
} else {
|
||||
obs.setEffective(readDateTime(csv.cell("chartdate")));
|
||||
}
|
||||
if (org == null) {
|
||||
obs.setValue(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/TestValue").setCode("negative")));
|
||||
} else {
|
||||
obs.setValue(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/Organism").setCode(csv.cell("org_itemid"))).setText(csv.cell("org_name")));
|
||||
}
|
||||
if (ab != null) {
|
||||
ObservationComponentComponent oc = obs.addComponent();
|
||||
oc.setCode(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/Antibiotic").setCode(csv.cell("ab_itemid"))).setText(csv.cell("ab_name")));
|
||||
if (csv.has("dilution_text")) {
|
||||
oc.setValue(parseQuantity(csv.cell("dilution_text")));
|
||||
}
|
||||
if (csv.has("interpretation")) {
|
||||
oc.addInterpretation().addCoding().setSystem("http://mimic.physionet.org/fhir/Interpretation").setCode(csv.cell("interpretation"));
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
|
||||
}
|
||||
|
||||
private void processNoteEvents(String src, String dest) throws FileNotFoundException, IOException {
|
||||
System.out.print("Processing Note Events... ");
|
||||
CSVReader csv = new CSVReader(new FileInputStream(src));
|
||||
Bundle bnd = new Bundle();
|
||||
bnd.setId("notes");
|
||||
bnd.setType(BundleType.COLLECTION);
|
||||
bnd.setTimestamp(date);
|
||||
csv.readHeaders();
|
||||
Map<String, Observation> cache = new HashMap<>();
|
||||
|
||||
int t = 0;
|
||||
while (csv.line()) {
|
||||
|
||||
Patient pat = patients.get(csv.cell("subject_id"));
|
||||
Encounter enc = encounters.get(csv.cell("hadm_id"));
|
||||
|
||||
DocumentReference dr = new DocumentReference();
|
||||
dr.setId(csv.cell("row_id"));
|
||||
if (pat != null) {
|
||||
dr.setSubject(new Reference("Patient/"+pat.getId()));
|
||||
}
|
||||
if (enc != null) {
|
||||
dr.getContext().addEncounter(new Reference("Encounter/"+enc.getId()));
|
||||
}
|
||||
if ("1".equals(csv.cell("iserror"))) {
|
||||
dr.setStatus(DocumentReferenceStatus.ENTEREDINERROR);
|
||||
} else {
|
||||
dr.setStatus(DocumentReferenceStatus.CURRENT);
|
||||
}
|
||||
if (csv.has("cgid")) {
|
||||
dr.addAuthor().setReference("PractitionerRole/"+csv.cell("cgid"));
|
||||
}
|
||||
String cat = csv.cell("category");
|
||||
String desc = csv.cell("description");
|
||||
if ("Discharge summary".equals(cat) && "Report".equals(desc)) {
|
||||
dr.getType().addCoding().setSystem("http://loinc.org").setCode("18842-5").setDisplay("Discharge summary");
|
||||
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
|
||||
} else if ("Echo".equals(cat) && "Report".equals(desc)) {
|
||||
dr.getType().addCoding().setSystem("http://loinc.org").setCode("59282-4").setDisplay("Stress cardiac echo study report US");
|
||||
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
|
||||
} else if ("Radiology".equals(cat) && "CHEST (PORTABLE AP)".equals(desc)) {
|
||||
dr.getType().addCoding().setSystem("http://loinc.org").setCode("59282-4").setDisplay("Chest X-ray AP portable single view");
|
||||
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
|
||||
} else if ("Nursing/other".equals(cat) && "Report".equals(desc)) {
|
||||
dr.getType().addCoding().setSystem("http://loinc.org").setCode("34119-8").setDisplay("Nursing facility Initial assessment note");
|
||||
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
|
||||
} else if ("Physician".equals(cat) && "Physician Surgical Admission Note".equals(desc)) {
|
||||
dr.getType().addCoding().setSystem("http://loinc.org").setCode("36589-0").setDisplay("Surgery Admission evaluation note");
|
||||
dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other");
|
||||
} else {
|
||||
throw new FHIRException("Unhandled Note type '"+cat+"'/'"+desc+"'");
|
||||
}
|
||||
dr.addContent().getAttachment().setContentType("text/plain; charset=UTF-8").setData(csv.cell("text").getBytes(Charset.forName("UTF-8")));
|
||||
bnd.addEntry().setResource(dr);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd);
|
||||
}
|
||||
|
||||
|
||||
private Type parseQuantity(String cell) {
|
||||
if (cell.startsWith("<=")) {
|
||||
return new Quantity().setComparator(QuantityComparator.LESS_OR_EQUAL).setValue(new BigDecimal(cell.substring(2)));
|
||||
}
|
||||
if (cell.startsWith("<")) {
|
||||
return new Quantity().setComparator(QuantityComparator.LESS_THAN).setValue(new BigDecimal(cell.substring(2)));
|
||||
}
|
||||
if (cell.startsWith(">=") || cell.startsWith("=>") ) {
|
||||
return new Quantity().setComparator(QuantityComparator.GREATER_OR_EQUAL).setValue(new BigDecimal(cell.substring(2)));
|
||||
}
|
||||
if (cell.startsWith(">")) {
|
||||
return new Quantity().setComparator(QuantityComparator.GREATER_THAN).setValue(new BigDecimal(cell.substring(2)));
|
||||
}
|
||||
if (!Utilities.isDecimal(cell, true)) {
|
||||
throw new Error("Not a valid decimal: "+cell);
|
||||
}
|
||||
|
||||
return new Quantity().setValue(new BigDecimal(cell));
|
||||
}
|
||||
|
||||
private Bundle processAdmissions(String src) throws FileNotFoundException, IOException {
|
||||
System.out.print("Processing Admissions... ");
|
||||
CSVReader csv = new CSVReader(new FileInputStream(src));
|
||||
|
@ -202,6 +458,7 @@ public class Mimic14Importer {
|
|||
bnd.addEntry().setResource(enc);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
return bnd;
|
||||
}
|
||||
|
||||
|
@ -231,6 +488,7 @@ public class Mimic14Importer {
|
|||
bnd.addEntry().setResource(pat);
|
||||
}
|
||||
System.out.println(Integer.toString(t)+" found");
|
||||
csv.close();
|
||||
return bnd;
|
||||
}
|
||||
|
||||
|
|
|
@ -152,8 +152,14 @@ public class CSVReader extends InputStreamReader {
|
|||
while (inQuote || (peek() != '\r' && peek() != '\n')) {
|
||||
char c = peek();
|
||||
next();
|
||||
if (c == '"')
|
||||
if (c == '"') {
|
||||
if (ready() && peek() == '"') {
|
||||
b.append(c);
|
||||
next();
|
||||
} else {
|
||||
inQuote = !inQuote;
|
||||
}
|
||||
}
|
||||
else if (!inQuote && c == ',') {
|
||||
res.add(b.toString().trim());
|
||||
b = new StringBuilder();
|
||||
|
|
Loading…
Reference in New Issue