From 958791d1cd156c851576c943cc6e5794e8b06bff Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Fri, 25 Oct 2019 08:19:59 +1100 Subject: [PATCH] more Mimic work --- .../fhir/r4/importers/Mimic14Importer.java | 262 +++++++++++++++++- .../org/hl7/fhir/utilities/CSVReader.java | 10 +- 2 files changed, 268 insertions(+), 4 deletions(-) diff --git a/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/importers/Mimic14Importer.java b/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/importers/Mimic14Importer.java index 29d07c49f..ac4d8f88c 100644 --- a/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/importers/Mimic14Importer.java +++ b/org.hl7.fhir.r4/src/main/java/org/hl7/fhir/r4/importers/Mimic14Importer.java @@ -1,10 +1,36 @@ package org.hl7.fhir.r4.importers; +/*- + * #%L + * org.hl7.fhir.r4.importers + * %% + * Copyright (C) 2014 - 2019 Health Level 7 + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Development Notes: + * - ignore ICUStays and Transfers for now - low yield, based on discussons with Tom/Alistair + */ + import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.math.BigDecimal; +import java.nio.charset.Charset; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -19,14 +45,23 @@ import org.hl7.fhir.r4.formats.JsonParser; import org.hl7.fhir.r4.model.Bundle; import org.hl7.fhir.r4.model.Bundle.BundleType; import org.hl7.fhir.r4.model.CodeableConcept; +import org.hl7.fhir.r4.model.Coding; import org.hl7.fhir.r4.model.DateTimeType; import org.hl7.fhir.r4.model.DateType; +import org.hl7.fhir.r4.model.DocumentReference; import org.hl7.fhir.r4.model.Encounter; import org.hl7.fhir.r4.model.Enumerations.AdministrativeGender; +import org.hl7.fhir.r4.model.Enumerations.DocumentReferenceStatus; import org.hl7.fhir.r4.model.Observation; +import org.hl7.fhir.r4.model.Observation.ObservationComponentComponent; +import org.hl7.fhir.r4.model.Observation.ObservationStatus; import org.hl7.fhir.r4.model.Patient; +import org.hl7.fhir.r4.model.Practitioner; +import org.hl7.fhir.r4.model.PractitionerRole; import org.hl7.fhir.r4.model.Quantity; +import org.hl7.fhir.r4.model.Quantity.QuantityComparator; import org.hl7.fhir.r4.model.Reference; +import org.hl7.fhir.r4.model.Type; import org.hl7.fhir.utilities.CSVReader; import org.hl7.fhir.utilities.Utilities; @@ -41,9 +76,24 @@ public class Mimic14Importer { private String loinc; } + public static class Item { + private int rowId; + private int itemId; + private String label; + private String abbreviation; + private String dbSource; + private String linksTo; // where it's used... + private String category; + private String unitName; // uom for item, if it has one + private String paramType; // type of item + private int conceptId; + } + private static final String MRN_SYSTEM = null; private Date date; private Map labItems = new HashMap<>(); + private Map items = new HashMap<>(); +// private Map careGivers = new HashMap<>(); private Map patients = new HashMap<>(); private Map encounters = new HashMap<>(); @@ -64,11 +114,15 @@ public class Mimic14Importer { ucum = new UcumEssenceService(ucumSrc); + loadItems(Utilities.path(src, "d_items.csv")); loadLabItems(Utilities.path(src, "d_labitems.csv")); + loadCareGivers(Utilities.path(src, "caregivers.csv"), Utilities.path(dest, "care-givers.json")); + Bundle patients = processPatients(Utilities.path(src, "patients.csv")); Bundle encounters = processAdmissions(Utilities.path(src, "admissions.csv")); processLabEvents(Utilities.path(src, "labevents.csv"), Utilities.path(dest, "lab-observations.json")); - + processMicroEvents(Utilities.path(src, "microbiologyevents.csv"), Utilities.path(dest, "micro-observations.json")); + processNoteEvents(Utilities.path(src, "noteevents.csv"), Utilities.path(dest, "notes.json")); System.out.println("saving"); @@ -78,6 +132,32 @@ public class Mimic14Importer { System.out.println("done"); } + private void loadItems(String src) throws NumberFormatException, FHIRException, IOException { + System.out.print("Processing Items... "); + CSVReader csv = new CSVReader(new FileInputStream(src)); + int t = 0; + csv.readHeaders(); + while (csv.line()) { + Item item = new Item(); + t++; + item.rowId = Integer.parseInt(csv.cell("row_id")); + item.itemId = Integer.parseInt(csv.cell("itemid")); + item.label = csv.cell("label"); + item.abbreviation = csv.cell("abbreviation"); + item.dbSource = csv.cell("dbsource"); + item.linksTo = csv.cell("linksto"); + item.category = csv.cell("category"); + item.unitName = csv.cell("unitname"); + item.paramType = csv.cell("param_type"); + if (csv.has("conceptid")) { + item.conceptId = Integer.parseInt(csv.cell("conceptid")); + } + items.put(csv.cell("itemid"), item); + } + System.out.println(Integer.toString(t)+" found"); + csv.close(); + } + private void loadLabItems(String src) throws NumberFormatException, FHIRException, IOException { System.out.print("Processing Lab Items... "); CSVReader csv = new CSVReader(new FileInputStream(src)); @@ -95,6 +175,28 @@ public class Mimic14Importer { labItems.put(csv.cell("itemid"), item); } System.out.println(Integer.toString(t)+" found"); + csv.close(); + } + + private void loadCareGivers(String src, String dest) throws NumberFormatException, FHIRException, IOException { + System.out.print("Processing Care Givers... "); + CSVReader csv = new CSVReader(new FileInputStream(src)); + int t = 0; + csv.readHeaders(); + Bundle bnd = new Bundle(); + bnd.setId("care-givers"); + bnd.setType(BundleType.COLLECTION); + bnd.setTimestamp(date); + while (csv.line()) { + PractitionerRole pr = new PractitionerRole(); + t++; + pr.setId(csv.cell("cgid")); + pr.addCode().addCoding().setSystem("http://mimic.physionet.org/fhir/CareGiverType").setCode(csv.cell("label")).setDisplay(csv.cell("description")); + bnd.addEntry().setResource(pr); + } + System.out.println(Integer.toString(t)+" found"); + csv.close(); + new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd); } private void processLabEvents(String src, String dest) throws FileNotFoundException, IOException { @@ -109,10 +211,12 @@ public class Mimic14Importer { while (csv.line()) { Observation obs = new Observation(); t++; + Patient pat = patients.get(csv.cell("subject_id")); Encounter enc = encounters.get(csv.cell("hadm_id")); LabItem item = labItems.get(csv.cell("itemid")); - obs.setId(csv.cell("hadm_id")); + obs.setId(csv.cell("row_id")); + obs.setStatus(ObservationStatus.FINAL); if (pat != null) { obs.setSubject(new Reference("Patient/"+pat.getId())); } @@ -152,9 +256,161 @@ public class Mimic14Importer { bnd.addEntry().setResource(obs); } System.out.println(Integer.toString(t)+" found"); + csv.close(); new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd); } + private void processMicroEvents(String src, String dest) throws FileNotFoundException, IOException { + System.out.print("Processing Micro Events... "); + CSVReader csv = new CSVReader(new FileInputStream(src)); + Bundle bnd = new Bundle(); + bnd.setId("lab-observations"); + bnd.setType(BundleType.COLLECTION); + bnd.setTimestamp(date); + csv.readHeaders(); + Map cache = new HashMap<>(); + + int t = 0; + while (csv.line()) { + String cacheId = csv.cell("hadm_id")+"|"+csv.cell("spec_itemid")+"|"+csv.cell("org_itemid")+"|"+(csv.has("charttime") ? csv.cell("charttime") : csv.cell("chartdate"))+"|"+csv.cell("isolate_num"); + + Patient pat = patients.get(csv.cell("subject_id")); + Encounter enc = encounters.get(csv.cell("hadm_id")); + Item spec = items.get(csv.cell("spec_itemid")); + Item org = items.get(csv.cell("org_itemid")); + Item ab = items.get(csv.cell("ab_itemid")); + + Observation obs; + if (cache.containsKey(cacheId)) { + obs = cache.get(cacheId); + } else { + obs = new Observation(); + t++; + cache.put(cacheId, obs); + obs.setId(csv.cell("row_id")); + obs.setStatus(ObservationStatus.FINAL); + obs.addCategory().setText("microbiology"); + bnd.addEntry().setResource(obs); + + // todo: these are all cultures, but the codes state mainly what the culture is on + obs.getCode().setText(csv.cell("spec_itemid")); + obs.getCode().addCoding().setSystem("http://mimic.physionet.org/fhir/TestType").setCode(csv.cell("spec_itemid")); + } + + if (pat != null) { + obs.setSubject(new Reference("Patient/"+pat.getId())); + } + if (enc != null) { + obs.setEncounter(new Reference("Encounter/"+enc.getId())); + } + + if (csv.has("charttime")) { + obs.setEffective(readDateTime(csv.cell("charttime"))); + } else { + obs.setEffective(readDateTime(csv.cell("chartdate"))); + } + if (org == null) { + obs.setValue(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/TestValue").setCode("negative"))); + } else { + obs.setValue(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/Organism").setCode(csv.cell("org_itemid"))).setText(csv.cell("org_name"))); + } + if (ab != null) { + ObservationComponentComponent oc = obs.addComponent(); + oc.setCode(new CodeableConcept(new Coding().setSystem("http://mimic.physionet.org/fhir/Antibiotic").setCode(csv.cell("ab_itemid"))).setText(csv.cell("ab_name"))); + if (csv.has("dilution_text")) { + oc.setValue(parseQuantity(csv.cell("dilution_text"))); + } + if (csv.has("interpretation")) { + oc.addInterpretation().addCoding().setSystem("http://mimic.physionet.org/fhir/Interpretation").setCode(csv.cell("interpretation")); + } + } + } + System.out.println(Integer.toString(t)+" found"); + csv.close(); + new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd); + } + + private void processNoteEvents(String src, String dest) throws FileNotFoundException, IOException { + System.out.print("Processing Note Events... "); + CSVReader csv = new CSVReader(new FileInputStream(src)); + Bundle bnd = new Bundle(); + bnd.setId("notes"); + bnd.setType(BundleType.COLLECTION); + bnd.setTimestamp(date); + csv.readHeaders(); + Map cache = new HashMap<>(); + + int t = 0; + while (csv.line()) { + + Patient pat = patients.get(csv.cell("subject_id")); + Encounter enc = encounters.get(csv.cell("hadm_id")); + + DocumentReference dr = new DocumentReference(); + dr.setId(csv.cell("row_id")); + if (pat != null) { + dr.setSubject(new Reference("Patient/"+pat.getId())); + } + if (enc != null) { + dr.getContext().addEncounter(new Reference("Encounter/"+enc.getId())); + } + if ("1".equals(csv.cell("iserror"))) { + dr.setStatus(DocumentReferenceStatus.ENTEREDINERROR); + } else { + dr.setStatus(DocumentReferenceStatus.CURRENT); + } + if (csv.has("cgid")) { + dr.addAuthor().setReference("PractitionerRole/"+csv.cell("cgid")); + } + String cat = csv.cell("category"); + String desc = csv.cell("description"); + if ("Discharge summary".equals(cat) && "Report".equals(desc)) { + dr.getType().addCoding().setSystem("http://loinc.org").setCode("18842-5").setDisplay("Discharge summary"); + dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other"); + } else if ("Echo".equals(cat) && "Report".equals(desc)) { + dr.getType().addCoding().setSystem("http://loinc.org").setCode("59282-4").setDisplay("Stress cardiac echo study report US"); + dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other"); + } else if ("Radiology".equals(cat) && "CHEST (PORTABLE AP)".equals(desc)) { + dr.getType().addCoding().setSystem("http://loinc.org").setCode("59282-4").setDisplay("Chest X-ray AP portable single view"); + dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other"); + } else if ("Nursing/other".equals(cat) && "Report".equals(desc)) { + dr.getType().addCoding().setSystem("http://loinc.org").setCode("34119-8").setDisplay("Nursing facility Initial assessment note"); + dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other"); + } else if ("Physician".equals(cat) && "Physician Surgical Admission Note".equals(desc)) { + dr.getType().addCoding().setSystem("http://loinc.org").setCode("36589-0").setDisplay("Surgery Admission evaluation note"); + dr.addCategory().addCoding().setSystem("http://fhir.org/guides/argonaut/clinicalnotes/CodeSystem/documentreference-category").setCode("other"); + } else { + throw new FHIRException("Unhandled Note type '"+cat+"'/'"+desc+"'"); + } + dr.addContent().getAttachment().setContentType("text/plain; charset=UTF-8").setData(csv.cell("text").getBytes(Charset.forName("UTF-8"))); + bnd.addEntry().setResource(dr); + } + System.out.println(Integer.toString(t)+" found"); + csv.close(); + new JsonParser().setOutputStyle(OutputStyle.PRETTY).compose(new FileOutputStream(dest), bnd); + } + + + private Type parseQuantity(String cell) { + if (cell.startsWith("<=")) { + return new Quantity().setComparator(QuantityComparator.LESS_OR_EQUAL).setValue(new BigDecimal(cell.substring(2))); + } + if (cell.startsWith("<")) { + return new Quantity().setComparator(QuantityComparator.LESS_THAN).setValue(new BigDecimal(cell.substring(2))); + } + if (cell.startsWith(">=") || cell.startsWith("=>") ) { + return new Quantity().setComparator(QuantityComparator.GREATER_OR_EQUAL).setValue(new BigDecimal(cell.substring(2))); + } + if (cell.startsWith(">")) { + return new Quantity().setComparator(QuantityComparator.GREATER_THAN).setValue(new BigDecimal(cell.substring(2))); + } + if (!Utilities.isDecimal(cell, true)) { + throw new Error("Not a valid decimal: "+cell); + } + + return new Quantity().setValue(new BigDecimal(cell)); + } + private Bundle processAdmissions(String src) throws FileNotFoundException, IOException { System.out.print("Processing Admissions... "); CSVReader csv = new CSVReader(new FileInputStream(src)); @@ -202,6 +458,7 @@ public class Mimic14Importer { bnd.addEntry().setResource(enc); } System.out.println(Integer.toString(t)+" found"); + csv.close(); return bnd; } @@ -231,6 +488,7 @@ public class Mimic14Importer { bnd.addEntry().setResource(pat); } System.out.println(Integer.toString(t)+" found"); + csv.close(); return bnd; } diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java index 3252a3206..8fdd90cd8 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java @@ -152,8 +152,14 @@ public class CSVReader extends InputStreamReader { while (inQuote || (peek() != '\r' && peek() != '\n')) { char c = peek(); next(); - if (c == '"') - inQuote = !inQuote; + if (c == '"') { + if (ready() && peek() == '"') { + b.append(c); + next(); + } else { + inQuote = !inQuote; + } + } else if (!inQuote && c == ',') { res.add(b.toString().trim()); b = new StringBuilder();