From 2cb7c052af8cede2e24a1ce93bf4f800f9d3c8ca Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Wed, 5 Jul 2023 07:23:47 +1000 Subject: [PATCH] OMOP importer --- .../fhir/convertors/misc/OMOPImporter.java | 392 ++++++++++++++++++ .../org/hl7/fhir/utilities/CSVReader.java | 17 +- .../fhir/utilities/i18n/I18nConstants.java | 4 + 3 files changed, 410 insertions(+), 3 deletions(-) create mode 100644 org.hl7.fhir.convertors/src/main/java/org/hl7/fhir/convertors/misc/OMOPImporter.java diff --git a/org.hl7.fhir.convertors/src/main/java/org/hl7/fhir/convertors/misc/OMOPImporter.java b/org.hl7.fhir.convertors/src/main/java/org/hl7/fhir/convertors/misc/OMOPImporter.java new file mode 100644 index 000000000..381b66b0d --- /dev/null +++ b/org.hl7.fhir.convertors/src/main/java/org/hl7/fhir/convertors/misc/OMOPImporter.java @@ -0,0 +1,392 @@ +package org.hl7.fhir.convertors.misc; + +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Map; +import java.util.HashMap; + +import org.hl7.fhir.exceptions.FHIRException; +import org.hl7.fhir.utilities.CSVReader; +import org.hl7.fhir.utilities.Utilities; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.Connection; + +public class OMOPImporter { + + + private Connection con; + private Map relationships = new HashMap<>(); + + public static void main(String[] args) throws Exception { + new OMOPImporter().process("/Users/grahamegrieve/Downloads/vocabulary_download_v5_{97cc5432-0dc9-4f14-9da2-d0624129d2f7}_1688068174909"); + } + + private void process(String folder) throws ClassNotFoundException, SQLException, FHIRException, FileNotFoundException, IOException { + connect(); + + loadRelationships(folder, true); + processVocabularies(folder, false); + processDomains(folder, false); + processConceptClasses(folder, false); + processDrugStrength(folder, false); + processConcepts(folder, false); + processConceptRelationships(folder, false); + processConceptSynonyms(folder, false); + processConceptAncestors(folder, false); + } + + private void loadRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "RELATIONSHIP.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + stmt.executeUpdate("delete from Relationships"); + while (csv.line()) { + relationships.put(csv.cell("relationship_id"), csv.cell("relationship_concept_id")); + if (process) { + String sql = "INSERT INTO `omop`.`Relationships` (`relationship_concept_id`, `relationship_id`, `relationship_name`, `is_hierarchical`, `defines_ancestry`, `reverse_relationship_id`) VALUES ("+ + sw(csv.cell("relationship_concept_id"))+", "+ + sw(csv.cell("relationship_id"))+", "+ + sw(csv.cell("relationship_name"))+", "+ + sw(csv.cell("is_hierarchical"))+", "+ + sw(csv.cell("defines_ancestry"))+", "+ + sw(csv.cell("reverse_relationship_id"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + // System.out.println("sql: "+sql); + } + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + + } + + private void processVocabularies(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "VOCABULARY.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + stmt.executeUpdate("delete from Vocabularies"); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`Vocabularies` (`vocabulary_concept_id`, `vocabulary_id`, `vocabulary_name`, `vocabulary_reference`, `vocabulary_version`) VALUES ("+ + sw(csv.cell("vocabulary_concept_id"))+", "+ + sw(csv.cell("vocabulary_id"))+", "+ + sw(csv.cell("vocabulary_name"))+", "+ + sw(csv.cell("vocabulary_reference"))+", "+ + sw(csv.cell("vocabulary_version"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + // System.out.println("sql: "+sql); + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + } + + + private void processDrugStrength(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DRUG_STRENGTH.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + stmt.executeUpdate("delete from DrugStrengths"); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`DrugStrengths` (`drug_concept_id`, `ingredient_concept_id`, `amount_value`, `amount_unit_concept_id`, `numerator_value`, `numerator_unit_concept_id`, `denominator_value`, " + + "`denominator_unit_concept_id`, `box_size`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+ + sw(csv.cell("drug_concept_id"))+", "+ + sw(csv.cell("ingredient_concept_id"))+", "+ + sw(csv.cell("amount_value"))+", "+ + sw(csv.cell("amount_unit_concept_id"))+", "+ + sw(csv.cell("numerator_value"))+", "+ + sw(csv.cell("numerator_unit_concept_id"))+", "+ + sw(csv.cell("denominator_value"))+", "+ + sw(csv.cell("denominator_unit_concept_id"))+", "+ + sw(csv.cell("box_size"))+", "+ + sw(csv.cell("valid_start_date"))+", "+ + sw(csv.cell("valid_end_date"))+", "+ + sw(csv.cell("invalid_reason"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + System.out.println("sql: "+sql); + } + i++; + if (i % 100 == 0) { + System.out.println(i); + } + } + csv.close(); + } + + + private void processDomains(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DOMAIN.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + stmt.executeUpdate("delete from Domains"); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`Domains` (`domain_concept_id`, `domain_id`, `domain_name`) VALUES ("+ + sw(csv.cell("domain_concept_id"))+", "+ + sw(csv.cell("domain_id"))+", "+ + sw(csv.cell("domain_name"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + // System.out.println("sql: "+sql); + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + } + + + private void processConceptClasses(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_CLASS.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + stmt.executeUpdate("delete from ConceptClasses"); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`ConceptClasses` (`concept_class_concept_id`, `concept_class_id`, `concept_class_name`) VALUES ("+ + sw(csv.cell("concept_class_concept_id"))+", "+ + sw(csv.cell("concept_class_id"))+", "+ + sw(csv.cell("concept_class_name"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + // System.out.println("sql: "+sql); + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + } + + + + private void processConcepts(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`Concepts` (`concept_id`, `concept_name`, `domain_id`, `vocabulary_id`, `concept_class_id`, `standard_concept`, `concept_code`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+ + sw(csv.cell("concept_id"))+", "+ + sw(csv.cell("concept_name"))+", "+ + sw(csv.cell("domain_id"))+", "+ + sw(csv.cell("vocabulary_id"))+", "+ + sw(csv.cell("concept_class_id"))+", "+ + sw(csv.cell("standard_concept"))+", "+ + sw(csv.cell("concept_code"))+", "+ + sw(csv.cell("valid_start_date"))+", "+ + sw(csv.cell("valid_end_date"))+", "+ + sw(csv.cell("invalid_reason"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); +// System.out.println("sql: "+sql); + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + + } + + private void processConceptSynonyms(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_SYNONYM.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + int ec = 0; + Statement stmt = con.createStatement(); + + stmt.executeUpdate("delete from ConceptSynonyms"); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`ConceptSynonyms` (`concept_id`, `concept_synonym_name`, `language_concept_id`) VALUES ("+ + sw(csv.cell("concept_id"))+", "+ + sw(csv.cell("concept_synonym_name"))+", "+ + sw(csv.cell("language_concept_id"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + System.out.println("sql: "+sql); + ec++; + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + System.out.println("Finished. "+i+" rows, "+ec+" errors"); + } + + + private void processConceptAncestors(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_ANCESTOR" + + ".csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + int ec = 0; + Statement stmt = con.createStatement(); + + stmt.executeUpdate("delete from ConceptAncestors"); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`ConceptAncestors` (`ancestor_concept_id`, `descendant_concept_id`, `min_levels_of_separation`, `max_levels_of_separation`) VALUES ("+ + sw(csv.cell("ancestor_concept_id"))+", "+ + sw(csv.cell("descendant_concept_id"))+", "+ + sw(csv.cell("min_levels_of_separation"))+", "+ + sw(csv.cell("max_levels_of_separation"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); + System.out.println("sql: "+sql); + ec++; + } + i++; + if (i % 1000 == 0) { + System.out.println(i); + } + } + csv.close(); + System.out.println("Finished. "+i+" rows, "+ec+" errors"); + } + + + private void processConceptRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { + if (!process) { + return; + } + CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_RELATIONSHIP.csv"))); + csv.setDelimiter('\t'); + csv.readHeaders(); + csv.setDoingQuotes(false); + int i = 0; + Statement stmt = con.createStatement(); + while (csv.line()) { + String sql = "INSERT INTO `omop`.`ConceptRelationships` (`concept_id_1`, `concept_id_2`, `relationship_id`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+ + sw(csv.cell("concept_id_1"))+", "+ + sw(csv.cell("concept_id_2"))+", "+ + sw(relationships.get(csv.cell("relationship_id")))+", "+ + sw(csv.cell("valid_start_date"))+", "+ + sw(csv.cell("valid_end_date"))+", "+ + sw(csv.cell("invalid_reason"))+")"; + try { + stmt.executeUpdate(sql); + } catch (Exception e) { + System.out.println("error: "+e.getMessage()); + System.out.println("i: "+i); +// System.out.println("sql: "+sql); + } + i++; + if (i % 100 == 0) { + System.out.println(i); + } + } + csv.close(); + + } + + + private String sw(String value) { + if (value == null) { + return "null"; + } + StringBuilder b = new StringBuilder(); + b.append('"'); + for (char ch : value.toCharArray()) { + if (ch == '"') { + b.append('"'); + } + b.append(ch); + } + b.append('"'); + return b.toString(); + } + + private void connect() throws SQLException, ClassNotFoundException { +// Class.forName("com.mysql.jdbc.Driver"); + con = DriverManager.getConnection("jdbc:mysql://localhost:3306/omop?useSSL=false","root","@AZEq|OzHLl1/[50v[CI"); + + } + +} diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java index 60009a350..2b781a1e7 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/CSVReader.java @@ -62,6 +62,7 @@ public class CSVReader extends InputStreamReader { private String[] cells; private char delimiter = ','; private boolean multiline; + private boolean doingQuotes = true; public void readHeaders() throws IOException, FHIRException { cols = parseLine(); @@ -86,11 +87,13 @@ public class CSVReader extends InputStreamReader { public String cell(String name) { int index = -1; for (int i = 0; i < cols.length; i++) { - if (name.equals(cols[i].trim())) + if (name.equals(cols[i].trim())) { index = i; + break; + } } if (index == -1) - throw new FHIRException("no cell "+name); + throw new FHIRException("no cell "+name+" in "+cols); String s = cells.length > index ? cells[index] : null; if (Utilities.noString(s)) return null; @@ -143,7 +146,7 @@ public class CSVReader extends InputStreamReader { while (more() && !finished(inQuote, res.size())) { char c = peek(); next(); - if (c == '"') { + if (c == '"' && doingQuotes) { if (ready() && peek() == '"') { b.append(c); next(); @@ -238,5 +241,13 @@ public class CSVReader extends InputStreamReader { this.multiline = multiline; } + public boolean isDoingQuotes() { + return doingQuotes; + } + + public void setDoingQuotes(boolean doingQuotes) { + this.doingQuotes = doingQuotes; + } + } \ No newline at end of file diff --git a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java index b9e03ddf8..0365ab0e3 100644 --- a/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java +++ b/org.hl7.fhir.utilities/src/main/java/org/hl7/fhir/utilities/i18n/I18nConstants.java @@ -917,6 +917,10 @@ public class I18nConstants { public static final String ED_INVARIANT_EXPRESSION_CONFLICT = "ED_INVARIANT_EXPRESSION_CONFLICT"; public static final String ED_INVARIANT_EXPRESSION_ERROR = "ED_INVARIANT_EXPRESSION_ERROR"; public static final String SNAPSHOT_IS_EMPTY = "SNAPSHOT_IS_EMPTY"; + public static final String EXTENSION_CONTEXT_UNABLE_TO_CHECK_PROFILE = "EXTENSION_CONTEXT_UNABLE_TO_CHECK_PROFILE"; + public static final String EXTENSION_CONTEXT_UNABLE_TO_FIND_PROFILE = "EXTENSION_CONTEXT_UNABLE_TO_FIND_PROFILE"; + public static final String TERMINOLOGY_TX_HINT = "TERMINOLOGY_TX_HINT"; + public static final String TERMINOLOGY_TX_WARNING = "TERMINOLOGY_TX_WARNING"; }