Finish off OMOP Importer

This commit is contained in:
Grahame Grieve 2023-07-18 11:05:21 +10:00
parent ced714305d
commit 27961d3da5
1 changed files with 555 additions and 254 deletions

View File

@ -1,15 +1,18 @@
package org.hl7.fhir.convertors.misc; package org.hl7.fhir.convertors.misc;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.Statement; import java.sql.Statement;
import java.util.Map; import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
import org.hl7.fhir.convertors.misc.OMOPImporter.Tracker;
import org.hl7.fhir.exceptions.FHIRException; import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.utilities.CSVReader; import org.hl7.fhir.utilities.CSVReader;
import org.hl7.fhir.utilities.Utilities; import org.hl7.fhir.utilities.Utilities;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
@ -17,41 +20,144 @@ import java.sql.Connection;
public class OMOPImporter { public class OMOPImporter {
public class Tracker {
private int blip;
private long start;
private int counter = 0;
boolean processed = false;
public Tracker(String name, int estimate) {
this.start = System.currentTimeMillis();
this.blip = estimate < 100 ? 1 : estimate / 80;
System.out.print(name);
}
public void skip() {
System.out.println(" ... skipped");
}
public void scan() {
System.out.println("");
System.out.print(" Scan :");
counter = 0;
}
public void process() {
System.out.println("");
System.out.print(" Build:");
counter = 0;
processed = true;
}
public void step() {
counter++;
if (counter % blip == 0) {
System.out.print(".");
}
}
public void done() {
if (counter > 0) {
System.out.println("");
}
if (processed) {
long elapsed = System.currentTimeMillis()-start;
if (elapsed > 3000) {
System.out.println(" Finished: "+counter+" rows, "+Utilities.describeDuration(elapsed)+" ("+(counter/(elapsed/1000))+" rows/msec)");
} else {
System.out.println(" Finished: "+counter+" rows, "+Utilities.describeDuration(elapsed));
}
} else {
System.out.println(" Finished: "+counter+" rows");
}
}
public void error(String e) {
System.out.println("error: "+e);
System.out.println("row: "+counter);
throw new Error(e);
}
}
private Connection con; private Connection con;
private Map<String, String> relationships = new HashMap<>(); private Map<String, String> relationships = new HashMap<>();
private Map<String, String> vocabularies = new HashMap<>();
private Map<String, String> domains = new HashMap<>();
private Map<String, String> classes = new HashMap<>();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
new OMOPImporter().process("/Users/grahamegrieve/Downloads/vocabulary_download_v5_{97cc5432-0dc9-4f14-9da2-d0624129d2f7}_1688068174909"); new OMOPImporter().process(args[0], args[1]);
// "/Users/grahamegrieve/Downloads/vocabulary_download_v5_{97cc5432-0dc9-4f14-9da2-d0624129d2f7}_1688068174909");
// /Users/grahamegrieve/temp/omop/omop.db
} }
private void process(String folder) throws ClassNotFoundException, SQLException, FHIRException, FileNotFoundException, IOException { private void process(String folder, String dest) throws ClassNotFoundException, SQLException, FHIRException, FileNotFoundException, IOException {
connect(); connect(dest);
loadRelationships(folder, true); processRelationships(folder, true);
processVocabularies(folder, false); processVocabularies(folder, true);
processDomains(folder, false); processDomains(folder, true);
processConceptClasses(folder, false); processConceptClasses(folder, true);
processConcepts(folder, true);
processConceptSynonyms(folder, true);
processConceptRelationships(folder, true);
// disabled - don't consume space that isn't required
processDrugStrength(folder, false); processDrugStrength(folder, false);
processConcepts(folder, false);
processConceptRelationships(folder, false);
processConceptSynonyms(folder, false);
processConceptAncestors(folder, false); processConceptAncestors(folder, false);
} }
private void loadRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
private void connect(String dest) throws SQLException, ClassNotFoundException {
// Class.forName("com.mysql.jdbc.Driver");
// con = DriverManager.getConnection("jdbc:mysql://localhost:3306/omop?useSSL=false","root",{pwd});
new File("/Users/grahamegrieve/temp/omop/omop.db").delete();
con = DriverManager.getConnection("jdbc:sqlite:"+dest);
}
private void processRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("Relationships", 700);
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "RELATIONSHIP.csv"))); CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "RELATIONSHIP.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; int lid = 0;
int lname = 0;
t.scan();
while (csv.line()) {
relationships.put(csv.cell("relationship_id"), csv.cell("relationship_concept_id"));
lid = max(lid, csv.cell("relationship_id"));
lname = max(lname, csv.cell("relationship_name"));
t.step();
}
csv.close();
if (process) {
t.process();
Statement stmt = con.createStatement(); Statement stmt = con.createStatement();
stmt.executeUpdate("delete from Relationships"); stmt.execute("CREATE TABLE Relationships (\r\n"+
"`relationship_concept_id` bigint NOT NULL,\r\n"+
"`relationship_id` varchar("+lid+") DEFAULT NULL,\r\n"+
"`relationship_name` varchar("+lname+") DEFAULT NULL,\r\n"+
"`is_hierarchical` int DEFAULT NULL,\r\n"+
"`defines_ancestry` int DEFAULT NULL,\r\n"+
"`reverse_relationship_id` varchar(45) DEFAULT NULL,\r\n"+
"PRIMARY KEY (`relationship_concept_id`))\r\n");
stmt.execute("Create Index `RelationshipsId` on Relationships (`relationship_id`)");
stmt.execute("Create Index`RelationshipsReverse` on Relationships (`reverse_relationship_id`)");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "RELATIONSHIP.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
while (csv.line()) { while (csv.line()) {
relationships.put(csv.cell("relationship_id"), csv.cell("relationship_concept_id")); relationships.put(csv.cell("relationship_id"), csv.cell("relationship_concept_id"));
if (process) { if (process) {
String sql = "INSERT INTO `omop`.`Relationships` (`relationship_concept_id`, `relationship_id`, `relationship_name`, `is_hierarchical`, `defines_ancestry`, `reverse_relationship_id`) VALUES ("+ String sql = "INSERT INTO `Relationships` (`relationship_concept_id`, `relationship_id`, `relationship_name`, `is_hierarchical`, `defines_ancestry`, `reverse_relationship_id`) VALUES ("+
sw(csv.cell("relationship_concept_id"))+", "+ sw(csv.cell("relationship_concept_id"))+", "+
sw(csv.cell("relationship_id"))+", "+ sw(csv.cell("relationship_id"))+", "+
sw(csv.cell("relationship_name"))+", "+ sw(csv.cell("relationship_name"))+", "+
@ -61,34 +167,64 @@ public class OMOPImporter {
try { try {
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
} catch (Exception e) { } catch (Exception e) {
System.out.println("error: "+e.getMessage()); t.error(e.getMessage());
System.out.println("i: "+i);
// System.out.println("sql: "+sql);
} }
} }
i++; t.step();
if (i % 1000 == 0) {
System.out.println(i);
}
} }
csv.close(); csv.close();
}
t.done();
}
private int max(int lid, String cell) {
int i = cell == null? 0 : cell.length();
return i > lid ? i : lid;
} }
private void processVocabularies(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { private void processVocabularies(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
if (!process) { Tracker t = new Tracker("Vocabularies", 60);
return;
}
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "VOCABULARY.csv"))); CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "VOCABULARY.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; int lid = 0;
Statement stmt = con.createStatement(); int lname = 0;
stmt.executeUpdate("delete from Vocabularies"); int lref = 0;
int lver = 0;
t.scan();
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`Vocabularies` (`vocabulary_concept_id`, `vocabulary_id`, `vocabulary_name`, `vocabulary_reference`, `vocabulary_version`) VALUES ("+ vocabularies.put(csv.cell("vocabulary_id"), csv.cell("vocabulary_concept_id"));
lid = max(lid, csv.cell("vocabulary_id"));
lname = max(lname, csv.cell("vocabulary_name"));
lref = max(lref, csv.cell("vocabulary_reference"));
lver = max(lver, csv.cell("vocabulary_version"));
t.step();
}
csv.close();
if (process) {
t.process();
Statement stmt = con.createStatement();
stmt.execute("CREATE TABLE `Vocabularies` (\r\n"+
" `vocabulary_concept_id` bigint NOT NULL,\r\n"+
" `vocabulary_id` varchar("+lid+") DEFAULT NULL,\r\n"+
" `vocabulary_name` varchar("+lname+") DEFAULT NULL,\r\n"+
" `vocabulary_reference` varchar("+lref+") DEFAULT NULL,\r\n"+
" `vocabulary_version` varchar("+lver+") DEFAULT NULL,\r\n"+
" PRIMARY KEY (`vocabulary_concept_id`)\r\n"+
") \r\n"+
"\r\n");
stmt.execute("CREATE INDEX `VocabulariesId` on Vocabularies (`vocabulary_id`)");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "VOCABULARY.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
while (csv.line()) {
String sql = "INSERT INTO `Vocabularies` (`vocabulary_concept_id`, `vocabulary_id`, `vocabulary_name`, `vocabulary_reference`, `vocabulary_version`) VALUES ("+
sw(csv.cell("vocabulary_concept_id"))+", "+ sw(csv.cell("vocabulary_concept_id"))+", "+
sw(csv.cell("vocabulary_id"))+", "+ sw(csv.cell("vocabulary_id"))+", "+
sw(csv.cell("vocabulary_name"))+", "+ sw(csv.cell("vocabulary_name"))+", "+
@ -97,21 +233,128 @@ public class OMOPImporter {
try { try {
stmt.executeUpdate(sql); stmt.executeUpdate(sql);
} catch (Exception e) { } catch (Exception e) {
System.out.println("error: "+e.getMessage()); t.error(e.getMessage());
System.out.println("i: "+i);
// System.out.println("sql: "+sql);
}
i++;
if (i % 1000 == 0) {
System.out.println(i);
} }
t.step();
} }
csv.close(); csv.close();
} }
t.done();
}
private void processDomains(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("Domains", 50);
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DOMAIN.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
int lid = 0;
int lname = 0;
t.scan();
while (csv.line()) {
domains.put(csv.cell("domain_id"), csv.cell("domain_concept_id"));
lid = max(lid, csv.cell("domain_id"));
lname = max(lname, csv.cell("domain_name"));
t.step();
}
csv.close();
if (process) {
t.process();
Statement stmt = con.createStatement();
stmt.execute("CREATE TABLE `Domains` (\r\n"+
" `domain_concept_id` bigint NOT NULL,\r\n"+
" `domain_id` varchar("+lid+") DEFAULT NULL,\r\n"+
" `domain_name` varchar("+lname+") DEFAULT NULL,\r\n"+
" PRIMARY KEY (`domain_concept_id`)\r\n"+
") \r\n"+
"\r\n");
stmt.execute("CREATE INDEX `DomainId` on Domains (`domain_id`)");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DOMAIN.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
while (csv.line()) {
String sql = "INSERT INTO `Domains` (`domain_concept_id`, `domain_id`, `domain_name`) VALUES ("+
sw(csv.cell("domain_concept_id"))+", "+
sw(csv.cell("domain_id"))+", "+
sw(csv.cell("domain_name"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
t.error(e.getMessage());
}
t.step();
}
csv.close();
}
t.done();
}
private void processConceptClasses(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("ConceptClasses", 400);
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_CLASS.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
int lid = 0;
int lname = 0;
t.scan();
while (csv.line()) {
classes.put(csv.cell("concept_class_id"), csv.cell("concept_class_concept_id"));
lid = max(lid, csv.cell("concept_class_id"));
lname = max(lname, csv.cell("concept_class_name"));
t.step();
}
csv.close();
if (process) {
t.process();
Statement stmt = con.createStatement();
stmt.execute("CREATE TABLE `ConceptClasses` (\r\n"+
" `concept_class_concept_id` bigint NOT NULL,\r\n"+
" `concept_class_id` varchar("+lid+") DEFAULT NULL,\r\n"+
" `concept_class_name` varchar("+lname+") DEFAULT NULL,\r\n"+
" PRIMARY KEY (`concept_class_concept_id`)\r\n"+
") \r\n"+
"\r\n");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_CLASS.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
while (csv.line()) {
String sql = "INSERT INTO `ConceptClasses` (`concept_class_concept_id`, `concept_class_id`, `concept_class_name`) VALUES ("+
sw(csv.cell("concept_class_concept_id"))+", "+
sw(csv.cell("concept_class_id"))+", "+
sw(csv.cell("concept_class_name"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
t.error(e.getMessage());
}
t.step();
}
csv.close();
}
t.done();
}
private void processDrugStrength(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { private void processDrugStrength(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("DrugStrengths", 3000000);
if (!process) { if (!process) {
t.skip();
return; return;
} }
@ -119,254 +362,317 @@ public class OMOPImporter {
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; int lreason = 0;
Statement stmt = con.createStatement(); int lamount1 = 0;
stmt.executeUpdate("delete from DrugStrengths"); int lnum1 = 0;
int lden1 = 0;
int lamount2 = 0;
int lnum2 = 0;
int lden2 = 0;
t.scan();
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`DrugStrengths` (`drug_concept_id`, `ingredient_concept_id`, `amount_value`, `amount_unit_concept_id`, `numerator_value`, `numerator_unit_concept_id`, `denominator_value`, " lreason = max(lreason, csv.cell("invalid_reason"));
+ "`denominator_unit_concept_id`, `box_size`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+ lamount1 = dmax1(lamount1, csv.cell("amount_value"));
sw(csv.cell("drug_concept_id"))+", "+ lamount2 = dmax2(lamount2, csv.cell("amount_value"));
sw(csv.cell("ingredient_concept_id"))+", "+ lnum1 = dmax1(lnum1, csv.cell("numerator_value"));
sw(csv.cell("amount_value"))+", "+ lnum2 = dmax2(lnum2, csv.cell("numerator_value"));
sw(csv.cell("amount_unit_concept_id"))+", "+ lden1 = dmax1(lden1, csv.cell("denominator_value"));
sw(csv.cell("numerator_value"))+", "+ lden2 = dmax2(lden2, csv.cell("denominator_value"));
sw(csv.cell("numerator_unit_concept_id"))+", "+ t.step();
sw(csv.cell("denominator_value"))+", "+
sw(csv.cell("denominator_unit_concept_id"))+", "+
sw(csv.cell("box_size"))+", "+
sw(csv.cell("valid_start_date"))+", "+
sw(csv.cell("valid_end_date"))+", "+
sw(csv.cell("invalid_reason"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
System.out.println("error: "+e.getMessage());
System.out.println("i: "+i);
System.out.println("sql: "+sql);
}
i++;
if (i % 100 == 0) {
System.out.println(i);
}
} }
csv.close(); csv.close();
} t.process();
Statement stmt = con.createStatement();
private void processDomains(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { stmt.execute("CREATE TABLE `DrugStrengths` (\r\n"+
if (!process) { " `drug_concept_id` bigint NOT NULL,\r\n"+
return; " `ingredient_concept_id` bigint NOT NULL,\r\n"+
} " `amount_value` decimal("+lamount1+","+lamount2+") DEFAULT NULL,\r\n"+
" `amount_unit_concept_id` bigint DEFAULT NULL,\r\n"+
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DOMAIN.csv"))); " `numerator_value` decimal("+lnum1+","+lnum2+") DEFAULT NULL,\r\n"+
" `numerator_unit_concept_id` bigint DEFAULT NULL,\r\n"+
" `denominator_value` decimal("+lden1+","+lden2+") DEFAULT NULL,\r\n"+
" `denominator_unit_concept_id` bigint DEFAULT NULL,\r\n"+
" `box_size` int DEFAULT NULL,\r\n"+
" `valid_start_date` date DEFAULT NULL,\r\n"+
" `valid_end_date` date DEFAULT NULL,\r\n"+
" `invalid_reason` varchar("+lreason+") DEFAULT NULL,\r\n"+
" PRIMARY KEY (`drug_concept_id`,`ingredient_concept_id`)\r\n"+
") \r\n"+
"\r\n");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DRUG_STRENGTH.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; PreparedStatement pstmt = con.prepareStatement("INSERT INTO `DrugStrengths` (`drug_concept_id`, `ingredient_concept_id`, `amount_value`, `amount_unit_concept_id`, `numerator_value`, `numerator_unit_concept_id`, `denominator_value`, "
Statement stmt = con.createStatement(); + "`denominator_unit_concept_id`, `box_size`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
stmt.executeUpdate("delete from Domains");
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`Domains` (`domain_concept_id`, `domain_id`, `domain_name`) VALUES ("+
sw(csv.cell("domain_concept_id"))+", "+
sw(csv.cell("domain_id"))+", "+
sw(csv.cell("domain_name"))+")";
try { try {
stmt.executeUpdate(sql); pstmt.setString(1, csv.cell("drug_concept_id"));
pstmt.setString(2, csv.cell("ingredient_concept_id"));
pstmt.setString(3, csv.cell("amount_value"));
pstmt.setString(4, csv.cell("amount_unit_concept_id"));
pstmt.setString(5, csv.cell("numerator_value"));
pstmt.setString(6, csv.cell("numerator_unit_concept_id"));
pstmt.setString(7, csv.cell("denominator_value"));
pstmt.setString(8, csv.cell("denominator_unit_concept_id"));
pstmt.setString(9, csv.cell("box_size"));
pstmt.setString(10, date(csv.cell("valid_start_date")));
pstmt.setString(11, date(csv.cell("valid_end_date")));
pstmt.setString(11, csv.cell("invalid_reason"));
pstmt.executeUpdate();
} catch (Exception e) { } catch (Exception e) {
System.out.println("error: "+e.getMessage()); t.error(e.getMessage());
System.out.println("i: "+i);
// System.out.println("sql: "+sql);
}
i++;
if (i % 1000 == 0) {
System.out.println(i);
} }
t.step();
} }
csv.close(); csv.close();
t.done();
} }
private int dmax1(int lid, String cell) {
private void processConceptClasses(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { int i = cell == null? 0 : cell.indexOf('.');
if (!process) { return i > lid ? i : lid;
return;
} }
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_CLASS.csv"))); private int dmax2(int lid, String cell) {
csv.setDelimiter('\t'); int i = cell == null? 0 : cell.length() - cell.indexOf('.') - 1;
csv.readHeaders(); return i > lid ? i : lid;
csv.setDoingQuotes(false);
int i = 0;
Statement stmt = con.createStatement();
stmt.executeUpdate("delete from ConceptClasses");
while (csv.line()) {
String sql = "INSERT INTO `omop`.`ConceptClasses` (`concept_class_concept_id`, `concept_class_id`, `concept_class_name`) VALUES ("+
sw(csv.cell("concept_class_concept_id"))+", "+
sw(csv.cell("concept_class_id"))+", "+
sw(csv.cell("concept_class_name"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
System.out.println("error: "+e.getMessage());
System.out.println("i: "+i);
// System.out.println("sql: "+sql);
} }
i++;
if (i % 1000 == 0) {
System.out.println(i);
}
}
csv.close();
}
private void processConcepts(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { private void processConcepts(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("Concepts", 5617348);
if (!process) { if (!process) {
t.skip();
return; return;
} }
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT.csv"))); CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; int lname = 0;
Statement stmt = con.createStatement(); int lstd = 0;
int lcode = 0;
int lreason = 0;
t.scan();
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`Concepts` (`concept_id`, `concept_name`, `domain_id`, `vocabulary_id`, `concept_class_id`, `standard_concept`, `concept_code`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+ lname = max(lname, csv.cell("concept_name"));
sw(csv.cell("concept_id"))+", "+ lstd = max(lstd, csv.cell("standard_concept"));
sw(csv.cell("concept_name"))+", "+ lcode = max(lcode, csv.cell("concept_code"));
sw(csv.cell("domain_id"))+", "+ lreason = max(lreason, csv.cell("invalid_reason"));
sw(csv.cell("vocabulary_id"))+", "+ t.step();
sw(csv.cell("concept_class_id"))+", "+
sw(csv.cell("standard_concept"))+", "+
sw(csv.cell("concept_code"))+", "+
sw(csv.cell("valid_start_date"))+", "+
sw(csv.cell("valid_end_date"))+", "+
sw(csv.cell("invalid_reason"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
System.out.println("error: "+e.getMessage());
System.out.println("i: "+i);
// System.out.println("sql: "+sql);
}
i++;
if (i % 1000 == 0) {
System.out.println(i);
}
} }
csv.close(); csv.close();
t.process();
Statement stmt = con.createStatement();
stmt.execute("CREATE TABLE `Concepts` (\r\n"+
" `concept_id` bigint NOT NULL,\r\n"+
" `concept_name` varchar("+lname+") DEFAULT NULL,\r\n"+
" `domain_id` bigint DEFAULT NULL,\r\n"+
" `vocabulary_id` bigint DEFAULT NULL,\r\n"+
" `concept_class_id` bigint DEFAULT NULL,\r\n"+
" `standard_concept` varchar("+lstd+") DEFAULT NULL,\r\n"+
" `concept_code` varchar("+lcode+") DEFAULT NULL,\r\n"+
" `valid_start_date` date DEFAULT NULL,\r\n"+
" `valid_end_date` date DEFAULT NULL,\r\n"+
" `invalid_reason` varchar("+lreason+") DEFAULT NULL,\r\n"+
" PRIMARY KEY (`concept_id`)\r\n"+
" ) \r\n"+
"\r\n");
stmt.execute("CREATE INDEX `ConceptDomain` on Concepts (`domain_id`)");
stmt.execute("CREATE INDEX `ConceptVocabulary` on Concepts (`vocabulary_id`,`concept_code`)");
stmt.execute("CREATE INDEX `ConceptClass` on Concepts (`concept_class_id`)");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
PreparedStatement pstmt = con.prepareStatement(
"INSERT INTO `Concepts` (`concept_id`, `concept_name`, `domain_id`, `vocabulary_id`, `concept_class_id`, `standard_concept`, `concept_code`, `valid_start_date`, `valid_end_date`, `invalid_reason`) "+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
while (csv.line()) {
try {
pstmt.setString(1, csv.cell("concept_id"));
pstmt.setString(2, csv.cell("concept_name"));
pstmt.setString(3, domains.get(csv.cell("domain_id")));
pstmt.setString(4, vocabularies.get(csv.cell("vocabulary_id")));
pstmt.setString(5, classes.get(csv.cell("concept_class_id")));
pstmt.setString(6, csv.cell("standard_concept"));
pstmt.setString(7, csv.cell("concept_code"));
pstmt.setString(8, date(csv.cell("valid_start_date")));
pstmt.setString(9, date(csv.cell("valid_end_date")));
pstmt.setString(10, csv.cell("invalid_reason"));
pstmt.executeUpdate();
} catch (Exception e) {
t.error(e.getMessage());
}
t.step();
}
csv.close();
t.done();
} }
private void processConceptSynonyms(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { private void processConceptSynonyms(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("ConceptSynonyms", 1933498);
if (!process) { if (!process) {
t.skip();
return; return;
} }
t.scan();
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_SYNONYM.csv"))); CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_SYNONYM.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; int lname = 0;
int ec = 0;
Statement stmt = con.createStatement();
stmt.executeUpdate("delete from ConceptSynonyms");
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`ConceptSynonyms` (`concept_id`, `concept_synonym_name`, `language_concept_id`) VALUES ("+ lname = max(lname, csv.cell("concept_synonym_name"));
sw(csv.cell("concept_id"))+", "+ t.step();
sw(csv.cell("concept_synonym_name"))+", "+
sw(csv.cell("language_concept_id"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
System.out.println("error: "+e.getMessage());
System.out.println("i: "+i);
System.out.println("sql: "+sql);
ec++;
}
i++;
if (i % 1000 == 0) {
System.out.println(i);
}
} }
csv.close(); csv.close();
System.out.println("Finished. "+i+" rows, "+ec+" errors"); t.process();
Statement stmt = con.createStatement();
stmt.execute("CREATE TABLE `ConceptSynonyms` (\r\n"+
" `concept_id` bigint NOT NULL,\r\n"+
" `concept_synonym_name` varchar("+lname+") DEFAULT NULL,\r\n"+
" `language_concept_id` bigint DEFAULT NULL\r\n"+
") \r\n"+
"\r\n");
stmt.execute("CREATE INDEX `SynonymId` on ConceptSynonyms (`concept_id`)");
stmt.execute("CREATE INDEX `SynonymLang` on ConceptSynonyms (`language_concept_id`)");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_SYNONYM.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
PreparedStatement pstmt = con.prepareStatement("INSERT INTO `ConceptSynonyms` (`concept_id`, `concept_synonym_name`, `language_concept_id`) VALUES (?, ?, ?)");
while (csv.line()) {
try {
pstmt.setString(1, csv.cell("concept_id"));
pstmt.setString(2, csv.cell("concept_synonym_name"));
pstmt.setString(3, csv.cell("language_concept_id"));
pstmt.executeUpdate();
} catch (Exception e) {
t.error(e.getMessage());
}
t.step();
}
csv.close();
t.done();
} }
private void processConceptAncestors(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { private void processConceptAncestors(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("ConceptAncestors", 67425885);
if (!process) { if (!process) {
t.skip();
return; return;
} }
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_ANCESTOR"
+ ".csv"))); t.process();
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_ANCESTOR.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; con.createStatement().execute("CREATE TABLE `ConceptAncestors` (\r\n"+
int ec = 0; " `ancestor_concept_id` bigint NOT NULL,\r\n"+
Statement stmt = con.createStatement(); " `descendant_concept_id` bigint NOT NULL,\r\n"+
" `min_levels_of_separation` int DEFAULT NULL,\r\n"+
" `max_levels_of_separation` int DEFAULT NULL,\r\n"+
" PRIMARY KEY (`ancestor_concept_id`,`descendant_concept_id`)\r\n"+
" ) \r\n"+
"\r\n");
stmt.executeUpdate("delete from ConceptAncestors"); PreparedStatement pstmt = con.prepareStatement("INSERT INTO `ConceptAncestors` (`ancestor_concept_id`, `descendant_concept_id`, `min_levels_of_separation`, `max_levels_of_separation`) VALUES (?, ?, ?, ?)");
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`ConceptAncestors` (`ancestor_concept_id`, `descendant_concept_id`, `min_levels_of_separation`, `max_levels_of_separation`) VALUES ("+
sw(csv.cell("ancestor_concept_id"))+", "+
sw(csv.cell("descendant_concept_id"))+", "+
sw(csv.cell("min_levels_of_separation"))+", "+
sw(csv.cell("max_levels_of_separation"))+")";
try { try {
stmt.executeUpdate(sql); pstmt.setString(1, csv.cell("ancestor_concept_id"));
pstmt.setString(2, csv.cell("descendant_concept_id"));
pstmt.setString(3, csv.cell("min_levels_of_separation"));
pstmt.setString(4, csv.cell("max_levels_of_separation"));
pstmt.executeUpdate();
} catch (Exception e) { } catch (Exception e) {
System.out.println("error: "+e.getMessage()); t.error(e.getMessage());
System.out.println("i: "+i);
System.out.println("sql: "+sql);
ec++;
}
i++;
if (i % 1000 == 0) {
System.out.println(i);
} }
t.step();
} }
csv.close(); csv.close();
System.out.println("Finished. "+i+" rows, "+ec+" errors"); t.done();
} }
private void processConceptRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException { private void processConceptRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
Tracker t = new Tracker("ConceptRelationships", 47000000);
if (!process) { if (!process) {
t.skip();
return; return;
} }
t.scan();
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_RELATIONSHIP.csv"))); CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_RELATIONSHIP.csv")));
csv.setDelimiter('\t'); csv.setDelimiter('\t');
csv.readHeaders(); csv.readHeaders();
csv.setDoingQuotes(false); csv.setDoingQuotes(false);
int i = 0; int lreason = 0;
Statement stmt = con.createStatement();
while (csv.line()) { while (csv.line()) {
String sql = "INSERT INTO `omop`.`ConceptRelationships` (`concept_id_1`, `concept_id_2`, `relationship_id`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+ lreason = max(lreason, csv.cell("invalid_reason"));
sw(csv.cell("concept_id_1"))+", "+ t.step();
sw(csv.cell("concept_id_2"))+", "+
sw(relationships.get(csv.cell("relationship_id")))+", "+
sw(csv.cell("valid_start_date"))+", "+
sw(csv.cell("valid_end_date"))+", "+
sw(csv.cell("invalid_reason"))+")";
try {
stmt.executeUpdate(sql);
} catch (Exception e) {
System.out.println("error: "+e.getMessage());
System.out.println("i: "+i);
// System.out.println("sql: "+sql);
}
i++;
if (i % 100 == 0) {
System.out.println(i);
}
} }
csv.close(); csv.close();
t.process();
Statement stmt = con.createStatement();
stmt.execute("CREATE TABLE `ConceptRelationships` (\r\n"+
" `concept_id_1` bigint NOT NULL,\r\n"+
" `concept_id_2` bigint NOT NULL,\r\n"+
" `relationship_id` bigint NOT NULL,\r\n"+
" `valid_start_date` date DEFAULT NULL,\r\n"+
" `valid_end_date` date DEFAULT NULL,\r\n"+
" `invalid_reason` varchar("+lreason+") DEFAULT NULL)\r\n"+
" \r\n");
stmt.execute("CREATE INDEX `Reverse` on ConceptRelationships (`concept_id_2`,`concept_id_1`,`relationship_id`)");
stmt.execute("CREATE INDEX `Forward` on ConceptRelationships (`concept_id_1`,`concept_id_2`,`relationship_id`)");
// stmt.execute("CREATE INDEX `type1` on ConceptRelationships (`relationship_id`,`concept_id_1`,`concept_id_2`)");
// stmt.execute("CREATE INDEX `type2` on ConceptRelationships (`relationship_id`,`concept_id_2`,`concept_id_1`)");
csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_RELATIONSHIP.csv")));
csv.setDelimiter('\t');
csv.readHeaders();
csv.setDoingQuotes(false);
PreparedStatement pstmt = con.prepareStatement("INSERT INTO `ConceptRelationships` (`concept_id_1`, `concept_id_2`, `relationship_id`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES (?, ?, ?, ?, ?, ?)");
while (csv.line()) {
try {
pstmt.setString(1, csv.cell("concept_id_1"));
pstmt.setString(2, csv.cell("concept_id_2"));
pstmt.setString(3, relationships.get(csv.cell("relationship_id")));
pstmt.setString(4, csv.cell("valid_start_date"));
pstmt.setString(5, date(csv.cell("valid_end_date")));
pstmt.setString(6, date(csv.cell("invalid_reason")));
pstmt.executeUpdate();
} catch (Exception e) {
t.error(e.getMessage());
}
t.step();
}
csv.close();
t.done();
} }
private String date(String cell) {
return cell;
}
private String sw(String value) { private String sw(String value) {
if (value == null) { if (value == null) {
return "null"; return "null";
@ -383,10 +689,5 @@ public class OMOPImporter {
return b.toString(); return b.toString();
} }
private void connect() throws SQLException, ClassNotFoundException {
// Class.forName("com.mysql.jdbc.Driver");
con = DriverManager.getConnection("jdbc:mysql://localhost:3306/omop?useSSL=false","root","@AZEq|OzHLl1/[50v[CI");
}
} }