OMOP importer
This commit is contained in:
parent
09b653d9ed
commit
2cb7c052af
|
@ -0,0 +1,392 @@
|
|||
package org.hl7.fhir.convertors.misc;
|
||||
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.hl7.fhir.exceptions.FHIRException;
|
||||
import org.hl7.fhir.utilities.CSVReader;
|
||||
import org.hl7.fhir.utilities.Utilities;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.sql.Connection;
|
||||
|
||||
public class OMOPImporter {
|
||||
|
||||
|
||||
private Connection con;
|
||||
private Map<String, String> relationships = new HashMap<>();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new OMOPImporter().process("/Users/grahamegrieve/Downloads/vocabulary_download_v5_{97cc5432-0dc9-4f14-9da2-d0624129d2f7}_1688068174909");
|
||||
}
|
||||
|
||||
private void process(String folder) throws ClassNotFoundException, SQLException, FHIRException, FileNotFoundException, IOException {
|
||||
connect();
|
||||
|
||||
loadRelationships(folder, true);
|
||||
processVocabularies(folder, false);
|
||||
processDomains(folder, false);
|
||||
processConceptClasses(folder, false);
|
||||
processDrugStrength(folder, false);
|
||||
processConcepts(folder, false);
|
||||
processConceptRelationships(folder, false);
|
||||
processConceptSynonyms(folder, false);
|
||||
processConceptAncestors(folder, false);
|
||||
}
|
||||
|
||||
private void loadRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "RELATIONSHIP.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
stmt.executeUpdate("delete from Relationships");
|
||||
while (csv.line()) {
|
||||
relationships.put(csv.cell("relationship_id"), csv.cell("relationship_concept_id"));
|
||||
if (process) {
|
||||
String sql = "INSERT INTO `omop`.`Relationships` (`relationship_concept_id`, `relationship_id`, `relationship_name`, `is_hierarchical`, `defines_ancestry`, `reverse_relationship_id`) VALUES ("+
|
||||
sw(csv.cell("relationship_concept_id"))+", "+
|
||||
sw(csv.cell("relationship_id"))+", "+
|
||||
sw(csv.cell("relationship_name"))+", "+
|
||||
sw(csv.cell("is_hierarchical"))+", "+
|
||||
sw(csv.cell("defines_ancestry"))+", "+
|
||||
sw(csv.cell("reverse_relationship_id"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
// System.out.println("sql: "+sql);
|
||||
}
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
|
||||
}
|
||||
|
||||
private void processVocabularies(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "VOCABULARY.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
stmt.executeUpdate("delete from Vocabularies");
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`Vocabularies` (`vocabulary_concept_id`, `vocabulary_id`, `vocabulary_name`, `vocabulary_reference`, `vocabulary_version`) VALUES ("+
|
||||
sw(csv.cell("vocabulary_concept_id"))+", "+
|
||||
sw(csv.cell("vocabulary_id"))+", "+
|
||||
sw(csv.cell("vocabulary_name"))+", "+
|
||||
sw(csv.cell("vocabulary_reference"))+", "+
|
||||
sw(csv.cell("vocabulary_version"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
// System.out.println("sql: "+sql);
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
}
|
||||
|
||||
|
||||
private void processDrugStrength(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DRUG_STRENGTH.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
stmt.executeUpdate("delete from DrugStrengths");
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`DrugStrengths` (`drug_concept_id`, `ingredient_concept_id`, `amount_value`, `amount_unit_concept_id`, `numerator_value`, `numerator_unit_concept_id`, `denominator_value`, "
|
||||
+ "`denominator_unit_concept_id`, `box_size`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+
|
||||
sw(csv.cell("drug_concept_id"))+", "+
|
||||
sw(csv.cell("ingredient_concept_id"))+", "+
|
||||
sw(csv.cell("amount_value"))+", "+
|
||||
sw(csv.cell("amount_unit_concept_id"))+", "+
|
||||
sw(csv.cell("numerator_value"))+", "+
|
||||
sw(csv.cell("numerator_unit_concept_id"))+", "+
|
||||
sw(csv.cell("denominator_value"))+", "+
|
||||
sw(csv.cell("denominator_unit_concept_id"))+", "+
|
||||
sw(csv.cell("box_size"))+", "+
|
||||
sw(csv.cell("valid_start_date"))+", "+
|
||||
sw(csv.cell("valid_end_date"))+", "+
|
||||
sw(csv.cell("invalid_reason"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
System.out.println("sql: "+sql);
|
||||
}
|
||||
i++;
|
||||
if (i % 100 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
}
|
||||
|
||||
|
||||
private void processDomains(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "DOMAIN.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
stmt.executeUpdate("delete from Domains");
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`Domains` (`domain_concept_id`, `domain_id`, `domain_name`) VALUES ("+
|
||||
sw(csv.cell("domain_concept_id"))+", "+
|
||||
sw(csv.cell("domain_id"))+", "+
|
||||
sw(csv.cell("domain_name"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
// System.out.println("sql: "+sql);
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
}
|
||||
|
||||
|
||||
private void processConceptClasses(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_CLASS.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
stmt.executeUpdate("delete from ConceptClasses");
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`ConceptClasses` (`concept_class_concept_id`, `concept_class_id`, `concept_class_name`) VALUES ("+
|
||||
sw(csv.cell("concept_class_concept_id"))+", "+
|
||||
sw(csv.cell("concept_class_id"))+", "+
|
||||
sw(csv.cell("concept_class_name"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
// System.out.println("sql: "+sql);
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void processConcepts(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`Concepts` (`concept_id`, `concept_name`, `domain_id`, `vocabulary_id`, `concept_class_id`, `standard_concept`, `concept_code`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+
|
||||
sw(csv.cell("concept_id"))+", "+
|
||||
sw(csv.cell("concept_name"))+", "+
|
||||
sw(csv.cell("domain_id"))+", "+
|
||||
sw(csv.cell("vocabulary_id"))+", "+
|
||||
sw(csv.cell("concept_class_id"))+", "+
|
||||
sw(csv.cell("standard_concept"))+", "+
|
||||
sw(csv.cell("concept_code"))+", "+
|
||||
sw(csv.cell("valid_start_date"))+", "+
|
||||
sw(csv.cell("valid_end_date"))+", "+
|
||||
sw(csv.cell("invalid_reason"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
// System.out.println("sql: "+sql);
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
|
||||
}
|
||||
|
||||
private void processConceptSynonyms(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_SYNONYM.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
int ec = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
stmt.executeUpdate("delete from ConceptSynonyms");
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`ConceptSynonyms` (`concept_id`, `concept_synonym_name`, `language_concept_id`) VALUES ("+
|
||||
sw(csv.cell("concept_id"))+", "+
|
||||
sw(csv.cell("concept_synonym_name"))+", "+
|
||||
sw(csv.cell("language_concept_id"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
System.out.println("sql: "+sql);
|
||||
ec++;
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
System.out.println("Finished. "+i+" rows, "+ec+" errors");
|
||||
}
|
||||
|
||||
|
||||
private void processConceptAncestors(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_ANCESTOR"
|
||||
+ ".csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
int ec = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
stmt.executeUpdate("delete from ConceptAncestors");
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`ConceptAncestors` (`ancestor_concept_id`, `descendant_concept_id`, `min_levels_of_separation`, `max_levels_of_separation`) VALUES ("+
|
||||
sw(csv.cell("ancestor_concept_id"))+", "+
|
||||
sw(csv.cell("descendant_concept_id"))+", "+
|
||||
sw(csv.cell("min_levels_of_separation"))+", "+
|
||||
sw(csv.cell("max_levels_of_separation"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
System.out.println("sql: "+sql);
|
||||
ec++;
|
||||
}
|
||||
i++;
|
||||
if (i % 1000 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
System.out.println("Finished. "+i+" rows, "+ec+" errors");
|
||||
}
|
||||
|
||||
|
||||
private void processConceptRelationships(String folder, boolean process) throws FHIRException, FileNotFoundException, IOException, SQLException {
|
||||
if (!process) {
|
||||
return;
|
||||
}
|
||||
CSVReader csv = new CSVReader(new FileInputStream(Utilities.path(folder, "CONCEPT_RELATIONSHIP.csv")));
|
||||
csv.setDelimiter('\t');
|
||||
csv.readHeaders();
|
||||
csv.setDoingQuotes(false);
|
||||
int i = 0;
|
||||
Statement stmt = con.createStatement();
|
||||
while (csv.line()) {
|
||||
String sql = "INSERT INTO `omop`.`ConceptRelationships` (`concept_id_1`, `concept_id_2`, `relationship_id`, `valid_start_date`, `valid_end_date`, `invalid_reason`) VALUES ("+
|
||||
sw(csv.cell("concept_id_1"))+", "+
|
||||
sw(csv.cell("concept_id_2"))+", "+
|
||||
sw(relationships.get(csv.cell("relationship_id")))+", "+
|
||||
sw(csv.cell("valid_start_date"))+", "+
|
||||
sw(csv.cell("valid_end_date"))+", "+
|
||||
sw(csv.cell("invalid_reason"))+")";
|
||||
try {
|
||||
stmt.executeUpdate(sql);
|
||||
} catch (Exception e) {
|
||||
System.out.println("error: "+e.getMessage());
|
||||
System.out.println("i: "+i);
|
||||
// System.out.println("sql: "+sql);
|
||||
}
|
||||
i++;
|
||||
if (i % 100 == 0) {
|
||||
System.out.println(i);
|
||||
}
|
||||
}
|
||||
csv.close();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private String sw(String value) {
|
||||
if (value == null) {
|
||||
return "null";
|
||||
}
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append('"');
|
||||
for (char ch : value.toCharArray()) {
|
||||
if (ch == '"') {
|
||||
b.append('"');
|
||||
}
|
||||
b.append(ch);
|
||||
}
|
||||
b.append('"');
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
private void connect() throws SQLException, ClassNotFoundException {
|
||||
// Class.forName("com.mysql.jdbc.Driver");
|
||||
con = DriverManager.getConnection("jdbc:mysql://localhost:3306/omop?useSSL=false","root","@AZEq|OzHLl1/[50v[CI");
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -62,6 +62,7 @@ public class CSVReader extends InputStreamReader {
|
|||
private String[] cells;
|
||||
private char delimiter = ',';
|
||||
private boolean multiline;
|
||||
private boolean doingQuotes = true;
|
||||
|
||||
public void readHeaders() throws IOException, FHIRException {
|
||||
cols = parseLine();
|
||||
|
@ -86,11 +87,13 @@ public class CSVReader extends InputStreamReader {
|
|||
public String cell(String name) {
|
||||
int index = -1;
|
||||
for (int i = 0; i < cols.length; i++) {
|
||||
if (name.equals(cols[i].trim()))
|
||||
if (name.equals(cols[i].trim())) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (index == -1)
|
||||
throw new FHIRException("no cell "+name);
|
||||
throw new FHIRException("no cell "+name+" in "+cols);
|
||||
String s = cells.length > index ? cells[index] : null;
|
||||
if (Utilities.noString(s))
|
||||
return null;
|
||||
|
@ -143,7 +146,7 @@ public class CSVReader extends InputStreamReader {
|
|||
while (more() && !finished(inQuote, res.size())) {
|
||||
char c = peek();
|
||||
next();
|
||||
if (c == '"') {
|
||||
if (c == '"' && doingQuotes) {
|
||||
if (ready() && peek() == '"') {
|
||||
b.append(c);
|
||||
next();
|
||||
|
@ -238,5 +241,13 @@ public class CSVReader extends InputStreamReader {
|
|||
this.multiline = multiline;
|
||||
}
|
||||
|
||||
public boolean isDoingQuotes() {
|
||||
return doingQuotes;
|
||||
}
|
||||
|
||||
public void setDoingQuotes(boolean doingQuotes) {
|
||||
this.doingQuotes = doingQuotes;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -917,6 +917,10 @@ public class I18nConstants {
|
|||
public static final String ED_INVARIANT_EXPRESSION_CONFLICT = "ED_INVARIANT_EXPRESSION_CONFLICT";
|
||||
public static final String ED_INVARIANT_EXPRESSION_ERROR = "ED_INVARIANT_EXPRESSION_ERROR";
|
||||
public static final String SNAPSHOT_IS_EMPTY = "SNAPSHOT_IS_EMPTY";
|
||||
public static final String EXTENSION_CONTEXT_UNABLE_TO_CHECK_PROFILE = "EXTENSION_CONTEXT_UNABLE_TO_CHECK_PROFILE";
|
||||
public static final String EXTENSION_CONTEXT_UNABLE_TO_FIND_PROFILE = "EXTENSION_CONTEXT_UNABLE_TO_FIND_PROFILE";
|
||||
public static final String TERMINOLOGY_TX_HINT = "TERMINOLOGY_TX_HINT";
|
||||
public static final String TERMINOLOGY_TX_WARNING = "TERMINOLOGY_TX_WARNING";
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue