Fix bugs in PHINVads importer

This commit is contained in:
Grahame Grieve 2020-08-22 08:09:50 +10:00
parent 904e600016
commit fe7efcf5a2
3 changed files with 44 additions and 18 deletions

View File

@ -26,6 +26,7 @@ import org.hl7.fhir.utilities.cache.ToolsVersion;
public class PhinVadsImporter { public class PhinVadsImporter {
public static void main(String[] args) throws FileNotFoundException, FHIRException, IOException, ParseException { public static void main(String[] args) throws FileNotFoundException, FHIRException, IOException, ParseException {
// new PhinVadsImporter().importValueSet(TextFile.fileToBytes("C:\\work\\org.hl7.fhir\\packages\\us.cdc.phinvads-source\\source\\PHVS_BirthDefectsLateralityatDiagnosis_HL7_V1.txt"));
PhinVadsImporter self = new PhinVadsImporter(); PhinVadsImporter self = new PhinVadsImporter();
self.init(); self.init();
self.process(args[0], args[1]); self.process(args[0], args[1]);
@ -61,17 +62,20 @@ public class PhinVadsImporter {
private ValueSet importValueSet(byte[] source) throws FHIRException, IOException, ParseException { private ValueSet importValueSet(byte[] source) throws FHIRException, IOException, ParseException {
// first thing do is split into 2 // first thing do is split into 2
List<byte[]> parts = Utilities.splitBytes(source, "\r\n\r\n".getBytes()); List<byte[]> parts = Utilities.splitBytes(source, "\r\n\r\n".getBytes());
if (parts.size() != 2) { if (parts.size() < 2) {
TextFile.bytesToFile(source, "c:\\temp\\phinvads.txt");
throw new FHIRException("Unable to parse phinvads value set: "+parts.size()+" parts found"); throw new FHIRException("Unable to parse phinvads value set: "+parts.size()+" parts found");
} }
CSVReader rdr = new CSVReader(new ByteArrayInputStream(parts.get(0))); CSVReader rdr = new CSVReader(new ByteArrayInputStream(parts.get(0)));
rdr.setDelimiter('\t'); rdr.setDelimiter('\t');
rdr.setMultiline(true);
rdr.readHeaders(); rdr.readHeaders();
rdr.line(); rdr.line();
ValueSet vs = new ValueSet(); ValueSet vs = new ValueSet();
vs.setId(rdr.cell("Value Set OID")); vs.setId(rdr.cell("Value Set OID"));
vs.setUrl("https://phinvads.cdc.gov/fhir/ValueSet/"+vs.getId()); vs.setUrl("https://phinvads.cdc.gov/fhir/ValueSet/"+vs.getId());
vs.getMeta().setSource("https://phinvads.cdc.gov/vads/ViewValueSet.action?oid="+vs.getId());
vs.setVersion(rdr.cell("Value Set Version")); vs.setVersion(rdr.cell("Value Set Version"));
vs.setTitle(rdr.cell("Value Set Name")); vs.setTitle(rdr.cell("Value Set Name"));
vs.setName(rdr.cell("Value Set Code")); vs.setName(rdr.cell("Value Set Code"));
@ -83,7 +87,8 @@ public class PhinVadsImporter {
vs.setDate(new SimpleDateFormat("mm/dd/yyyy").parse(rdr.cell("VS Last Updated Date"))); vs.setDate(new SimpleDateFormat("mm/dd/yyyy").parse(rdr.cell("VS Last Updated Date")));
} }
rdr = new CSVReader(new ByteArrayInputStream(parts.get(1))); rdr = new CSVReader(new ByteArrayInputStream(parts.get(parts.size()-1)));
rdr.setMultiline(true);
rdr.setDelimiter('\t'); rdr.setDelimiter('\t');
rdr.readHeaders(); rdr.readHeaders();
while (rdr.line()) { while (rdr.line()) {
@ -103,8 +108,14 @@ public class PhinVadsImporter {
private ConceptSetComponent getInclude(ValueSet vs, String url, String csver) { private ConceptSetComponent getInclude(ValueSet vs, String url, String csver) {
for (ConceptSetComponent t : vs.getCompose().getInclude()) { for (ConceptSetComponent t : vs.getCompose().getInclude()) {
if (t.getSystem().equals(url) && t.getVersion().equals(csver)) { if (csver == null) {
return t; if (t.getSystem().equals(url) && !t.hasVersion()) {
return t;
}
} else {
if (t.getSystem().equals(url) && t.hasVersion() && t.getVersion().equals(csver)) {
return t;
}
} }
} }
ConceptSetComponent c = vs.getCompose().addInclude(); ConceptSetComponent c = vs.getCompose().addInclude();

View File

@ -61,12 +61,12 @@ public class CSVReader extends InputStreamReader {
private String[] cols; private String[] cols;
private String[] cells; private String[] cells;
private char delimiter = ','; private char delimiter = ',';
private boolean multiline;
public void readHeaders() throws IOException, FHIRException { public void readHeaders() throws IOException, FHIRException {
cols = parseLine(); cols = parseLine();
} }
public boolean line() throws IOException, FHIRException { public boolean line() throws IOException, FHIRException {
if (ready()) { if (ready()) {
cells = parseLine(); cells = parseLine();
@ -128,7 +128,7 @@ public class CSVReader extends InputStreamReader {
/** /**
* Split one line in a CSV file into its rows. Comma's appearing in double quoted strings will * Split one line in a CSV file into its cells. Comma's appearing in double quoted strings will
* not be seen as a separator. * not be seen as a separator.
* @return * @return
* @throws IOException * @throws IOException
@ -140,7 +140,7 @@ public class CSVReader extends InputStreamReader {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
boolean inQuote = false; boolean inQuote = false;
while (ready() && (inQuote || (peek() != '\r' && peek() != '\n'))) { while (more() && !finished(inQuote, res.size())) {
char c = peek(); char c = peek();
next(); next();
if (c == '"') { if (c == '"') {
@ -166,10 +166,21 @@ public class CSVReader extends InputStreamReader {
String[] r = new String[] {}; String[] r = new String[] {};
r = res.toArray(r); r = res.toArray(r);
return r; return r;
} }
private int state = 0; private boolean more() throws IOException {
return state == 1 || ready();
}
private boolean finished(boolean inQuote, int size) throws FHIRException, IOException {
if (multiline && cols != null) {
return size == cols.length || (size == cols.length - 1 && !(inQuote || (peek() != '\r' && peek() != '\n')));
} else {
return !(inQuote || (peek() != '\r' && peek() != '\n'));
}
}
private int state = 0;
private char pc; private char pc;
private char peek() throws FHIRException, IOException private char peek() throws FHIRException, IOException
@ -219,5 +230,13 @@ public class CSVReader extends InputStreamReader {
this.delimiter = delimiter; this.delimiter = delimiter;
} }
public boolean isMultiline() {
return multiline;
}
public void setMultiline(boolean multiline) {
this.multiline = multiline;
}
} }

View File

@ -25,7 +25,7 @@ import com.google.gson.JsonObject;
public class PackageHacker { public class PackageHacker {
public static void main(String[] args) throws FileNotFoundException, IOException { public static void main(String[] args) throws FileNotFoundException, IOException {
new PackageHacker().edit("M:\\web\\hl7.org\\fhir\\us\\davinci-pdex-plan-net\\2020Feb\\package.tgz"); new PackageHacker().edit("M:\\web\\hl7.org\\fhir\\2020Sep\\hl7.fhir.r5.expansions.tgz");
} }
private void edit(String name) throws FileNotFoundException, IOException { private void edit(String name) throws FileNotFoundException, IOException {
@ -56,11 +56,11 @@ public class PackageHacker {
} }
private void change(JsonObject npm, Map<String, byte[]> content) throws FileNotFoundException, IOException { private void change(JsonObject npm, Map<String, byte[]> content) throws FileNotFoundException, IOException {
// fixVersions(npm); fixVersions(npm);
// npm.remove("url"); // npm.remove("url");
// npm.addProperty("url", url); // npm.addProperty("url", url);
// npm.remove("version"); npm.remove("version");
// npm.addProperty("version", "1.0.0"); npm.addProperty("version", "4.5.0");
// npm.remove("canonical"); // npm.remove("canonical");
// npm.addProperty("canonical", "http://hl7.org/fhir/us/davinci-drug-formulary"); // npm.addProperty("canonical", "http://hl7.org/fhir/us/davinci-drug-formulary");
//// npm.remove("description"); //// npm.remove("description");
@ -80,11 +80,7 @@ public class PackageHacker {
npm.remove("fhirVersions"); npm.remove("fhirVersions");
JsonArray a = new JsonArray(); JsonArray a = new JsonArray();
npm.add("fhirVersions", a); npm.add("fhirVersions", a);
a.add("4.2.0"); a.add("4.5.0");
npm.remove("fhir-version-list");
a = new JsonArray();
npm.add("fhir-version-list", a);
a.add("4.2.0");
} }
private void setProperty(JsonObject npm, String name, String value) { private void setProperty(JsonObject npm, String name, String value) {