updates to PHINVads and VSAC value set importing

This commit is contained in:
Grahame Grieve 2020-08-26 10:34:36 +10:00
parent 3ff241bcf8
commit da91f17011
3 changed files with 200 additions and 49 deletions

View File

@ -0,0 +1,85 @@
package org.hl7.fhir.convertors.misc;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.r5.context.IWorkerContext;
import org.hl7.fhir.r5.context.SimpleWorkerContext;
import org.hl7.fhir.r5.model.ValueSet;
import org.hl7.fhir.r5.model.ValueSet.ConceptSetComponent;
import org.hl7.fhir.utilities.cache.FilesystemPackageCacheManager;
import org.hl7.fhir.utilities.cache.NpmPackage;
import org.hl7.fhir.utilities.cache.ToolsVersion;
import org.w3c.dom.Document;
public class OIDBasedValueSetImporter {
protected IWorkerContext context;
protected void init() throws FileNotFoundException, FHIRException, IOException {
FilesystemPackageCacheManager pcm = new FilesystemPackageCacheManager(true, ToolsVersion.TOOLS_VERSION);
NpmPackage npm = pcm.loadPackage("hl7.fhir.r5.core", "current");
SimpleWorkerContext ctxt = SimpleWorkerContext.fromPackage(npm);
ctxt.setAllowLoadingDuplicates(true);
ctxt.loadFromPackage(pcm.loadPackage("hl7.terminology"), null);
context = ctxt;
}
protected String fixVersionforSystem(String url, String csver) {
if ("http://snomed.info/sct".equals(url)) {
return "http://snomed.info/sct/731000124108/version/"+csver;
}
if ("http://loinc.org".equals(url)) {
return csver;
}
if ("http://www.nlm.nih.gov/research/umls/rxnorm".equals(url)) {
if (csver.length() == 8) {
return csver.substring(4,6)+csver.substring(6,8)+csver.substring(0,4);
} else {
return csver;
}
}
return csver;
}
protected ConceptSetComponent getInclude(ValueSet vs, String url, String csver) {
for (ConceptSetComponent t : vs.getCompose().getInclude()) {
if (csver == null) {
if (t.getSystem().equals(url) && !t.hasVersion()) {
return t;
}
} else {
if (t.getSystem().equals(url) && t.hasVersion() && t.getVersion().equals(csver)) {
return t;
}
}
}
ConceptSetComponent c = vs.getCompose().addInclude();
c.setSystem(url);
c.setVersion(csver);
return c;
}
protected Document loadXml(InputStream fn) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(fn);
}
}

View File

@ -23,28 +23,18 @@ import org.hl7.fhir.utilities.cache.FilesystemPackageCacheManager;
import org.hl7.fhir.utilities.cache.NpmPackage;
import org.hl7.fhir.utilities.cache.ToolsVersion;
public class PhinVadsImporter {
public class PhinVadsImporter extends OIDBasedValueSetImporter {
public static void main(String[] args) throws FileNotFoundException, FHIRException, IOException, ParseException {
// new PhinVadsImporter().importValueSet(TextFile.fileToBytes("C:\\work\\org.hl7.fhir\\packages\\us.cdc.phinvads-source\\source\\PHVS_BirthDefectsLateralityatDiagnosis_HL7_V1.txt"));
PhinVadsImporter self = new PhinVadsImporter();
self.init();
self.process(args[0], args[1]);
}
private IWorkerContext context;
public PhinVadsImporter() {
public PhinVadsImporter() throws FileNotFoundException, FHIRException, IOException {
super();
}
private void init() throws FileNotFoundException, FHIRException, IOException {
FilesystemPackageCacheManager pcm = new FilesystemPackageCacheManager(true, ToolsVersion.TOOLS_VERSION);
NpmPackage npm = pcm.loadPackage("hl7.fhir.r5.core", "current");
SimpleWorkerContext ctxt = SimpleWorkerContext.fromPackage(npm);
ctxt.setAllowLoadingDuplicates(true);
ctxt.loadFromPackage(pcm.loadPackage("hl7.terminology"), null);
context = ctxt;
init();
}
private void process(String source, String dest) {
@ -84,6 +74,8 @@ public class PhinVadsImporter {
vs.setDescription(rdr.cell("Value Set Definition"));
if ("Published".equals(rdr.cell("Value Set Status"))) {
vs.setStatus(PublicationStatus.ACTIVE);
} else {
vs.setStatus(PublicationStatus.DRAFT);
}
if (rdr.has("VS Last Updated Date")) {
vs.setDate(new SimpleDateFormat("mm/dd/yyyy").parse(rdr.cell("VS Last Updated Date")));
@ -109,40 +101,5 @@ public class PhinVadsImporter {
return vs;
}
private String fixVersionforSystem(String url, String csver) {
if ("http://snomed.info/sct".equals(url)) {
return "http://snomed.info/sct|http://snomed.info/sct/731000124108/"+csver;
}
if ("http://loinc.org".equals(url)) {
return csver;
}
if ("http://www.nlm.nih.gov/research/umls/rxnorm".equals(url)) {
if (csver.length() == 8) {
return csver.substring(4,6)+csver.substring(6,8)+csver.substring(0,4);
} else {
return csver;
}
}
return csver;
}
private ConceptSetComponent getInclude(ValueSet vs, String url, String csver) {
for (ConceptSetComponent t : vs.getCompose().getInclude()) {
if (csver == null) {
if (t.getSystem().equals(url) && !t.hasVersion()) {
return t;
}
} else {
if (t.getSystem().equals(url) && t.hasVersion() && t.getVersion().equals(csver)) {
return t;
}
}
}
ConceptSetComponent c = vs.getCompose().addInclude();
c.setSystem(url);
c.setVersion(csver);
return c;
}
}

View File

@ -0,0 +1,109 @@
package org.hl7.fhir.convertors.misc;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import javax.xml.bind.annotation.XmlElement;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.r5.context.IWorkerContext;
import org.hl7.fhir.r5.context.SimpleWorkerContext;
import org.hl7.fhir.r5.formats.JsonParser;
import org.hl7.fhir.r5.model.Enumerations.PublicationStatus;
import org.hl7.fhir.r5.model.ValueSet;
import org.hl7.fhir.r5.model.ValueSet.ConceptSetComponent;
import org.hl7.fhir.utilities.CSVReader;
import org.hl7.fhir.utilities.TextFile;
import org.hl7.fhir.utilities.Utilities;
import org.hl7.fhir.utilities.cache.FilesystemPackageCacheManager;
import org.hl7.fhir.utilities.cache.NpmPackage;
import org.hl7.fhir.utilities.cache.ToolsVersion;
import org.hl7.fhir.utilities.xml.XMLUtil;
import org.w3c.dom.Element;
public class VSACImporter extends OIDBasedValueSetImporter {
public static void main(String[] args) throws FileNotFoundException, FHIRException, IOException, ParseException {
// new PhinVadsImporter().importValueSet(TextFile.fileToBytes("C:\\work\\org.hl7.fhir\\packages\\us.cdc.phinvads-source\\source\\PHVS_BirthDefectsLateralityatDiagnosis_HL7_V1.txt"));
VSACImporter self = new VSACImporter();
self.process(args[0], args[1]);
}
public VSACImporter() throws FileNotFoundException, FHIRException, IOException {
super();
init();
}
private void process(String source, String dest) {
for (File f : new File(source).listFiles()) {
try {
System.out.println("Process "+f.getName());
List<ValueSet> vsl = importValueSet(TextFile.fileToBytes(f));
for (ValueSet vs : vsl) {
if (vs.getId() != null) {
new JsonParser().compose(new FileOutputStream(Utilities.path(dest, "ValueSet-"+vs.getId()+".json")), vs);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
private List<ValueSet> importValueSet(byte[] source) throws Exception {
List<ValueSet> res = new ArrayList<ValueSet>();
Element x = loadXml(new ByteArrayInputStream(source)).getDocumentElement();
List<Element> vl = XMLUtil.getNamedChildren(x, "DescribedValueSet");
for (Element v : vl) {
ValueSet vs = new ValueSet();
vs.setId(v.getAttribute("ID"));
vs.setUrl("http://vsac.nlm.nih.gov/fhir/ValueSet/"+vs.getId());
vs.getMeta().setSource("https://vsac.nlm.nih.gov/valueset/"+vs.getId()+"/expansion");
vs.setVersion(v.getAttribute("version"));
vs.setTitle(v.getAttribute("displayName"));
vs.setName(Utilities.titleize(vs.getTitle()).replace(" ", ""));
Element d = XMLUtil.getNamedChild(v, "Purpose");
if (d != null) {
vs.setDescription(d.getTextContent());
}
Element s = XMLUtil.getNamedChild(v, "Status");
if (s != null && "Active".equals(s.getTextContent())) {
vs.setStatus(PublicationStatus.ACTIVE);
} else {
vs.setStatus(PublicationStatus.DRAFT);
}
Element dt = XMLUtil.getNamedChild(v, "RevisionDate");
if (dt != null) {
vs.getDateElement().setValueAsString(dt.getTextContent());
}
Element cl = XMLUtil.getNamedChild(v, "ConceptList");
Element cc = XMLUtil.getFirstChild(cl);
while (cc != null) {
String code = cc.getAttribute("code");
String display = cc.getAttribute("displayName");
String csoid = cc.getAttribute("codeSystem");
String csver = cc.getAttribute("codeSystemVersion");
String url = context.oid2Uri(csoid);
if (url == null) {
url = "urn:oid:"+csoid;
}
csver = fixVersionforSystem(url, csver);
ConceptSetComponent inc = getInclude(vs, url, csver);
inc.addConcept().setCode(code).setDisplay(display);
cc = XMLUtil.getNextSibling(cc);
}
res.add(vs);
}
return res;
}
}