Add support for ICD-10 International Version (Fixes #3734) (#3849)

* Update CLI upload examples for R4.

* Fixes #3734 Add support for ICD-10 (International)

Include all concept properties in ICD-10 loader.

Remove newlines and tabs from ICD Rubrics.

* Bump Msg code and use in Term Loader.

* Added a changelog entry for this addition.

* Update icd10-dummy-test-en.xml

* Added a default impl to the new interface method to avoid breaking upstream projects.

Co-authored-by: Kevin Dougan <kevin.dougan@smilecdr.com>
Co-authored-by: Kevin Dougan SmileCDR <72025369+KevinDougan-SmileCDR@users.noreply.github.com>
Co-authored-by: Kevin SmileCDR <kevinsmilecdr@Kevins-MacBook-Pro-2.local>
This commit is contained in:
Kai Kewley 2022-09-06 14:11:45 +01:00 committed by GitHub
parent 100b8f9190
commit dd5c49a9ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 325 additions and 8 deletions

View File

@ -25,7 +25,7 @@ public final class Msg {
/** /**
* IMPORTANT: Please update the following comment after you add a new code * IMPORTANT: Please update the following comment after you add a new code
* Last used code value: 2134 * Last used code value: 2135
*/ */
private Msg() {} private Msg() {}

View File

@ -1870,10 +1870,10 @@ public class XmlUtil {
} }
public static Document parseDocument(Reader reader) throws SAXException, IOException { public static Document parseDocument(Reader reader) throws SAXException, IOException {
return parseDocument(reader, true); return parseDocument(reader, true, false);
} }
public static Document parseDocument(Reader theReader, boolean theNamespaceAware) throws SAXException, IOException { public static Document parseDocument(Reader theReader, boolean theNamespaceAware, boolean allowDoctypeDeclaration) throws SAXException, IOException {
DocumentBuilder builder; DocumentBuilder builder;
try { try {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
@ -1881,7 +1881,7 @@ public class XmlUtil {
docBuilderFactory.setXIncludeAware(false); docBuilderFactory.setXIncludeAware(false);
docBuilderFactory.setExpandEntityReferences(false); docBuilderFactory.setExpandEntityReferences(false);
try { try {
docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", !allowDoctypeDeclaration);
docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); docBuilderFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); docBuilderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

View File

@ -0,0 +1,4 @@
---
type: add
issue: 3734
title: "Added support for loading the International version of ICD-10. Thanks to kaicode for the contribution!"

View File

@ -74,19 +74,25 @@ Note that the path and exact filename of the terminology files will likely need
### SNOMED CT ### SNOMED CT
``` ```
./hapi-fhir-cli upload-terminology -d Downloads/SnomedCT_RF2Release_INT_20160131.zip -f dstu3 -t http://localhost:8080/baseDstu3 -u http://snomed.info/sct ./hapi-fhir-cli upload-terminology -d Downloads/SnomedCT_InternationalRF2_PRODUCTION_20220131T120000Z.zip -v r4 -t http://localhost:8080/fhir -u http://snomed.info/sct
``` ```
### LOINC ### LOINC
``` ```
./hapi-fhir-cli upload-terminology -d Downloads/LOINC_2.54_MULTI-AXIAL_HIERARCHY.zip -d Downloads/LOINC_2.54_Text.zip -f dstu3 -t http://localhost:8080/baseDstu3 -u http://loinc.org ./hapi-fhir-cli upload-terminology -d Downloads/LOINC_2.54_MULTI-AXIAL_HIERARCHY.zip -d Downloads/LOINC_2.54_Text.zip -v r4 -t http://localhost:8080/fhir -u http://loinc.org
```
### ICD-10 (International Version)
```
./hapi-fhir-cli upload-terminology -d Downloads/icdClaML2019ens.zip -v r4 -t http://localhost:8080/fhir -u http://hl7.org/fhir/sid/icd-10
``` ```
### ICD-10-CM ### ICD-10-CM
``` ```
./hapi-fhir-cli upload-terminology -d Downloads/LOINC_2.54_MULTI-AXIAL_HIERARCHY.zip -d icd10cm_tabular_2021.xml -f dstu3 -t http://localhost:8080/baseDstu3 -u http://hl7.org/fhir/sid/icd-10-cm ./hapi-fhir-cli upload-terminology -d Downloads/icd10cm_tabular_2021.xml -v r4 -t http://localhost:8080/fhir -u http://hl7.org/fhir/sid/icd-10-cm
``` ```
# Migrate Database # Migrate Database

View File

@ -129,6 +129,9 @@ public class TerminologyUploaderProvider extends BaseJpaProvider {
UploadStatistics stats; UploadStatistics stats;
switch (codeSystemUrl) { switch (codeSystemUrl) {
case ITermLoaderSvc.ICD10_URI:
stats = myTerminologyLoaderSvc.loadIcd10(localFiles, theRequestDetails);
break;
case ITermLoaderSvc.ICD10CM_URI: case ITermLoaderSvc.ICD10CM_URI:
stats = myTerminologyLoaderSvc.loadIcd10cm(localFiles, theRequestDetails); stats = myTerminologyLoaderSvc.loadIcd10cm(localFiles, theRequestDetails);
break; break;

View File

@ -10,6 +10,7 @@ import ca.uhn.fhir.jpa.term.api.ITermCodeSystemStorageSvc;
import ca.uhn.fhir.jpa.term.api.ITermDeferredStorageSvc; import ca.uhn.fhir.jpa.term.api.ITermDeferredStorageSvc;
import ca.uhn.fhir.jpa.term.api.ITermLoaderSvc; import ca.uhn.fhir.jpa.term.api.ITermLoaderSvc;
import ca.uhn.fhir.jpa.term.custom.CustomTerminologySet; import ca.uhn.fhir.jpa.term.custom.CustomTerminologySet;
import ca.uhn.fhir.jpa.term.icd10.Icd10Loader;
import ca.uhn.fhir.jpa.term.icd10cm.Icd10CmLoader; import ca.uhn.fhir.jpa.term.icd10cm.Icd10CmLoader;
import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListHandler; import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListHandler;
import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListLinkHandler; import ca.uhn.fhir.jpa.term.loinc.LoincAnswerListLinkHandler;
@ -300,6 +301,39 @@ public class TermLoaderSvcImpl implements ITermLoaderSvc {
} }
} }
@Override
public UploadStatistics loadIcd10(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
ourLog.info("Beginning ICD-10 processing");
CodeSystem codeSystem = new CodeSystem();
codeSystem.setUrl(ICD10_URI);
codeSystem.setContent(CodeSystem.CodeSystemContentMode.NOTPRESENT);
codeSystem.setStatus(Enumerations.PublicationStatus.ACTIVE);
TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
int count = 0;
try (LoadedFileDescriptors compressedDescriptors = getLoadedFileDescriptors(theFiles)) {
for (FileDescriptor nextDescriptor : compressedDescriptors.getUncompressedFileDescriptors()) {
if (nextDescriptor.getFilename().toLowerCase(Locale.US).endsWith(".xml")) {
try (InputStream inputStream = nextDescriptor.getInputStream();
InputStreamReader reader = new InputStreamReader(inputStream, Charsets.UTF_8) ) {
Icd10Loader loader = new Icd10Loader(codeSystem, codeSystemVersion);
loader.load(reader);
count += loader.getConceptCount();
}
}
}
} catch (IOException | SAXException e) {
throw new InternalErrorException(Msg.code(2135) + e);
}
codeSystem.setVersion(codeSystemVersion.getCodeSystemVersionId());
IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, codeSystem, null, null);
return new UploadStatistics(count, target);
}
@Override @Override
public UploadStatistics loadIcd10cm(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) { public UploadStatistics loadIcd10cm(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
ourLog.info("Beginning ICD-10-cm processing"); ourLog.info("Beginning ICD-10-cm processing");

View File

@ -0,0 +1,121 @@
package ca.uhn.fhir.jpa.term.icd10;
/*-
* #%L
* HAPI FHIR JPA Server
* %%
* Copyright (C) 2014 - 2022 Smile CDR, Inc.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
import ca.uhn.fhir.jpa.entity.TermConcept;
import ca.uhn.fhir.jpa.entity.TermConceptParentChildLink;
import org.hl7.fhir.r4.model.CodeSystem;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.Reader;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static ca.uhn.fhir.util.XmlUtil.getChildrenByTagName;
import static ca.uhn.fhir.util.XmlUtil.parseDocument;
public class Icd10Loader {
public static final String EXPECTED_ROOT_NODE = "ClaML";
private final CodeSystem codeSystem;
private final TermCodeSystemVersion codeSystemVersion;
private int conceptCount = 0;
public Icd10Loader(CodeSystem codeSystem, TermCodeSystemVersion codeSystemVersion) {
this.codeSystem = codeSystem;
this.codeSystemVersion = codeSystemVersion;
}
public void load(Reader reader) throws IOException, SAXException {
Document document = parseDocument(reader, false, true);
Element documentElement = document.getDocumentElement();
String rootNodeName = documentElement.getTagName();
if (!EXPECTED_ROOT_NODE.equals(rootNodeName)) {
return;
}
for (Element title : getChildrenByTagName(documentElement, "Title")) {
String name = title.getAttribute("name");
if (!name.isEmpty()) {
codeSystem.setName(name);
codeSystem.setTitle(name);
}
String version = title.getAttribute("version");
if (!version.isEmpty()) {
codeSystemVersion.setCodeSystemVersionId(version);
}
codeSystem.setDescription(title.getTextContent());
}
Map<String, TermConcept> conceptMap = new HashMap<>();
for (Element aClass : getChildrenByTagName(documentElement, "Class")) {
String code = aClass.getAttribute("code");
if (code.isEmpty()) {
continue;
}
boolean rootConcept = getChildrenByTagName(aClass, "SuperClass").isEmpty();
TermConcept termConcept = rootConcept ? codeSystemVersion.addConcept() : new TermConcept();
termConcept.setCode(code);
// Preferred label and other properties
for (Element rubric : getChildrenByTagName(aClass, "Rubric")) {
String kind = rubric.getAttribute("kind");
Optional<Element> firstLabel = getChildrenByTagName(rubric, "Label").stream().findFirst();
if (firstLabel.isPresent()) {
String textContent = firstLabel.get().getTextContent();
if (textContent != null && !textContent.isEmpty()) {
textContent = textContent.replace("\n", "").replace("\r", "").replace("\t", "");
if (kind.equals("preferred")) {
termConcept.setDisplay(textContent);
} else {
termConcept.addPropertyString(kind, textContent);
}
}
}
}
for (Element superClass : getChildrenByTagName(aClass, "SuperClass")) {
TermConcept parent = conceptMap.get(superClass.getAttribute("code"));
if (parent != null) {
parent.addChild(termConcept, TermConceptParentChildLink.RelationshipTypeEnum.ISA);
}
}
conceptMap.put(code, termConcept);
}
conceptCount = conceptMap.size();
}
public int getConceptCount() {
return conceptCount;
}
}

View File

@ -57,7 +57,7 @@ public class Icd10CmLoader {
public void load(Reader theReader) throws IOException, SAXException { public void load(Reader theReader) throws IOException, SAXException {
myConceptCount = 0; myConceptCount = 0;
Document document = XmlUtil.parseDocument(theReader, false); Document document = XmlUtil.parseDocument(theReader, false, false);
Element documentElement = document.getDocumentElement(); Element documentElement = document.getDocumentElement();
// Extract version: Should only be 1 tag // Extract version: Should only be 1 tag

View File

@ -0,0 +1,78 @@
package ca.uhn.fhir.jpa.term.icd10;
import ca.uhn.fhir.jpa.entity.TermCodeSystemVersion;
import ca.uhn.fhir.jpa.entity.TermConcept;
import ca.uhn.fhir.jpa.entity.TermConceptProperty;
import ca.uhn.fhir.util.ClasspathUtil;
import org.hl7.fhir.r4.model.CodeSystem;
import org.junit.jupiter.api.Test;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class Icd10LoaderTest {
@Test
public void testLoadIcd10Cm() throws IOException, SAXException {
StringReader reader = new StringReader(ClasspathUtil.loadResource("icd/icd10-dummy-test-en.xml"));
TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
CodeSystem codeSystem = new CodeSystem();
Icd10Loader loader = new Icd10Loader(codeSystem, codeSystemVersion);
loader.load(reader);
assertEquals("ICD-10-EN", codeSystem.getTitle());
assertEquals("International Statistical Classification of Diseases and Related Health Problems 10th Revision", codeSystem.getDescription());
assertEquals("2022-tree-expanded", codeSystemVersion.getCodeSystemVersionId());
List<TermConcept> rootConcepts = new ArrayList<>(codeSystemVersion.getConcepts());
assertEquals(2, rootConcepts.size());
TermConcept chapterA = rootConcepts.get(0);
assertEquals("A", chapterA.getCode());
assertEquals("Fruit", chapterA.getDisplay());
Collection<TermConceptProperty> properties = chapterA.getProperties();
assertEquals(2, properties.size());
assertEquals("Include fruit", chapterA.getStringProperty("inclusion"));
assertEquals("Things that are not fruit", chapterA.getStringProperty("exclusion"));
assertEquals("""
A "Fruit"
-A1-A3 "A1 to A3 type fruit"
--A1 "Apples"
--A2 "Pears"
--A3 "Bananas"
B "Trees"
-B1-B2 "A group of trees"
--B1 "Oak trees"
--B2 "Ash trees"
""", toTree(rootConcepts));
}
private String toTree(List<TermConcept> concepts) {
StringBuilder buffer = new StringBuilder();
for (TermConcept concept : concepts) {
toTree(concept, 0, buffer);
}
return buffer.toString();
}
private void toTree(TermConcept concept, int indent, StringBuilder buffer) {
buffer.append("-".repeat(indent));
buffer.append(concept.getCode());
String display = concept.getDisplay();
if (display != null) {
buffer.append(" \"").append(display).append("\"");
}
buffer.append("\n");
indent++;
for (TermConcept childCode : concept.getChildCodes()) {
toTree(childCode, indent, buffer);
}
}
}

View File

@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<ClaML version="2.0.0">
<!-- All dummy data -->
<Title date="2022-06-23" name="ICD-10-EN" version="2022-tree-expanded">International Statistical Classification of Diseases and Related Health Problems 10th Revision</Title>
<Class code="A" kind="chapter">
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Fruit</Label>
</Rubric>
<Rubric id="id-icd10_20190322-1587556953279-585" kind="inclusion">
<Label xml:lang="en" xml:space="default">Include fruit</Label>
</Rubric>
<Rubric id="id-icd10_20190322-1587556953279-587" kind="exclusion">
<Label xml:lang="en" xml:space="default">Things that are not fruit</Label>
</Rubric>
</Class>
<Class code="B" kind="chapter">
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Trees</Label>
</Rubric>
<Rubric id="id-icd10_20190322-1587556953279-585" kind="inclusion">
<Label xml:lang="en" xml:space="default">Anything that we think is a tree</Label>
</Rubric>
</Class>
<Class code="A1-A3" kind="block">
<SuperClass code="A"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">A1 to A3 type fruit</Label>
</Rubric>
</Class>
<Class code="A1" kind="category">
<SuperClass code="A1-A3"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Apples</Label>
</Rubric>
</Class>
<Class code="A2" kind="category">
<SuperClass code="A1-A3"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Pears</Label>
</Rubric>
</Class>
<Class code="A3" kind="category">
<SuperClass code="A1-A3"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Bananas</Label>
</Rubric>
</Class>
<Class code="B1-B2" kind="block">
<SuperClass code="B"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">A group of trees</Label>
</Rubric>
</Class>
<Class code="B1" kind="category">
<SuperClass code="B1-B2"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Oak trees</Label>
</Rubric>
</Class>
<Class code="B2" kind="category">
<SuperClass code="B1-B2"/>
<Rubric kind="preferred">
<Label xml:lang="en" xml:space="default">Ash trees</Label>
</Rubric>
</Class>
</ClaML>

View File

@ -36,6 +36,7 @@ public interface ITermLoaderSvc {
String IMGTHLA_URI = "http://www.ebi.ac.uk/ipd/imgt/hla"; String IMGTHLA_URI = "http://www.ebi.ac.uk/ipd/imgt/hla";
String LOINC_URI = "http://loinc.org"; String LOINC_URI = "http://loinc.org";
String SCT_URI = "http://snomed.info/sct"; String SCT_URI = "http://snomed.info/sct";
String ICD10_URI = "http://hl7.org/fhir/sid/icd-10";
String ICD10CM_URI = "http://hl7.org/fhir/sid/icd-10-cm"; String ICD10CM_URI = "http://hl7.org/fhir/sid/icd-10-cm";
String IEEE_11073_10101_URI = "urn:iso:std:iso:11073:10101"; String IEEE_11073_10101_URI = "urn:iso:std:iso:11073:10101";
@ -45,6 +46,10 @@ public interface ITermLoaderSvc {
UploadStatistics loadSnomedCt(List<FileDescriptor> theFiles, RequestDetails theRequestDetails); UploadStatistics loadSnomedCt(List<FileDescriptor> theFiles, RequestDetails theRequestDetails);
default UploadStatistics loadIcd10(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
return null;
}
UploadStatistics loadIcd10cm(List<FileDescriptor> theFiles, RequestDetails theRequestDetails); UploadStatistics loadIcd10cm(List<FileDescriptor> theFiles, RequestDetails theRequestDetails);
UploadStatistics loadCustom(String theSystem, List<FileDescriptor> theFiles, RequestDetails theRequestDetails); UploadStatistics loadCustom(String theSystem, List<FileDescriptor> theFiles, RequestDetails theRequestDetails);