add stub for HLA nomenclature terminology upload

This commit is contained in:
Joel Schneider 2019-05-31 14:01:24 -06:00 committed by James Agnew
parent 6d8465abe6
commit 1f5cbc36b3
4 changed files with 378 additions and 6 deletions

View File

@ -115,11 +115,17 @@ public abstract class BaseTerminologyUploaderProvider extends BaseJpaProvider {
url = defaultString(url);
UploadStatistics stats;
if (IHapiTerminologyLoaderSvc.SCT_URI.equals(url)) {
switch(url) {
case IHapiTerminologyLoaderSvc.SCT_URI:
stats = myTerminologyLoaderSvc.loadSnomedCt(localFiles, theRequestDetails);
} else if (IHapiTerminologyLoaderSvc.LOINC_URI.equals(url)) {
break;
case IHapiTerminologyLoaderSvc.LOINC_URI:
stats = myTerminologyLoaderSvc.loadLoinc(localFiles, theRequestDetails);
} else {
break;
case IHapiTerminologyLoaderSvc.IMGTHLA_URI:
stats = myTerminologyLoaderSvc.loadImgthla(localFiles, theRequestDetails);
break;
default:
throw new InvalidRequestException("Unknown URL: " + url);
}

View File

@ -28,10 +28,13 @@ import java.util.List;
public interface IHapiTerminologyLoaderSvc {
String IMGTHLA_URI = "http://www.ebi.ac.uk/ipd/imgt/hla";
String LOINC_URI = "http://loinc.org";
String SCT_URI = "http://snomed.info/sct";
String IEEE_11073_10101_URI = "urn:iso:std:iso:11073:10101";
UploadStatistics loadImgthla(List<FileDescriptor> theFiles, RequestDetails theRequestDetails);
UploadStatistics loadLoinc(List<FileDescriptor> theFiles, RequestDetails theRequestDetails);
UploadStatistics loadSnomedCt(List<FileDescriptor> theFiles, RequestDetails theRequestDetails);

View File

@ -63,6 +63,8 @@ public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc {
public static final String SCT_FILE_CONCEPT = "Terminology/sct2_Concept_Full_";
public static final String SCT_FILE_DESCRIPTION = "Terminology/sct2_Description_Full-en";
public static final String SCT_FILE_RELATIONSHIP = "Terminology/sct2_Relationship_Full";
public static final String IMGTHLA_HLA_NOM_TXT = "hla_nom.txt";
public static final String IMGTHLA_HLA_XML = "hla.xml";
public static final String LOINC_ANSWERLIST_FILE = "AnswerList.csv";
public static final String LOINC_ANSWERLIST_LINK_FILE = "LoincAnswerListLink.csv";
public static final String LOINC_DOCUMENT_ONTOLOGY_FILE = "DocumentOntology.csv";
@ -187,6 +189,26 @@ public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc {
}
@Override
public UploadStatistics loadImgthla(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
LoadedFileDescriptors descriptors = null;
try {
descriptors = new LoadedFileDescriptors(theFiles);
List<String> mandatoryFilenameFragments = Arrays.asList(
IMGTHLA_HLA_NOM_TXT,
IMGTHLA_HLA_XML
);
descriptors.verifyMandatoryFilesExist(mandatoryFilenameFragments);
ourLog.info("Beginning IMGTHLA processing");
return processImgthlaFiles(descriptors, theRequestDetails);
}
finally {
IOUtils.closeQuietly(descriptors);
}
}
@Override
public UploadStatistics loadLoinc(List<FileDescriptor> theFiles, RequestDetails theRequestDetails) {
try (LoadedFileDescriptors descriptors = new LoadedFileDescriptors(theFiles)) {
@ -235,6 +257,125 @@ public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc {
}
}
UploadStatistics processImgthlaFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) {
final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
final Map<String, TermConcept> code2concept = new HashMap<>();
final List<ValueSet> valueSets = new ArrayList<>();
final List<ConceptMap> conceptMaps = new ArrayList<>();
CodeSystem imgthlaCs;
try {
String imgthlaCsString = IOUtils.toString(BaseHapiTerminologySvcImpl.class.getResourceAsStream("/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml"), Charsets.UTF_8);
imgthlaCs = FhirContext.forR4().newXmlParser().parseResource(CodeSystem.class, imgthlaCsString);
} catch (IOException e) {
throw new InternalErrorException("Failed to load imgthla.xml", e);
}
Map<String, CodeSystem.PropertyType> propertyNamesToTypes = new HashMap<>();
for (CodeSystem.PropertyComponent nextProperty : imgthlaCs.getProperty()) {
String nextPropertyCode = nextProperty.getCode();
CodeSystem.PropertyType nextPropertyType = nextProperty.getType();
if (isNotBlank(nextPropertyCode)) {
propertyNamesToTypes.put(nextPropertyCode, nextPropertyType);
}
}
boolean foundHlaNom = false;
boolean foundHlaXml = false;
for (FileDescriptor nextZipBytes : theDescriptors.getUncompressedFileDescriptors()) {
String nextFilename = nextZipBytes.getFilename();
if(!IMGTHLA_HLA_NOM_TXT.equals(nextFilename)
&& !IMGTHLA_HLA_XML.equals(nextFilename)) {
ourLog.info("Skipping unexpected file {}", nextFilename);
continue;
}
if(IMGTHLA_HLA_NOM_TXT.equals(nextFilename)) {
// process colon-delimited hla_nom.txt file
ourLog.info("Processing file {}", nextFilename);
// IRecordHandler handler = new HlaNomTxtHandler(codeSystemVersion, code2concept, propertyNamesToTypes);
// AntigenSource antigenSource = new WmdaAntigenSource(hlaNomFilename, relSerSerFilename, relDnaSerFilename);
Reader reader = null;
try {
reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
if (ourLog.isTraceEnabled()) {
String contents = IOUtils.toString(reader);
ourLog.info("File contents for: {}\n{}", nextFilename, contents);
reader = new StringReader(contents);
}
LineNumberReader lnr = new LineNumberReader(reader);
while(lnr.readLine() != null) {}
ourLog.warn("Lines read from {}: {}", nextFilename, lnr.getLineNumber());
} catch (IOException e) {
throw new InternalErrorException(e);
}
finally {
IOUtils.closeQuietly(reader);
}
foundHlaNom = true;
}
if(IMGTHLA_HLA_XML.equals(nextFilename)) {
// process hla.xml file
ourLog.info("Processing file {}", nextFilename);
// IRecordHandler handler = new HlaXmlHandler(codeSystemVersion, code2concept, propertyNamesToTypes);
// AlleleSource alleleSource = new HlaXmlAlleleSource(hlaXmlFilename);
Reader reader = null;
try {
reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8);
if (ourLog.isTraceEnabled()) {
String contents = IOUtils.toString(reader);
ourLog.info("File contents for: {}\n{}", nextFilename, contents);
reader = new StringReader(contents);
}
LineNumberReader lnr = new LineNumberReader(reader);
while(lnr.readLine() != null) {}
ourLog.warn("Lines read from {}: {}", nextFilename, lnr.getLineNumber());
} catch (IOException e) {
throw new InternalErrorException(e);
}
finally {
IOUtils.closeQuietly(reader);
}
foundHlaXml = true;
}
}
if (!foundHlaNom) {
throw new InvalidRequestException("Did not find file matching " + IMGTHLA_HLA_NOM_TXT);
}
if (!foundHlaXml) {
throw new InvalidRequestException("Did not find file matching " + IMGTHLA_HLA_XML);
}
int valueSetCount = valueSets.size();
int rootConceptCount = codeSystemVersion.getConcepts().size();
int conceptCount = code2concept.size();
ourLog.info("Have {} total concepts, {} root concepts, {} ValueSets", conceptCount, rootConceptCount, valueSetCount);
// remove this when fully implemented ...
throw new InternalErrorException("HLA nomenclature terminology upload not yet fully implemented.");
// IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, imgthlaCs, valueSets, conceptMaps);
//
// return new UploadStatistics(conceptCount, target);
}
UploadStatistics processLoincFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) {
final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion();
final Map<String, TermConcept> code2concept = new HashMap<>();

View File

@ -0,0 +1,222 @@
<!--
Links to additional information on HLA nomenclature:
http://hla.alleles.org/nomenclature/
http://www.ebi.ac.uk/ipd/imgt/hla
New releases of the IPD-IMGT/HLA database are published periodically,
typically once every 3 months.
https://www.ebi.ac.uk/ipd/imgt/hla/docs/release.html
In some cases, a new database release may delete or modify the definition
of concepts introduced in a previous release. For that reason, the HLA
nomenclature is a versioned code system, using the IPD-IMGT/HLA release
as its CodeSystem.version.
Note that the following set of codes constitute the IMGTHLA code system:
- "allele" names, including ...
- allele-group names, e.g. HLA-A*01
- protein-level names, e.g. HLA-A*01:01
- exomic (coding-region level) names, e.g. HLA-A*01:01:01
- genomic (coding plus non-coding-region level) names, e.g. HLA-A*01:01:01:01
- P group names, e.g. HLA-A*01:01:01G
- G group names, e.g. HLA-A*01:01P
- serotype names, e.g. HLA-A1
-->
<!--
Version History of this specification
0.1 | published 2019-05-08
-->
<CodeSystem xmlns="http://hl7.org/fhir">
<id value="imgthla"/>
<!-- This url is unchanged for all versions of IMGTHLA. There
can only be one correct Code System resource for each value of the
version attribute (at least, only one per server) -->
<url value="http://www.ebi.ac.uk/ipd/imgt/hla"/>
<!-- the HL7 v3 OID assigned to IPD-IMGT/HLA nomenclature -->
<identifier>
<system value="urn:ietf:rfc:3986"/>
<value value="urn:oid:2.16.840.1.113883.6.341"/>
</identifier>
<!--
// if a version is specified:
<version value="2.59"/>
-->
<!-- if a specific version is specified, this information should be in the name (e.g. IMGTHLA_3360) and title -->
<name value="IMGTHLA"/>
<title value="HLA Nomenclature"/>
<status value="active"/>
<experimental value="true"/>
<publisher value="WHO Nomenclature Committee for Factors of the HLA System"/>
<contact>
<telecom>
<system value="url"/>
<value value="http://hla.alleles.org/nomenclature/committee.html"/>
</telecom>
<telecom>
<system value="url"/>
<value value="https://www.ebi.ac.uk/ipd/imgt/hla/"/>
</telecom>
<telecom>
<system value="url"/>
<value value="https://github.com/ANHIG/IMGTHLA"/>
</telecom>
<telecom>
<system value="other"/>
<value value="Professor Steven G. E. Marsh"/>
</telecom>
<telecom>
<system value="other"/>
<value value="hla [at] alleles [dot] org"/>
</telecom>
</contact>
<!--
<date value=[date for this version]"/>
-->
<description value="The IPD-IMGT/HLA Database provides a specialist database
for sequences of the human major histocompatibility complex (MHC) and includes
the official sequences named by the WHO Nomenclature Committee For Factors of
the HLA System. The IPD-IMGT/HLA Database is part of the international
ImMunoGeneTics project (IMGT).
The IPD and IMGT/HLA database is described in the following publications.
Robinson J, Halliwell JA, Hayhurst JD, Flicek P, Parham P, Marsh SGE:
The IPD and IMGT/HLA database: allele variant databases.
Nucleic Acids Research (2014) 43 Suppl 1:D423-31
&lt;https://doi.org/10.1093/nar/gku1161&gt;
Robinson J, Malik A, Parham P, Bodmer JG, Marsh SGE:
IMGT/HLA - a sequence database for the human major histocompatibility complex
Tissue Antigens (2000), 55:280-287
&lt;https://doi.org/10.1034/j.1399-0039.2000.550314.x&gt;
"/>
<copyright value="This content from the IPD-IMGT/HLA database is copyright © 2003 Anthony Nolan Research Institute and the WHO Nomenclature Committee for Factors of the HLA System, and available at no cost under a Creative Commons Attribution-NoDerivs License."/>
<caseSensitive value="true"/>
<valueSet value="http://www.ebi.ac.uk/ipd/imgt/hla/vs"/>
<hierarchyMeaning value="grouped-by"/>
<compositional value="false"/> <!-- no compositional grammar defined by IPD-IMGT/HLA -->
<versionNeeded value="true"/>
<content value="complete"/>
<!-- <count value="65000"/>... if working with a specific version, you could nominate a count of the total number of concepts -->
<!-- properties. There are 3 kinds of properties:
fhir: display, designation; these are not described here since they are inherent in the specification
infrastructural: defined by FHIR, but documented here for IMGTHLA
IMGTHLA properties: defined by the HLA nomenclature
-->
<!-- first, the infrastructural properties - inherited from FHIR, but documented here -->
<property>
<code value="inactive"/>
<uri value="http://hl7.org/fhir/concept-properties#inactive"/>
<description value="True if the concept is not considered active - e.g. not a valid concept any more. Property type is boolean, default value is false"/>
<type value="boolean"/>
</property>
<property>
<code value="deprecated"/>
<uri value="http://hl7.org/fhir/concept-properties#deprecated"/>
<description value="The date at which a concept was deprecated. Concepts that are deprecated but not inactive can still be used, but their use is discouraged, and they should be expected to be made inactive in a future release. Property type is dateTime"/>
<type value="dateTime"/>
</property>
<property>
<code value="parent"/>
<uri value="http://hl7.org/fhir/concept-properties#parent"/>
<description value="The concept identified in this property is a parent of the concept on which it is a property. The property type will be 'code'. The meaning of 'parent' is defined by the hierarchyMeaning attribute"/>
<type value="code"/>
</property>
<!--
IMGTHLA concept properties.
-->
<property>
<code value="allele_id"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/allele_id"/>
<description value="IPD-IMGT/HLA database accession number."/>
<type value="string"/>
</property>
<property>
<code value="expression_suffix"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/expression_suffix"/>
<description value="Expression suffix (if any) assigned to the concept. See http://hla.alleles.org/nomenclature/naming.html"/>
<type value="string"/>
</property>
<property>
<code value="hla_g_group"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/hla_g_group"/>
<description value="HLA G group containing this concept. See http://hla.alleles.org/alleles/g_groups.html"/>
<type value="code"/>
</property>
<property>
<code value="hla_p_group"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/hla_p_group"/>
<description value="HLA P group containing this concept. See http://hla.alleles.org/alleles/p_groups.html"/>
<type value="code"/>
</property>
<property>
<code value="is_allele_group_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_allele_group_concept"/>
<description value="True if the concept is an allele group. See http://hla.alleles.org/nomenclature/naming.html"/>
<type value="boolean"/>
</property>
<property>
<code value="is_dna_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_dna_concept"/>
<description value="True if the concept is DNA-level (as opposed to serology)."/>
<type value="boolean"/>
</property>
<property>
<code value="is_exomic_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_exomic_concept"/>
<description value="True if the concept represents a distinct (within this CodeSystem.version) nucleotide sequence across all coding regions of the full gene. See http://hla.alleles.org/nomenclature/naming.html"/>
<type value="boolean"/>
</property>
<property>
<code value="is_hla_g_group_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_hla_g_group_concept"/>
<description value="True if the concept is a HLA G group. See http://hla.alleles.org/alleles/g_groups.html"/>
<type value="boolean"/>
</property>
<property>
<code value="is_hla_p_group_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_hla_p_group_concept"/>
<description value="True if the concept is a HLA P group. See http://hla.alleles.org/alleles/p_groups.html"/>
<type value="boolean"/>
</property>
<property>
<code value="is_genomic_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_genomic_concept"/>
<description value="True if the concept represents a distinct (within this CodeSystem.version) nucleotide sequence across all coding and non-coding regions of the full gene. See http://hla.alleles.org/nomenclature/naming.html"/>
<type value="boolean"/>
</property>
<property>
<code value="is_protein_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_protein_concept"/>
<description value="True if the concept represents a distinct amino acid sequence across the full gene. See http://hla.alleles.org/nomenclature/naming.html"/>
<type value="boolean"/>
</property>
<property>
<code value="is_serology_concept"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_serology_concept"/>
<description value="True if the concept is serology-level (as opposed to DNA)."/>
<type value="boolean"/>
</property>
<property>
<code value="locus_name"/>
<uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/locus_name"/>
<description value="DNA or serology locus name, e.g. HLA-A, HLA-DR"/>
<type value="string"/>
</property>
</CodeSystem>