From 1f5cbc36b36be7c3504f46f043f84e17c91854b0 Mon Sep 17 00:00:00 2001 From: Joel Schneider Date: Fri, 31 May 2019 14:01:24 -0600 Subject: [PATCH] add stub for HLA nomenclature terminology upload --- .../BaseTerminologyUploaderProvider.java | 18 +- .../jpa/term/IHapiTerminologyLoaderSvc.java | 3 + .../jpa/term/TerminologyLoaderSvcImpl.java | 141 +++++++++++ .../ca/uhn/fhir/jpa/term/imgthla/imgthla.xml | 222 ++++++++++++++++++ 4 files changed, 378 insertions(+), 6 deletions(-) create mode 100644 hapi-fhir-jpaserver-base/src/main/resources/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/provider/BaseTerminologyUploaderProvider.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/provider/BaseTerminologyUploaderProvider.java index 2671200e982..bc82e74ff9e 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/provider/BaseTerminologyUploaderProvider.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/provider/BaseTerminologyUploaderProvider.java @@ -115,12 +115,18 @@ public abstract class BaseTerminologyUploaderProvider extends BaseJpaProvider { url = defaultString(url); UploadStatistics stats; - if (IHapiTerminologyLoaderSvc.SCT_URI.equals(url)) { - stats = myTerminologyLoaderSvc.loadSnomedCt(localFiles, theRequestDetails); - } else if (IHapiTerminologyLoaderSvc.LOINC_URI.equals(url)) { - stats = myTerminologyLoaderSvc.loadLoinc(localFiles, theRequestDetails); - } else { - throw new InvalidRequestException("Unknown URL: " + url); + switch(url) { + case IHapiTerminologyLoaderSvc.SCT_URI: + stats = myTerminologyLoaderSvc.loadSnomedCt(localFiles, theRequestDetails); + break; + case IHapiTerminologyLoaderSvc.LOINC_URI: + stats = myTerminologyLoaderSvc.loadLoinc(localFiles, theRequestDetails); + break; + case IHapiTerminologyLoaderSvc.IMGTHLA_URI: + stats = myTerminologyLoaderSvc.loadImgthla(localFiles, theRequestDetails); + break; + default: + throw new InvalidRequestException("Unknown URL: " + url); } Parameters retVal = new Parameters(); diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologyLoaderSvc.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologyLoaderSvc.java index 3e73d2d43de..a07fb6b8801 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologyLoaderSvc.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/IHapiTerminologyLoaderSvc.java @@ -28,10 +28,13 @@ import java.util.List; public interface IHapiTerminologyLoaderSvc { + String IMGTHLA_URI = "http://www.ebi.ac.uk/ipd/imgt/hla"; String LOINC_URI = "http://loinc.org"; String SCT_URI = "http://snomed.info/sct"; String IEEE_11073_10101_URI = "urn:iso:std:iso:11073:10101"; + UploadStatistics loadImgthla(List theFiles, RequestDetails theRequestDetails); + UploadStatistics loadLoinc(List theFiles, RequestDetails theRequestDetails); UploadStatistics loadSnomedCt(List theFiles, RequestDetails theRequestDetails); diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcImpl.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcImpl.java index 1693c156539..f290b5478fa 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcImpl.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TerminologyLoaderSvcImpl.java @@ -63,6 +63,8 @@ public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc { public static final String SCT_FILE_CONCEPT = "Terminology/sct2_Concept_Full_"; public static final String SCT_FILE_DESCRIPTION = "Terminology/sct2_Description_Full-en"; public static final String SCT_FILE_RELATIONSHIP = "Terminology/sct2_Relationship_Full"; + public static final String IMGTHLA_HLA_NOM_TXT = "hla_nom.txt"; + public static final String IMGTHLA_HLA_XML = "hla.xml"; public static final String LOINC_ANSWERLIST_FILE = "AnswerList.csv"; public static final String LOINC_ANSWERLIST_LINK_FILE = "LoincAnswerListLink.csv"; public static final String LOINC_DOCUMENT_ONTOLOGY_FILE = "DocumentOntology.csv"; @@ -187,6 +189,26 @@ public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc { } + @Override + public UploadStatistics loadImgthla(List theFiles, RequestDetails theRequestDetails) { + LoadedFileDescriptors descriptors = null; + try { + descriptors = new LoadedFileDescriptors(theFiles); + List mandatoryFilenameFragments = Arrays.asList( + IMGTHLA_HLA_NOM_TXT, + IMGTHLA_HLA_XML + ); + descriptors.verifyMandatoryFilesExist(mandatoryFilenameFragments); + + ourLog.info("Beginning IMGTHLA processing"); + + return processImgthlaFiles(descriptors, theRequestDetails); + } + finally { + IOUtils.closeQuietly(descriptors); + } + } + @Override public UploadStatistics loadLoinc(List theFiles, RequestDetails theRequestDetails) { try (LoadedFileDescriptors descriptors = new LoadedFileDescriptors(theFiles)) { @@ -235,6 +257,125 @@ public class TerminologyLoaderSvcImpl implements IHapiTerminologyLoaderSvc { } } + UploadStatistics processImgthlaFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) { + final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion(); + final Map code2concept = new HashMap<>(); + final List valueSets = new ArrayList<>(); + final List conceptMaps = new ArrayList<>(); + + CodeSystem imgthlaCs; + try { + String imgthlaCsString = IOUtils.toString(BaseHapiTerminologySvcImpl.class.getResourceAsStream("/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml"), Charsets.UTF_8); + imgthlaCs = FhirContext.forR4().newXmlParser().parseResource(CodeSystem.class, imgthlaCsString); + } catch (IOException e) { + throw new InternalErrorException("Failed to load imgthla.xml", e); + } + + Map propertyNamesToTypes = new HashMap<>(); + for (CodeSystem.PropertyComponent nextProperty : imgthlaCs.getProperty()) { + String nextPropertyCode = nextProperty.getCode(); + CodeSystem.PropertyType nextPropertyType = nextProperty.getType(); + if (isNotBlank(nextPropertyCode)) { + propertyNamesToTypes.put(nextPropertyCode, nextPropertyType); + } + } + + boolean foundHlaNom = false; + boolean foundHlaXml = false; + for (FileDescriptor nextZipBytes : theDescriptors.getUncompressedFileDescriptors()) { + String nextFilename = nextZipBytes.getFilename(); + + if(!IMGTHLA_HLA_NOM_TXT.equals(nextFilename) + && !IMGTHLA_HLA_XML.equals(nextFilename)) { + ourLog.info("Skipping unexpected file {}", nextFilename); + continue; + } + + if(IMGTHLA_HLA_NOM_TXT.equals(nextFilename)) { + // process colon-delimited hla_nom.txt file + ourLog.info("Processing file {}", nextFilename); + +// IRecordHandler handler = new HlaNomTxtHandler(codeSystemVersion, code2concept, propertyNamesToTypes); +// AntigenSource antigenSource = new WmdaAntigenSource(hlaNomFilename, relSerSerFilename, relDnaSerFilename); + + Reader reader = null; + try { + reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8); + + if (ourLog.isTraceEnabled()) { + String contents = IOUtils.toString(reader); + ourLog.info("File contents for: {}\n{}", nextFilename, contents); + reader = new StringReader(contents); + } + + LineNumberReader lnr = new LineNumberReader(reader); + while(lnr.readLine() != null) {} + ourLog.warn("Lines read from {}: {}", nextFilename, lnr.getLineNumber()); + + } catch (IOException e) { + throw new InternalErrorException(e); + } + finally { + IOUtils.closeQuietly(reader); + } + + foundHlaNom = true; + } + + if(IMGTHLA_HLA_XML.equals(nextFilename)) { + // process hla.xml file + ourLog.info("Processing file {}", nextFilename); + +// IRecordHandler handler = new HlaXmlHandler(codeSystemVersion, code2concept, propertyNamesToTypes); +// AlleleSource alleleSource = new HlaXmlAlleleSource(hlaXmlFilename); + + Reader reader = null; + try { + reader = new InputStreamReader(nextZipBytes.getInputStream(), Charsets.UTF_8); + + if (ourLog.isTraceEnabled()) { + String contents = IOUtils.toString(reader); + ourLog.info("File contents for: {}\n{}", nextFilename, contents); + reader = new StringReader(contents); + } + + LineNumberReader lnr = new LineNumberReader(reader); + while(lnr.readLine() != null) {} + ourLog.warn("Lines read from {}: {}", nextFilename, lnr.getLineNumber()); + + } catch (IOException e) { + throw new InternalErrorException(e); + } + finally { + IOUtils.closeQuietly(reader); + } + + foundHlaXml = true; + } + + } + + if (!foundHlaNom) { + throw new InvalidRequestException("Did not find file matching " + IMGTHLA_HLA_NOM_TXT); + } + + if (!foundHlaXml) { + throw new InvalidRequestException("Did not find file matching " + IMGTHLA_HLA_XML); + } + + int valueSetCount = valueSets.size(); + int rootConceptCount = codeSystemVersion.getConcepts().size(); + int conceptCount = code2concept.size(); + ourLog.info("Have {} total concepts, {} root concepts, {} ValueSets", conceptCount, rootConceptCount, valueSetCount); + + // remove this when fully implemented ... + throw new InternalErrorException("HLA nomenclature terminology upload not yet fully implemented."); + +// IIdType target = storeCodeSystem(theRequestDetails, codeSystemVersion, imgthlaCs, valueSets, conceptMaps); +// +// return new UploadStatistics(conceptCount, target); + } + UploadStatistics processLoincFiles(LoadedFileDescriptors theDescriptors, RequestDetails theRequestDetails) { final TermCodeSystemVersion codeSystemVersion = new TermCodeSystemVersion(); final Map code2concept = new HashMap<>(); diff --git a/hapi-fhir-jpaserver-base/src/main/resources/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml b/hapi-fhir-jpaserver-base/src/main/resources/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml new file mode 100644 index 00000000000..ddc16df089a --- /dev/null +++ b/hapi-fhir-jpaserver-base/src/main/resources/ca/uhn/fhir/jpa/term/imgthla/imgthla.xml @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + + + + + + + <status value="active"/> + <experimental value="true"/> + + <publisher value="WHO Nomenclature Committee for Factors of the HLA System"/> + <contact> + <telecom> + <system value="url"/> + <value value="http://hla.alleles.org/nomenclature/committee.html"/> + </telecom> + <telecom> + <system value="url"/> + <value value="https://www.ebi.ac.uk/ipd/imgt/hla/"/> + </telecom> + <telecom> + <system value="url"/> + <value value="https://github.com/ANHIG/IMGTHLA"/> + </telecom> + <telecom> + <system value="other"/> + <value value="Professor Steven G. E. Marsh"/> + </telecom> + <telecom> + <system value="other"/> + <value value="hla [at] alleles [dot] org"/> + </telecom> + </contact> + + <!-- + <date value=[date for this version]"/> + --> + <description value="The IPD-IMGT/HLA Database provides a specialist database +for sequences of the human major histocompatibility complex (MHC) and includes +the official sequences named by the WHO Nomenclature Committee For Factors of +the HLA System. The IPD-IMGT/HLA Database is part of the international +ImMunoGeneTics project (IMGT). + +The IPD and IMGT/HLA database is described in the following publications. + +Robinson J, Halliwell JA, Hayhurst JD, Flicek P, Parham P, Marsh SGE: +The IPD and IMGT/HLA database: allele variant databases. +Nucleic Acids Research (2014) 43 Suppl 1:D423-31 +<https://doi.org/10.1093/nar/gku1161> + +Robinson J, Malik A, Parham P, Bodmer JG, Marsh SGE: +IMGT/HLA - a sequence database for the human major histocompatibility complex +Tissue Antigens (2000), 55:280-287 +<https://doi.org/10.1034/j.1399-0039.2000.550314.x> +"/> + <copyright value="This content from the IPD-IMGT/HLA database is copyright © 2003 Anthony Nolan Research Institute and the WHO Nomenclature Committee for Factors of the HLA System, and available at no cost under a Creative Commons Attribution-NoDerivs License."/> + <caseSensitive value="true"/> + + <valueSet value="http://www.ebi.ac.uk/ipd/imgt/hla/vs"/> + + <hierarchyMeaning value="grouped-by"/> + <compositional value="false"/> <!-- no compositional grammar defined by IPD-IMGT/HLA --> + <versionNeeded value="true"/> + + <content value="complete"/> + +<!-- <count value="65000"/>... if working with a specific version, you could nominate a count of the total number of concepts --> + + <!-- properties. There are 3 kinds of properties: + fhir: display, designation; these are not described here since they are inherent in the specification + infrastructural: defined by FHIR, but documented here for IMGTHLA + IMGTHLA properties: defined by the HLA nomenclature + --> + <!-- first, the infrastructural properties - inherited from FHIR, but documented here --> + <property> + <code value="inactive"/> + <uri value="http://hl7.org/fhir/concept-properties#inactive"/> + <description value="True if the concept is not considered active - e.g. not a valid concept any more. Property type is boolean, default value is false"/> + <type value="boolean"/> + </property> + <property> + <code value="deprecated"/> + <uri value="http://hl7.org/fhir/concept-properties#deprecated"/> + <description value="The date at which a concept was deprecated. Concepts that are deprecated but not inactive can still be used, but their use is discouraged, and they should be expected to be made inactive in a future release. Property type is dateTime"/> + <type value="dateTime"/> + </property> + <property> + <code value="parent"/> + <uri value="http://hl7.org/fhir/concept-properties#parent"/> + <description value="The concept identified in this property is a parent of the concept on which it is a property. The property type will be 'code'. The meaning of 'parent' is defined by the hierarchyMeaning attribute"/> + <type value="code"/> + </property> + <!-- + IMGTHLA concept properties. + --> + <property> + <code value="allele_id"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/allele_id"/> + <description value="IPD-IMGT/HLA database accession number."/> + <type value="string"/> + </property> + <property> + <code value="expression_suffix"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/expression_suffix"/> + <description value="Expression suffix (if any) assigned to the concept. See http://hla.alleles.org/nomenclature/naming.html"/> + <type value="string"/> + </property> + <property> + <code value="hla_g_group"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/hla_g_group"/> + <description value="HLA G group containing this concept. See http://hla.alleles.org/alleles/g_groups.html"/> + <type value="code"/> + </property> + <property> + <code value="hla_p_group"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/hla_p_group"/> + <description value="HLA P group containing this concept. See http://hla.alleles.org/alleles/p_groups.html"/> + <type value="code"/> + </property> + <property> + <code value="is_allele_group_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_allele_group_concept"/> + <description value="True if the concept is an allele group. See http://hla.alleles.org/nomenclature/naming.html"/> + <type value="boolean"/> + </property> + <property> + <code value="is_dna_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_dna_concept"/> + <description value="True if the concept is DNA-level (as opposed to serology)."/> + <type value="boolean"/> + </property> + <property> + <code value="is_exomic_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_exomic_concept"/> + <description value="True if the concept represents a distinct (within this CodeSystem.version) nucleotide sequence across all coding regions of the full gene. See http://hla.alleles.org/nomenclature/naming.html"/> + <type value="boolean"/> + </property> + <property> + <code value="is_hla_g_group_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_hla_g_group_concept"/> + <description value="True if the concept is a HLA G group. See http://hla.alleles.org/alleles/g_groups.html"/> + <type value="boolean"/> + </property> + <property> + <code value="is_hla_p_group_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_hla_p_group_concept"/> + <description value="True if the concept is a HLA P group. See http://hla.alleles.org/alleles/p_groups.html"/> + <type value="boolean"/> + </property> + <property> + <code value="is_genomic_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_genomic_concept"/> + <description value="True if the concept represents a distinct (within this CodeSystem.version) nucleotide sequence across all coding and non-coding regions of the full gene. See http://hla.alleles.org/nomenclature/naming.html"/> + <type value="boolean"/> + </property> + <property> + <code value="is_protein_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_protein_concept"/> + <description value="True if the concept represents a distinct amino acid sequence across the full gene. See http://hla.alleles.org/nomenclature/naming.html"/> + <type value="boolean"/> + </property> + <property> + <code value="is_serology_concept"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/is_serology_concept"/> + <description value="True if the concept is serology-level (as opposed to DNA)."/> + <type value="boolean"/> + </property> + <property> + <code value="locus_name"/> + <uri value="http://www.ebi.ac.uk/ipd/imgt/hla/property/locus_name"/> + <description value="DNA or serology locus name, e.g. HLA-A, HLA-DR"/> + <type value="string"/> + </property> + +</CodeSystem> +