From 2e5aed4016736812d8958f27c0d677cb8b7062fb Mon Sep 17 00:00:00 2001 From: Ben Li-Sauerwine Date: Sun, 17 Oct 2021 09:06:31 -0400 Subject: [PATCH] Adds a NDJSON parser, in preparation for adding NDJSON $import to HAPI (#3022) * NDJsonParser with one example test. * Add test for empty NDJson, and fix bug for empty NDJson. * Adds multi-Patient test, and fixes bug whereby all multi-line NDJSON would be put into the same line. * Adds test for NDJson with newlines in it. * Adds test for converting non-Bundle types to NDJSON failing. * Confirm that we can only extract to Bundle types in test. * Update hapi-fhir-base/src/main/java/ca/uhn/fhir/context/FhirContext.java Co-authored-by: James Agnew * Documents behavior of the NDJsonParser in FhirContext. * Attempt to fix failing build by using TestUtil to clear context in the manner of r4 parser tests instead of dstu Also clean up indentation. Co-authored-by: James Agnew --- .../java/ca/uhn/fhir/context/FhirContext.java | 41 ++++++ .../java/ca/uhn/fhir/parser/NDJsonParser.java | 120 ++++++++++++++++ .../java/ca/uhn/fhir/rest/api/Constants.java | 1 + .../ca/uhn/fhir/rest/api/EncodingEnum.java | 15 +- .../ca/uhn/fhir/parser/NDJsonParserTest.java | 128 ++++++++++++++++++ 5 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 hapi-fhir-base/src/main/java/ca/uhn/fhir/parser/NDJsonParser.java create mode 100644 hapi-fhir-structures-r4/src/test/java/ca/uhn/fhir/parser/NDJsonParserTest.java diff --git a/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/FhirContext.java b/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/FhirContext.java index 73970ce82fe..5035e885ce9 100644 --- a/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/FhirContext.java +++ b/hapi-fhir-base/src/main/java/ca/uhn/fhir/context/FhirContext.java @@ -15,6 +15,7 @@ import ca.uhn.fhir.parser.IParser; import ca.uhn.fhir.parser.IParserErrorHandler; import ca.uhn.fhir.parser.JsonParser; import ca.uhn.fhir.parser.LenientErrorHandler; +import ca.uhn.fhir.parser.NDJsonParser; import ca.uhn.fhir.parser.RDFParser; import ca.uhn.fhir.parser.XmlParser; import ca.uhn.fhir.rest.api.IVersionSpecificBundleFactory; @@ -124,6 +125,7 @@ public class FhirContext { private volatile Set myResourceNames; private volatile Boolean myFormatXmlSupported; private volatile Boolean myFormatJsonSupported; + private volatile Boolean myFormatNDJsonSupported; private volatile Boolean myFormatRdfSupported; /** @@ -460,6 +462,7 @@ public class FhirContext { if (retVal == null) { retVal = scanResourceType(theResourceType); } + return retVal; } @@ -735,6 +738,21 @@ public class FhirContext { return retVal; } + /** + * @return Returns true if the NDJSON serialization format is supported, based on the + * available libraries on the classpath. + * + * @since 5.6.0 + */ + public boolean isFormatNDJsonSupported() { + Boolean retVal = myFormatNDJsonSupported; + if (retVal == null) { + retVal = tryToInitParser(() -> newNDJsonParser()); + myFormatNDJsonSupported = retVal; + } + return retVal; + } + /** * @return Returns true if the RDF serialization format is supported, based on the * available libraries on the classpath. @@ -801,6 +819,29 @@ public class FhirContext { return new JsonParser(this, myParserErrorHandler); } + /** + * Create and return a new NDJSON parser. + * + *

+ * Thread safety: Parsers are not guaranteed to be thread safe. Create a new parser instance for every thread + * or every message being parsed/encoded. + *

+ *

+ * Performance Note: This method is cheap to call, and may be called once for every message being processed + * without incurring any performance penalty + *

+ *

+ * The NDJsonParser provided here is expected to translate between legal NDJson and FHIR Bundles. + * In particular, it is able to encode the resources in a FHIR Bundle to NDJson, as well as decode + * NDJson into a FHIR "collection"-type Bundle populated with the resources described in the NDJson. + * It will throw an exception in the event where it is asked to encode to anything other than a FHIR Bundle + * or where it is asked to decode into anything other than a FHIR Bundle. + *

+ */ + public IParser newNDJsonParser() { + return new NDJsonParser(this, myParserErrorHandler); + } + /** * Create and return a new RDF parser. * diff --git a/hapi-fhir-base/src/main/java/ca/uhn/fhir/parser/NDJsonParser.java b/hapi-fhir-base/src/main/java/ca/uhn/fhir/parser/NDJsonParser.java new file mode 100644 index 00000000000..664685b80d1 --- /dev/null +++ b/hapi-fhir-base/src/main/java/ca/uhn/fhir/parser/NDJsonParser.java @@ -0,0 +1,120 @@ +package ca.uhn.fhir.parser; + +/* + * #%L + * HAPI FHIR - Core Library + * %% + * Copyright (C) 2014 - 2021 Smile CDR, Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import ca.uhn.fhir.context.*; +import ca.uhn.fhir.rest.api.EncodingEnum; +import ca.uhn.fhir.util.BundleBuilder; +import ca.uhn.fhir.util.BundleUtil; +import org.hl7.fhir.instance.model.api.*; + +import java.io.IOException; +import java.io.BufferedReader; +import java.io.Reader; +import java.io.Writer; +import java.util.List; + + +/** + * This class is the FHIR NDJSON parser/encoder. Users should not interact with this class directly, but should use + * {@link FhirContext#newNDJsonParser()} to get an instance. + */ +public class NDJsonParser extends BaseParser { + + private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(NDJsonParser.class); + + private IParser myJsonParser; + private FhirContext myFhirContext; + + /** + * Do not use this constructor, the recommended way to obtain a new instance of the NDJSON parser is to invoke + * {@link FhirContext#newNDJsonParser()}. + * + * @param theParserErrorHandler + */ + public NDJsonParser(FhirContext theContext, IParserErrorHandler theParserErrorHandler) { + super(theContext, theParserErrorHandler); + myFhirContext = theContext; + + myJsonParser = theContext.newJsonParser(); + } + + @Override + public IParser setPrettyPrint(boolean thePrettyPrint) { + myJsonParser.setPrettyPrint(thePrettyPrint); + return this; + } + + @Override + public EncodingEnum getEncoding() { + return EncodingEnum.NDJSON; + } + + @Override + protected void doEncodeResourceToWriter(IBaseResource theResource, Writer theWriter, EncodeContext theEncodeContext) throws IOException { + // We only encode bundles to NDJSON. + if (!(IBaseBundle.class.isAssignableFrom(theResource.getClass()))) { + throw new IllegalArgumentException("NDJsonParser can only encode Bundle types. Received " + theResource.getClass().getName()); + } + + // Ok, convert the bundle to a list of resources. + List theBundleResources = BundleUtil.toListOfResources(myFhirContext, (IBaseBundle) theResource); + + // Now we write each one in turn. + // Use newline only as a line separator, not at the end of the file. + boolean isFirstResource = true; + for (IBaseResource theBundleEntryResource : theBundleResources) { + if (!(isFirstResource)) { + theWriter.write("\n"); + } + isFirstResource = false; + + myJsonParser.encodeResourceToWriter(theBundleEntryResource, theWriter); + } + } + + @Override + public T doParseResource(Class theResourceType, Reader theReader) throws DataFormatException { + // We can only parse to bundles. + if ((theResourceType != null) && (!(IBaseBundle.class.isAssignableFrom(theResourceType)))) { + throw new DataFormatException("NDJsonParser can only parse to Bundle types. Received " + theResourceType.getName()); + } + + try { + // Now we go through line-by-line parsing the JSON and then stuffing it into a bundle. + BundleBuilder myBuilder = new BundleBuilder(myFhirContext); + myBuilder.setType("collection"); + BufferedReader myBufferedReader = new BufferedReader(theReader); + String jsonString = myBufferedReader.readLine(); + while (jsonString != null) { + // And add it to a collection in a Bundle. + // The string must be trimmed, as per the NDJson spec 3.2 + myBuilder.addCollectionEntry(myJsonParser.parseResource(jsonString.trim())); + // Try to read another line. + jsonString = myBufferedReader.readLine(); + } + + return (T) myBuilder.getBundle(); + } catch (IOException err) { + throw new DataFormatException(err.getMessage()); + } + } +} diff --git a/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/Constants.java b/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/Constants.java index 659c75b1209..87e0f340ad4 100644 --- a/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/Constants.java +++ b/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/Constants.java @@ -84,6 +84,7 @@ public class Constants { public static final String EXTOP_VALIDATE_RESOURCE = "resource"; public static final String FORMAT_HTML = "html"; public static final String FORMAT_JSON = "json"; + public static final String FORMAT_NDJSON = "ndjson"; public static final String FORMAT_XML = "xml"; public static final String CT_RDF_TURTLE_LEGACY = "text/turtle"; public static final String FORMAT_TURTLE = "ttl"; diff --git a/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/EncodingEnum.java b/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/EncodingEnum.java index 6196202a1a7..026641e1db4 100644 --- a/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/EncodingEnum.java +++ b/hapi-fhir-base/src/main/java/ca/uhn/fhir/rest/api/EncodingEnum.java @@ -53,7 +53,12 @@ public enum EncodingEnum { } }, - ; + NDJSON(Constants.CT_FHIR_NDJSON, Constants.CT_FHIR_NDJSON, Constants.FORMAT_NDJSON) { + @Override + public IParser newParser(FhirContext theContext) { + return theContext.newNDJsonParser(); + } + }; /** * "json" @@ -71,6 +76,11 @@ public enum EncodingEnum { */ public static final String XML_PLAIN_STRING = "xml"; + /** + * "ndjson" + */ + public static final String NDJSON_PLAIN_STRING = "ndjson"; + private static Map ourContentTypeToEncoding; private static Map ourContentTypeToEncodingLegacy; private static Map ourContentTypeToEncodingStrict; @@ -104,7 +114,9 @@ public enum EncodingEnum { ourContentTypeToEncoding.put("application/xml", XML); ourContentTypeToEncoding.put("application/fhir+turtle", RDF); ourContentTypeToEncoding.put("application/x-turtle", RDF); + ourContentTypeToEncoding.put("application/ndjson", NDJSON); ourContentTypeToEncoding.put("text/json", JSON); + ourContentTypeToEncoding.put("text/ndjson", NDJSON); ourContentTypeToEncoding.put("text/xml", XML); ourContentTypeToEncoding.put("text/turtle", RDF); @@ -114,6 +126,7 @@ public enum EncodingEnum { ourContentTypeToEncoding.put(JSON_PLAIN_STRING, JSON); ourContentTypeToEncoding.put(XML_PLAIN_STRING, XML); ourContentTypeToEncoding.put(RDF_PLAIN_STRING, RDF); + ourContentTypeToEncoding.put(NDJSON_PLAIN_STRING, NDJSON); ourContentTypeToEncoding.put(Constants.FORMAT_TURTLE, RDF); ourContentTypeToEncodingLegacy = Collections.unmodifiableMap(ourContentTypeToEncodingLegacy); diff --git a/hapi-fhir-structures-r4/src/test/java/ca/uhn/fhir/parser/NDJsonParserTest.java b/hapi-fhir-structures-r4/src/test/java/ca/uhn/fhir/parser/NDJsonParserTest.java new file mode 100644 index 00000000000..4fb0b43f936 --- /dev/null +++ b/hapi-fhir-structures-r4/src/test/java/ca/uhn/fhir/parser/NDJsonParserTest.java @@ -0,0 +1,128 @@ +package ca.uhn.fhir.parser; + + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import org.hl7.fhir.instance.model.api.IBaseResource; +import org.hl7.fhir.instance.model.api.IBaseBundle; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; + +import ca.uhn.fhir.context.FhirContext; +import ca.uhn.fhir.util.BundleBuilder; +import ca.uhn.fhir.util.TestUtil; + +import org.hl7.fhir.r4.model.Patient; + +public class NDJsonParserTest { + private static FhirContext ourCtx = FhirContext.forR4(); + private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(NDJsonParserTest.class); + + private String toNDJson(IBaseResource bundle) throws DataFormatException { + IParser p = ourCtx.newNDJsonParser(); + return p.encodeResourceToString(bundle); + } + + private IBaseResource fromNDJson(String ndjson) throws DataFormatException { + IParser p = ourCtx.newNDJsonParser(); + return p.parseResource(ndjson); + } + + private boolean fhirResourcesEqual(IBaseResource expected, IBaseResource actual) { + // I would prefer to use, e.g., EqualsBuilder to do this instead. + String encoded_expected = ourCtx.newJsonParser().setPrettyPrint(true).encodeResourceToString(expected); + String encoded_actual = ourCtx.newJsonParser().setPrettyPrint(true).encodeResourceToString(actual); + + ourLog.info("Expected: {}", encoded_expected); + ourLog.info("Actual : {}", encoded_actual); + + return encoded_expected.equals(encoded_actual); + } + + @Test + public void testSinglePatientEncodeDecode() { + BundleBuilder myBuilder = new BundleBuilder(ourCtx); + + Patient p = new Patient(); + p.setId("Patient/P1"); + myBuilder.addCollectionEntry(p); + IBaseResource myBundle = myBuilder.getBundle(); + IBaseResource responseBundle = fromNDJson(toNDJson(myBundle)); + + assertTrue(fhirResourcesEqual(myBundle, responseBundle)); + } + + @Test + public void testEmptyBundleEncodeDecode() { + BundleBuilder myBuilder = new BundleBuilder(ourCtx); + + myBuilder.setType("collection"); + IBaseResource myBundle = myBuilder.getBundle(); + IBaseResource responseBundle = fromNDJson(toNDJson(myBundle)); + + assertTrue(fhirResourcesEqual(myBundle, responseBundle)); + } + + @Test + public void testThreePatientEncodeDecode() { + BundleBuilder myBuilder = new BundleBuilder(ourCtx); + + Patient p = new Patient(); + p.setId("Patient/P1"); + myBuilder.addCollectionEntry(p); + p = new Patient(); + p.setId("Patient/P2"); + myBuilder.addCollectionEntry(p); + p = new Patient(); + p.setId("Patient/P3"); + myBuilder.addCollectionEntry(p); + + IBaseResource myBundle = myBuilder.getBundle(); + IBaseResource responseBundle = fromNDJson(toNDJson(myBundle)); + + assertTrue(fhirResourcesEqual(myBundle, responseBundle)); + } + + @Test + public void testHasNewlinesEncodeDecode() { + BundleBuilder myBuilder = new BundleBuilder(ourCtx); + + Patient p = new Patient(); + p.setId("Patient/P1"); + p.addAddress().setText("1 Place Street\r\nOn Earth"); + myBuilder.addCollectionEntry(p); + IBaseResource myBundle = myBuilder.getBundle(); + IBaseResource responseBundle = fromNDJson(toNDJson(myBundle)); + + assertTrue(fhirResourcesEqual(myBundle, responseBundle)); + } + + @Test + public void testOnlyEncodesBundles() { + Patient p = new Patient(); + p.setId("Patient/P1"); + assertThrows(IllegalArgumentException.class, + ()->{toNDJson(p);}); + } + + @Test + public void testOnlyDecodesBundles() { + BundleBuilder myBuilder = new BundleBuilder(ourCtx); + + Patient p = new Patient(); + p.setId("Patient/P1"); + myBuilder.addCollectionEntry(p); + IBaseResource myBundle = myBuilder.getBundle(); + String myBundleJson = toNDJson(myBundle); + IParser parser = ourCtx.newNDJsonParser(); + assertThrows(DataFormatException.class, + ()->{parser.parseResource(Patient.class, myBundleJson);}); + } + + @AfterAll + public static void afterClassClearContext() { + TestUtil.randomizeLocaleAndTimezone(); + } +}