Adds a NDJSON parser, in preparation for adding NDJSON $import to HAPI (#3022)

* NDJsonParser with one example test.

* Add test for empty NDJson, and fix bug for empty NDJson.

* Adds multi-Patient test, and fixes bug whereby all multi-line NDJSON would be put into the same line.

* Adds test for NDJson with newlines in it.

* Adds test for converting non-Bundle types to NDJSON failing.

* Confirm that we can only extract to Bundle types in test.

* Update hapi-fhir-base/src/main/java/ca/uhn/fhir/context/FhirContext.java

Co-authored-by: James Agnew <jamesagnew@gmail.com>

* Documents behavior of the NDJsonParser in FhirContext.

* Attempt to fix failing build by using TestUtil to clear context in the manner of r4 parser tests instead of dstu

Also clean up indentation.

Co-authored-by: James Agnew <jamesagnew@gmail.com>
This commit is contained in:
Ben Li-Sauerwine 2021-10-17 09:06:31 -04:00 committed by GitHub
parent 721b1817a7
commit 2e5aed4016
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 304 additions and 1 deletions

View File

@ -15,6 +15,7 @@ import ca.uhn.fhir.parser.IParser;
import ca.uhn.fhir.parser.IParserErrorHandler;
import ca.uhn.fhir.parser.JsonParser;
import ca.uhn.fhir.parser.LenientErrorHandler;
import ca.uhn.fhir.parser.NDJsonParser;
import ca.uhn.fhir.parser.RDFParser;
import ca.uhn.fhir.parser.XmlParser;
import ca.uhn.fhir.rest.api.IVersionSpecificBundleFactory;
@ -124,6 +125,7 @@ public class FhirContext {
private volatile Set<String> myResourceNames;
private volatile Boolean myFormatXmlSupported;
private volatile Boolean myFormatJsonSupported;
private volatile Boolean myFormatNDJsonSupported;
private volatile Boolean myFormatRdfSupported;
/**
@ -460,6 +462,7 @@ public class FhirContext {
if (retVal == null) {
retVal = scanResourceType(theResourceType);
}
return retVal;
}
@ -735,6 +738,21 @@ public class FhirContext {
return retVal;
}
/**
* @return Returns <code>true</code> if the NDJSON serialization format is supported, based on the
* available libraries on the classpath.
*
* @since 5.6.0
*/
public boolean isFormatNDJsonSupported() {
Boolean retVal = myFormatNDJsonSupported;
if (retVal == null) {
retVal = tryToInitParser(() -> newNDJsonParser());
myFormatNDJsonSupported = retVal;
}
return retVal;
}
/**
* @return Returns <code>true</code> if the RDF serialization format is supported, based on the
* available libraries on the classpath.
@ -801,6 +819,29 @@ public class FhirContext {
return new JsonParser(this, myParserErrorHandler);
}
/**
* Create and return a new NDJSON parser.
*
* <p>
* Thread safety: <b>Parsers are not guaranteed to be thread safe</b>. Create a new parser instance for every thread
* or every message being parsed/encoded.
* </p>
* <p>
* Performance Note: <b>This method is cheap</b> to call, and may be called once for every message being processed
* without incurring any performance penalty
* </p>
* <p>
* The NDJsonParser provided here is expected to translate between legal NDJson and FHIR Bundles.
* In particular, it is able to encode the resources in a FHIR Bundle to NDJson, as well as decode
* NDJson into a FHIR "collection"-type Bundle populated with the resources described in the NDJson.
* It will throw an exception in the event where it is asked to encode to anything other than a FHIR Bundle
* or where it is asked to decode into anything other than a FHIR Bundle.
* </p>
*/
public IParser newNDJsonParser() {
return new NDJsonParser(this, myParserErrorHandler);
}
/**
* Create and return a new RDF parser.
*

View File

@ -0,0 +1,120 @@
package ca.uhn.fhir.parser;
/*
* #%L
* HAPI FHIR - Core Library
* %%
* Copyright (C) 2014 - 2021 Smile CDR, Inc.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import ca.uhn.fhir.context.*;
import ca.uhn.fhir.rest.api.EncodingEnum;
import ca.uhn.fhir.util.BundleBuilder;
import ca.uhn.fhir.util.BundleUtil;
import org.hl7.fhir.instance.model.api.*;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.Reader;
import java.io.Writer;
import java.util.List;
/**
* This class is the FHIR NDJSON parser/encoder. Users should not interact with this class directly, but should use
* {@link FhirContext#newNDJsonParser()} to get an instance.
*/
public class NDJsonParser extends BaseParser {
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(NDJsonParser.class);
private IParser myJsonParser;
private FhirContext myFhirContext;
/**
* Do not use this constructor, the recommended way to obtain a new instance of the NDJSON parser is to invoke
* {@link FhirContext#newNDJsonParser()}.
*
* @param theParserErrorHandler
*/
public NDJsonParser(FhirContext theContext, IParserErrorHandler theParserErrorHandler) {
super(theContext, theParserErrorHandler);
myFhirContext = theContext;
myJsonParser = theContext.newJsonParser();
}
@Override
public IParser setPrettyPrint(boolean thePrettyPrint) {
myJsonParser.setPrettyPrint(thePrettyPrint);
return this;
}
@Override
public EncodingEnum getEncoding() {
return EncodingEnum.NDJSON;
}
@Override
protected void doEncodeResourceToWriter(IBaseResource theResource, Writer theWriter, EncodeContext theEncodeContext) throws IOException {
// We only encode bundles to NDJSON.
if (!(IBaseBundle.class.isAssignableFrom(theResource.getClass()))) {
throw new IllegalArgumentException("NDJsonParser can only encode Bundle types. Received " + theResource.getClass().getName());
}
// Ok, convert the bundle to a list of resources.
List<IBaseResource> theBundleResources = BundleUtil.toListOfResources(myFhirContext, (IBaseBundle) theResource);
// Now we write each one in turn.
// Use newline only as a line separator, not at the end of the file.
boolean isFirstResource = true;
for (IBaseResource theBundleEntryResource : theBundleResources) {
if (!(isFirstResource)) {
theWriter.write("\n");
}
isFirstResource = false;
myJsonParser.encodeResourceToWriter(theBundleEntryResource, theWriter);
}
}
@Override
public <T extends IBaseResource> T doParseResource(Class<T> theResourceType, Reader theReader) throws DataFormatException {
// We can only parse to bundles.
if ((theResourceType != null) && (!(IBaseBundle.class.isAssignableFrom(theResourceType)))) {
throw new DataFormatException("NDJsonParser can only parse to Bundle types. Received " + theResourceType.getName());
}
try {
// Now we go through line-by-line parsing the JSON and then stuffing it into a bundle.
BundleBuilder myBuilder = new BundleBuilder(myFhirContext);
myBuilder.setType("collection");
BufferedReader myBufferedReader = new BufferedReader(theReader);
String jsonString = myBufferedReader.readLine();
while (jsonString != null) {
// And add it to a collection in a Bundle.
// The string must be trimmed, as per the NDJson spec 3.2
myBuilder.addCollectionEntry(myJsonParser.parseResource(jsonString.trim()));
// Try to read another line.
jsonString = myBufferedReader.readLine();
}
return (T) myBuilder.getBundle();
} catch (IOException err) {
throw new DataFormatException(err.getMessage());
}
}
}

View File

@ -84,6 +84,7 @@ public class Constants {
public static final String EXTOP_VALIDATE_RESOURCE = "resource";
public static final String FORMAT_HTML = "html";
public static final String FORMAT_JSON = "json";
public static final String FORMAT_NDJSON = "ndjson";
public static final String FORMAT_XML = "xml";
public static final String CT_RDF_TURTLE_LEGACY = "text/turtle";
public static final String FORMAT_TURTLE = "ttl";

View File

@ -53,7 +53,12 @@ public enum EncodingEnum {
}
},
;
NDJSON(Constants.CT_FHIR_NDJSON, Constants.CT_FHIR_NDJSON, Constants.FORMAT_NDJSON) {
@Override
public IParser newParser(FhirContext theContext) {
return theContext.newNDJsonParser();
}
};
/**
* "json"
@ -71,6 +76,11 @@ public enum EncodingEnum {
*/
public static final String XML_PLAIN_STRING = "xml";
/**
* "ndjson"
*/
public static final String NDJSON_PLAIN_STRING = "ndjson";
private static Map<String, EncodingEnum> ourContentTypeToEncoding;
private static Map<String, EncodingEnum> ourContentTypeToEncodingLegacy;
private static Map<String, EncodingEnum> ourContentTypeToEncodingStrict;
@ -104,7 +114,9 @@ public enum EncodingEnum {
ourContentTypeToEncoding.put("application/xml", XML);
ourContentTypeToEncoding.put("application/fhir+turtle", RDF);
ourContentTypeToEncoding.put("application/x-turtle", RDF);
ourContentTypeToEncoding.put("application/ndjson", NDJSON);
ourContentTypeToEncoding.put("text/json", JSON);
ourContentTypeToEncoding.put("text/ndjson", NDJSON);
ourContentTypeToEncoding.put("text/xml", XML);
ourContentTypeToEncoding.put("text/turtle", RDF);
@ -114,6 +126,7 @@ public enum EncodingEnum {
ourContentTypeToEncoding.put(JSON_PLAIN_STRING, JSON);
ourContentTypeToEncoding.put(XML_PLAIN_STRING, XML);
ourContentTypeToEncoding.put(RDF_PLAIN_STRING, RDF);
ourContentTypeToEncoding.put(NDJSON_PLAIN_STRING, NDJSON);
ourContentTypeToEncoding.put(Constants.FORMAT_TURTLE, RDF);
ourContentTypeToEncodingLegacy = Collections.unmodifiableMap(ourContentTypeToEncodingLegacy);

View File

@ -0,0 +1,128 @@
package ca.uhn.fhir.parser;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.hl7.fhir.instance.model.api.IBaseResource;
import org.hl7.fhir.instance.model.api.IBaseBundle;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Test;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.util.BundleBuilder;
import ca.uhn.fhir.util.TestUtil;
import org.hl7.fhir.r4.model.Patient;
public class NDJsonParserTest {
private static FhirContext ourCtx = FhirContext.forR4();
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(NDJsonParserTest.class);
private String toNDJson(IBaseResource bundle) throws DataFormatException {
IParser p = ourCtx.newNDJsonParser();
return p.encodeResourceToString(bundle);
}
private IBaseResource fromNDJson(String ndjson) throws DataFormatException {
IParser p = ourCtx.newNDJsonParser();
return p.parseResource(ndjson);
}
private boolean fhirResourcesEqual(IBaseResource expected, IBaseResource actual) {
// I would prefer to use, e.g., EqualsBuilder to do this instead.
String encoded_expected = ourCtx.newJsonParser().setPrettyPrint(true).encodeResourceToString(expected);
String encoded_actual = ourCtx.newJsonParser().setPrettyPrint(true).encodeResourceToString(actual);
ourLog.info("Expected: {}", encoded_expected);
ourLog.info("Actual : {}", encoded_actual);
return encoded_expected.equals(encoded_actual);
}
@Test
public void testSinglePatientEncodeDecode() {
BundleBuilder myBuilder = new BundleBuilder(ourCtx);
Patient p = new Patient();
p.setId("Patient/P1");
myBuilder.addCollectionEntry(p);
IBaseResource myBundle = myBuilder.getBundle();
IBaseResource responseBundle = fromNDJson(toNDJson(myBundle));
assertTrue(fhirResourcesEqual(myBundle, responseBundle));
}
@Test
public void testEmptyBundleEncodeDecode() {
BundleBuilder myBuilder = new BundleBuilder(ourCtx);
myBuilder.setType("collection");
IBaseResource myBundle = myBuilder.getBundle();
IBaseResource responseBundle = fromNDJson(toNDJson(myBundle));
assertTrue(fhirResourcesEqual(myBundle, responseBundle));
}
@Test
public void testThreePatientEncodeDecode() {
BundleBuilder myBuilder = new BundleBuilder(ourCtx);
Patient p = new Patient();
p.setId("Patient/P1");
myBuilder.addCollectionEntry(p);
p = new Patient();
p.setId("Patient/P2");
myBuilder.addCollectionEntry(p);
p = new Patient();
p.setId("Patient/P3");
myBuilder.addCollectionEntry(p);
IBaseResource myBundle = myBuilder.getBundle();
IBaseResource responseBundle = fromNDJson(toNDJson(myBundle));
assertTrue(fhirResourcesEqual(myBundle, responseBundle));
}
@Test
public void testHasNewlinesEncodeDecode() {
BundleBuilder myBuilder = new BundleBuilder(ourCtx);
Patient p = new Patient();
p.setId("Patient/P1");
p.addAddress().setText("1 Place Street\r\nOn Earth");
myBuilder.addCollectionEntry(p);
IBaseResource myBundle = myBuilder.getBundle();
IBaseResource responseBundle = fromNDJson(toNDJson(myBundle));
assertTrue(fhirResourcesEqual(myBundle, responseBundle));
}
@Test
public void testOnlyEncodesBundles() {
Patient p = new Patient();
p.setId("Patient/P1");
assertThrows(IllegalArgumentException.class,
()->{toNDJson(p);});
}
@Test
public void testOnlyDecodesBundles() {
BundleBuilder myBuilder = new BundleBuilder(ourCtx);
Patient p = new Patient();
p.setId("Patient/P1");
myBuilder.addCollectionEntry(p);
IBaseResource myBundle = myBuilder.getBundle();
String myBundleJson = toNDJson(myBundle);
IParser parser = ourCtx.newNDJsonParser();
assertThrows(DataFormatException.class,
()->{parser.parseResource(Patient.class, myBundleJson);});
}
@AfterAll
public static void afterClassClearContext() {
TestUtil.randomizeLocaleAndTimezone();
}
}