Add inline resource mode (#3267)

* Start work on inline resource text storage mode

* Work on inlining

* Add changelog

* Test fixes

* Fix for hardcoded date that just passed
This commit is contained in:
James Agnew 2022-01-03 11:43:45 -05:00 committed by GitHub
parent 216a3d3878
commit d9820bfb89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 243 additions and 53 deletions

View File

@ -0,0 +1,8 @@
---
type: perf
issue: 3267
title: "A new JPA setting has been added to DaoConfig settings called **Inline Resource Text Below Size**. When
this setting is set to a positive value, any resources with a total serialized length smaller than the given
number will be stored as an inline VARCHAR2 text value on the HFJ_RES_VER table, instead of using an external
LOB column. This improves read/write performance (often by a significant amount) at the expense of a slightly larger
amount of disk usage."

View File

@ -133,7 +133,7 @@ The HFJ_RESOURCE table indicates a single resource of any type in the database.
The HFJ_RES_VER table contains individual versions of a resource. If the resource `Patient/1` has 3 versions, there will be 3 rows in this table.
The complete raw contents of the resource is stored in the `RES_TEXT` column, using the encoding specified in the `RES_ENCODING` column.
The complete raw contents of the resource is stored in either the `RES_TEXT` or the `RES_TEXT_VC` column, using the encoding specified in the `RES_ENCODING` column.
## Columns
@ -206,10 +206,19 @@ The complete raw contents of the resource is stored in the `RES_TEXT` column, us
<tr>
<td>RES_TEXT</td>
<td></td>
<td>byte[]</td>
<td>byte[] (SQL LOB)</td>
<td></td>
<td>
Contains the actual full text of the resource being stored.
Contains the actual full text of the resource being stored, stored in a binary LOB.
</td>
</tr>
<tr>
<td>RES_TEXT_VC</td>
<td></td>
<td>String (SQL VARCHAR2)</td>
<td></td>
<td>
Contains the actual full text of the resource being stored, stored in a textual VARCHAR2 column. Only one of <code>RES_TEXT</code> and <code>RES_TEXT_VC</code> will be populated for any given row. The other column in either case will be <i>null</i>.
</td>
</tr>
</tbody>

View File

@ -92,6 +92,7 @@ import ca.uhn.fhir.util.XmlUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.collect.Sets;
import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import org.apache.commons.lang3.NotImplementedException;
@ -133,6 +134,7 @@ import javax.persistence.criteria.CriteriaQuery;
import javax.persistence.criteria.Root;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -511,7 +513,8 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
theEntity.setResourceType(toResourceName(theResource));
}
byte[] bytes;
byte[] resourceBinary;
String resourceText;
ResourceEncodingEnum encoding;
boolean changed = false;
@ -574,7 +577,37 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
theEntity.setFhirVersion(myContext.getVersion().getVersion());
bytes = encodeResource(theResource, encoding, excludeElements, myContext);
HashFunction sha256 = Hashing.sha256();
HashCode hashCode;
String encodedResource = encodeResource(theResource, encoding, excludeElements, myContext);
if (getConfig().getInlineResourceTextBelowSize() > 0 && encodedResource.length() < getConfig().getInlineResourceTextBelowSize()) {
resourceText = encodedResource;
resourceBinary = null;
encoding = ResourceEncodingEnum.JSON;
hashCode = sha256.hashUnencodedChars(encodedResource);
} else {
resourceText = null;
switch (encoding) {
case JSON:
resourceBinary = encodedResource.getBytes(Charsets.UTF_8);
break;
case JSONC:
resourceBinary = GZipUtil.compress(encodedResource);
break;
default:
case DEL:
resourceBinary = new byte[0];
break;
}
hashCode = sha256.hashBytes(resourceBinary);
}
String hashSha256 = hashCode.toString();
if (hashSha256.equals(theEntity.getHashSha256()) == false) {
changed = true;
}
theEntity.setHashSha256(hashSha256);
if (sourceExtension != null) {
IBaseExtension<?, ?> newSourceExtension = ((IBaseHasExtensions) meta).addExtension();
@ -582,17 +615,11 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
newSourceExtension.setValue(sourceExtension.getValue());
}
HashFunction sha256 = Hashing.sha256();
String hashSha256 = sha256.hashBytes(bytes).toString();
if (hashSha256.equals(theEntity.getHashSha256()) == false) {
changed = true;
}
theEntity.setHashSha256(hashSha256);
} else {
encoding = null;
bytes = null;
resourceBinary = null;
resourceText = null;
}
@ -604,9 +631,12 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
}
} else {
theEntity.setHashSha256(null);
bytes = null;
resourceBinary = null;
resourceText = null;
encoding = ResourceEncodingEnum.DEL;
}
if (thePerformIndexing && !changed) {
@ -625,17 +655,18 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
if (currentHistoryVersion == null) {
currentHistoryVersion = myResourceHistoryTableDao.findForIdAndVersionAndFetchProvenance(theEntity.getId(), theEntity.getVersion());
}
if (currentHistoryVersion == null || currentHistoryVersion.getResource() == null) {
if (currentHistoryVersion == null || !currentHistoryVersion.hasResource()) {
changed = true;
} else {
changed = !Arrays.equals(currentHistoryVersion.getResource(), bytes);
changed = !Arrays.equals(currentHistoryVersion.getResource(), resourceBinary);
}
}
}
EncodedResource retVal = new EncodedResource();
retVal.setEncoding(encoding);
retVal.setResource(bytes);
retVal.setResourceBinary(resourceBinary);
retVal.setResourceText(resourceText);
retVal.setChanged(changed);
return retVal;
@ -905,6 +936,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
// 1. get resource, it's encoding and the tags if any
byte[] resourceBytes;
String resourceText;
ResourceEncodingEnum resourceEncoding;
@Nullable
Collection<? extends BaseTag> tagList = Collections.emptyList();
@ -915,6 +947,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
if (theEntity instanceof ResourceHistoryTable) {
ResourceHistoryTable history = (ResourceHistoryTable) theEntity;
resourceBytes = history.getResource();
resourceText = history.getResourceTextVc();
resourceEncoding = history.getEncoding();
switch (getConfig().getTagStorageMode()) {
case VERSIONED:
@ -952,6 +985,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
}
resourceBytes = history.getResource();
resourceEncoding = history.getEncoding();
resourceText = history.getResourceTextVc();
switch (getConfig().getTagStorageMode()) {
case VERSIONED:
case NON_VERSIONED:
@ -974,6 +1008,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
// This is the search View
ResourceSearchView view = (ResourceSearchView) theEntity;
resourceBytes = view.getResource();
resourceText = view.getResourceTextVc();
resourceEncoding = view.getEncoding();
version = view.getVersion();
provenanceRequestId = view.getProvenanceRequestId();
@ -997,7 +1032,12 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
}
// 2. get The text
String resourceText = decodeResource(resourceBytes, resourceEncoding);
String decodedResourceText;
if (resourceText != null) {
decodedResourceText = resourceText;
} else {
decodedResourceText = decodeResource(resourceBytes, resourceEncoding);
}
// 3. Use the appropriate custom type if one is specified in the context
Class<R> resourceType = theResourceType;
@ -1027,7 +1067,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
IParser parser = new TolerantJsonParser(getContext(theEntity.getFhirVersion()), errorHandler, theEntity.getId());
try {
retVal = parser.parseResource(resourceType, resourceText);
retVal = parser.parseResource(resourceType, decodedResourceText);
} catch (Exception e) {
StringBuilder b = new StringBuilder();
b.append("Failed to parse database resource[");
@ -1352,7 +1392,8 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
boolean versionedTags = getConfig().getTagStorageMode() == DaoConfig.TagStorageModeEnum.VERSIONED;
final ResourceHistoryTable historyEntry = theEntity.toHistory(versionedTags);
historyEntry.setEncoding(theChanged.getEncoding());
historyEntry.setResource(theChanged.getResource());
historyEntry.setResource(theChanged.getResourceBinary());
historyEntry.setResourceTextVc(theChanged.getResourceText());
ourLog.debug("Saving history entry {}", historyEntry.getIdDt());
myResourceHistoryTableDao.save(historyEntry);
@ -1694,28 +1735,10 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> extends BaseStora
return resourceText;
}
public static byte[] encodeResource(IBaseResource theResource, ResourceEncodingEnum theEncoding, List<String> theExcludeElements, FhirContext theContext) {
byte[] bytes;
public static String encodeResource(IBaseResource theResource, ResourceEncodingEnum theEncoding, List<String> theExcludeElements, FhirContext theContext) {
IParser parser = theEncoding.newParser(theContext);
parser.setDontEncodeElements(theExcludeElements);
String encoded = parser.encodeResourceToString(theResource);
switch (theEncoding) {
case JSON:
bytes = encoded.getBytes(Charsets.UTF_8);
break;
case JSONC:
bytes = GZipUtil.compress(encoded);
break;
default:
case DEL:
bytes = new byte[0];
break;
}
ourLog.debug("Encoded {} chars of resource body as {} bytes", encoded.length(), bytes.length);
return bytes;
return parser.encodeResourceToString(theResource);
}
private static String parseNarrativeTextIntoWords(IBaseResource theResource) {

View File

@ -27,6 +27,7 @@ class EncodedResource {
private boolean myChanged;
private byte[] myResource;
private ResourceEncodingEnum myEncoding;
private String myResourceText;
public ResourceEncodingEnum getEncoding() {
return myEncoding;
@ -36,11 +37,11 @@ class EncodedResource {
myEncoding = theEncoding;
}
public byte[] getResource() {
public byte[] getResourceBinary() {
return myResource;
}
public void setResource(byte[] theResource) {
public void setResourceBinary(byte[] theResource) {
myResource = theResource;
}
@ -52,4 +53,11 @@ class EncodedResource {
myChanged = theChanged;
}
public String getResourceText() {
return myResourceText;
}
public void setResourceText(String theResourceText) {
myResourceText = theResourceText;
}
}

View File

@ -21,7 +21,6 @@ package ca.uhn.fhir.jpa.entity;
*/
import ca.uhn.fhir.context.FhirVersionEnum;
import ca.uhn.fhir.interceptor.model.RequestPartitionId;
import ca.uhn.fhir.jpa.model.entity.ForcedId;
import ca.uhn.fhir.jpa.model.entity.IBaseResourceEntity;
import ca.uhn.fhir.jpa.model.entity.PartitionablePartitionId;
@ -57,6 +56,7 @@ import java.util.Date;
" h.res_published as res_published, " +
" h.res_updated as res_updated, " +
" h.res_text as res_text, " +
" h.res_text_vc as res_text_vc, " +
" h.res_encoding as res_encoding, " +
" h.PARTITION_ID as PARTITION_ID, " +
" p.SOURCE_URI as PROV_SOURCE_URI," +
@ -100,6 +100,8 @@ public class ResourceSearchView implements IBaseResourceEntity, Serializable {
@Column(name = "RES_TEXT")
@Lob()
private byte[] myResource;
@Column(name = "RES_TEXT_VC")
private String myResourceTextVc;
@Column(name = "RES_ENCODING")
@Enumerated(EnumType.STRING)
private ResourceEncodingEnum myEncoding;
@ -111,6 +113,10 @@ public class ResourceSearchView implements IBaseResourceEntity, Serializable {
public ResourceSearchView() {
}
public String getResourceTextVc() {
return myResourceTextVc;
}
public String getProvenanceRequestId() {
return myProvenanceRequestId;
}

View File

@ -144,6 +144,13 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
.onlyAppliesToPlatforms(NON_AUTOMATIC_FK_INDEX_PLATFORMS);
addIndexesForDeleteExpunge(version);
// Add inline resource text column
version.onTable("HFJ_RES_VER")
.addColumn("20220102.1", "RES_TEXT_VC")
.nullable()
.type(ColumnTypeEnum.STRING, 2000);
}
@ -229,6 +236,9 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
// HFJ_SEARCH.SEARCH_QUERY_STRING
version.onTable("HFJ_SEARCH")
.migratePostgresTextClobToBinaryClob("20211003.3", "SEARCH_QUERY_STRING");
}
private void init540() {

View File

@ -4,6 +4,7 @@ import ca.uhn.fhir.jpa.api.config.DaoConfig;
import ca.uhn.fhir.jpa.api.model.DaoMethodOutcome;
import ca.uhn.fhir.jpa.model.entity.ModelConfig;
import ca.uhn.fhir.jpa.model.entity.NormalizedQuantitySearchLevel;
import ca.uhn.fhir.jpa.model.entity.ResourceHistoryTable;
import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamQuantity;
import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamQuantityNormalized;
import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamString;
@ -55,6 +56,7 @@ import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.matchesPattern;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
@ -69,6 +71,7 @@ public class FhirResourceDaoR4CreateTest extends BaseJpaR4Test {
myModelConfig.setNormalizedQuantitySearchLevel(NormalizedQuantitySearchLevel.NORMALIZED_QUANTITY_SEARCH_NOT_SUPPORTED);
myModelConfig.setIndexOnContainedResources(new ModelConfig().isIndexOnContainedResources());
myModelConfig.setIndexOnContainedResourcesRecursively(new ModelConfig().isIndexOnContainedResourcesRecursively());
myDaoConfig.setInlineResourceTextBelowSize(new DaoConfig().getInlineResourceTextBelowSize());
}

View File

@ -0,0 +1,100 @@
package ca.uhn.fhir.jpa.dao.r4;
import ca.uhn.fhir.jpa.api.config.DaoConfig;
import ca.uhn.fhir.jpa.api.model.DaoMethodOutcome;
import ca.uhn.fhir.jpa.model.entity.ResourceHistoryTable;
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
import org.apache.commons.lang3.StringUtils;
import org.hl7.fhir.r4.model.IdType;
import org.hl7.fhir.r4.model.Patient;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class FhirResourceDaoR4InlineResourceModeTest extends BaseJpaR4Test {
@BeforeEach
public void beforeSetDao() {
myDaoConfig.setInlineResourceTextBelowSize(5000);
}
@AfterEach
public void afterResetDao() {
myDaoConfig.setInlineResourceTextBelowSize(new DaoConfig().getInlineResourceTextBelowSize());
}
@Test
public void testCreateWithInlineResourceTextStorage() {
Patient patient = new Patient();
patient.setActive(true);
Long resourceId = myPatientDao.create(patient).getId().getIdPartAsLong();
patient = new Patient();
patient.setId("Patient/" + resourceId);
patient.setActive(false);
myPatientDao.update(patient);
runInTransaction(() -> {
// Version 1
ResourceHistoryTable entity = myResourceHistoryTableDao.findForIdAndVersionAndFetchProvenance(resourceId, 1);
assertNull(entity.getResource());
assertEquals("{\"resourceType\":\"Patient\",\"active\":true}", entity.getResourceTextVc());
// Version 2
entity = myResourceHistoryTableDao.findForIdAndVersionAndFetchProvenance(resourceId, 2);
assertNull(entity.getResource());
assertEquals("{\"resourceType\":\"Patient\",\"active\":false}", entity.getResourceTextVc());
});
patient = myPatientDao.read(new IdType("Patient/" + resourceId));
assertFalse(patient.getActive());
patient = (Patient) myPatientDao.search(SearchParameterMap.newSynchronous()).getAllResources().get(0);
assertFalse(patient.getActive());
}
@Test
public void testDontUseInlineAboveThreshold() {
String veryLongFamilyName = StringUtils.leftPad("", 6000, 'a');
Patient patient = new Patient();
patient.setActive(true);
patient.addName().setFamily(veryLongFamilyName);
Long resourceId = myPatientDao.create(patient).getId().getIdPartAsLong();
runInTransaction(() -> {
// Version 1
ResourceHistoryTable entity = myResourceHistoryTableDao.findForIdAndVersionAndFetchProvenance(resourceId, 1);
assertNotNull(entity.getResource());
assertNull(entity.getResourceTextVc());
});
patient = myPatientDao.read(new IdType("Patient/" + resourceId));
assertEquals(veryLongFamilyName, patient.getNameFirstRep().getFamily());
}
@Test
public void testNopOnUnchangedUpdate() {
Patient patient = new Patient();
patient.setActive(true);
Long resourceId = myPatientDao.create(patient).getId().getIdPartAsLong();
patient = new Patient();
patient.setId("Patient/" + resourceId);
patient.setActive(true);
DaoMethodOutcome updateOutcome = myPatientDao.update(patient);
assertEquals("1", updateOutcome.getId().getVersionIdPart());
assertTrue(updateOutcome.isNop());
}
}

View File

@ -2319,12 +2319,15 @@ public class FhirResourceDaoR4LegacySearchBuilderTest extends BaseJpaR4Test {
{
// Don't load synchronous
SearchParameterMap map = new SearchParameterMap();
map.setLastUpdated(new DateRangeParam().setUpperBound(new DateParam(ParamPrefixEnum.LESSTHAN, "2022-01-01")));
map.setLastUpdated(new DateRangeParam().setUpperBound(new DateParam(ParamPrefixEnum.LESSTHAN, "2042-01-01")));
myCaptureQueriesListener.clear();
IBundleProvider found = myPatientDao.search(map);
Set<String> dates = new HashSet<>();
String searchId = found.getUuid();
for (int i = 0; i < 9; i++) {
List<IBaseResource> resources = found.getResources(i, i + 1);
myCaptureQueriesListener.logSelectQueries();
if (resources.size() != 1) {
int finalI = i;
int finalI1 = i;

View File

@ -2452,7 +2452,7 @@ public class FhirResourceDaoR4SearchNoFtTest extends BaseJpaR4Test {
{
// Don't load synchronous
SearchParameterMap map = new SearchParameterMap();
map.setLastUpdated(new DateRangeParam().setUpperBound(new DateParam(ParamPrefixEnum.LESSTHAN, "2022-01-01")));
map.setLastUpdated(new DateRangeParam().setUpperBound(new DateParam(ParamPrefixEnum.LESSTHAN, "2042-01-01")));
IBundleProvider found = myPatientDao.search(map);
Set<String> dates = new HashSet<>();
String searchId = found.getUuid();

View File

@ -90,6 +90,7 @@ public class SyntheaPerfTest extends BaseJpaTest {
myDaoConfig.setMatchUrlCacheEnabled(true);
myDaoConfig.setDeleteEnabled(false);
myCtx.getParserOptions().setAutoContainReferenceTargetsWithNoId(false);
myDaoConfig.setInlineResourceTextBelowSize(4000);
assertTrue(myDaoConfig.isMassIngestionMode());

View File

@ -43,7 +43,6 @@ import javax.persistence.OneToMany;
import javax.persistence.OneToOne;
import javax.persistence.SequenceGenerator;
import javax.persistence.Table;
import javax.persistence.Transient;
import javax.persistence.UniqueConstraint;
import java.io.Serializable;
import java.util.ArrayList;
@ -97,10 +96,8 @@ public class ResourceHistoryTable extends BaseHasResource implements Serializabl
@OptimisticLock(excluded = true)
private byte[] myResource;
// TODO: JA For future use or removal
// @Column(name = "RES_TEXT_VC", length = RES_TEXT_VC_MAX_LENGTH, nullable = true)
// @OptimisticLock(excluded = true)
@Transient
@Column(name = "RES_TEXT_VC", length = RES_TEXT_VC_MAX_LENGTH, nullable = true)
@OptimisticLock(excluded = true)
private String myResourceTextVc;
@Column(name = "RES_ENCODING", nullable = false, length = ENCODING_COL_LENGTH)
@ -157,6 +154,20 @@ public class ResourceHistoryTable extends BaseHasResource implements Serializabl
return myId;
}
/**
* Do not delete, required for java bean introspection
*/
public Long getMyId() {
return myId;
}
/**
* Do not delete, required for java bean introspection
*/
public void setMyId(Long theId) {
myId = theId;
}
public byte[] getResource() {
return myResource;
}
@ -240,4 +251,12 @@ public class ResourceHistoryTable extends BaseHasResource implements Serializabl
getResourceTable().setForcedId(theForcedId);
}
/**
* Returns <code>true</code> if there is a populated resource text (i.e.
* either {@link #getResource()} or {@link #getResourceTextVc()} return a non null
* value.
*/
public boolean hasResource() {
return myResource != null || myResourceTextVc != null;
}
}

View File

@ -283,7 +283,7 @@ public class DaoConfig {
* of characters will be stored inline in the <code>HFJ_RES_VER</code> table instead of using a
* separate LOB column.
*
* @since 7.0.0
* @since 5.7.0
*/
private int myInlineResourceTextBelowSize = 0;
@ -325,7 +325,7 @@ public class DaoConfig {
* of characters will be stored inline in the <code>HFJ_RES_VER</code> table instead of using a
* separate LOB column.
*
* @since 7.0.0
* @since 5.7.0
*/
public int getInlineResourceTextBelowSize() {
return myInlineResourceTextBelowSize;
@ -336,7 +336,7 @@ public class DaoConfig {
* of characters will be stored inline in the <code>HFJ_RES_VER</code> table instead of using a
* separate LOB column.
*
* @since 7.0.0
* @since 5.7.0
*/
public void setInlineResourceTextBelowSize(int theInlineResourceTextBelowSize) {
myInlineResourceTextBelowSize = theInlineResourceTextBelowSize;