Reduce use of LOB columns (#5799)

* Reducer LOB usage in Batch2 and Search (#5748)

* Reducer LOB usage in Batch2 and Search

* Add changelog

* Rework a number of LOB columns

* Test fix

* Test fix

* Column fixes

* Test fix

* Formatting

* Fixes

* patching bad oracle test

* Apply spotless

---------

Co-authored-by: Tadgh <garygrantgraham@gmail.com>

* Clean up

* Revert change

* One more revert

* Resolve compile issue

---------

Co-authored-by: Tadgh <garygrantgraham@gmail.com>
This commit is contained in:
James Agnew 2024-03-27 14:19:07 -04:00 committed by GitHub
parent 2f9693a1e4
commit 6175807f58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 133 additions and 42 deletions

View File

@ -0,0 +1,8 @@
---
type: perf
issue: 5748
title: "In the JPA server, several database columns related to Batch2 jobs and searching
have been reworked so that they no will longer use LOB datatypes going forward. This
is a significant advantage on Postgresql databases as it removes a significant use
of the inefficient `pg_largeobject` table, and should yield performance boosts for
MSSQL as well."

View File

@ -55,14 +55,15 @@ public interface IBatch2JobInstanceRepository
int updateWorkChunksPurgedTrue(@Param("id") String theInstanceId);
@Query(
"SELECT b from Batch2JobInstanceEntity b WHERE b.myDefinitionId = :defId AND b.myParamsJson = :params AND b.myStatus IN( :stats )")
"SELECT b from Batch2JobInstanceEntity b WHERE b.myDefinitionId = :defId AND (b.myParamsJson = :params OR b.myParamsJsonVc = :params) AND b.myStatus IN( :stats )")
List<Batch2JobInstanceEntity> findInstancesByJobIdParamsAndStatus(
@Param("defId") String theDefinitionId,
@Param("params") String theParams,
@Param("stats") Set<StatusEnum> theStatus,
Pageable thePageable);
@Query("SELECT b from Batch2JobInstanceEntity b WHERE b.myDefinitionId = :defId AND b.myParamsJson = :params")
@Query(
"SELECT b from Batch2JobInstanceEntity b WHERE b.myDefinitionId = :defId AND (b.myParamsJson = :params OR b.myParamsJsonVc = :params)")
List<Batch2JobInstanceEntity> findInstancesByJobIdAndParams(
@Param("defId") String theDefinitionId, @Param("params") String theParams, Pageable thePageable);

View File

@ -65,7 +65,7 @@ public interface IBatch2WorkChunkRepository
@Modifying
@Query("UPDATE Batch2WorkChunkEntity e SET e.myStatus = :status, e.myEndTime = :et, "
+ "e.myRecordsProcessed = :rp, e.myErrorCount = e.myErrorCount + :errorRetries, e.mySerializedData = null, "
+ "e.myRecordsProcessed = :rp, e.myErrorCount = e.myErrorCount + :errorRetries, e.mySerializedData = null, e.mySerializedDataVc = null, "
+ "e.myWarningMessage = :warningMessage WHERE e.myId = :id")
void updateChunkStatusAndClearDataForEndSuccess(
@Param("id") String theChunkId,
@ -77,7 +77,7 @@ public interface IBatch2WorkChunkRepository
@Modifying
@Query(
"UPDATE Batch2WorkChunkEntity e SET e.myStatus = :status, e.myEndTime = :et, e.mySerializedData = null, e.myErrorMessage = :em WHERE e.myId IN(:ids)")
"UPDATE Batch2WorkChunkEntity e SET e.myStatus = :status, e.myEndTime = :et, e.mySerializedData = null, e.mySerializedDataVc = null, e.myErrorMessage = :em WHERE e.myId IN(:ids)")
void updateAllChunksForInstanceStatusClearDataAndSetError(
@Param("ids") List<String> theChunkIds,
@Param("et") Date theEndTime,

View File

@ -36,6 +36,7 @@ import jakarta.persistence.TemporalType;
import jakarta.persistence.Version;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.hibernate.Length;
import java.io.Serializable;
import java.util.Date;
@ -95,13 +96,17 @@ public class Batch2JobInstanceEntity implements Serializable {
@Column(name = "FAST_TRACKING", nullable = true)
private Boolean myFastTracking;
// TODO: VC column added in 7.2.0 - Remove non-VC column later
@Column(name = "PARAMS_JSON", length = PARAMS_JSON_MAX_LENGTH, nullable = true)
private String myParamsJson;
@Lob
@Lob // TODO: VC column added in 7.2.0 - Remove non-VC column later
@Column(name = "PARAMS_JSON_LOB", nullable = true)
private String myParamsJsonLob;
@Column(name = "PARAMS_JSON_VC", nullable = true, length = Length.LONG32)
private String myParamsJsonVc;
@Column(name = "CMB_RECS_PROCESSED", nullable = true)
private Integer myCombinedRecordsProcessed;
@ -142,11 +147,14 @@ public class Batch2JobInstanceEntity implements Serializable {
* Any output from the job can be held in this column
* Even serialized json
*/
@Lob
@Lob // TODO: VC column added in 7.2.0 - Remove non-VC column later
@Basic(fetch = FetchType.LAZY)
@Column(name = "REPORT", nullable = true, length = Integer.MAX_VALUE - 1)
private String myReport;
@Column(name = "REPORT_VC", nullable = true, length = Length.LONG32)
private String myReportVc;
public String getCurrentGatedStepId() {
return myCurrentGatedStepId;
}
@ -260,6 +268,9 @@ public class Batch2JobInstanceEntity implements Serializable {
}
public String getParams() {
if (myParamsJsonVc != null) {
return myParamsJsonVc;
}
if (myParamsJsonLob != null) {
return myParamsJsonLob;
}
@ -267,13 +278,9 @@ public class Batch2JobInstanceEntity implements Serializable {
}
public void setParams(String theParams) {
myParamsJsonVc = theParams;
myParamsJsonLob = null;
myParamsJson = null;
if (theParams != null && theParams.length() > PARAMS_JSON_MAX_LENGTH) {
myParamsJsonLob = theParams;
} else {
myParamsJson = theParams;
}
}
public boolean getWorkChunksPurged() {
@ -309,11 +316,12 @@ public class Batch2JobInstanceEntity implements Serializable {
}
public String getReport() {
return myReport;
return myReportVc != null ? myReportVc : myReport;
}
public void setReport(String theReport) {
myReport = theReport;
myReportVc = theReport;
myReport = null;
}
public String getWarningMessages() {
@ -362,7 +370,7 @@ public class Batch2JobInstanceEntity implements Serializable {
.append("progress", myProgress)
.append("errorMessage", myErrorMessage)
.append("estimatedTimeRemaining", myEstimatedTimeRemaining)
.append("report", myReport)
.append("report", getReport())
.append("warningMessages", myWarningMessages)
.append("initiatingUsername", myTriggeringUsername)
.append("initiatingclientId", myTriggeringClientId)

View File

@ -38,6 +38,7 @@ import jakarta.persistence.TemporalType;
import jakarta.persistence.Version;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.hibernate.Length;
import java.io.Serializable;
import java.util.Date;
@ -92,11 +93,14 @@ public class Batch2WorkChunkEntity implements Serializable {
@Column(name = "TGT_STEP_ID", length = ID_MAX_LENGTH, nullable = false)
private String myTargetStepId;
@Lob
@Lob // TODO: VC column added in 7.2.0 - Remove non-VC column later
@Basic(fetch = FetchType.LAZY)
@Column(name = "CHUNK_DATA", nullable = true, length = Integer.MAX_VALUE - 1)
private String mySerializedData;
@Column(name = "CHUNK_DATA_VC", nullable = true, length = Length.LONG32)
private String mySerializedDataVc;
@Column(name = "STAT", length = STATUS_MAX_LENGTH, nullable = false)
@Enumerated(EnumType.STRING)
private WorkChunkStatusEnum myStatus;
@ -263,11 +267,12 @@ public class Batch2WorkChunkEntity implements Serializable {
}
public String getSerializedData() {
return mySerializedData;
return mySerializedDataVc != null ? mySerializedDataVc : mySerializedData;
}
public void setSerializedData(String theSerializedData) {
mySerializedData = theSerializedData;
mySerializedData = null;
mySerializedDataVc = theSerializedData;
}
public WorkChunkStatusEnum getStatus() {
@ -309,7 +314,7 @@ public class Batch2WorkChunkEntity implements Serializable {
.append("updateTime", myUpdateTime)
.append("recordsProcessed", myRecordsProcessed)
.append("targetStepId", myTargetStepId)
.append("serializedData", mySerializedData)
.append("serializedData", getSerializedData())
.append("status", myStatus)
.append("errorMessage", myErrorMessage)
.append("warningMessage", myWarningMessage)

View File

@ -32,6 +32,7 @@ import jakarta.persistence.Lob;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.SequenceGenerator;
import jakarta.persistence.Table;
import org.hibernate.Length;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
@ -66,10 +67,13 @@ public class BulkImportJobFileEntity implements Serializable {
@Column(name = "FILE_DESCRIPTION", nullable = true, length = MAX_DESCRIPTION_LENGTH)
private String myFileDescription;
@Lob
@Column(name = "JOB_CONTENTS", nullable = false)
@Lob // TODO: VC column added in 7.2.0 - Remove non-VC column later
@Column(name = "JOB_CONTENTS", nullable = true)
private byte[] myContents;
@Column(name = "JOB_CONTENTS_VC", nullable = true, length = Length.LONG32)
private String myContentsVc;
@Column(name = "TENANT_NAME", nullable = true, length = PartitionEntity.MAX_NAME_LENGTH)
private String myTenantName;
@ -98,11 +102,16 @@ public class BulkImportJobFileEntity implements Serializable {
}
public String getContents() {
return new String(myContents, StandardCharsets.UTF_8);
if (myContentsVc != null) {
return myContentsVc;
} else {
return new String(myContents, StandardCharsets.UTF_8);
}
}
public void setContents(String theContents) {
myContents = theContents.getBytes(StandardCharsets.UTF_8);
myContentsVc = theContents;
myContents = null;
}
public BulkImportJobFileJson toJson() {

View File

@ -50,6 +50,7 @@ import jakarta.persistence.UniqueConstraint;
import jakarta.persistence.Version;
import org.apache.commons.lang3.SerializationUtils;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.hibernate.Length;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.annotations.OptimisticLock;
import org.hibernate.type.SqlTypes;
@ -141,14 +142,21 @@ public class Search implements ICachedSearchDetails, Serializable {
@Column(name = "RESOURCE_TYPE", length = 200, nullable = true)
private String myResourceType;
/**
* Note that this field may have the request partition IDs prepended to it
*/
@Lob()
@Lob // TODO: VC column added in 7.2.0 - Remove non-VC column later
@Basic(fetch = FetchType.LAZY)
@Column(name = "SEARCH_QUERY_STRING", nullable = true, updatable = false, length = MAX_SEARCH_QUERY_STRING)
private String mySearchQueryString;
/**
* Note that this field may have the request partition IDs prepended to it
*/
@Column(name = "SEARCH_QUERY_STRING_VC", nullable = true, length = Length.LONG32)
private String mySearchQueryStringVc;
@Column(name = "SEARCH_QUERY_STRING_HASH", nullable = true, updatable = false)
private Integer mySearchQueryStringHash;
@ -172,10 +180,13 @@ public class Search implements ICachedSearchDetails, Serializable {
@Column(name = "OPTLOCK_VERSION", nullable = true)
private Integer myVersion;
@Lob
@Lob // TODO: VC column added in 7.2.0 - Remove non-VC column later
@Column(name = "SEARCH_PARAM_MAP", nullable = true)
private byte[] mySearchParameterMap;
@Column(name = "SEARCH_PARAM_MAP_BIN", nullable = true, length = Length.LONG32)
private byte[] mySearchParameterMapBin;
@Transient
private transient SearchParameterMap mySearchParameterMapTransient;
@ -350,7 +361,7 @@ public class Search implements ICachedSearchDetails, Serializable {
* Note that this field may have the request partition IDs prepended to it
*/
public String getSearchQueryString() {
return mySearchQueryString;
return mySearchQueryStringVc != null ? mySearchQueryStringVc : mySearchQueryString;
}
public void setSearchQueryString(String theSearchQueryString, RequestPartitionId theRequestPartitionId) {
@ -362,12 +373,13 @@ public class Search implements ICachedSearchDetails, Serializable {
// We want this field to always have a wide distribution of values in order
// to avoid optimizers avoiding using it if it has lots of nulls, so in the
// case of null, just put a value that will never be hit
mySearchQueryString = UUID.randomUUID().toString();
mySearchQueryStringVc = UUID.randomUUID().toString();
} else {
mySearchQueryString = searchQueryString;
mySearchQueryStringVc = searchQueryString;
}
mySearchQueryStringHash = mySearchQueryString.hashCode();
mySearchQueryString = null;
mySearchQueryStringHash = mySearchQueryStringVc.hashCode();
}
public SearchTypeEnum getSearchType() {
@ -466,8 +478,12 @@ public class Search implements ICachedSearchDetails, Serializable {
return Optional.of(mySearchParameterMapTransient);
}
SearchParameterMap searchParameterMap = null;
if (mySearchParameterMap != null) {
searchParameterMap = SerializationUtils.deserialize(mySearchParameterMap);
byte[] searchParameterMapSerialized = mySearchParameterMapBin;
if (searchParameterMapSerialized == null) {
searchParameterMapSerialized = mySearchParameterMap;
}
if (searchParameterMapSerialized != null) {
searchParameterMap = SerializationUtils.deserialize(searchParameterMapSerialized);
mySearchParameterMapTransient = searchParameterMap;
}
return Optional.ofNullable(searchParameterMap);
@ -475,7 +491,8 @@ public class Search implements ICachedSearchDetails, Serializable {
public void setSearchParameterMap(SearchParameterMap theSearchParameterMap) {
mySearchParameterMapTransient = theSearchParameterMap;
mySearchParameterMap = SerializationUtils.serialize(theSearchParameterMap);
mySearchParameterMapBin = SerializationUtils.serialize(theSearchParameterMap);
mySearchParameterMap = null;
}
@Override

View File

@ -117,6 +117,7 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
init640_after_20230126();
init660();
init680();
init680_Part2();
init700();
}
@ -218,6 +219,44 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
version.addTask(new ForceIdMigrationFixTask(version.getRelease(), "20231222.1"));
}
private void init680_Part2() {
Builder version = forVersion(VersionEnum.V6_8_0);
// Add additional LOB migration columns
version.onTable("BT2_JOB_INSTANCE")
.addColumn("20240227.1", "REPORT_VC")
.nullable()
.type(ColumnTypeEnum.TEXT);
version.onTable("BT2_JOB_INSTANCE")
.addColumn("20240227.2", "PARAMS_JSON_VC")
.nullable()
.type(ColumnTypeEnum.TEXT);
version.onTable("BT2_WORK_CHUNK")
.addColumn("20240227.3", "CHUNK_DATA_VC")
.nullable()
.type(ColumnTypeEnum.TEXT);
version.onTable("HFJ_SEARCH")
.addColumn("20240227.4", "SEARCH_QUERY_STRING_VC")
.nullable()
.type(ColumnTypeEnum.TEXT);
version.onTable("HFJ_SEARCH")
.addColumn("20240227.5", "SEARCH_PARAM_MAP_BIN")
.nullable()
.type(ColumnTypeEnum.BINARY);
version.onTable("HFJ_BLK_IMPORT_JOBFILE")
.addColumn("20240227.6", "JOB_CONTENTS_VC")
.nullable()
.type(ColumnTypeEnum.TEXT);
version.onTable("HFJ_BLK_IMPORT_JOBFILE")
.modifyColumn("20240227.7", "JOB_CONTENTS")
.nullable()
.withType(ColumnTypeEnum.BLOB);
}
protected void init680() {
Builder version = forVersion(VersionEnum.V6_8_0);
// HAPI-FHIR #4801 - Add New Index On HFJ_RESOURCE

View File

@ -207,14 +207,7 @@ public class JdbcUtils {
case Types.BLOB:
return new ColumnType(ColumnTypeEnum.BLOB, length);
case Types.LONGVARBINARY:
if (DriverTypeEnum.MYSQL_5_7.equals(theConnectionProperties.getDriverType())) {
// See git
return new ColumnType(ColumnTypeEnum.BLOB, length);
} else {
throw new IllegalArgumentException(
Msg.code(32) + "Don't know how to handle datatype " + dataType
+ " for column " + theColumnName + " on table " + theTableName);
}
return new ColumnType(ColumnTypeEnum.BINARY, length);
case Types.VARBINARY:
if (DriverTypeEnum.MSSQL_2012.equals(theConnectionProperties.getDriverType())) {
// MS SQLServer seems to be mapping BLOB to VARBINARY under the covers, so we need

View File

@ -37,5 +37,7 @@ public enum ColumnTypeEnum {
* <code>@Column(length=Integer.MAX_VALUE)</code>
*/
TEXT,
/** Long inline binary */
BINARY,
BIG_DECIMAL;
}

View File

@ -131,6 +131,14 @@ public final class ColumnTypeToDriverTypeToSqlType {
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.POSTGRES_9_4, "text");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.MSSQL_2012, "varchar(MAX)");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.H2_EMBEDDED, "blob");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.DERBY_EMBEDDED, "blob");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.MARIADB_10_1, "longblob");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.MYSQL_5_7, "longblob");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.ORACLE_12C, "blob");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.POSTGRES_9_4, "bytea");
setColumnType(ColumnTypeEnum.BINARY, DriverTypeEnum.MSSQL_2012, "varbinary(MAX)");
setColumnType(ColumnTypeEnum.BIG_DECIMAL, DriverTypeEnum.H2_EMBEDDED, "numeric(38,2)");
setColumnType(ColumnTypeEnum.BIG_DECIMAL, DriverTypeEnum.DERBY_EMBEDDED, "decimal(31,2)");
setColumnType(ColumnTypeEnum.BIG_DECIMAL, DriverTypeEnum.MARIADB_10_1, "decimal(38,2)");

View File

@ -133,9 +133,10 @@ public class ModifyColumnTask extends BaseTableColumnTypeTask {
}
break;
case ORACLE_12C:
String oracleNullableStmt = !alreadyCorrectNullable ? notNull : "";
sql = "alter table " + getTableName() + " modify ( " + getColumnName() + " " + type + oracleNullableStmt
+ " )";
String oracleNullableStmt = alreadyCorrectNullable ? "" : notNull;
String oracleTypeStmt = alreadyOfCorrectType ? "" : type;
sql = "alter table " + getTableName() + " modify ( " + getColumnName() + " " + oracleTypeStmt + " "
+ oracleNullableStmt + " )";
break;
case MSSQL_2012:
sql = "alter table " + getTableName() + " alter column " + getColumnName() + " " + type + notNull;