Make inline resource storage use a better column type (#4763)

* Use text column on postgres

* Add changelog

* Test fix

* Merge
This commit is contained in:
James Agnew 2023-04-28 12:06:27 -04:00 committed by GitHub
parent e2717bd63a
commit 78ff58277a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 195 additions and 52 deletions

View File

@ -0,0 +1,7 @@
---
type: perf
issue: 4763
title: "The SQL column type used for inline resource storage mode in the JPA server has been
changed in order to avoid abitrary database size limits. We now use `text` on PostgreSQL,
`long` on Oracle, and `varchar(MAX)` on MSSQL. Previously `varchar(4000)` was used
in all cases, requiring manual resizing in order to support longer values."

View File

@ -445,7 +445,6 @@
<artifactId>hibernate56-ddl-maven-plugin</artifactId>
<configuration>
<dialects>
<param>h2</param>
<param>derby_10_7</param>
<param>mysql57</param>
<param>mariadb</param>
@ -453,6 +452,7 @@
<param>sqlserver2012</param>
</dialects>
<customDialects>
<customDialect>ca.uhn.fhir.jpa.model.dialect.HapiFhirH2Dialect</customDialect>
<customDialect>ca.uhn.fhir.jpa.model.dialect.HapiFhirPostgres94Dialect</customDialect>
<customDialect>org.hibernate.dialect.CockroachDB201Dialect</customDialect>
</customDialects>
@ -508,6 +508,30 @@
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<phase>process-classes</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<concat destfile="${project.basedir}/target/classes/ca/uhn/hapi/fhir/jpa/docs/database/hapifhirpostgres94-complete.sql" force="yes">
<fileset dir="${project.basedir}/target/classes/ca/uhn/hapi/fhir/jpa/docs/database">
<include name="hapifhirpostgres94.sql"/>
</fileset>
<fileset dir="${project.basedir}/src/main/resources/ca/uhn/fhir/jpa/docs/database">
<include name="hapifhirpostgres94-init01.sql"/>
</fileset>
</concat>
</target>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-tinder-plugin</artifactId>

View File

@ -40,7 +40,9 @@ import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamUri;
import ca.uhn.fhir.jpa.model.entity.ResourceTable;
import ca.uhn.fhir.jpa.model.entity.SearchParamPresentEntity;
import ca.uhn.fhir.jpa.model.entity.StorageSettings;
import ca.uhn.fhir.util.ClasspathUtil;
import ca.uhn.fhir.util.VersionEnum;
import software.amazon.awssdk.utils.StringUtils;
import java.util.Arrays;
import java.util.HashMap;
@ -51,6 +53,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import static ca.uhn.fhir.rest.api.Constants.UUID_LENGTH;
import static org.apache.commons.lang3.StringUtils.trim;
@SuppressWarnings({"SqlNoDataSourceInspection", "SpellCheckingInspection", "java:S1192"})
public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
@ -265,44 +268,22 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
// Postgres tuning.
version.executeRawSqls("20230402.1", Map.of(DriverTypeEnum.POSTGRES_9_4, List.of(
// we can't use convering index until the autovacuum runs for those rows, which kills index performance
"ALTER TABLE hfj_resource SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_forced_id SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_res_link SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_coords SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_date SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_number SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_quantity SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_quantity_nrml SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_string SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_token SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_spidx_uri SET (autovacuum_vacuum_scale_factor = 0.01)",
String postgresTuningStatementsAll = ClasspathUtil.loadResource("ca/uhn/fhir/jpa/docs/database/hapifhirpostgres94-init01.sql");
List<String> postgresTuningStatements = Arrays
.stream(postgresTuningStatementsAll.split("\\n"))
.map(org.apache.commons.lang3.StringUtils::trim)
.filter(StringUtils::isNotBlank)
.filter(t -> !t.startsWith("--"))
.collect(Collectors.toList());
version.executeRawSqls("20230402.1", Map.of(DriverTypeEnum.POSTGRES_9_4, postgresTuningStatements));
// PG by default tracks the most common 100 values. But our hashes cover 100s of SPs and need greater depth.
// Set stats depth to the max for hash_value columns, and 1000 for hash_identity (one per SP).
"alter table hfj_res_link alter column src_path set statistics 10000",
"alter table hfj_res_link alter column target_resource_id set statistics 10000",
"alter table hfj_res_link alter column src_resource_id set statistics 10000",
"alter table hfj_spidx_coords alter column hash_identity set statistics 1000",
"alter table hfj_spidx_date alter column hash_identity set statistics 1000",
"alter table hfj_spidx_number alter column hash_identity set statistics 1000",
"alter table hfj_spidx_quantity alter column hash_identity set statistics 1000",
"alter table hfj_spidx_quantity alter column hash_identity_and_units set statistics 10000",
"alter table hfj_spidx_quantity alter column hash_identity_sys_units set statistics 10000",
"alter table hfj_spidx_quantity_nrml alter column hash_identity set statistics 1000",
"alter table hfj_spidx_quantity_nrml alter column hash_identity_and_units set statistics 10000",
"alter table hfj_spidx_quantity_nrml alter column hash_identity_sys_units set statistics 10000",
"alter table hfj_spidx_string alter column hash_identity set statistics 1000",
"alter table hfj_spidx_string alter column hash_exact set statistics 10000",
"alter table hfj_spidx_string alter column hash_norm_prefix set statistics 10000",
"alter table hfj_spidx_token alter column hash_identity set statistics 1000",
"alter table hfj_spidx_token alter column hash_sys set statistics 10000",
"alter table hfj_spidx_token alter column hash_sys_and_value set statistics 10000",
"alter table hfj_spidx_token alter column hash_value set statistics 10000",
"alter table hfj_spidx_uri alter column hash_identity set statistics 1000",
"alter table hfj_spidx_uri alter column hash_uri set statistics 10000"
)));
// Use an unlimited length text column for RES_TEXT_VC
version
.onTable("HFJ_RES_VER")
.modifyColumn("20230421.1", "RES_TEXT_VC")
.nullable()
.failureAllowed()
.withType(ColumnTypeEnum.TEXT);
{
// add hash_norm to res_id to speed up joins on a second string.
@ -320,7 +301,6 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
linkTable.addForeignKey("20230424.5", "FK_RESLINK_TARGET")
.toColumn("TARGET_RESOURCE_ID").references("HFJ_RESOURCE", "RES_ID");
}
}
protected void init640() {

View File

@ -0,0 +1,37 @@
-- we can't use convering index until the autovacuum runs for those rows, which kills index performance
ALTER TABLE hfj_resource SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_forced_id SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_res_link SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_coords SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_date SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_number SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_quantity SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_quantity_nrml SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_string SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_token SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_spidx_uri SET (autovacuum_vacuum_scale_factor = 0.01);
-- PG by default tracks the most common 100 values. But our hashes cover 100s of SPs and need greater depth.
-- Set stats depth to the max for hash_value columns, and 1000 for hash_identity (one per SP).
alter table hfj_res_link alter column src_path set statistics 10000;
alter table hfj_res_link alter column target_resource_id set statistics 10000;
alter table hfj_res_link alter column src_resource_id set statistics 10000;
alter table hfj_spidx_coords alter column hash_identity set statistics 1000;
alter table hfj_spidx_date alter column hash_identity set statistics 1000;
alter table hfj_spidx_number alter column hash_identity set statistics 1000;
alter table hfj_spidx_quantity alter column hash_identity set statistics 1000;
alter table hfj_spidx_quantity alter column hash_identity_and_units set statistics 10000;
alter table hfj_spidx_quantity alter column hash_identity_sys_units set statistics 10000;
alter table hfj_spidx_quantity_nrml alter column hash_identity set statistics 1000;
alter table hfj_spidx_quantity_nrml alter column hash_identity_and_units set statistics 10000;
alter table hfj_spidx_quantity_nrml alter column hash_identity_sys_units set statistics 10000;
alter table hfj_spidx_string alter column hash_identity set statistics 1000;
alter table hfj_spidx_string alter column hash_exact set statistics 10000;
alter table hfj_spidx_string alter column hash_norm_prefix set statistics 10000;
alter table hfj_spidx_token alter column hash_identity set statistics 1000;
alter table hfj_spidx_token alter column hash_sys set statistics 10000;
alter table hfj_spidx_token alter column hash_sys_and_value set statistics 10000;
alter table hfj_spidx_token alter column hash_value set statistics 10000;
alter table hfj_spidx_uri alter column hash_identity set statistics 1000;
alter table hfj_spidx_uri alter column hash_uri set statistics 10000;

View File

@ -21,11 +21,30 @@ package ca.uhn.fhir.jpa.model.dialect;
import org.hibernate.dialect.H2Dialect;
import java.sql.Types;
/**
* HAPI FHIR dialect for H2 database
*/
public class HapiFhirH2Dialect extends H2Dialect {
/**
* Constructor
*/
public HapiFhirH2Dialect() {
super();
/*
* These mappings are already defined in the super() constructor, but they
* will only happen if the dialect can connect to the database and
* determine that it's a recent enough version of H2 to support this. This
* means that the Maven plugin that does schema generation doesn't add it.
* So this dialect forces the use of the right defs.
*/
registerColumnType(Types.LONGVARCHAR, "character varying");
registerColumnType(Types.BINARY, "binary($l)");
}
/**
* Workaround until this bug is fixed:
* https://hibernate.atlassian.net/browse/HHH-15002

View File

@ -20,6 +20,7 @@
package ca.uhn.fhir.jpa.model.entity;
import ca.uhn.fhir.jpa.model.dao.JpaPid;
import ca.uhn.fhir.jpa.model.util.JpaConstants;
import ca.uhn.fhir.model.primitive.IdDt;
import ca.uhn.fhir.rest.api.Constants;
import org.apache.commons.lang3.builder.ToStringBuilder;
@ -89,6 +90,7 @@ public class ResourceHistoryTable extends BaseHasResource implements Serializabl
@OptimisticLock(excluded = true)
private byte[] myResource;
@Column(name = "RES_TEXT_VC", length = RES_TEXT_VC_MAX_LENGTH, nullable = true)
@org.hibernate.annotations.Type(type = JpaConstants.ORG_HIBERNATE_TYPE_TEXT_TYPE)
@OptimisticLock(excluded = true)
private String myResourceTextVc;

View File

@ -295,6 +295,7 @@ public class JpaConstants {
* IPS Generation operation URL
*/
public static final String SUMMARY_OPERATION_URL = "http://hl7.org/fhir/uv/ips/OperationDefinition/summary";
public static final String ORG_HIBERNATE_TYPE_TEXT_TYPE = "org.hibernate.type.TextType";
public static final String BULK_META_EXTENSION_EXPORT_IDENTIFIER = "https://hapifhir.org/NamingSystem/bulk-export-identifier";
public static final String BULK_META_EXTENSION_JOB_ID = "https://hapifhir.org/NamingSystem/bulk-export-job-id";
public static final String BULK_META_EXTENSION_RESOURCE_TYPE = "https://hapifhir.org/NamingSystem/bulk-export-binary-resource-type";

View File

@ -86,7 +86,7 @@ public enum DriverTypeEnum {
String retval;
switch (this) {
case H2_EMBEDDED:
retval = "h2.sql";
retval = "hapifhirh2.sql";
break;
case DERBY_EMBEDDED:
retval = "derbytenseven.sql";
@ -96,7 +96,7 @@ public enum DriverTypeEnum {
retval = "mysql57.sql";
break;
case POSTGRES_9_4:
retval = "hapifhirpostgres94.sql";
retval = "hapifhirpostgres94-complete.sql";
break;
case ORACLE_12C:
retval = "oracle12c.sql";

View File

@ -31,5 +31,6 @@ public enum ColumnTypeEnum {
TINYINT,
BLOB,
CLOB,
DOUBLE;
DOUBLE,
TEXT;
}

View File

@ -118,6 +118,14 @@ public final class ColumnTypeToDriverTypeToSqlType {
setColumnType(ColumnTypeEnum.CLOB, DriverTypeEnum.ORACLE_12C, "clob");
setColumnType(ColumnTypeEnum.CLOB, DriverTypeEnum.POSTGRES_9_4, "oid"); // the PG driver will write oid into a `text` column
setColumnType(ColumnTypeEnum.CLOB, DriverTypeEnum.MSSQL_2012, "varchar(MAX)");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.H2_EMBEDDED, "character varying");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.DERBY_EMBEDDED, "long varchar");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.MARIADB_10_1, "longtext");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.MYSQL_5_7, "longtext");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.ORACLE_12C, "long");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.POSTGRES_9_4, "text");
setColumnType(ColumnTypeEnum.TEXT, DriverTypeEnum.MSSQL_2012, "varchar(MAX)");
}
public static Map<ColumnTypeEnum, Map<DriverTypeEnum, String>> getColumnTypeToDriverTypeToSqlType() {

View File

@ -32,6 +32,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.commons.lang3.StringUtils.trim;
public class ExecuteRawSqlTask extends BaseTask {
private static final Logger ourLog = LoggerFactory.getLogger(ExecuteRawSqlTask.class);
@ -48,7 +50,13 @@ public class ExecuteRawSqlTask extends BaseTask {
Validate.notBlank(theSql);
List<String> list = myDriverToSqls.computeIfAbsent(theDriverType, t -> new ArrayList<>());
list.add(theSql);
String sql = trim(theSql);
// Trim the semicolon at the end if one is present
while (sql.endsWith(";")) {
sql = sql.substring(0, sql.length() - 1);
}
list.add(sql);
return this;
}

View File

@ -23,6 +23,7 @@ import ca.uhn.fhir.context.ConfigurationException;
import ca.uhn.fhir.i18n.Msg;
import ca.uhn.fhir.jpa.migrate.DriverTypeEnum;
import ca.uhn.fhir.jpa.migrate.tasks.api.ISchemaInitializationProvider;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.builder.HashCodeBuilder;
@ -35,6 +36,7 @@ import java.util.Collections;
import java.util.List;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.trim;
public class SchemaInitializationProvider implements ISchemaInitializationProvider {
@ -71,18 +73,29 @@ public class SchemaInitializationProvider implements ISchemaInitializationProvid
}
// Assumes no escaped semicolons...
String sqlString = IOUtils.toString(sqlFileInputStream, Charsets.UTF_8);
parseSqlFileIntoIndividualStatements(theDriverType, retval, sqlString);
} catch (IOException e) {
throw new ConfigurationException(Msg.code(50) + "Error reading schema initialization script " + initScript, e);
}
return retval;
}
@VisibleForTesting
void parseSqlFileIntoIndividualStatements(DriverTypeEnum theDriverType, List<String> retval, String theSqlString) {
String sqlString = theSqlString.replaceAll("--.*", "");
String sqlStringNoComments = preProcessSqlString(theDriverType, sqlString);
String[] statements = sqlStringNoComments.split("\\;");
for (String statement : statements) {
String cleanedStatement = preProcessSqlStatement(theDriverType, statement);
if (!isBlank(cleanedStatement)) {
retval.add(cleanedStatement);
String next = trim(cleanedStatement);
next = next.replace('\n', ' ');
next = next.replace('\r', ' ');
next = next.replaceAll(" +", " ");
retval.add(next);
}
}
} catch (IOException e) {
throw new ConfigurationException(Msg.code(50) + "Error reading schema initialization script " + initScript, e);
}
return retval;
}
protected String preProcessSqlString(DriverTypeEnum theDriverType, String sqlString) {

View File

@ -0,0 +1,43 @@
package ca.uhn.fhir.jpa.migrate.tasks;
import ca.uhn.fhir.jpa.migrate.DriverTypeEnum;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.List;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
public class SchemaInitializationProviderTest {
@Test
public void testParseSqlFileIntoIndividualStatements() {
SchemaInitializationProvider svc = new SchemaInitializationProvider(null, null, null, true);
String input = """
create sequence foo;
alter table if exists CDR_XACT_LOG_STEP
add constraint FK_XACTLOGSTEP_XACTLOG
foreign key (LOG_PID)
-- comment in a weird spot
references CDR_XACT_LOG;
-- we can't use convering index until the autovacuum runs for those rows, which kills index performance
ALTER TABLE hfj_resource SET (autovacuum_vacuum_scale_factor = 0.01);
ALTER TABLE hfj_forced_id SET (autovacuum_vacuum_scale_factor = 0.01);
""";
List<String> listToPopulate = new ArrayList<>();
svc.parseSqlFileIntoIndividualStatements(DriverTypeEnum.POSTGRES_9_4, listToPopulate, input);
assertThat(listToPopulate.toString(), listToPopulate, contains(
"create sequence foo",
"alter table if exists CDR_XACT_LOG_STEP add constraint FK_XACTLOGSTEP_XACTLOG foreign key (LOG_PID) references CDR_XACT_LOG",
"ALTER TABLE hfj_resource SET (autovacuum_vacuum_scale_factor = 0.01)",
"ALTER TABLE hfj_forced_id SET (autovacuum_vacuum_scale_factor = 0.01)"
));
}
}