Merge the fhir_id copy migration with the fhir_id fix to avoid traversing hfj_resource twice. (#5552)
Turn off the original migration ForceIdMigrationCopyTask. Fix it anyway so nobody copies bad code. Also add another migration ForceIdMigrationFixTask that fixes the bad data, as well as fills in the fhir_id column for new migrations.
This commit is contained in:
parent
71b0987fc2
commit
cc294505cc
|
@ -0,0 +1,5 @@
|
|||
### Major Database Change
|
||||
|
||||
This release contains a migration that covers every resource.
|
||||
This may take several minutes on a larger system (e.g. 10 minutes for 100 million resources).
|
||||
For zero-downtime, or for larger systems, we recommend you upgrade the schema using the CLI tools.
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
type: fix
|
||||
issue: 5546
|
||||
backport: 6.10.1
|
||||
title: "A database migration added trailing spaces to server-assigned resource ids.
|
||||
This fix corrects the migration, and adds another migration to fix the errors."
|
||||
This fix removes the bad migration, and adds another migration to fix the errors."
|
||||
|
|
|
@ -115,10 +115,19 @@ public class HapiFhirJpaMigrationTasks extends BaseMigrationTasks<VersionEnum> {
|
|||
|
||||
// Move forced_id constraints to hfj_resource and the new fhir_id column
|
||||
// Note: we leave the HFJ_FORCED_ID.IDX_FORCEDID_TYPE_FID index in place to support old writers for a while.
|
||||
version.addTask(new ForceIdMigrationCopyTask(version.getRelease(), "20231018.1"));
|
||||
version.addTask(new ForceIdMigrationCopyTask(version.getRelease(), "20231018.1").setDoNothing(true));
|
||||
|
||||
Builder.BuilderWithTableName hfjResource = version.onTable("HFJ_RESOURCE");
|
||||
hfjResource.modifyColumn("20231018.2", "FHIR_ID").nonNullable();
|
||||
// commented out to make numeric space for the fix task below.
|
||||
// This constraint can't be enabled until the column is fully populated, and the shipped version of 20231018.1
|
||||
// was broken.
|
||||
// hfjResource.modifyColumn("20231018.2", "FHIR_ID").nonNullable();
|
||||
|
||||
// this was inserted after the release.
|
||||
version.addTask(new ForceIdMigrationFixTask(version.getRelease(), "20231018.3"));
|
||||
|
||||
// added back in place of 20231018.2. If 20231018.2 already ran, this is a no-op.
|
||||
hfjResource.modifyColumn("20231018.4", "FHIR_ID").nonNullable();
|
||||
|
||||
hfjResource.dropIndex("20231027.1", "IDX_RES_FHIR_ID");
|
||||
hfjResource
|
||||
|
|
|
@ -22,8 +22,8 @@ import org.springframework.jdbc.core.JdbcTemplate;
|
|||
import javax.sql.DataSource;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import static ca.uhn.fhir.jpa.embedded.HapiEmbeddedDatabasesExtension.FIRST_TESTED_VERSION;
|
||||
import static ca.uhn.fhir.jpa.migrate.SchemaMigrator.HAPI_FHIR_MIGRATION_TABLENAME;
|
||||
|
@ -73,7 +73,7 @@ public class HapiSchemaMigrationTest {
|
|||
|
||||
VersionEnum[] allVersions = VersionEnum.values();
|
||||
|
||||
Set<VersionEnum> dataVersions = Set.of(
|
||||
List<VersionEnum> dataVersions = List.of(
|
||||
VersionEnum.V5_2_0,
|
||||
VersionEnum.V5_3_0,
|
||||
VersionEnum.V5_4_0,
|
||||
|
@ -129,8 +129,10 @@ public class HapiSchemaMigrationTest {
|
|||
private void verifyForcedIdMigration(DataSource theDataSource) throws SQLException {
|
||||
JdbcTemplate jdbcTemplate = new JdbcTemplate(theDataSource);
|
||||
@SuppressWarnings("DataFlowIssue")
|
||||
int count = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id <> trim(fhir_id)", Integer.class);
|
||||
assertEquals(0, count, "no fhir_id should contain a space");
|
||||
int nullCount = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id is null", Integer.class);
|
||||
assertEquals(0, nullCount, "no fhir_id should be null");
|
||||
int trailingSpaceCount = jdbcTemplate.queryForObject("select count(1) from hfj_resource where fhir_id <> trim(fhir_id)", Integer.class);
|
||||
assertEquals(0, trailingSpaceCount, "no fhir_id should contain a space");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.sql.SQLException;
|
|||
/**
|
||||
* Fix for bad version of {@link ForceIdMigrationCopyTask}
|
||||
* The earlier migration had used at cast to char instead of varchar, which is space-padded on Oracle.
|
||||
* This migration includes the copy action, but also adds a trim() call to fixup the bad server-assigned ids.
|
||||
*/
|
||||
public class ForceIdMigrationFixTask extends BaseTask {
|
||||
private static final Logger ourLog = LoggerFactory.getLogger(ForceIdMigrationFixTask.class);
|
||||
|
@ -62,13 +63,47 @@ public class ForceIdMigrationFixTask extends BaseTask {
|
|||
// run update in batches.
|
||||
int rowsPerBlock = 50; // hfj_resource has roughly 50 rows per 8k block.
|
||||
int batchSize = rowsPerBlock * 2000; // a few thousand IOPS gives a batch size around a second.
|
||||
ourLog.info(
|
||||
"About to migrate ids from {} to {} in batches of size {}",
|
||||
range.getLeft(),
|
||||
range.getRight(),
|
||||
batchSize);
|
||||
for (long batchStart = range.getLeft(); batchStart <= range.getRight(); batchStart = batchStart + batchSize) {
|
||||
long batchEnd = batchStart + batchSize;
|
||||
ourLog.info("Migrating client-assigned ids for pids: {}-{}", batchStart, batchEnd);
|
||||
|
||||
/*
|
||||
We have several cases. Two require no action:
|
||||
1. client-assigned id, with correct value in fhir_id and row in hfj_forced_id
|
||||
2. server-assigned id, with correct value in fhir_id, no row in hfj_forced_id
|
||||
And three require action:
|
||||
3. client-assigned id, no value in fhir_id, but row in hfj_forced_id
|
||||
4. server-assigned id, no value in fhir_id, and row in hfj_forced_id
|
||||
5. bad migration - server-assigned id, with wrong space-padded value in fhir_id, no row in hfj_forced_id
|
||||
*/
|
||||
|
||||
executeSql(
|
||||
"hfj_resource",
|
||||
"update hfj_resource set fhir_id = trim(fhir_id) where res_id >= ? and res_id < ?",
|
||||
"update hfj_resource " +
|
||||
// coalesce is varargs and chooses the first non-null value, like ||
|
||||
" set fhir_id = coalesce( "
|
||||
+
|
||||
// case 5.
|
||||
" trim(fhir_id), "
|
||||
+
|
||||
// case 3
|
||||
" (select f.forced_id from hfj_forced_id f where f.resource_pid = res_id), "
|
||||
+
|
||||
// case 4 - use pid as fhir_id
|
||||
" cast(res_id as varchar(64)) "
|
||||
+ " ) "
|
||||
+
|
||||
// avoid useless updates on engines that don't check
|
||||
// skip case 1, 2. Only check 3,4,5
|
||||
" where (fhir_id is null or fhir_id <> trim(fhir_id)) "
|
||||
+
|
||||
// chunk range.
|
||||
" and res_id >= ? and res_id < ?",
|
||||
batchStart,
|
||||
batchEnd);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue