Support retry of $reindex version conflict (#4603)
* Support retry of $reindex conflict * Notes and cleanup of Phaser
This commit is contained in:
parent
bf495e2d92
commit
ec13b751fe
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
type: change
|
||||
issue: 4603
|
||||
title: "Transaction retry will now also apply to ObjectOptimisticLockingFailureException. This enables retry of
|
||||
$reindex work chunks when they collide with a DELETE operation."
|
|
@ -8,8 +8,11 @@ import ca.uhn.fhir.rest.api.server.RequestDetails;
|
|||
import ca.uhn.fhir.rest.api.server.SystemRequestDetails;
|
||||
import ca.uhn.fhir.rest.api.server.storage.TransactionDetails;
|
||||
import ca.uhn.fhir.rest.server.exceptions.ResourceGoneException;
|
||||
import ca.uhn.fhir.rest.server.exceptions.ResourceVersionConflictException;
|
||||
import ca.uhn.fhir.storage.test.DaoTestDataBuilder;
|
||||
import ca.uhn.test.concurrency.LockstepEnumPhaser;
|
||||
import org.apache.commons.lang3.concurrent.BasicThreadFactory;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.hl7.fhir.instance.model.api.IIdType;
|
||||
import org.hl7.fhir.r4.model.Observation;
|
||||
import org.hl7.fhir.r4.model.SearchParameter;
|
||||
|
@ -21,15 +24,15 @@ import org.springframework.test.context.ContextConfiguration;
|
|||
import org.springframework.transaction.TransactionStatus;
|
||||
import org.springframework.transaction.annotation.Propagation;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
@ContextConfiguration(classes = {
|
||||
|
@ -49,12 +52,12 @@ class ReindexRaceBugTest extends BaseJpaR4Test {
|
|||
* The $reindex step processes several resources in a single tx.
|
||||
* The tested sequence here is: job step $reindexes a resouce, then another thread DELETEs the resource,
|
||||
* then later, the $reindex step finishes the rest of the resources and commits AFTER the DELETE commits.
|
||||
*
|
||||
* This was inserting new index rows into HFJ_SPIDX_TOKEN even though the resource was gone.
|
||||
* This scenario could insert index rows into HFJ_SPIDX_TOKEN even though the resource was gone.
|
||||
* This is an illegal state for our index. Deleted resources should never have content in HFJ_SPIDX_*
|
||||
* Fixed by taking an optimistic lock on hfj_resource even though $reindex is read-only on that table.
|
||||
*/
|
||||
@Test
|
||||
void deleteOverlapsWithReindex_leavesIndexRowsP() throws InterruptedException, ExecutionException {
|
||||
void deleteOverlapsWithReindex_leavesIndexRowsP() {
|
||||
LockstepEnumPhaser<Steps> phaser = new LockstepEnumPhaser<>(2, Steps.class);
|
||||
|
||||
ourLog.info("An observation is created");
|
||||
|
@ -90,10 +93,8 @@ class ReindexRaceBugTest extends BaseJpaR4Test {
|
|||
|
||||
assertEquals(1, getSPIDXDateCount(observationPid), "still only one index row before reindex");
|
||||
|
||||
|
||||
// suppose reindex job step starts here and loads the resource and ResourceTable entity
|
||||
ThreadFactory loggingThreadFactory = getLoggingThreadFactory("Reindex-thread");
|
||||
ExecutorService backgroundReindexThread = Executors.newSingleThreadExecutor(loggingThreadFactory);
|
||||
ExecutorService backgroundReindexThread = Executors.newSingleThreadExecutor(new BasicThreadFactory.Builder().namingPattern("Reindex-thread-%d").build());
|
||||
Future<Integer> backgroundResult = backgroundReindexThread.submit(() -> {
|
||||
try {
|
||||
callInFreshTx((tx, rd) -> {
|
||||
|
@ -120,6 +121,7 @@ class ReindexRaceBugTest extends BaseJpaR4Test {
|
|||
return 0;
|
||||
});
|
||||
} finally {
|
||||
ourLog.info("$reindex commit complete");
|
||||
phaser.arriveAndAwaitSharedEndOf(Steps.COMMIT_REINDEX);
|
||||
}
|
||||
return 1;
|
||||
|
@ -131,9 +133,8 @@ class ReindexRaceBugTest extends BaseJpaR4Test {
|
|||
phaser.arriveAndAwaitSharedEndOf(Steps.RUN_REINDEX);
|
||||
|
||||
// then the resource is deleted
|
||||
phaser.assertInPhase(Steps.RUN_DELETE);
|
||||
|
||||
ourLog.info("Deleting observation");
|
||||
phaser.assertInPhase(Steps.RUN_DELETE);
|
||||
callInFreshTx((tx, rd) -> myObservationDao.delete(observationId, rd));
|
||||
assertResourceDeleted(observationId);
|
||||
assertEquals(0, getSPIDXDateCount(observationPid), "A deleted resource should have 0 index rows");
|
||||
|
@ -146,22 +147,12 @@ class ReindexRaceBugTest extends BaseJpaR4Test {
|
|||
phaser.arriveAndAwaitSharedEndOf(Steps.COMMIT_REINDEX);
|
||||
|
||||
assertEquals(0, getSPIDXDateCount(observationPid), "A deleted resource should still have 0 index rows, after $reindex completes");
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
static ThreadFactory getLoggingThreadFactory(String theThreadName) {
|
||||
ThreadFactory loggingThreadFactory = r -> new Thread(() -> {
|
||||
boolean success = false;
|
||||
try {
|
||||
r.run();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
ourLog.error("Background thread failed");
|
||||
}
|
||||
}
|
||||
}, theThreadName);
|
||||
return loggingThreadFactory;
|
||||
// Verify the exception from $reindex
|
||||
// In a running server, we expect UserRequestRetryVersionConflictsInterceptor to cause a retry inside the ReindexStep
|
||||
// But here in the test, we have not configured any retry logic.
|
||||
ExecutionException e = assertThrows(ExecutionException.class, backgroundResult::get, "Optimistic locking detects the DELETE and rolls back");
|
||||
assertThat("Hapi maps conflict exception type", e.getCause(), Matchers.instanceOf(ResourceVersionConflictException.class));
|
||||
}
|
||||
|
||||
void assertResourceDeleted(IIdType observationId) {
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.dao.DataIntegrityViolationException;
|
||||
import org.springframework.orm.ObjectOptimisticLockingFailureException;
|
||||
import org.springframework.transaction.PlatformTransactionManager;
|
||||
import org.springframework.transaction.TransactionStatus;
|
||||
import org.springframework.transaction.annotation.Isolation;
|
||||
|
@ -222,7 +223,7 @@ public class HapiTransactionService implements IHapiTransactionService {
|
|||
|
||||
return doExecuteCallback(theExecutionBuilder, theCallback);
|
||||
|
||||
} catch (ResourceVersionConflictException | DataIntegrityViolationException e) {
|
||||
} catch (ResourceVersionConflictException | DataIntegrityViolationException | ObjectOptimisticLockingFailureException e) {
|
||||
ourLog.debug("Version conflict detected", e);
|
||||
|
||||
if (theExecutionBuilder.myOnRollback != null) {
|
||||
|
|
|
@ -9,17 +9,16 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
|||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
/**
|
||||
* Test helper to force a particular sequence on 2 or more threads.
|
||||
* Test helper to force a particular sequence on multiple threads.
|
||||
* Wraps Phaser with an Enum for better messages, and some test support.
|
||||
* The only use is to impose a particular execution sequence over multiple threads when reproducing bugs.
|
||||
*
|
||||
* <p>
|
||||
* The simplest usage is to declare the number of collaborators as theParticipantCount
|
||||
* in the constructor, and then have each participant thread call {@link #arriveAndAwaitSharedEndOf}
|
||||
* as they finish the work of every phase.
|
||||
* Every thread needs to confirm, even if they do no work in that phase.
|
||||
* <p>
|
||||
* Note: this is just a half-baked wrapper around Phaser.
|
||||
* The behaviour is not especially precise, or tested. Comments welcome: MB.
|
||||
*
|
||||
* @param <E> an enum used to name the phases.
|
||||
*/
|
||||
|
@ -35,60 +34,72 @@ public class LockstepEnumPhaser<E extends Enum<E>> {
|
|||
myEnumConstants = myEnumClass.getEnumConstants();
|
||||
}
|
||||
|
||||
public E arrive() {
|
||||
E result = phaseToEnum(myPhaser.arrive());
|
||||
ourLog.info("Arrive in phase {}", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void assertInPhase(E theStageEnum) {
|
||||
assertEquals(theStageEnum, getPhase(), "In stage " + theStageEnum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current phase.
|
||||
*/
|
||||
public E getPhase() {
|
||||
return phaseToEnum(myPhaser.getPhase());
|
||||
}
|
||||
|
||||
public E awaitAdvance(E thePhase) {
|
||||
/**
|
||||
* Declare that this thread-participant has finished the work of thePhase,
|
||||
* and then wait until all other participants have also finished.
|
||||
*
|
||||
* @param thePhase the phase the thread just completed
|
||||
* @return the new phase starting.
|
||||
*/
|
||||
public E arriveAndAwaitSharedEndOf(E thePhase) {
|
||||
checkAwait(thePhase);
|
||||
E current = arrive();
|
||||
assertEquals(thePhase, current);
|
||||
return doAwait(thePhase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Like arrive(), but verify stage first
|
||||
* Finish a phase, and deregister so that later stages can complete
|
||||
* with a reduced participant count.
|
||||
*/
|
||||
public E arriveAtMyEndOf(E thePhase) {
|
||||
assertInPhase(thePhase);
|
||||
return arrive();
|
||||
}
|
||||
|
||||
public E arriveAndAwaitSharedEndOf(E thePhase) {
|
||||
checkAwait(thePhase);
|
||||
arrive();
|
||||
return doAwait(thePhase);
|
||||
}
|
||||
|
||||
public E arriveAndDeregister() {
|
||||
return phaseToEnum(myPhaser.arriveAndDeregister());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new participant to the pool.
|
||||
* Later await calls will wait for one more arrival before proceeding.
|
||||
*/
|
||||
public E register() {
|
||||
return phaseToEnum(myPhaser.register());
|
||||
}
|
||||
|
||||
E arrive() {
|
||||
E result = phaseToEnum(myPhaser.arrive());
|
||||
ourLog.info("Arrive to my end of phase {}", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private E doAwait(E thePhase) {
|
||||
ourLog.debug("Start doAwait - {}", thePhase);
|
||||
E phase = phaseToEnum(myPhaser.awaitAdvance(thePhase.ordinal()));
|
||||
ourLog.info("Finish doAwait - {}", thePhase);
|
||||
ourLog.info("Done waiting for end of {}. Now starting {}", thePhase, getPhase());
|
||||
return phase;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defensively verify that the phase we are waiting to end is actually the current phase.
|
||||
*/
|
||||
private void checkAwait(E thePhase) {
|
||||
E currentPhase = getPhase();
|
||||
if (currentPhase.ordinal() < thePhase.ordinal()) {
|
||||
fail("Can't wait for end of phase " + thePhase + ", still in phase " + currentPhase);
|
||||
// Explicitly progressing lock-step is safer for most tests.
|
||||
// But we could allow waiting multiple phases with a loop here instead of failing. MB
|
||||
fail(String.format("Can't wait for end of phase %s, still in phase %s", thePhase, currentPhase));
|
||||
} else if (currentPhase.ordinal() > thePhase.ordinal()) {
|
||||
ourLog.warn("Skip waiting for phase {}, already in phase {}", thePhase, currentPhase);
|
||||
fail(String.format("Can't wait for end of phase %s, already in phase %s", thePhase, currentPhase));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import org.junit.jupiter.api.Timeout;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
|
@ -26,6 +27,9 @@ import static ca.uhn.test.concurrency.LockstepEnumPhaserTest.Stages.TWO;
|
|||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
// All of these should run pretty quickly - 5s should be lots.
|
||||
// But if they deadlock, they will hang forever. Need @Timeout.
|
||||
@Timeout(5)
|
||||
class LockstepEnumPhaserTest {
|
||||
private static final Logger ourLog = LoggerFactory.getLogger(LockstepEnumPhaserTest.class);
|
||||
final ExecutorService myExecutorService = Executors.newFixedThreadPool(10);
|
||||
|
@ -39,7 +43,6 @@ class LockstepEnumPhaserTest {
|
|||
|
||||
LockstepEnumPhaser<Stages> myPhaser;
|
||||
|
||||
@Timeout(1)
|
||||
@Test
|
||||
void phaserWithOnePariticpant_worksFine() {
|
||||
// given
|
||||
|
@ -47,7 +50,7 @@ class LockstepEnumPhaserTest {
|
|||
|
||||
myPhaser.assertInPhase(ONE);
|
||||
|
||||
myPhaser.arriveAtMyEndOf(ONE);
|
||||
myPhaser.arriveAndAwaitSharedEndOf(ONE);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(TWO);
|
||||
|
||||
|
@ -56,7 +59,6 @@ class LockstepEnumPhaserTest {
|
|||
myPhaser.assertInPhase(FINISHED);
|
||||
}
|
||||
|
||||
@Timeout(5)
|
||||
@Test
|
||||
void phaserWithTwoThreads_runsInLockStep() throws InterruptedException, ExecutionException {
|
||||
// given
|
||||
|
@ -82,10 +84,7 @@ class LockstepEnumPhaserTest {
|
|||
myPhaser.assertInPhase(THREE);
|
||||
recordProgress(threadId);
|
||||
|
||||
Stages nextStage = myPhaser.awaitAdvance(TWO);
|
||||
assertEquals(THREE, nextStage);
|
||||
|
||||
myPhaser.arriveAtMyEndOf(THREE);
|
||||
myPhaser.arriveAndAwaitSharedEndOf(THREE);
|
||||
|
||||
ourLog.info("Finished");
|
||||
|
||||
|
@ -104,33 +103,13 @@ class LockstepEnumPhaserTest {
|
|||
myProgressEvents.add(Pair.of(threadId, myPhaser.getPhase()));
|
||||
}
|
||||
|
||||
@Timeout(5)
|
||||
@Test
|
||||
void phaserWithTwoThreads_canAddThird_sequencContinues() throws InterruptedException, ExecutionException {
|
||||
// given
|
||||
myPhaser = new LockstepEnumPhaser<>(2, Stages.class);
|
||||
|
||||
// run one simple schedule
|
||||
Callable<Integer> schedule1 = ()->{
|
||||
int threadId = 1;
|
||||
ourLog.info("Starting schedule1");
|
||||
myPhaser.assertInPhase(ONE);
|
||||
recordProgress(threadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(ONE);
|
||||
|
||||
recordProgress(threadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(TWO);
|
||||
|
||||
recordProgress(threadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(THREE);
|
||||
|
||||
ourLog.info("Finished schedule1");
|
||||
|
||||
return 1;
|
||||
};
|
||||
Callable<Integer> schedule1 = buildSimpleCountingSchedule(1);
|
||||
// this schedule will start half-way in
|
||||
Callable<Integer> schedule2 = ()->{
|
||||
int threadId = 2;
|
||||
|
@ -149,7 +128,7 @@ class LockstepEnumPhaserTest {
|
|||
|
||||
ourLog.info("Finished schedule2");
|
||||
|
||||
return 1;
|
||||
return 2;
|
||||
};
|
||||
// this schedule will start schedule 2 half-way
|
||||
Callable<Integer> schedule3 = ()->{
|
||||
|
@ -174,17 +153,83 @@ class LockstepEnumPhaserTest {
|
|||
|
||||
ourLog.info("Finished schedule3");
|
||||
|
||||
return 1;
|
||||
return 3;
|
||||
};
|
||||
Future<Integer> result1 = myExecutorService.submit(schedule1);
|
||||
Future<Integer> result2 = myExecutorService.submit(schedule3);
|
||||
Future<Integer> result3 = myExecutorService.submit(schedule3);
|
||||
|
||||
assertEquals(1, result1.get());
|
||||
assertEquals(1, result2.get());
|
||||
assertEquals(3, result3.get());
|
||||
|
||||
assertThat("progress is ordered", myProgressEvents, OrderMatchers.softOrdered(myProgressStageComparator));
|
||||
assertThat("all progress logged", myProgressEvents, Matchers.hasSize(8));
|
||||
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private Callable<Integer> buildSimpleCountingSchedule(int theThreadId) {
|
||||
Callable<Integer> schedule = ()->{
|
||||
ourLog.info("Starting schedule - {}", theThreadId);
|
||||
|
||||
myPhaser.assertInPhase(ONE);
|
||||
recordProgress(theThreadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(ONE);
|
||||
|
||||
recordProgress(theThreadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(TWO);
|
||||
|
||||
recordProgress(theThreadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(THREE);
|
||||
|
||||
ourLog.info("Finished schedule1");
|
||||
|
||||
return theThreadId;
|
||||
};
|
||||
return schedule;
|
||||
}
|
||||
|
||||
@Test
|
||||
void aShortScheduleDeregister_allowsRemainingParticipantsToContinue() throws ExecutionException, InterruptedException {
|
||||
// given
|
||||
myPhaser = new LockstepEnumPhaser<>(3, Stages.class);
|
||||
|
||||
// Three schedules, but with one that leaves early
|
||||
// sched 1,2 counting
|
||||
// sched 3 start, but end with 2.
|
||||
Callable<Integer> schedule1 = buildSimpleCountingSchedule(1);
|
||||
Callable<Integer> schedule2 = buildSimpleCountingSchedule(2);
|
||||
Callable<Integer> schedule3 = () -> {
|
||||
int threadId = 3;
|
||||
ourLog.info("Starting schedule - {}", threadId);
|
||||
|
||||
myPhaser.assertInPhase(ONE);
|
||||
recordProgress(threadId);
|
||||
|
||||
myPhaser.arriveAndAwaitSharedEndOf(ONE);
|
||||
|
||||
recordProgress(threadId);
|
||||
|
||||
ourLog.info("Leaving schedule - {}", threadId);
|
||||
|
||||
Stages deregisterPhase = myPhaser.arriveAndDeregister();
|
||||
assertEquals(TWO, deregisterPhase);
|
||||
|
||||
return threadId;
|
||||
};
|
||||
Future<Integer> result1 = myExecutorService.submit(schedule1);
|
||||
Future<Integer> result2 = myExecutorService.submit(schedule2);
|
||||
Future<Integer> result3 = myExecutorService.submit(schedule3);
|
||||
|
||||
assertEquals(1, result1.get());
|
||||
assertEquals(2, result2.get());
|
||||
assertEquals(3, result3.get());
|
||||
|
||||
assertThat("progress is ordered", myProgressEvents, OrderMatchers.softOrdered(myProgressStageComparator));
|
||||
assertThat("all progress logged", myProgressEvents, Matchers.hasSize(2*3 + 2));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue