HDFS-12383. Re-encryption updater should handle canceled tasks better.

This commit is contained in:
Xiao Chen 2017-09-03 19:50:21 -07:00
parent 275980bb1e
commit 633c1ea455
2 changed files with 108 additions and 12 deletions

View File

@ -39,6 +39,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
@ -72,6 +73,7 @@ public final class ReencryptionUpdater implements Runnable {
private final StopWatch throttleTimerLocked = new StopWatch();
private volatile long faultRetryInterval = 60000;
private volatile boolean isRunning = false;
/**
* Class to track re-encryption submissions of a single zone. It contains
@ -201,6 +203,11 @@ void pauseForTestingAfterNthCheckpoint(final long zoneId, final int count) {
pauseZoneId = zoneId;
}
@VisibleForTesting
boolean isRunning() {
return isRunning;
}
private final FSDirectory dir;
private final CompletionService<ReencryptionTask> batchService;
private final ReencryptionHandler handler;
@ -242,6 +249,7 @@ void markZoneSubmissionDone(final long zoneId)
@Override
public void run() {
isRunning = true;
throttleTimerAll.start();
while (true) {
try {
@ -250,11 +258,13 @@ public void run() {
} catch (InterruptedException ie) {
LOG.warn("Re-encryption updater thread interrupted. Exiting.");
Thread.currentThread().interrupt();
isRunning = false;
return;
} catch (IOException ioe) {
LOG.warn("Re-encryption updater thread exception.", ioe);
} catch (IOException | CancellationException e) {
LOG.warn("Re-encryption updater thread exception.", e);
} catch (Throwable t) {
LOG.error("Re-encryption updater thread exiting.", t);
isRunning = false;
return;
}
}
@ -405,6 +415,7 @@ private void takeAndProcessTasks() throws Exception {
if (completed.isCancelled()) {
LOG.debug("Skipped canceled re-encryption task for zone {}, last: {}",
task.zoneId, task.lastFile);
return;
}
boolean shouldRetry;

View File

@ -30,6 +30,7 @@
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import com.google.common.base.Supplier;
@ -1086,12 +1087,9 @@ public void testRaceDeleteZoneHandler() throws Exception {
getEzManager().resumeReencryptForTesting();
Thread.sleep(3000);
EncryptionZoneManager ezm = getEzManager();
ReencryptionHandler handler = (ReencryptionHandler) Whitebox
.getInternalState(ezm, "reencryptionHandler");
Map<Long, ZoneSubmissionTracker> tasks =
(Map<Long, ZoneSubmissionTracker>) Whitebox
.getInternalState(handler, "submissions");
.getInternalState(getHandler(), "submissions");
List<Future> futures = new LinkedList<>();
for (ZoneSubmissionTracker zst : tasks.values()) {
for (Future f : zst.getTasks()) {
@ -1493,6 +1491,88 @@ public void testReencryptCancel() throws Exception {
assertEquals(5, getZoneStatus(zone.toString()).getFilesReencrypted());
}
@Test
public void testCancelFuture() throws Exception {
final AtomicBoolean callableRunning = new AtomicBoolean(false);
class MyInjector extends EncryptionFaultInjector {
private volatile int exceptionCount = 0;
MyInjector(int numFailures) {
exceptionCount = numFailures;
}
@Override
public void reencryptEncryptedKeys() throws IOException {
if (exceptionCount > 0) {
exceptionCount--;
try {
callableRunning.set(true);
Thread.sleep(Long.MAX_VALUE);
} catch (InterruptedException ie) {
LOG.info("Fault injector interrupted", ie);
}
}
}
}
final MyInjector injector = new MyInjector(1);
EncryptionFaultInjector.instance = injector;
/* Setup test dir:
* /zones/zone/[0-9]
* /dir/f
*/
final int len = 8196;
final Path zoneParent = new Path("/zones");
final Path zone = new Path(zoneParent, "zone");
fsWrapper.mkdir(zone, FsPermission.getDirDefault(), true);
dfsAdmin.createEncryptionZone(zone, TEST_KEY, NO_TRASH);
for (int i = 0; i < 10; ++i) {
DFSTestUtil
.createFile(fs, new Path(zone, Integer.toString(i)), len, (short) 1,
0xFEED);
}
final Path subdir = new Path("/dir");
fsWrapper.mkdir(subdir, FsPermission.getDirDefault(), true);
DFSTestUtil.createFile(fs, new Path(subdir, "f"), len, (short) 1, 0xFEED);
// re-encrypt 10 files, so 2 callables. Hang 1, pause the updater so the
// callable is taken from the executor but not processed.
fsn.getProvider().rollNewVersion(TEST_KEY);
fsn.getProvider().flush();
getEzManager().pauseReencryptForTesting();
dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START);
waitForQueuedZones(1);
getEzManager().resumeReencryptForTesting();
LOG.info("Waiting for re-encrypt callables to run");
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return callableRunning.get();
}
}, 100, 10000);
getEzManager().pauseReencryptUpdaterForTesting();
dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.CANCEL);
// now resume updater and verify status.
getEzManager().resumeReencryptUpdaterForTesting();
waitForZoneCompletes(zone.toString());
RemoteIterator<ZoneReencryptionStatus> it =
dfsAdmin.listReencryptionStatus();
assertTrue(it.hasNext());
final ZoneReencryptionStatus zs = it.next();
assertEquals(zone.toString(), zs.getZoneName());
assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState());
assertTrue(zs.isCanceled());
assertTrue(zs.getCompletionTime() > 0);
assertTrue(zs.getCompletionTime() > zs.getSubmissionTime());
assertEquals(0, zs.getFilesReencrypted());
assertTrue(getUpdater().isRunning());
}
@Test
public void testReencryptCancelForUpdater() throws Exception {
/* Setup test dir:
@ -1822,12 +1902,7 @@ public void reencryptUpdaterProcessOneTask() throws IOException {
fsn.getProvider().rollNewVersion(TEST_KEY);
fsn.getProvider().flush();
final EncryptionZoneManager ezm = getEzManager();
final ReencryptionHandler handler = (ReencryptionHandler) Whitebox
.getInternalState(ezm, "reencryptionHandler");
final ReencryptionUpdater updater = (ReencryptionUpdater) Whitebox
.getInternalState(handler, "reencryptionUpdater");
Whitebox.setInternalState(updater, "faultRetryInterval", 50);
Whitebox.setInternalState(getUpdater(), "faultRetryInterval", 50);
dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START);
waitForReencryptedZones(1);
assertEquals(0, injector.exceptionCount);
@ -1844,4 +1919,14 @@ public void reencryptUpdaterProcessOneTask() throws IOException {
assertEquals(10, zs.getFilesReencrypted());
assertEquals(0, zs.getNumReencryptionFailures());
}
private ReencryptionHandler getHandler() {
return (ReencryptionHandler) Whitebox
.getInternalState(getEzManager(), "reencryptionHandler");
}
private ReencryptionUpdater getUpdater() {
return (ReencryptionUpdater) Whitebox
.getInternalState(getHandler(), "reencryptionUpdater");
}
}