HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
This commit is contained in:
parent
c210cede5c
commit
65c4660bcd
|
@ -599,7 +599,13 @@ public class ImageServlet extends HttpServlet {
|
|||
long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
|
||||
now - lastCheckpointTime);
|
||||
|
||||
// Since the goal of the check below is to prevent overly
|
||||
// frequent upload from Standby, the check should only be done
|
||||
// for the periodical upload from Standby. For the other
|
||||
// scenarios such as rollback image and ckpt file, they skip
|
||||
// this check, see HDFS-15036 for more info.
|
||||
if (checkRecentImageEnable &&
|
||||
NameNodeFile.IMAGE.equals(parsedParams.getNameNodeFile()) &&
|
||||
timeDelta < checkpointPeriod &&
|
||||
txid - lastCheckpointTxid < checkpointTxnCount) {
|
||||
// only when at least one of two conditions are met we accept
|
||||
|
|
|
@ -292,10 +292,20 @@ public class StandbyCheckpointer {
|
|||
// TODO should there be some smarts here about retries nodes that
|
||||
// are not the active NN?
|
||||
CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
|
||||
if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
|
||||
TransferFsImage.TransferResult uploadResult = upload.get();
|
||||
if (uploadResult == TransferFsImage.TransferResult.SUCCESS) {
|
||||
receiverEntry.setLastUploadTime(monotonicNow());
|
||||
receiverEntry.setIsPrimary(true);
|
||||
} else {
|
||||
// Getting here means image upload is explicitly rejected
|
||||
// by the other node. This could happen if:
|
||||
// 1. the other is also a standby, or
|
||||
// 2. the other is active, but already accepted another
|
||||
// newer image, or
|
||||
// 3. the other is active but has a recent enough image.
|
||||
// All these are valid cases, just log for information.
|
||||
LOG.info("Image upload rejected by the other NameNode: {}",
|
||||
uploadResult);
|
||||
receiverEntry.setIsPrimary(false);
|
||||
}
|
||||
} catch (ExecutionException e) {
|
||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.hadoop.test.GenericTestUtils;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
@ -432,7 +433,22 @@ public class TestRollingUpgrade {
|
|||
testFinalize(3);
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
public void testFinalizeWithDeltaCheck() throws Exception {
|
||||
testFinalize(2, true);
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
public void testFinalizeWithMultipleNNDeltaCheck() throws Exception {
|
||||
testFinalize(3, true);
|
||||
}
|
||||
|
||||
private void testFinalize(int nnCount) throws Exception {
|
||||
testFinalize(nnCount, false);
|
||||
}
|
||||
|
||||
private void testFinalize(int nnCount, boolean skipImageDeltaCheck)
|
||||
throws Exception {
|
||||
final Configuration conf = new HdfsConfiguration();
|
||||
MiniQJMHACluster cluster = null;
|
||||
final Path foo = new Path("/foo");
|
||||
|
@ -451,6 +467,10 @@ public class TestRollingUpgrade {
|
|||
dfsCluster.restartNameNodes();
|
||||
|
||||
dfsCluster.transitionToActive(0);
|
||||
|
||||
dfsCluster.getNameNode(0).getHttpServer()
|
||||
.setAttribute(RECENT_IMAGE_CHECK_ENABLED, skipImageDeltaCheck);
|
||||
|
||||
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
|
||||
dfs.mkdirs(foo);
|
||||
|
||||
|
|
Loading…
Reference in New Issue