HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang.
This commit is contained in:
parent
c210cede5c
commit
65c4660bcd
|
@ -599,7 +599,13 @@ public class ImageServlet extends HttpServlet {
|
||||||
long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
|
long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
|
||||||
now - lastCheckpointTime);
|
now - lastCheckpointTime);
|
||||||
|
|
||||||
|
// Since the goal of the check below is to prevent overly
|
||||||
|
// frequent upload from Standby, the check should only be done
|
||||||
|
// for the periodical upload from Standby. For the other
|
||||||
|
// scenarios such as rollback image and ckpt file, they skip
|
||||||
|
// this check, see HDFS-15036 for more info.
|
||||||
if (checkRecentImageEnable &&
|
if (checkRecentImageEnable &&
|
||||||
|
NameNodeFile.IMAGE.equals(parsedParams.getNameNodeFile()) &&
|
||||||
timeDelta < checkpointPeriod &&
|
timeDelta < checkpointPeriod &&
|
||||||
txid - lastCheckpointTxid < checkpointTxnCount) {
|
txid - lastCheckpointTxid < checkpointTxnCount) {
|
||||||
// only when at least one of two conditions are met we accept
|
// only when at least one of two conditions are met we accept
|
||||||
|
|
|
@ -292,10 +292,20 @@ public class StandbyCheckpointer {
|
||||||
// TODO should there be some smarts here about retries nodes that
|
// TODO should there be some smarts here about retries nodes that
|
||||||
// are not the active NN?
|
// are not the active NN?
|
||||||
CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
|
CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
|
||||||
if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
|
TransferFsImage.TransferResult uploadResult = upload.get();
|
||||||
|
if (uploadResult == TransferFsImage.TransferResult.SUCCESS) {
|
||||||
receiverEntry.setLastUploadTime(monotonicNow());
|
receiverEntry.setLastUploadTime(monotonicNow());
|
||||||
receiverEntry.setIsPrimary(true);
|
receiverEntry.setIsPrimary(true);
|
||||||
} else {
|
} else {
|
||||||
|
// Getting here means image upload is explicitly rejected
|
||||||
|
// by the other node. This could happen if:
|
||||||
|
// 1. the other is also a standby, or
|
||||||
|
// 2. the other is active, but already accepted another
|
||||||
|
// newer image, or
|
||||||
|
// 3. the other is active but has a recent enough image.
|
||||||
|
// All these are valid cases, just log for information.
|
||||||
|
LOG.info("Image upload rejected by the other NameNode: {}",
|
||||||
|
uploadResult);
|
||||||
receiverEntry.setIsPrimary(false);
|
receiverEntry.setIsPrimary(false);
|
||||||
}
|
}
|
||||||
} catch (ExecutionException e) {
|
} catch (ExecutionException e) {
|
||||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotEquals;
|
import static org.junit.Assert.assertNotEquals;
|
||||||
import static org.junit.Assert.assertNull;
|
import static org.junit.Assert.assertNull;
|
||||||
|
@ -432,7 +433,22 @@ public class TestRollingUpgrade {
|
||||||
testFinalize(3);
|
testFinalize(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 300000)
|
||||||
|
public void testFinalizeWithDeltaCheck() throws Exception {
|
||||||
|
testFinalize(2, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 300000)
|
||||||
|
public void testFinalizeWithMultipleNNDeltaCheck() throws Exception {
|
||||||
|
testFinalize(3, true);
|
||||||
|
}
|
||||||
|
|
||||||
private void testFinalize(int nnCount) throws Exception {
|
private void testFinalize(int nnCount) throws Exception {
|
||||||
|
testFinalize(nnCount, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testFinalize(int nnCount, boolean skipImageDeltaCheck)
|
||||||
|
throws Exception {
|
||||||
final Configuration conf = new HdfsConfiguration();
|
final Configuration conf = new HdfsConfiguration();
|
||||||
MiniQJMHACluster cluster = null;
|
MiniQJMHACluster cluster = null;
|
||||||
final Path foo = new Path("/foo");
|
final Path foo = new Path("/foo");
|
||||||
|
@ -451,6 +467,10 @@ public class TestRollingUpgrade {
|
||||||
dfsCluster.restartNameNodes();
|
dfsCluster.restartNameNodes();
|
||||||
|
|
||||||
dfsCluster.transitionToActive(0);
|
dfsCluster.transitionToActive(0);
|
||||||
|
|
||||||
|
dfsCluster.getNameNode(0).getHttpServer()
|
||||||
|
.setAttribute(RECENT_IMAGE_CHECK_ENABLED, skipImageDeltaCheck);
|
||||||
|
|
||||||
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
|
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
|
||||||
dfs.mkdirs(foo);
|
dfs.mkdirs(foo);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue