diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java index c36b46dbb82..5ed2a1686ea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java @@ -98,6 +98,19 @@ public class ImageServlet extends HttpServlet { "recent.image.check.enabled"; public static final boolean RECENT_IMAGE_CHECK_ENABLED_DEFAULT = true; + /* + * Specify a relaxation for the time delta check, the relaxation is to account + * for the scenario that there are chances that minor time difference (e.g. + * due to image upload delay, or minor machine clock skew) can cause ANN to + * reject a fsImage too aggressively. + */ + private static double recentImageCheckTimePrecision = 0.75; + + @VisibleForTesting + static void setRecentImageCheckTimePrecision(double ratio) { + recentImageCheckTimePrecision = ratio; + } + @Override public void doGet(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException { @@ -566,6 +579,9 @@ public class ImageServlet extends HttpServlet { long checkpointPeriod = conf.getTimeDuration(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT, TimeUnit.SECONDS); + checkpointPeriod = Math.round( + checkpointPeriod * recentImageCheckTimePrecision); + long checkpointTxnCount = conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT); @@ -586,21 +602,24 @@ public class ImageServlet extends HttpServlet { // a new fsImage // 1. most recent image's txid is too far behind // 2. last checkpoint time was too old - response.sendError(HttpServletResponse.SC_CONFLICT, - "Most recent checkpoint is neither too far behind in " - + "txid, nor too old. New txnid cnt is " - + (txid - lastCheckpointTxid) - + ", expecting at least " + checkpointTxnCount - + " unless too long since last upload."); + String message = "Rejecting a fsimage due to small time delta " + + "and txnid delta. Time since previous checkpoint is " + + timeDelta + " expecting at least " + checkpointPeriod + + " txnid delta since previous checkpoint is " + + (txid - lastCheckpointTxid) + " expecting at least " + + checkpointTxnCount; + LOG.info(message); + response.sendError(HttpServletResponse.SC_CONFLICT, message); return null; } try { if (nnImage.getStorage().findImageFile(nnf, txid) != null) { - response.sendError(HttpServletResponse.SC_CONFLICT, - "Either current namenode has checkpointed or " - + "another checkpointer already uploaded an " - + "checkpoint for txid " + txid); + String message = "Either current namenode has checkpointed or " + + "another checkpointer already uploaded an " + + "checkpoint for txid " + txid; + LOG.info(message); + response.sendError(HttpServletResponse.SC_CONFLICT, message); return null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index 46f66949408..7bd2f9c245e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -2474,7 +2474,7 @@ public class TestCheckpoint { } @Test(timeout = 300000) - public void testActiveRejectSmallerDeltaImage() throws Exception { + public void testActiveRejectSmallerTxidDeltaImage() throws Exception { MiniDFSCluster cluster = null; Configuration conf = new HdfsConfiguration(); // Set the delta txid threshold to 10 @@ -2527,6 +2527,57 @@ public class TestCheckpoint { } } + /** + * Test that even with txid and time delta threshold, by having time + * relaxation, SBN can still upload images to ANN. + * + * @throws Exception + */ + @Test + public void testActiveImageWithTimeDeltaRelaxation() throws Exception { + Configuration conf = new HdfsConfiguration(); + // Set the delta txid threshold to some arbitrarily large value, so + // it does not trigger a checkpoint during this test. + conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1000000); + // Set the delta time threshold to some arbitrarily large value, so + // it does not trigger a checkpoint during this test. + conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 900000); + // Set relaxation to 0, means time delta = 0 from previous image is fine, + // this will effectively disable reject small delta image + ImageServlet.setRecentImageCheckTimePrecision(0); + + SecondaryNameNode secondary = null; + + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0).format(true).build()) { + // enable small delta rejection + NameNode active = cluster.getNameNode(); + active.httpServer.getHttpServer() + .setAttribute(RECENT_IMAGE_CHECK_ENABLED, true); + + secondary = startSecondaryNameNode(conf); + + FileSystem fs = cluster.getFileSystem(); + assertEquals(0, active.getNamesystem().getFSImage() + .getMostRecentCheckpointTxId()); + + // create 5 dir. + for (int i = 0; i < 5; i++) { + fs.mkdirs(new Path("dir-" + i)); + } + + // Checkpoint 1st + secondary.doCheckpoint(); + // at this point, despite this is a small delta change, w.r.t both + // txid and time delta, due to we set relaxation to 0, this image + // still gets accepted + assertEquals(9, active.getNamesystem().getFSImage() + .getMostRecentCheckpointTxId()); + } finally { + cleanup(secondary); + } + } + private static void cleanup(SecondaryNameNode snn) { if (snn != null) { try {