HDFS-8229. LAZY_PERSIST file gets deleted after NameNode restart. (Contributed by Surendra Singh Lilhore)

This commit is contained in:
Arpit Agarwal 2015-05-01 16:30:51 -07:00
parent 32b3b8e2ef
commit 8e3fdd0a40
4 changed files with 43 additions and 1 deletions

View File

@ -277,6 +277,9 @@ Release 2.8.0 - UNRELEASED
HDFS-8276. LazyPersistFileScrubber should be disabled if scrubber interval
configured zero. (Surendra Singh Lilhore via Arpit Agarwal)
HDFS-8229. LAZY_PERSIST file gets deleted after NameNode restart.
(Surendra Singh Lilhore via Arpit Agarwal)
Release 2.7.1 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -4763,7 +4763,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
public void run() {
while (fsRunning && shouldRun) {
try {
clearCorruptLazyPersistFiles();
if (!isInSafeMode()) {
clearCorruptLazyPersistFiles();
} else {
if (FSNamesystem.LOG.isDebugEnabled()) {
FSNamesystem.LOG
.debug("Namenode is in safemode, skipping scrubbing of corrupted lazy-persist files.");
}
}
Thread.sleep(scrubIntervalSec * 1000);
} catch (InterruptedException e) {
FSNamesystem.LOG.info(

View File

@ -258,6 +258,7 @@ public abstract class LazyPersistTestCase {
LAZY_WRITER_INTERVAL_SEC);
conf.setLong(DFS_DATANODE_RAM_DISK_LOW_WATERMARK_BYTES,
evictionLowWatermarkReplicas * BLOCK_SIZE);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
if (useSCR) {
conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, true);

View File

@ -16,6 +16,7 @@
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
import com.google.common.collect.Iterators;
import com.google.common.util.concurrent.Uninterruptibles;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.Path;
@ -145,6 +146,36 @@ public class TestLazyPersistFiles extends LazyPersistTestCase {
Assert.assertTrue(fs.exists(path1));
}
/**
* If NN restarted then lazyPersist files should not deleted
*/
@Test
public void testFileShouldNotDiscardedIfNNRestarted() throws IOException,
InterruptedException {
getClusterBuilder().setRamDiskReplicaCapacity(2).build();
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path path1 = new Path("/" + METHOD_NAME + ".01.dat");
makeTestFile(path1, BLOCK_SIZE, true);
ensureFileReplicasOnStorageType(path1, RAM_DISK);
cluster.shutdownDataNodes();
cluster.restartNameNodes();
// wait for the replication monitor to mark the file as corrupt
Thread.sleep(2 * DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT * 1000);
Long corruptBlkCount = (long) Iterators.size(cluster.getNameNode()
.getNamesystem().getBlockManager().getCorruptReplicaBlockIterator());
// Check block detected as corrupted
assertThat(corruptBlkCount, is(1L));
// Ensure path1 exist.
Assert.assertTrue(fs.exists(path1));
}
/**
* Concurrent read from the same node and verify the contents.
*/