HDFS-10360. DataNode may format directory and lose blocks if current/VERSION is missing. (Wei-Chiu Chuang via lei)
(cherry picked from commit bae11b95d492e0be7c5576097424879c2539b474)
This commit is contained in:
parent
456fe08a6d
commit
2d1d2dde17
|
@ -25,7 +25,10 @@ import java.io.RandomAccessFile;
|
|||
import java.lang.management.ManagementFactory;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.nio.channels.OverlappingFileLockException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
@ -345,9 +348,12 @@ public abstract class Storage extends StorageInfo {
|
|||
*/
|
||||
public void clearDirectory() throws IOException {
|
||||
File curDir = this.getCurrentDir();
|
||||
if (curDir.exists())
|
||||
if (curDir.exists()) {
|
||||
File[] files = FileUtil.listFiles(curDir);
|
||||
LOG.info("Will remove files: " + Arrays.toString(files));
|
||||
if (!(FileUtil.fullyDelete(curDir)))
|
||||
throw new IOException("Cannot remove current directory: " + curDir);
|
||||
}
|
||||
if (!curDir.mkdirs())
|
||||
throw new IOException("Cannot create directory " + curDir);
|
||||
}
|
||||
|
@ -470,16 +476,60 @@ public abstract class Storage extends StorageInfo {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check consistency of the storage directory
|
||||
* Check to see if current/ directory is empty. This method is used
|
||||
* before determining to format the directory.
|
||||
*
|
||||
* @throws InconsistentFSStateException if not empty.
|
||||
* @throws IOException if unable to list files under the directory.
|
||||
*/
|
||||
private void checkEmptyCurrent() throws InconsistentFSStateException,
|
||||
IOException {
|
||||
File currentDir = getCurrentDir();
|
||||
if(!currentDir.exists()) {
|
||||
// if current/ does not exist, it's safe to format it.
|
||||
return;
|
||||
}
|
||||
try(DirectoryStream<java.nio.file.Path> dirStream =
|
||||
Files.newDirectoryStream(currentDir.toPath())) {
|
||||
if (dirStream.iterator().hasNext()) {
|
||||
throw new InconsistentFSStateException(root,
|
||||
"Can't format the storage directory because the current/ "
|
||||
+ "directory is not empty.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check consistency of the storage directory.
|
||||
*
|
||||
* @param startOpt a startup option.
|
||||
* @param storage The Storage object that manages this StorageDirectory.
|
||||
*
|
||||
* @return state {@link StorageState} of the storage directory
|
||||
* @throws InconsistentFSStateException if directory state is not
|
||||
* consistent and cannot be recovered.
|
||||
* @throws IOException
|
||||
*/
|
||||
public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
|
||||
throws IOException {
|
||||
return analyzeStorage(startOpt, storage, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check consistency of the storage directory.
|
||||
*
|
||||
* @param startOpt a startup option.
|
||||
* @param storage The Storage object that manages this StorageDirectory.
|
||||
* @param checkCurrentIsEmpty if true, make sure current/ directory
|
||||
* is empty before determining to format it.
|
||||
*
|
||||
* @return state {@link StorageState} of the storage directory
|
||||
* @throws InconsistentFSStateException if directory state is not
|
||||
* consistent and cannot be recovered.
|
||||
* @throws IOException
|
||||
*/
|
||||
public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
|
||||
public StorageState analyzeStorage(StartupOption startOpt, Storage storage,
|
||||
boolean checkCurrentIsEmpty)
|
||||
throws IOException {
|
||||
assert root != null : "root is null";
|
||||
boolean hadMkdirs = false;
|
||||
|
@ -516,8 +566,12 @@ public abstract class Storage extends StorageInfo {
|
|||
// If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory,
|
||||
// while it also checks the layout version.
|
||||
if (startOpt == HdfsServerConstants.StartupOption.FORMAT ||
|
||||
(startOpt == StartupOption.HOTSWAP && hadMkdirs))
|
||||
(startOpt == StartupOption.HOTSWAP && hadMkdirs)) {
|
||||
if (checkCurrentIsEmpty) {
|
||||
checkEmptyCurrent();
|
||||
}
|
||||
return StorageState.NOT_FORMATTED;
|
||||
}
|
||||
|
||||
if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
|
||||
storage.checkOldLayoutStorage(this);
|
||||
|
@ -542,6 +596,9 @@ public abstract class Storage extends StorageInfo {
|
|||
if (hasPrevious)
|
||||
throw new InconsistentFSStateException(root,
|
||||
"version file in current directory is missing.");
|
||||
if (checkCurrentIsEmpty) {
|
||||
checkEmptyCurrent();
|
||||
}
|
||||
return StorageState.NOT_FORMATTED;
|
||||
}
|
||||
|
||||
|
|
|
@ -267,7 +267,7 @@ public class DataStorage extends Storage {
|
|||
List<Callable<StorageDirectory>> callables) throws IOException {
|
||||
StorageDirectory sd = new StorageDirectory(dataDir, null, false);
|
||||
try {
|
||||
StorageState curState = sd.analyzeStorage(startOpt, this);
|
||||
StorageState curState = sd.analyzeStorage(startOpt, this, true);
|
||||
// sd is locked but not opened
|
||||
switch (curState) {
|
||||
case NORMAL:
|
||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
|||
import static org.hamcrest.core.Is.is;
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
@ -444,6 +445,41 @@ public class TestDataNodeVolumeFailureReporting {
|
|||
checkFailuresAtNameNode(dm, dns.get(1), true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAutoFormatEmptyDirectory() throws Exception {
|
||||
// remove the version file
|
||||
File dn1Vol1 = cluster.getStorageDir(0, 0);
|
||||
File current = new File(dn1Vol1, "current");
|
||||
File currentVersion = new File(current, "VERSION");
|
||||
currentVersion.delete();
|
||||
// restart the data node
|
||||
assertTrue(cluster.restartDataNodes(true));
|
||||
// the DN should tolerate one volume failure.
|
||||
cluster.waitActive();
|
||||
ArrayList<DataNode> dns = cluster.getDataNodes();
|
||||
DataNode dn = dns.get(0);
|
||||
assertFalse("DataNode should not reformat if VERSION is missing",
|
||||
currentVersion.exists());
|
||||
|
||||
// Make sure DN's JMX sees the failed volume
|
||||
final String[] expectedFailedVolumes = {dn1Vol1.getAbsolutePath()};
|
||||
DataNodeTestUtils.triggerHeartbeat(dn);
|
||||
FsDatasetSpi<?> fsd = dn.getFSDataset();
|
||||
assertEquals(expectedFailedVolumes.length, fsd.getNumFailedVolumes());
|
||||
assertArrayEquals(expectedFailedVolumes, fsd.getFailedStorageLocations());
|
||||
// there shouldn't be any more volume failures due to I/O failure
|
||||
checkFailuresAtDataNode(dn, 0, false, expectedFailedVolumes);
|
||||
|
||||
// The NN reports one volume failures
|
||||
final DatanodeManager dm = cluster.getNamesystem().getBlockManager().
|
||||
getDatanodeManager();
|
||||
long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
|
||||
DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1,
|
||||
(1*dnCapacity), WAIT_FOR_HEARTBEATS);
|
||||
checkAggregateFailuresAtNameNode(false, 1);
|
||||
checkFailuresAtNameNode(dm, dns.get(0), false, dn1Vol1.getAbsolutePath());
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the NameNode for correct values of aggregate counters tracking failed
|
||||
* volumes across all DataNodes.
|
||||
|
|
|
@ -166,6 +166,32 @@ public class TestDataStorage {
|
|||
assertEquals(6, storage.getNumStorageDirs());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMissingVersion() throws IOException,
|
||||
URISyntaxException {
|
||||
final int numLocations = 1;
|
||||
final int numNamespace = 1;
|
||||
List<StorageLocation> locations = createStorageLocations(numLocations);
|
||||
|
||||
StorageLocation firstStorage = locations.get(0);
|
||||
Storage.StorageDirectory sd = new Storage.StorageDirectory(
|
||||
firstStorage.getFile());
|
||||
// the directory is not initialized so VERSION does not exist
|
||||
// create a fake directory under current/
|
||||
File currentDir = new File(sd.getCurrentDir(),
|
||||
"BP-787466439-172.26.24.43-1462305406642");
|
||||
assertTrue("unable to mkdir " + currentDir.getName(), currentDir.mkdirs());
|
||||
|
||||
// Add volumes for multiple namespaces.
|
||||
List<NamespaceInfo> namespaceInfos = createNamespaceInfos(numNamespace);
|
||||
for (NamespaceInfo ni : namespaceInfos) {
|
||||
storage.addStorageLocations(mockDN, ni, locations, START_OPT);
|
||||
}
|
||||
|
||||
// It should not format the directory because VERSION is missing.
|
||||
assertTrue("Storage directory was formatted", currentDir.exists());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRecoverTransitionReadFailure() throws IOException {
|
||||
final int numLocations = 3;
|
||||
|
|
Loading…
Reference in New Issue