HDFS-14043. Tolerate corrupted seen_txid file. Contributed by Lukas Majercak.

(cherry picked from commit f3296501e0)
This commit is contained in:
Inigo Goiri 2018-11-05 16:48:37 -08:00
parent a1321d020a
commit 9bf4f3d614
2 changed files with 58 additions and 0 deletions

View File

@ -98,6 +98,8 @@ public class PersistentLongFile {
val = Long.parseLong(br.readLine());
br.close();
br = null;
} catch (NumberFormatException e) {
throw new IOException(e);
} finally {
IOUtils.cleanup(LOG, br);
}

View File

@ -28,8 +28,13 @@ import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.spy;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@ -37,6 +42,8 @@ import java.util.concurrent.Future;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -737,6 +744,55 @@ public class TestSaveNamespace {
}
}
@Test(timeout=30000)
public void testTxFaultTolerance() throws Exception {
String baseDir = MiniDFSCluster.getBaseDirectory();
List<String> nameDirs = new ArrayList<>();
nameDirs.add(fileAsURI(new File(baseDir, "name1")).toString());
nameDirs.add(fileAsURI(new File(baseDir, "name2")).toString());
Configuration conf = new HdfsConfiguration();
String nameDirsStr = StringUtils.join(",", nameDirs);
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDirsStr);
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDirsStr);
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
DFSTestUtil.formatNameNode(conf);
FSNamesystem fsn = FSNamesystem.loadFromDisk(conf);
try {
// We have a BEGIN_LOG_SEGMENT txn to start
assertEquals(1, fsn.getEditLog().getLastWrittenTxId());
doAnEdit(fsn, 1);
assertEquals(2, fsn.getEditLog().getLastWrittenTxId());
// Shut down
fsn.close();
// Corrupt one of the seen_txid files
File txidFile0 = new File(new URI(nameDirs.get(0) +
"/current/seen_txid"));
FileWriter fw = new FileWriter(txidFile0, false);
try (PrintWriter pw = new PrintWriter(fw)) {
pw.print("corrupt____!");
}
// Restart
fsn = FSNamesystem.loadFromDisk(conf);
assertEquals(4, fsn.getEditLog().getLastWrittenTxId());
// Check seen_txid is same in both dirs
File txidFile1 = new File(new URI(nameDirs.get(1) +
"/current/seen_txid"));
assertTrue(FileUtils.contentEquals(txidFile0, txidFile1));
} finally {
if (fsn != null) {
fsn.close();
}
}
}
private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
// Make an edit
fsn.mkdirs("/test" + id, new PermissionStatus("test", "Test",