HDFS-14043. Tolerate corrupted seen_txid file. Contributed by Lukas Majercak.
(cherry picked from commit f3296501e0
)
This commit is contained in:
parent
a1321d020a
commit
9bf4f3d614
|
@ -98,6 +98,8 @@ public class PersistentLongFile {
|
|||
val = Long.parseLong(br.readLine());
|
||||
br.close();
|
||||
br = null;
|
||||
} catch (NumberFormatException e) {
|
||||
throw new IOException(e);
|
||||
} finally {
|
||||
IOUtils.cleanup(LOG, br);
|
||||
}
|
||||
|
|
|
@ -28,8 +28,13 @@ import static org.mockito.Mockito.doThrow;
|
|||
import static org.mockito.Mockito.spy;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
@ -37,6 +42,8 @@ import java.util.concurrent.Future;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
@ -737,6 +744,55 @@ public class TestSaveNamespace {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout=30000)
|
||||
public void testTxFaultTolerance() throws Exception {
|
||||
String baseDir = MiniDFSCluster.getBaseDirectory();
|
||||
List<String> nameDirs = new ArrayList<>();
|
||||
nameDirs.add(fileAsURI(new File(baseDir, "name1")).toString());
|
||||
nameDirs.add(fileAsURI(new File(baseDir, "name2")).toString());
|
||||
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
String nameDirsStr = StringUtils.join(",", nameDirs);
|
||||
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDirsStr);
|
||||
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDirsStr);
|
||||
|
||||
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
|
||||
DFSTestUtil.formatNameNode(conf);
|
||||
FSNamesystem fsn = FSNamesystem.loadFromDisk(conf);
|
||||
try {
|
||||
// We have a BEGIN_LOG_SEGMENT txn to start
|
||||
assertEquals(1, fsn.getEditLog().getLastWrittenTxId());
|
||||
|
||||
doAnEdit(fsn, 1);
|
||||
|
||||
assertEquals(2, fsn.getEditLog().getLastWrittenTxId());
|
||||
|
||||
// Shut down
|
||||
fsn.close();
|
||||
|
||||
// Corrupt one of the seen_txid files
|
||||
File txidFile0 = new File(new URI(nameDirs.get(0) +
|
||||
"/current/seen_txid"));
|
||||
FileWriter fw = new FileWriter(txidFile0, false);
|
||||
try (PrintWriter pw = new PrintWriter(fw)) {
|
||||
pw.print("corrupt____!");
|
||||
}
|
||||
|
||||
// Restart
|
||||
fsn = FSNamesystem.loadFromDisk(conf);
|
||||
assertEquals(4, fsn.getEditLog().getLastWrittenTxId());
|
||||
|
||||
// Check seen_txid is same in both dirs
|
||||
File txidFile1 = new File(new URI(nameDirs.get(1) +
|
||||
"/current/seen_txid"));
|
||||
assertTrue(FileUtils.contentEquals(txidFile0, txidFile1));
|
||||
} finally {
|
||||
if (fsn != null) {
|
||||
fsn.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
|
||||
// Make an edit
|
||||
fsn.mkdirs("/test" + id, new PermissionStatus("test", "Test",
|
||||
|
|
Loading…
Reference in New Issue