HDFS-9308. Add truncateMeta() and deleteMeta() to MiniDFSCluster. (Tony Wu via lei)

This commit is contained in:
Lei Xu 2015-11-02 18:05:43 -08:00
parent 5ba2b98d0f
commit 8e05dbf2bd
4 changed files with 75 additions and 102 deletions

View File

@ -1661,6 +1661,8 @@ Release 2.8.0 - UNRELEASED
HDFS-9312. Fix TestReplication to be FsDataset-agnostic. (lei)
HDFS-9308. Add truncateMeta() and deleteMeta() to MiniDFSCluster. (Tony Wu via lei)
BUG FIXES
HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.

View File

@ -2117,6 +2117,28 @@ public class MiniDFSCluster {
getMaterializedReplica(i, blk).corruptMeta();
}
/**
* Corrupt the metadata of a block by deleting it.
* @param i index of the datanode
* @param blk name of the block.
*/
public void deleteMeta(int i, ExtendedBlock blk)
throws IOException {
getMaterializedReplica(i, blk).deleteMeta();
}
/**
* Corrupt the metadata of a block by truncating it to a new size.
* @param i index of the datanode.
* @param blk name of the block.
* @param newSize the new size of the metadata file.
* @throws IOException if any I/O errors.
*/
public void truncateMeta(int i, ExtendedBlock blk, int newSize)
throws IOException {
getMaterializedReplica(i, blk).truncateMeta(newSize);
}
public boolean changeGenStampOfBlock(int dnIndex, ExtendedBlock blk,
long newGenStamp) throws IOException {
File blockFile = getBlockFile(dnIndex, blk);

View File

@ -22,11 +22,8 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
@ -35,12 +32,15 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A JUnit test for corrupted file handling.
@ -70,6 +70,8 @@ import org.mockito.Mockito;
* replica was created from the non-corrupted replica.
*/
public class TestCrcCorruption {
public static final Logger LOG =
LoggerFactory.getLogger(TestCrcCorruption.class);
private DFSClientFaultInjector faultInjector;
@ -167,90 +169,26 @@ public class TestCrcCorruption {
// file disallows this Datanode to send data to another datanode.
// However, a client is alowed access to this block.
//
File storageDir = cluster.getInstanceStorageDir(0, 1);
String bpid = cluster.getNamesystem().getBlockPoolId();
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
assertTrue("data directory does not exist", data_dir.exists());
File[] blocks = data_dir.listFiles();
assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
int num = 0;
for (int idx = 0; idx < blocks.length; idx++) {
if (blocks[idx].getName().startsWith(Block.BLOCK_FILE_PREFIX) &&
blocks[idx].getName().endsWith(".meta")) {
num++;
if (num % 3 == 0) {
//
// remove .meta file
//
System.out.println("Deliberately removing file " + blocks[idx].getName());
assertTrue("Cannot remove file.", blocks[idx].delete());
} else if (num % 3 == 1) {
//
// shorten .meta file
//
RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
FileChannel channel = file.getChannel();
int newsize = random.nextInt((int)channel.size()/2);
System.out.println("Deliberately truncating file " +
blocks[idx].getName() +
" to size " + newsize + " bytes.");
channel.truncate(newsize);
file.close();
} else {
//
// corrupt a few bytes of the metafile
//
RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
FileChannel channel = file.getChannel();
long position = 0;
//
// The very first time, corrupt the meta header at offset 0
//
if (num != 2) {
position = (long)random.nextInt((int)channel.size());
}
int length = random.nextInt((int)(channel.size() - position + 1));
byte[] buffer = new byte[length];
random.nextBytes(buffer);
channel.write(ByteBuffer.wrap(buffer), position);
System.out.println("Deliberately corrupting file " +
blocks[idx].getName() +
" at offset " + position +
" length " + length);
file.close();
}
}
}
//
// Now deliberately corrupt all meta blocks from the second
// directory of the first datanode
//
storageDir = cluster.getInstanceStorageDir(0, 1);
data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
assertTrue("data directory does not exist", data_dir.exists());
blocks = data_dir.listFiles();
assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
final int dnIdx = 0;
final DataNode dn = cluster.getDataNodes().get(dnIdx);
final String bpid = cluster.getNamesystem().getBlockPoolId();
List<FinalizedReplica> replicas =
dn.getFSDataset().getFinalizedBlocks(bpid);
assertTrue("Replicas do not exist", !replicas.isEmpty());
int count = 0;
File previous = null;
for (int idx = 0; idx < blocks.length; idx++) {
if (blocks[idx].getName().startsWith("blk_") &&
blocks[idx].getName().endsWith(".meta")) {
//
// Move the previous metafile into the current one.
//
count++;
if (count % 2 == 0) {
System.out.println("Deliberately insertimg bad crc into files " +
blocks[idx].getName() + " " + previous.getName());
assertTrue("Cannot remove file.", blocks[idx].delete());
assertTrue("Cannot corrupt meta file.", previous.renameTo(blocks[idx]));
assertTrue("Cannot recreate empty meta file.", previous.createNewFile());
previous = null;
} else {
previous = blocks[idx];
}
for (int idx = 0; idx < replicas.size(); idx++) {
FinalizedReplica replica = replicas.get(idx);
ExtendedBlock eb = new ExtendedBlock(bpid, replica);
if (idx % 3 == 0) {
LOG.info("Deliberately removing meta for block " + eb);
cluster.deleteMeta(dnIdx, eb);
} else if (idx % 3 == 1) {
final int newSize = 2; // bytes
LOG.info("Deliberately truncating meta file for block " +
eb + " to size " + newSize + " bytes.");
cluster.truncateMeta(dnIdx, eb, newSize);
} else {
cluster.corruptMeta(dnIdx, eb);
}
}
@ -260,7 +198,7 @@ public class TestCrcCorruption {
//
assertTrue("Corrupted replicas not handled properly.",
util.checkFiles(fs, "/srcdat"));
System.out.println("All File still have a valid replica");
LOG.info("All File still have a valid replica");
//
// set replication factor back to 1. This causes only one replica of
@ -273,7 +211,7 @@ public class TestCrcCorruption {
//System.out.println("All Files done with removing replicas");
//assertTrue("Excess replicas deleted. Corrupted replicas found.",
// util.checkFiles(fs, "/srcdat"));
System.out.println("The excess-corrupted-replica test is disabled " +
LOG.info("The excess-corrupted-replica test is disabled " +
" pending HADOOP-1557");
util.cleanup(fs, "/srcdat");

View File

@ -20,9 +20,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.EnumSet;
import org.apache.hadoop.conf.Configuration;
@ -32,7 +30,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@ -178,8 +175,9 @@ public class TestLeaseRecovery {
Path file = new Path("/testRecoveryFile");
DistributedFileSystem dfs = cluster.getFileSystem();
FSDataOutputStream out = dfs.create(file);
final int FILE_SIZE = 2 * 1024 * 1024;
int count = 0;
while (count < 2 * 1024 * 1024) {
while (count < FILE_SIZE) {
out.writeBytes("Data");
count += 4;
}
@ -190,15 +188,23 @@ public class TestLeaseRecovery {
LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
file.toString(), 0, count);
ExtendedBlock block = locations.get(0).getBlock();
DataNode dn = cluster.getDataNodes().get(0);
BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null);
File metafile = new File(localPathInfo.getMetaPath());
assertTrue(metafile.exists());
// reduce the block meta file size
RandomAccessFile raf = new RandomAccessFile(metafile, "rw");
raf.setLength(metafile.length() - 20);
raf.close();
// Calculate meta file size
// From DataNode.java, checksum size is given by:
// (length of data + BYTE_PER_CHECKSUM - 1)/BYTES_PER_CHECKSUM *
// CHECKSUM_SIZE
final int CHECKSUM_SIZE = 4; // CRC32 & CRC32C
final int bytesPerChecksum = conf.getInt(
DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY,
DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT);
final int metaFileSize =
(FILE_SIZE + bytesPerChecksum - 1) / bytesPerChecksum * CHECKSUM_SIZE +
8; // meta file header is 8 bytes
final int newMetaFileSize = metaFileSize - CHECKSUM_SIZE;
// Corrupt the block meta file by dropping checksum for bytesPerChecksum
// bytes. Lease recovery is expected to recover the uncorrupted file length.
cluster.truncateMeta(0, block, newMetaFileSize);
// restart DN to make replica to RWR
DataNodeProperties dnProp = cluster.stopDataNode(0);
@ -213,6 +219,11 @@ public class TestLeaseRecovery {
}
assertTrue("File should be closed", newdfs.recoverLease(file));
// Verify file length after lease recovery. The new file length should not
// include the bytes with corrupted checksum.
final long expectedNewFileLen = FILE_SIZE - bytesPerChecksum;
final long newFileLen = newdfs.getFileStatus(file).getLen();
assertEquals(newFileLen, expectedNewFileLen);
}
/**