HDFS-7587. Edit log corruption can happen if append fails with a quota violation. Contributed by Jing Zhao.

(cherry picked from commit c7c71cdba5)

Conflicts:
	hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
This commit is contained in:
Jing Zhao 2015-03-18 18:51:14 -07:00
parent 6dcc79507d
commit 5a5b244648
3 changed files with 152 additions and 9 deletions

View File

@ -899,6 +899,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7943. Append cannot handle the last block with length greater than HDFS-7943. Append cannot handle the last block with length greater than
the preferred block size. (jing9) the preferred block size. (jing9)
HDFS-7587. Edit log corruption can happen if append fails with a quota
violation. (jing9)
BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
HDFS-7720. Quota by Storage Type API, tools and ClientNameNode HDFS-7720. Quota by Storage Type API, tools and ClientNameNode

View File

@ -2672,6 +2672,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
String leaseHolder, String clientMachine, boolean newBlock, String leaseHolder, String clientMachine, boolean newBlock,
boolean writeToEditLog, boolean logRetryCache) throws IOException { boolean writeToEditLog, boolean logRetryCache) throws IOException {
final INodeFile file = iip.getLastINode().asFile(); final INodeFile file = iip.getLastINode().asFile();
final QuotaCounts delta = verifyQuotaForUCBlock(file, iip);
file.recordModification(iip.getLatestSnapshotId()); file.recordModification(iip.getLatestSnapshotId());
file.toUnderConstruction(leaseHolder, clientMachine); file.toUnderConstruction(leaseHolder, clientMachine);
@ -2681,10 +2683,15 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
LocatedBlock ret = null; LocatedBlock ret = null;
if (!newBlock) { if (!newBlock) {
ret = blockManager.convertLastBlockToUnderConstruction(file, 0); ret = blockManager.convertLastBlockToUnderConstruction(file, 0);
if (ret != null) { if (ret != null && delta != null) {
// update the quota: use the preferred block size for UC block Preconditions.checkState(delta.getStorageSpace() >= 0,
final long diff = file.getPreferredBlockSize() - ret.getBlockSize(); "appending to a block with size larger than the preferred block size");
dir.updateSpaceConsumed(iip, 0, diff, file.getBlockReplication()); dir.writeLock();
try {
dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta);
} finally {
dir.writeUnlock();
}
} }
} else { } else {
BlockInfoContiguous lastBlock = file.getLastBlock(); BlockInfoContiguous lastBlock = file.getLastBlock();
@ -2700,6 +2707,52 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
return ret; return ret;
} }
/**
* Verify quota when using the preferred block size for UC block. This is
* usually used by append and truncate
* @throws QuotaExceededException when violating the storage quota
* @return expected quota usage update. null means no change or no need to
* update quota usage later
*/
private QuotaCounts verifyQuotaForUCBlock(INodeFile file, INodesInPath iip)
throws QuotaExceededException {
if (!isImageLoaded() || dir.shouldSkipQuotaChecks()) {
// Do not check quota if editlog is still being processed
return null;
}
if (file.getLastBlock() != null) {
final QuotaCounts delta = computeQuotaDeltaForUCBlock(file);
dir.readLock();
try {
FSDirectory.verifyQuota(iip, iip.length() - 1, delta, null);
return delta;
} finally {
dir.readUnlock();
}
}
return null;
}
/** Compute quota change for converting a complete block to a UC block */
private QuotaCounts computeQuotaDeltaForUCBlock(INodeFile file) {
final QuotaCounts delta = new QuotaCounts.Builder().build();
final BlockInfoContiguous lastBlock = file.getLastBlock();
if (lastBlock != null) {
final long diff = file.getPreferredBlockSize() - lastBlock.getNumBytes();
final short repl = file.getBlockReplication();
delta.addStorageSpace(diff * repl);
final BlockStoragePolicy policy = dir.getBlockStoragePolicySuite()
.getPolicy(file.getStoragePolicyID());
List<StorageType> types = policy.chooseStorageTypes(repl);
for (StorageType t : types) {
if (t.supportTypeQuota()) {
delta.addTypeSpace(t, diff);
}
}
}
return delta;
}
/** /**
* Recover lease; * Recover lease;
* Immediately revoke the lease of the current lease holder and start lease * Immediately revoke the lease of the current lease holder and start lease
@ -3106,7 +3159,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
// doesn't match up with what we think is the last block. There are // doesn't match up with what we think is the last block. There are
// four possibilities: // four possibilities:
// 1) This is the first block allocation of an append() pipeline // 1) This is the first block allocation of an append() pipeline
// which started appending exactly at a block boundary. // which started appending exactly at or exceeding the block boundary.
// In this case, the client isn't passed the previous block, // In this case, the client isn't passed the previous block,
// so it makes the allocateBlock() call with previous=null. // so it makes the allocateBlock() call with previous=null.
// We can distinguish this since the last block of the file // We can distinguish this since the last block of the file
@ -3131,7 +3184,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock();
if (previous == null && if (previous == null &&
lastBlockInFile != null && lastBlockInFile != null &&
lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() && lastBlockInFile.getNumBytes() >= pendingFile.getPreferredBlockSize() &&
lastBlockInFile.isComplete()) { lastBlockInFile.isComplete()) {
// Case 1 // Case 1
if (NameNode.stateChangeLog.isDebugEnabled()) { if (NameNode.stateChangeLog.isDebugEnabled()) {

View File

@ -26,13 +26,18 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSOutputStream; import org.apache.hadoop.hdfs.DFSOutputStream;
import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.ipc.RemoteException;
import org.junit.After; import org.junit.After;
import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -148,12 +153,11 @@ public class TestDiskspaceQuotaUpdate {
final Path foo = new Path("/foo"); final Path foo = new Path("/foo");
final Path bar = new Path(foo, "bar"); final Path bar = new Path(foo, "bar");
DFSTestUtil.createFile(dfs, bar, BLOCKSIZE, REPLICATION, 0L); DFSTestUtil.createFile(dfs, bar, BLOCKSIZE, REPLICATION, 0L);
dfs.setQuota(foo, Long.MAX_VALUE-1, Long.MAX_VALUE-1); dfs.setQuota(foo, Long.MAX_VALUE - 1, Long.MAX_VALUE - 1);
FSDataOutputStream out = dfs.append(bar); FSDataOutputStream out = dfs.append(bar);
out.write(new byte[BLOCKSIZE / 4]); out.write(new byte[BLOCKSIZE / 4]);
((DFSOutputStream) out.getWrappedStream()).hsync(EnumSet ((DFSOutputStream) out.getWrappedStream()).hsync(EnumSet.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH));
.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH));
INodeDirectory fooNode = fsdir.getINode4Write(foo.toString()).asDirectory(); INodeDirectory fooNode = fsdir.getINode4Write(foo.toString()).asDirectory();
QuotaCounts quota = fooNode.getDirectoryWithQuotaFeature() QuotaCounts quota = fooNode.getDirectoryWithQuotaFeature()
@ -182,4 +186,87 @@ public class TestDiskspaceQuotaUpdate {
assertEquals(2, ns); // foo and bar assertEquals(2, ns); // foo and bar
assertEquals((BLOCKSIZE * 2 + BLOCKSIZE / 2) * REPLICATION, ds); assertEquals((BLOCKSIZE * 2 + BLOCKSIZE / 2) * REPLICATION, ds);
} }
/**
* Test append over storage quota does not mark file as UC or create lease
*/
@Test (timeout=60000)
public void testAppendOverStorageQuota() throws Exception {
final Path dir = new Path("/TestAppendOverQuota");
final Path file = new Path(dir, "file");
// create partial block file
dfs.mkdirs(dir);
DFSTestUtil.createFile(dfs, file, BLOCKSIZE/2, REPLICATION, seed);
// lower quota to cause exception when appending to partial block
dfs.setQuota(dir, Long.MAX_VALUE - 1, 1);
final INodeDirectory dirNode = fsdir.getINode4Write(dir.toString())
.asDirectory();
final long spaceUsed = dirNode.getDirectoryWithQuotaFeature()
.getSpaceConsumed().getStorageSpace();
try {
DFSTestUtil.appendFile(dfs, file, BLOCKSIZE);
Assert.fail("append didn't fail");
} catch (DSQuotaExceededException e) {
// ignore
}
// check that the file exists, isn't UC, and has no dangling lease
INodeFile inode = fsdir.getINode(file.toString()).asFile();
Assert.assertNotNull(inode);
Assert.assertFalse("should not be UC", inode.isUnderConstruction());
Assert.assertNull("should not have a lease", cluster.getNamesystem()
.getLeaseManager().getLeaseByPath(file.toString()));
// make sure the quota usage is unchanged
final long newSpaceUsed = dirNode.getDirectoryWithQuotaFeature()
.getSpaceConsumed().getStorageSpace();
assertEquals(spaceUsed, newSpaceUsed);
// make sure edits aren't corrupted
dfs.recoverLease(file);
cluster.restartNameNodes();
}
/**
* Test append over a specific type of storage quota does not mark file as
* UC or create a lease
*/
@Test (timeout=60000)
public void testAppendOverTypeQuota() throws Exception {
final Path dir = new Path("/TestAppendOverTypeQuota");
final Path file = new Path(dir, "file");
// create partial block file
dfs.mkdirs(dir);
// set the storage policy on dir
dfs.setStoragePolicy(dir, HdfsConstants.ONESSD_STORAGE_POLICY_NAME);
DFSTestUtil.createFile(dfs, file, BLOCKSIZE/2, REPLICATION, seed);
// set quota of SSD to 1L
dfs.setQuotaByStorageType(dir, StorageType.SSD, 1L);
final INodeDirectory dirNode = fsdir.getINode4Write(dir.toString())
.asDirectory();
final long spaceUsed = dirNode.getDirectoryWithQuotaFeature()
.getSpaceConsumed().getStorageSpace();
try {
DFSTestUtil.appendFile(dfs, file, BLOCKSIZE);
Assert.fail("append didn't fail");
} catch (RemoteException e) {
assertTrue(e.getClassName().contains("QuotaByStorageTypeExceededException"));
}
// check that the file exists, isn't UC, and has no dangling lease
INodeFile inode = fsdir.getINode(file.toString()).asFile();
Assert.assertNotNull(inode);
Assert.assertFalse("should not be UC", inode.isUnderConstruction());
Assert.assertNull("should not have a lease", cluster.getNamesystem()
.getLeaseManager().getLeaseByPath(file.toString()));
// make sure the quota usage is unchanged
final long newSpaceUsed = dirNode.getDirectoryWithQuotaFeature()
.getSpaceConsumed().getStorageSpace();
assertEquals(spaceUsed, newSpaceUsed);
// make sure edits aren't corrupted
dfs.recoverLease(file);
cluster.restartNameNodes();
}
} }