HDFS-4150. Update the inode in the block map when a snapshotted file or a snapshot file is deleted. Contributed by Jing Zhao

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1406763 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-11-07 19:30:36 +00:00
parent 7160c01f61
commit 8a577a16f9
6 changed files with 312 additions and 54 deletions

View File

@ -56,3 +56,6 @@ Branch-2802 Snapshot (Unreleased)
HDFS-4147. When there is a snapshot in a subtree, deletion of the subtree
should fail. (Jing Zhao via szetszwo)
HDFS-4150. Update the inode in the block map when a snapshotted file or a
snapshot file is deleted. (Jing Zhao via szetszwo)

View File

@ -145,6 +145,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@ -159,6 +160,7 @@ import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapINodeUpdateEntry;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.INodesInPath;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
@ -2718,21 +2720,48 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* of blocks that need to be removed from blocksMap
*/
private void removeBlocks(BlocksMapUpdateInfo blocks) {
int start = 0;
int end = 0;
List<Block> toDeleteList = blocks.getToDeleteList();
while (start < toDeleteList.size()) {
end = BLOCK_DELETION_INCREMENT + start;
end = end > toDeleteList.size() ? toDeleteList.size() : end;
Iterator<Map.Entry<Block, BlocksMapINodeUpdateEntry>> iter = blocks
.iterator();
while (iter.hasNext()) {
writeLock();
try {
for (int i = start; i < end; i++) {
blockManager.removeBlock(toDeleteList.get(i));
for (int numberToHandle = BLOCK_DELETION_INCREMENT; iter.hasNext()
&& numberToHandle > 0; numberToHandle--) {
Map.Entry<Block, BlocksMapINodeUpdateEntry> entry = iter.next();
updateBlocksMap(entry);
}
} finally {
writeUnlock();
}
start = end;
}
}
/**
* Update the blocksMap for a given block.
*
* @param entry
* The update entry containing both the block and its new INode. The
* block should be removed from the blocksMap if the INode is null,
* otherwise the INode for the block will be updated in the
* blocksMap.
*/
private void updateBlocksMap(
Map.Entry<Block, BlocksMapINodeUpdateEntry> entry) {
Block block = entry.getKey();
BlocksMapINodeUpdateEntry value = entry.getValue();
if (value == null) {
blockManager.removeBlock(block);
} else {
BlockCollection toDelete = value.getToDelete();
BlockInfo originalBlockInfo = blockManager.getStoredBlock(block);
// The FSDirectory tree and the blocksMap share the same INode reference.
// Thus we use "==" to check if the INode for the block belongs to the
// current file (instead of the INode from a snapshot file).
if (originalBlockInfo != null
&& toDelete == originalBlockInfo.getBlockCollection()) {
blockManager.addBlockCollection(originalBlockInfo,
value.getToReplace());
}
}
}
@ -2754,7 +2783,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
boolean trackBlockCounts = isSafeModeTrackingBlocks();
int numRemovedComplete = 0, numRemovedSafe = 0;
for (Block b : blocks.getToDeleteList()) {
Iterator<Map.Entry<Block, BlocksMapINodeUpdateEntry>> blockIter =
blocks.iterator();
while (blockIter.hasNext()) {
Map.Entry<Block, BlocksMapINodeUpdateEntry> entry = blockIter.next();
Block b = entry.getKey();
if (trackBlockCounts) {
BlockInfo bi = blockManager.getStoredBlock(b);
if (bi.isComplete()) {
@ -2764,8 +2797,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
}
}
blockManager.removeBlock(b);
updateBlocksMap(entry);
}
if (trackBlockCounts) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" +

View File

@ -22,7 +22,10 @@ import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.ContentSummary;
@ -31,6 +34,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.util.StringUtils;
@ -194,7 +198,8 @@ public abstract class INode implements Comparable<byte[]> {
*
* @param info
* Containing all the blocks collected from the children of this
* INode. These blocks later should be removed from the blocksMap.
* INode. These blocks later should be removed/updated in the
* blocksMap.
*/
abstract int collectSubtreeBlocksAndClear(BlocksMapUpdateInfo info);
@ -501,44 +506,84 @@ public abstract class INode implements Comparable<byte[]> {
/**
* Information used for updating the blocksMap when deleting files.
*/
public static class BlocksMapUpdateInfo {
/**
* The list of blocks that need to be removed from blocksMap
*/
private List<Block> toDeleteList;
public BlocksMapUpdateInfo(List<Block> toDeleteList) {
this.toDeleteList = toDeleteList == null ? new ArrayList<Block>()
: toDeleteList;
}
public static class BlocksMapUpdateInfo implements
Iterable<Map.Entry<Block, BlocksMapINodeUpdateEntry>> {
private final Map<Block, BlocksMapINodeUpdateEntry> updateMap;
public BlocksMapUpdateInfo() {
toDeleteList = new ArrayList<Block>();
updateMap = new HashMap<Block, BlocksMapINodeUpdateEntry>();
}
/**
* @return The list of blocks that need to be removed from blocksMap
*/
public List<Block> getToDeleteList() {
return toDeleteList;
}
/**
* Add a to-be-deleted block into the
* {@link BlocksMapUpdateInfo#toDeleteList}
* Add a to-be-deleted block. This block should belongs to a file without
* snapshots. We thus only need to put a block-null pair into the updateMap.
*
* @param toDelete the to-be-deleted block
*/
public void addDeleteBlock(Block toDelete) {
if (toDelete != null) {
toDeleteList.add(toDelete);
updateMap.put(toDelete, null);
}
}
/**
* Clear {@link BlocksMapUpdateInfo#toDeleteList}
* Add a given block, as well as its old and new BlockCollection
* information, into the updateMap.
*
* @param toUpdateBlock
* The given block
* @param entry
* The BlocksMapINodeUpdateEntry instance containing both the
* original BlockCollection of the given block and the new
* BlockCollection of the given block for updating the blocksMap.
* The new BlockCollection should be the INode of one of the
* corresponding file's snapshot.
*/
public void addUpdateBlock(Block toUpdateBlock,
BlocksMapINodeUpdateEntry entry) {
updateMap.put(toUpdateBlock, entry);
}
/**
* Clear {@link BlocksMapUpdateInfo#updateMap}
*/
public void clear() {
toDeleteList.clear();
updateMap.clear();
}
@Override
public Iterator<Map.Entry<Block, BlocksMapINodeUpdateEntry>> iterator() {
return updateMap.entrySet().iterator();
}
}
/**
* When deleting a file with snapshot, we cannot directly remove its record
* from blocksMap. Instead, we should consider replacing the original record
* in blocksMap with INode of snapshot.
*/
public static class BlocksMapINodeUpdateEntry {
/**
* The BlockCollection of the file to be deleted
*/
private final BlockCollection toDelete;
/**
* The BlockCollection of the to-be-deleted file's snapshot
*/
private final BlockCollection toReplace;
public BlocksMapINodeUpdateEntry(BlockCollection toDelete,
BlockCollection toReplace) {
this.toDelete = toDelete;
this.toReplace = toReplace;
}
public BlockCollection getToDelete() {
return toDelete;
}
public BlockCollection getToReplace() {
return toReplace;
}
}
}

View File

@ -91,7 +91,7 @@ public class INodeFile extends INode implements BlockCollection {
return getFileReplication();
}
void setFileReplication(short replication) {
protected void setFileReplication(short replication) {
if(replication <= 0)
throw new IllegalArgumentException("Unexpected value for the replication");
header = ((long)replication << BLOCKBITS) | (header & ~HEADERMASK);

View File

@ -17,10 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
@ -61,7 +58,8 @@ public class INodeFileWithLink extends INodeFile {
@Override
public short getBlockReplication() {
short max = getFileReplication();
for(INodeFileWithLink i = next; i != this; i = i.getNext()) {
// i may be null since next will be set to null when the INode is deleted
for(INodeFileWithLink i = next; i != this && i != null; i = i.getNext()) {
final short replication = i.getFileReplication();
if (replication > max) {
max = replication;
@ -78,35 +76,46 @@ public class INodeFileWithLink extends INodeFile {
* any other inode, collect them and update the block list.
*/
@Override
protected int collectSubtreeBlocksAndClear(List<Block> v) {
protected int collectSubtreeBlocksAndClear(BlocksMapUpdateInfo info) {
if (next == this) {
//this is the only remaining inode.
super.collectSubtreeBlocksAndClear(v);
// this is the only remaining inode.
super.collectSubtreeBlocksAndClear(info);
} else {
//There are other inode(s) using the blocks.
//Compute max file size excluding this and find the last inode.
// There are other inode(s) using the blocks.
// Compute max file size excluding this and find the last inode.
long max = next.computeFileSize(true);
short maxReplication = next.getFileReplication();
INodeFileWithLink last = next;
for(INodeFileWithLink i = next.getNext(); i != this; i = i.getNext()) {
final long size = i.computeFileSize(true);
if (size > max) {
max = size;
}
final short rep = i.getFileReplication();
if (rep > maxReplication) {
maxReplication = rep;
}
last = i;
}
collectBlocksBeyondMaxAndClear(max, v);
collectBlocksBeyondMaxAndClear(max, info);
//remove this from the circular linked list.
// remove this from the circular linked list.
last.next = this.next;
// Set the replication of the current INode to the max of all the other
// linked INodes, so that in case the current INode is retrieved from the
// blocksMap before it is removed or updated, the correct replication
// number can be retrieved.
this.setFileReplication(maxReplication);
this.next = null;
//clear parent
// clear parent
parent = null;
}
return 1;
}
private void collectBlocksBeyondMaxAndClear(final long max, final List<Block> v) {
private void collectBlocksBeyondMaxAndClear(final long max,
final BlocksMapUpdateInfo info) {
final BlockInfo[] oldBlocks = getBlocks();
if (oldBlocks != null) {
//find the minimum n such that the size of the first n blocks > max
@ -115,9 +124,18 @@ public class INodeFileWithLink extends INodeFile {
size += oldBlocks[n].getNumBytes();
}
//starting from block n, the data is beyond max.
// Replace the INode for all the remaining blocks in blocksMap
BlocksMapINodeUpdateEntry entry = new BlocksMapINodeUpdateEntry(this,
next);
if (info != null) {
for (int i = 0; i < n; i++) {
info.addUpdateBlock(oldBlocks[i], entry);
}
}
// starting from block n, the data is beyond max.
if (n < oldBlocks.length) {
//resize the array.
// resize the array.
final BlockInfo[] newBlocks;
if (n == 0) {
newBlocks = null;
@ -129,10 +147,10 @@ public class INodeFileWithLink extends INodeFile {
i.setBlocks(newBlocks);
}
//collect the blocks beyond max.
if (v != null) {
// collect the blocks beyond max.
if (info != null) {
for(; n < oldBlocks.length; n++) {
v.add(oldBlocks[n]);
info.addDeleteBlock(oldBlocks[n]);
}
}
}

View File

@ -0,0 +1,158 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
/**
* Test cases for snapshot-related information in blocksMap.
*/
public class TestSnapshotBlocksMap {
private static final long seed = 0;
private static final short REPLICATION = 3;
private static final int BLOCKSIZE = 1024;
private final Path dir = new Path("/TestSnapshot");
private final Path sub1 = new Path(dir, "sub1");
protected Configuration conf;
protected MiniDFSCluster cluster;
protected FSNamesystem fsn;
protected DistributedFileSystem hdfs;
@Before
public void setUp() throws Exception {
conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
.build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
}
@After
public void tearDown() throws Exception {
if (cluster != null) {
cluster.shutdown();
}
}
/**
* Test deleting a file with snapshots. Need to check the blocksMap to make
* sure the corresponding record is updated correctly.
*/
@Test
public void testDeletionWithSnapshots() throws Exception {
Path file0 = new Path(sub1, "file0");
Path file1 = new Path(sub1, "file1");
Path subsub1 = new Path(sub1, "sub1");
Path subfile0 = new Path(subsub1, "file0");
// Create file under sub1
DFSTestUtil.createFile(hdfs, file0, BLOCKSIZE, REPLICATION, seed);
DFSTestUtil.createFile(hdfs, file1, BLOCKSIZE, REPLICATION, seed);
DFSTestUtil.createFile(hdfs, subfile0, BLOCKSIZE, REPLICATION, seed);
BlockManager bm = fsn.getBlockManager();
FSDirectory dir = fsn.getFSDirectory();
INodeFile inodeForDeletedFile = INodeFile.valueOf(
dir.getINode(subfile0.toString()), subfile0.toString());
BlockInfo[] blocksForDeletedFile = inodeForDeletedFile.getBlocks();
assertEquals(blocksForDeletedFile.length, 1);
BlockCollection bcForDeletedFile = bm
.getBlockCollection(blocksForDeletedFile[0]);
assertNotNull(bcForDeletedFile);
// Normal deletion
hdfs.delete(subsub1, true);
bcForDeletedFile = bm.getBlockCollection(blocksForDeletedFile[0]);
// The INode should have been removed from the blocksMap
assertNull(bcForDeletedFile);
// Create snapshots for sub1
for (int i = 0; i < 2; i++) {
SnapshotTestHelper.createSnapshot(hdfs, sub1, "s" + i);
}
// Check the block information for file0
// Get the INode for file0
INodeFile inode = INodeFile.valueOf(dir.getINode(file0.toString()),
file0.toString());
BlockInfo[] blocks = inode.getBlocks();
// Get the INode for the first block from blocksMap
BlockCollection bc = bm.getBlockCollection(blocks[0]);
// The two INode should be the same one
assertTrue(bc == inode);
// Also check the block information for snapshot of file0
Path snapshotFile0 = SnapshotTestHelper.getSnapshotPath(sub1, "s0",
file0.getName());
INodeFile ssINode0 = INodeFile.valueOf(dir.getINode(snapshotFile0.toString()),
snapshotFile0.toString());
BlockInfo[] ssBlocks = ssINode0.getBlocks();
// The snapshot of file1 should contain 1 block
assertEquals(ssBlocks.length, 1);
// Delete file0
hdfs.delete(file0, true);
// Make sure the first block of file0 is still in blocksMap
BlockInfo blockInfoAfterDeletion = bm.getStoredBlock(blocks[0]);
assertNotNull(blockInfoAfterDeletion);
// Check the INode information
BlockCollection bcAfterDeletion = blockInfoAfterDeletion
.getBlockCollection();
// The INode in the blocksMap should be no longer the original INode for
// file0
assertFalse(bcAfterDeletion == inode);
// Compare the INode in the blocksMap with INodes for snapshots
Path snapshot1File0 = SnapshotTestHelper.getSnapshotPath(sub1, "s1",
file0.getName());
INodeFile ssINode1 = INodeFile.valueOf(
dir.getINode(snapshot1File0.toString()), snapshot1File0.toString());
assertTrue(bcAfterDeletion == ssINode0 || bcAfterDeletion == ssINode1);
assertEquals(bcAfterDeletion.getBlocks().length, 1);
}
// TODO: test for deletion file which was appended after taking snapshots
}