HDFS-7339. Allocating and persisting block groups in NameNode. Contributed by Zhe Zhang
This commit is contained in:
parent
f166e67a23
commit
bc2833b1c9
|
@ -159,6 +159,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final int DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT = 3;
|
public static final int DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT = 3;
|
||||||
public static final String DFS_NAMENODE_REPLICATION_MIN_KEY = "dfs.namenode.replication.min";
|
public static final String DFS_NAMENODE_REPLICATION_MIN_KEY = "dfs.namenode.replication.min";
|
||||||
public static final int DFS_NAMENODE_REPLICATION_MIN_DEFAULT = 1;
|
public static final int DFS_NAMENODE_REPLICATION_MIN_DEFAULT = 1;
|
||||||
|
public static final String DFS_NAMENODE_STRIPE_MIN_KEY = "dfs.namenode.stripe.min";
|
||||||
|
public static final int DFS_NAMENODE_STRIPE_MIN_DEFAULT = 1;
|
||||||
public static final String DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY = "dfs.namenode.replication.pending.timeout-sec";
|
public static final String DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY = "dfs.namenode.replication.pending.timeout-sec";
|
||||||
public static final int DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_DEFAULT = -1;
|
public static final int DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_DEFAULT = -1;
|
||||||
public static final String DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY = "dfs.namenode.replication.max-streams";
|
public static final String DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY = "dfs.namenode.replication.max-streams";
|
||||||
|
|
|
@ -54,10 +54,12 @@ public class BlockIdManager {
|
||||||
* The global block ID space for this file system.
|
* The global block ID space for this file system.
|
||||||
*/
|
*/
|
||||||
private final SequentialBlockIdGenerator blockIdGenerator;
|
private final SequentialBlockIdGenerator blockIdGenerator;
|
||||||
|
private final SequentialBlockGroupIdGenerator blockGroupIdGenerator;
|
||||||
|
|
||||||
public BlockIdManager(BlockManager blockManager) {
|
public BlockIdManager(BlockManager blockManager) {
|
||||||
this.generationStampV1Limit = HdfsConstants.GRANDFATHER_GENERATION_STAMP;
|
this.generationStampV1Limit = HdfsConstants.GRANDFATHER_GENERATION_STAMP;
|
||||||
this.blockIdGenerator = new SequentialBlockIdGenerator(blockManager);
|
this.blockIdGenerator = new SequentialBlockIdGenerator(blockManager);
|
||||||
|
this.blockGroupIdGenerator = new SequentialBlockGroupIdGenerator(blockManager);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -191,6 +193,10 @@ public class BlockIdManager {
|
||||||
return blockIdGenerator.nextValue();
|
return blockIdGenerator.nextValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long nextBlockGroupId() {
|
||||||
|
return blockGroupIdGenerator.nextValue();
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isGenStampInFuture(Block block) {
|
public boolean isGenStampInFuture(Block block) {
|
||||||
if (isLegacyBlock(block)) {
|
if (isLegacyBlock(block)) {
|
||||||
return block.getGenerationStamp() > getGenerationStampV1();
|
return block.getGenerationStamp() > getGenerationStampV1();
|
||||||
|
@ -206,4 +212,4 @@ public class BlockIdManager {
|
||||||
.LAST_RESERVED_BLOCK_ID);
|
.LAST_RESERVED_BLOCK_ID);
|
||||||
generationStampV1Limit = HdfsConstants.GRANDFATHER_GENERATION_STAMP;
|
generationStampV1Limit = HdfsConstants.GRANDFATHER_GENERATION_STAMP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.util.SequentialNumber;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate the next valid block group ID by incrementing the maximum block
|
||||||
|
* group ID allocated so far, with the first 2^10 block group IDs reserved.
|
||||||
|
* HDFS-EC introduces a hierarchical protocol to name blocks and groups:
|
||||||
|
* Contiguous: {reserved block IDs | flag | block ID}
|
||||||
|
* Striped: {reserved block IDs | flag | block group ID | index in group}
|
||||||
|
*
|
||||||
|
* Following n bits of reserved block IDs, The (n+1)th bit in an ID
|
||||||
|
* distinguishes contiguous (0) and striped (1) blocks. For a striped block,
|
||||||
|
* bits (n+2) to (64-m) represent the ID of its block group, while the last m
|
||||||
|
* bits represent its index of the group. The value m is determined by the
|
||||||
|
* maximum number of blocks in a group (MAX_BLOCKS_IN_GROUP).
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class SequentialBlockGroupIdGenerator extends SequentialNumber {
|
||||||
|
|
||||||
|
private final BlockManager blockManager;
|
||||||
|
|
||||||
|
SequentialBlockGroupIdGenerator(BlockManager blockManagerRef) {
|
||||||
|
super(Long.MIN_VALUE);
|
||||||
|
this.blockManager = blockManagerRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override // NumberGenerator
|
||||||
|
public long nextValue() {
|
||||||
|
// Skip to next legitimate block group ID based on the naming protocol
|
||||||
|
while (super.getCurrentValue() % HdfsConstants.MAX_BLOCKS_IN_GROUP > 0) {
|
||||||
|
super.nextValue();
|
||||||
|
}
|
||||||
|
// Make sure there's no conflict with existing random block IDs
|
||||||
|
while (hasValidBlockInRange(super.getCurrentValue())) {
|
||||||
|
super.skipTo(super.getCurrentValue() +
|
||||||
|
HdfsConstants.MAX_BLOCKS_IN_GROUP);
|
||||||
|
}
|
||||||
|
if (super.getCurrentValue() >= 0) {
|
||||||
|
BlockManager.LOG.warn("All negative block group IDs are used, " +
|
||||||
|
"growing into positive IDs, " +
|
||||||
|
"which might conflict with non-erasure coded blocks.");
|
||||||
|
}
|
||||||
|
return super.getCurrentValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param id The starting ID of the range
|
||||||
|
* @return true if any ID in the range
|
||||||
|
* {id, id+HdfsConstants.MAX_BLOCKS_IN_GROUP} is pointed-to by a file
|
||||||
|
*/
|
||||||
|
private boolean hasValidBlockInRange(long id) {
|
||||||
|
for (int i = 0; i < HdfsConstants.MAX_BLOCKS_IN_GROUP; i++) {
|
||||||
|
Block b = new Block(id + i);
|
||||||
|
if (blockManager.getBlockCollection(b) != null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
|
||||||
import org.apache.hadoop.util.SequentialNumber;
|
import org.apache.hadoop.util.SequentialNumber;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,6 +53,11 @@ public class SequentialBlockIdGenerator extends SequentialNumber {
|
||||||
while(isValidBlock(b)) {
|
while(isValidBlock(b)) {
|
||||||
b.setBlockId(super.nextValue());
|
b.setBlockId(super.nextValue());
|
||||||
}
|
}
|
||||||
|
if (b.getBlockId() < 0) {
|
||||||
|
BlockManager.LOG.warn("All positive block IDs are used, " +
|
||||||
|
"wrapping to negative IDs, " +
|
||||||
|
"which might conflict with erasure coded block groups.");
|
||||||
|
}
|
||||||
return b.getBlockId();
|
return b.getBlockId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2093,7 +2093,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
BlockInfoContiguous oldBlock = file.getLastBlock();
|
BlockInfoContiguous oldBlock = file.getLastBlock();
|
||||||
boolean shouldCopyOnTruncate = shouldCopyOnTruncate(file, oldBlock);
|
boolean shouldCopyOnTruncate = shouldCopyOnTruncate(file, oldBlock);
|
||||||
if(newBlock == null) {
|
if(newBlock == null) {
|
||||||
newBlock = (shouldCopyOnTruncate) ? createNewBlock() :
|
newBlock = (shouldCopyOnTruncate) ? createNewBlock(file.isStriped()) :
|
||||||
new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(),
|
new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(),
|
||||||
nextGenerationStamp(blockIdManager.isLegacyBlock(oldBlock)));
|
nextGenerationStamp(blockIdManager.isLegacyBlock(oldBlock)));
|
||||||
}
|
}
|
||||||
|
@ -3044,10 +3044,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create new block with a unique block id and a new generation stamp.
|
* Create new block with a unique block id and a new generation stamp.
|
||||||
|
* @param isStriped is the file under striping or contiguous layout?
|
||||||
*/
|
*/
|
||||||
Block createNewBlock() throws IOException {
|
Block createNewBlock() throws IOException {
|
||||||
assert hasWriteLock();
|
assert hasWriteLock();
|
||||||
Block b = new Block(nextBlockId(), 0, 0);
|
Block b = new Block(nextBlockId(isStriped), 0, 0);
|
||||||
// Increment the generation stamp for every new block.
|
// Increment the generation stamp for every new block.
|
||||||
b.setGenerationStamp(nextGenerationStamp(false));
|
b.setGenerationStamp(nextGenerationStamp(false));
|
||||||
return b;
|
return b;
|
||||||
|
@ -5610,11 +5611,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increments, logs and then returns the block ID
|
* Increments, logs and then returns the block ID
|
||||||
|
* @param isStriped is the file under striping or contiguous layout?
|
||||||
*/
|
*/
|
||||||
private long nextBlockId() throws IOException {
|
private long nextBlockId(boolean isStriped) throws IOException {
|
||||||
assert hasWriteLock();
|
assert hasWriteLock();
|
||||||
checkNameNodeSafeMode("Cannot get next block ID");
|
checkNameNodeSafeMode("Cannot get next block ID");
|
||||||
final long blockId = blockIdManager.nextBlockId();
|
final long blockId = isStriped ?
|
||||||
|
blockIdManager.nextBlockGroupId() : blockIdManager.nextBlockId();
|
||||||
getEditLog().logAllocateBlockId(blockId);
|
getEditLog().logAllocateBlockId(blockId);
|
||||||
// NB: callers sync the log
|
// NB: callers sync the log
|
||||||
return blockId;
|
return blockId;
|
||||||
|
|
|
@ -34,12 +34,14 @@ import org.apache.hadoop.fs.permission.PermissionStatus;
|
||||||
import org.apache.hadoop.fs.StorageType;
|
import org.apache.hadoop.fs.StorageType;
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
|
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
|
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiff;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiff;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
|
||||||
|
@ -924,4 +926,13 @@ public class INodeFile extends INodeWithAdditionalFields
|
||||||
return snapshotBlocks != null &&
|
return snapshotBlocks != null &&
|
||||||
Arrays.asList(snapshotBlocks).contains(block);
|
Arrays.asList(snapshotBlocks).contains(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
/**
|
||||||
|
* @return true if the file is in the striping layout.
|
||||||
|
*/
|
||||||
|
// TODO: move erasure coding policy to file XAttr (HDFS-7337)
|
||||||
|
public boolean isStriped() {
|
||||||
|
return getStoragePolicyID() == HdfsConstants.EC_STORAGE_POLICY_ID;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
public class TestAddBlockgroup {
|
||||||
|
|
||||||
|
public static final Log LOG = LogFactory.getLog(TestAddBlockgroup.class);
|
||||||
|
|
||||||
|
private final short GROUP_SIZE = HdfsConstants.NUM_DATA_BLOCKS +
|
||||||
|
HdfsConstants.NUM_PARITY_BLOCKS;
|
||||||
|
private final short NUM_DATANODES = GROUP_SIZE;
|
||||||
|
|
||||||
|
private static final int BLOCKSIZE = 1024;
|
||||||
|
private static final short REPLICATION = 3;
|
||||||
|
|
||||||
|
private MiniDFSCluster cluster;
|
||||||
|
private Configuration conf;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws IOException {
|
||||||
|
conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE);
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
|
||||||
|
.build();
|
||||||
|
cluster.waitActive();
|
||||||
|
cluster.getFileSystem().setStoragePolicy(new Path("/"),
|
||||||
|
HdfsConstants.EC_STORAGE_POLICY_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddBlockGroup() throws Exception {
|
||||||
|
DistributedFileSystem fs = cluster.getFileSystem();
|
||||||
|
FSDirectory fsdir = cluster.getNamesystem().getFSDirectory();
|
||||||
|
|
||||||
|
final Path file1 = new Path("/file1");
|
||||||
|
DFSTestUtil.createFile(fs, file1, BLOCKSIZE * 2, REPLICATION, 0L);
|
||||||
|
INodeFile file1Node = fsdir.getINode4Write(file1.toString()).asFile();
|
||||||
|
BlockInfo[] file1Blocks = file1Node.getBlocks();
|
||||||
|
assertEquals(2, file1Blocks.length);
|
||||||
|
assertEquals(GROUP_SIZE, file1Blocks[0].numNodes());
|
||||||
|
assertEquals(HdfsConstants.MAX_BLOCKS_IN_GROUP,
|
||||||
|
file1Blocks[1].getBlockId() - file1Blocks[0].getBlockId());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue