HDFS-7339. Allocating and persisting block groups in NameNode. Contributed by Zhe Zhang
This commit is contained in:
parent
f166e67a23
commit
bc2833b1c9
|
@ -159,6 +159,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||
public static final int DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT = 3;
|
||||
public static final String DFS_NAMENODE_REPLICATION_MIN_KEY = "dfs.namenode.replication.min";
|
||||
public static final int DFS_NAMENODE_REPLICATION_MIN_DEFAULT = 1;
|
||||
public static final String DFS_NAMENODE_STRIPE_MIN_KEY = "dfs.namenode.stripe.min";
|
||||
public static final int DFS_NAMENODE_STRIPE_MIN_DEFAULT = 1;
|
||||
public static final String DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY = "dfs.namenode.replication.pending.timeout-sec";
|
||||
public static final int DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_DEFAULT = -1;
|
||||
public static final String DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY = "dfs.namenode.replication.max-streams";
|
||||
|
|
|
@ -54,10 +54,12 @@ public class BlockIdManager {
|
|||
* The global block ID space for this file system.
|
||||
*/
|
||||
private final SequentialBlockIdGenerator blockIdGenerator;
|
||||
private final SequentialBlockGroupIdGenerator blockGroupIdGenerator;
|
||||
|
||||
public BlockIdManager(BlockManager blockManager) {
|
||||
this.generationStampV1Limit = HdfsConstants.GRANDFATHER_GENERATION_STAMP;
|
||||
this.blockIdGenerator = new SequentialBlockIdGenerator(blockManager);
|
||||
this.blockGroupIdGenerator = new SequentialBlockGroupIdGenerator(blockManager);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -191,6 +193,10 @@ public class BlockIdManager {
|
|||
return blockIdGenerator.nextValue();
|
||||
}
|
||||
|
||||
public long nextBlockGroupId() {
|
||||
return blockGroupIdGenerator.nextValue();
|
||||
}
|
||||
|
||||
public boolean isGenStampInFuture(Block block) {
|
||||
if (isLegacyBlock(block)) {
|
||||
return block.getGenerationStamp() > getGenerationStampV1();
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.util.SequentialNumber;
|
||||
|
||||
/**
|
||||
* Generate the next valid block group ID by incrementing the maximum block
|
||||
* group ID allocated so far, with the first 2^10 block group IDs reserved.
|
||||
* HDFS-EC introduces a hierarchical protocol to name blocks and groups:
|
||||
* Contiguous: {reserved block IDs | flag | block ID}
|
||||
* Striped: {reserved block IDs | flag | block group ID | index in group}
|
||||
*
|
||||
* Following n bits of reserved block IDs, The (n+1)th bit in an ID
|
||||
* distinguishes contiguous (0) and striped (1) blocks. For a striped block,
|
||||
* bits (n+2) to (64-m) represent the ID of its block group, while the last m
|
||||
* bits represent its index of the group. The value m is determined by the
|
||||
* maximum number of blocks in a group (MAX_BLOCKS_IN_GROUP).
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class SequentialBlockGroupIdGenerator extends SequentialNumber {
|
||||
|
||||
private final BlockManager blockManager;
|
||||
|
||||
SequentialBlockGroupIdGenerator(BlockManager blockManagerRef) {
|
||||
super(Long.MIN_VALUE);
|
||||
this.blockManager = blockManagerRef;
|
||||
}
|
||||
|
||||
@Override // NumberGenerator
|
||||
public long nextValue() {
|
||||
// Skip to next legitimate block group ID based on the naming protocol
|
||||
while (super.getCurrentValue() % HdfsConstants.MAX_BLOCKS_IN_GROUP > 0) {
|
||||
super.nextValue();
|
||||
}
|
||||
// Make sure there's no conflict with existing random block IDs
|
||||
while (hasValidBlockInRange(super.getCurrentValue())) {
|
||||
super.skipTo(super.getCurrentValue() +
|
||||
HdfsConstants.MAX_BLOCKS_IN_GROUP);
|
||||
}
|
||||
if (super.getCurrentValue() >= 0) {
|
||||
BlockManager.LOG.warn("All negative block group IDs are used, " +
|
||||
"growing into positive IDs, " +
|
||||
"which might conflict with non-erasure coded blocks.");
|
||||
}
|
||||
return super.getCurrentValue();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param id The starting ID of the range
|
||||
* @return true if any ID in the range
|
||||
* {id, id+HdfsConstants.MAX_BLOCKS_IN_GROUP} is pointed-to by a file
|
||||
*/
|
||||
private boolean hasValidBlockInRange(long id) {
|
||||
for (int i = 0; i < HdfsConstants.MAX_BLOCKS_IN_GROUP; i++) {
|
||||
Block b = new Block(id + i);
|
||||
if (blockManager.getBlockCollection(b) != null) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
|
|||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.util.SequentialNumber;
|
||||
|
||||
/**
|
||||
|
@ -54,6 +53,11 @@ public class SequentialBlockIdGenerator extends SequentialNumber {
|
|||
while(isValidBlock(b)) {
|
||||
b.setBlockId(super.nextValue());
|
||||
}
|
||||
if (b.getBlockId() < 0) {
|
||||
BlockManager.LOG.warn("All positive block IDs are used, " +
|
||||
"wrapping to negative IDs, " +
|
||||
"which might conflict with erasure coded block groups.");
|
||||
}
|
||||
return b.getBlockId();
|
||||
}
|
||||
|
||||
|
|
|
@ -2093,7 +2093,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
BlockInfoContiguous oldBlock = file.getLastBlock();
|
||||
boolean shouldCopyOnTruncate = shouldCopyOnTruncate(file, oldBlock);
|
||||
if(newBlock == null) {
|
||||
newBlock = (shouldCopyOnTruncate) ? createNewBlock() :
|
||||
newBlock = (shouldCopyOnTruncate) ? createNewBlock(file.isStriped()) :
|
||||
new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(),
|
||||
nextGenerationStamp(blockIdManager.isLegacyBlock(oldBlock)));
|
||||
}
|
||||
|
@ -3044,10 +3044,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
|
||||
/**
|
||||
* Create new block with a unique block id and a new generation stamp.
|
||||
* @param isStriped is the file under striping or contiguous layout?
|
||||
*/
|
||||
Block createNewBlock() throws IOException {
|
||||
assert hasWriteLock();
|
||||
Block b = new Block(nextBlockId(), 0, 0);
|
||||
Block b = new Block(nextBlockId(isStriped), 0, 0);
|
||||
// Increment the generation stamp for every new block.
|
||||
b.setGenerationStamp(nextGenerationStamp(false));
|
||||
return b;
|
||||
|
@ -5610,11 +5611,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
|
||||
/**
|
||||
* Increments, logs and then returns the block ID
|
||||
* @param isStriped is the file under striping or contiguous layout?
|
||||
*/
|
||||
private long nextBlockId() throws IOException {
|
||||
private long nextBlockId(boolean isStriped) throws IOException {
|
||||
assert hasWriteLock();
|
||||
checkNameNodeSafeMode("Cannot get next block ID");
|
||||
final long blockId = blockIdManager.nextBlockId();
|
||||
final long blockId = isStriped ?
|
||||
blockIdManager.nextBlockGroupId() : blockIdManager.nextBlockId();
|
||||
getEditLog().logAllocateBlockId(blockId);
|
||||
// NB: callers sync the log
|
||||
return blockId;
|
||||
|
|
|
@ -34,12 +34,14 @@ import org.apache.hadoop.fs.permission.PermissionStatus;
|
|||
import org.apache.hadoop.fs.StorageType;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
|
||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiff;
|
||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
|
||||
|
@ -924,4 +926,13 @@ public class INodeFile extends INodeWithAdditionalFields
|
|||
return snapshotBlocks != null &&
|
||||
Arrays.asList(snapshotBlocks).contains(block);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
/**
|
||||
* @return true if the file is in the striping layout.
|
||||
*/
|
||||
// TODO: move erasure coding policy to file XAttr (HDFS-7337)
|
||||
public boolean isStriped() {
|
||||
return getStoragePolicyID() == HdfsConstants.EC_STORAGE_POLICY_ID;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
public class TestAddBlockgroup {
|
||||
|
||||
public static final Log LOG = LogFactory.getLog(TestAddBlockgroup.class);
|
||||
|
||||
private final short GROUP_SIZE = HdfsConstants.NUM_DATA_BLOCKS +
|
||||
HdfsConstants.NUM_PARITY_BLOCKS;
|
||||
private final short NUM_DATANODES = GROUP_SIZE;
|
||||
|
||||
private static final int BLOCKSIZE = 1024;
|
||||
private static final short REPLICATION = 3;
|
||||
|
||||
private MiniDFSCluster cluster;
|
||||
private Configuration conf;
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf = new Configuration();
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE);
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
|
||||
.build();
|
||||
cluster.waitActive();
|
||||
cluster.getFileSystem().setStoragePolicy(new Path("/"),
|
||||
HdfsConstants.EC_STORAGE_POLICY_NAME);
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddBlockGroup() throws Exception {
|
||||
DistributedFileSystem fs = cluster.getFileSystem();
|
||||
FSDirectory fsdir = cluster.getNamesystem().getFSDirectory();
|
||||
|
||||
final Path file1 = new Path("/file1");
|
||||
DFSTestUtil.createFile(fs, file1, BLOCKSIZE * 2, REPLICATION, 0L);
|
||||
INodeFile file1Node = fsdir.getINode4Write(file1.toString()).asFile();
|
||||
BlockInfo[] file1Blocks = file1Node.getBlocks();
|
||||
assertEquals(2, file1Blocks.length);
|
||||
assertEquals(GROUP_SIZE, file1Blocks[0].numNodes());
|
||||
assertEquals(HdfsConstants.MAX_BLOCKS_IN_GROUP,
|
||||
file1Blocks[1].getBlockId() - file1Blocks[0].getBlockId());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue