HDFS-8791. block ID-based DN storage layout can be very slow for datanode on ext4. Contributed by Chris Trezzo.

(cherry picked from commit b8dbffbef87c0ee0bee5e17649586fc9f0e1f55b)
This commit is contained in:
Kihwal Lee 2016-03-01 14:56:16 -06:00
parent 9fa300ad1f
commit 9bc9e13a97
7 changed files with 67 additions and 9 deletions

View File

@ -38,6 +38,9 @@ Release 2.7.3 - UNRELEASED
HDFS-9395. Make HDFS audit logging consistant (Kuhu Shukla via kihwal) HDFS-9395. Make HDFS audit logging consistant (Kuhu Shukla via kihwal)
HDFS-8791. block ID-based DN storage layout can be very slow for datanode
on ext4 (Chris Trezzo via kihwal)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-8845. DiskChecker should not traverse the entire tree (Chang Li via HDFS-8845. DiskChecker should not traverse the entire tree (Chang Li via

View File

@ -65,7 +65,10 @@ public class DataNodeLayoutVersion {
FIRST_LAYOUT(-55, -53, "First datanode layout", false), FIRST_LAYOUT(-55, -53, "First datanode layout", false),
BLOCKID_BASED_LAYOUT(-56, BLOCKID_BASED_LAYOUT(-56,
"The block ID of a finalized block uniquely determines its position " + "The block ID of a finalized block uniquely determines its position " +
"in the directory structure"); "in the directory structure"),
BLOCKID_BASED_LAYOUT_32_by_32(-57,
"Identical to the block id based layout (-56) except it uses a smaller"
+ " directory structure (32x32)");
private final FeatureInfo info; private final FeatureInfo info;

View File

@ -1117,10 +1117,13 @@ public class DataStorage extends Storage {
LOG.info("Start linking block files from " + from + " to " + to); LOG.info("Start linking block files from " + from + " to " + to);
boolean upgradeToIdBasedLayout = false; boolean upgradeToIdBasedLayout = false;
// If we are upgrading from a version older than the one where we introduced // If we are upgrading from a version older than the one where we introduced
// block ID-based layout AND we're working with the finalized directory, // block ID-based layout (32x32) AND we're working with the finalized
// we'll need to upgrade from the old flat layout to the block ID-based one // directory, we'll need to upgrade from the old layout to the new one. The
if (oldLV > DataNodeLayoutVersion.Feature.BLOCKID_BASED_LAYOUT.getInfo(). // upgrade path from pre-blockid based layouts (>-56) and blockid based
getLayoutVersion() && to.getName().equals(STORAGE_DIR_FINALIZED)) { // 256x256 layouts (-56) is fortunately the same.
if (oldLV > DataNodeLayoutVersion.Feature.BLOCKID_BASED_LAYOUT_32_by_32
.getInfo().getLayoutVersion()
&& to.getName().equals(STORAGE_DIR_FINALIZED)) {
upgradeToIdBasedLayout = true; upgradeToIdBasedLayout = true;
} }

View File

@ -113,8 +113,8 @@ public class DatanodeUtil {
* @return * @return
*/ */
public static File idToBlockDir(File root, long blockId) { public static File idToBlockDir(File root, long blockId) {
int d1 = (int)((blockId >> 16) & 0xff); int d1 = (int) ((blockId >> 16) & 0x1F);
int d2 = (int)((blockId >> 8) & 0xff); int d2 = (int) ((blockId >> 8) & 0x1F);
String path = DataStorage.BLOCK_SUBDIR_PREFIX + d1 + SEP + String path = DataStorage.BLOCK_SUBDIR_PREFIX + d1 + SEP +
DataStorage.BLOCK_SUBDIR_PREFIX + d2; DataStorage.BLOCK_SUBDIR_PREFIX + d2;
return new File(root, path); return new File(root, path);

View File

@ -28,10 +28,16 @@ public class TestDatanodeLayoutUpgrade {
private static final String HADOOP_DATANODE_DIR_TXT = private static final String HADOOP_DATANODE_DIR_TXT =
"hadoop-datanode-dir.txt"; "hadoop-datanode-dir.txt";
private static final String HADOOP24_DATANODE = "hadoop-24-datanode-dir.tgz"; private static final String HADOOP24_DATANODE = "hadoop-24-datanode-dir.tgz";
private static final String HADOOP_56_DN_LAYOUT_TXT =
"hadoop-to-57-dn-layout-dir.txt";
private static final String HADOOP_56_DN_LAYOUT =
"hadoop-56-layout-datanode-dir.tgz";
/**
* Upgrade from LDir-based layout to 32x32 block ID-based layout (-57) --
* change described in HDFS-6482 and HDFS-8791
*/
@Test @Test
// Upgrade from LDir-based layout to block ID-based layout -- change described
// in HDFS-6482
public void testUpgradeToIdBasedLayout() throws IOException { public void testUpgradeToIdBasedLayout() throws IOException {
TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage(); TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage();
upgrade.unpackStorage(HADOOP24_DATANODE, HADOOP_DATANODE_DIR_TXT); upgrade.unpackStorage(HADOOP24_DATANODE, HADOOP_DATANODE_DIR_TXT);
@ -45,4 +51,23 @@ public class TestDatanodeLayoutUpgrade {
upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1) upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1)
.manageDataDfsDirs(false).manageNameDfsDirs(false), null); .manageDataDfsDirs(false).manageNameDfsDirs(false), null);
} }
/**
* Test upgrade from block ID-based layout 256x256 (-56) to block ID-based
* layout 32x32 (-57)
*/
@Test
public void testUpgradeFrom256To32Layout() throws IOException {
TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage();
upgrade.unpackStorage(HADOOP_56_DN_LAYOUT, HADOOP_56_DN_LAYOUT_TXT);
Configuration conf = new Configuration(TestDFSUpgradeFromImage.upgradeConf);
conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY,
new File(System.getProperty("test.build.data"), "dfs" + File.separator
+ "data").toURI().toString());
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
new File(System.getProperty("test.build.data"), "dfs" + File.separator
+ "name").toURI().toString());
upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1)
.manageDataDfsDirs(false).manageNameDfsDirs(false), null);
}
} }

View File

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Similar to hadoop-dfs-dir.txt, except this is used for a datanode layout
# upgrade test to 32x32 (layout -57)
# Uncomment the following line to produce checksum info for a new DFS image.
#printChecksums
/blocks/part1 286881285
/blocks/part12922 1068680946
/blocks/part972 2479788008
/blocks/part973 1221039573
overallCRC 1902127725