HDFS-15781. Add metrics for how blocks are moved in replaceBlock. (#2704)
This commit is contained in:
parent
e9a3c2950e
commit
940c780feb
|
@ -1250,6 +1250,8 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
|
|
||||||
LOG.info("Moved {} from {}, delHint={}",
|
LOG.info("Moved {} from {}, delHint={}",
|
||||||
block, peer.getRemoteAddressString(), delHint);
|
block, peer.getRemoteAddressString(), delHint);
|
||||||
|
|
||||||
|
datanode.metrics.incrReplaceBlockOpToOtherHost();
|
||||||
}
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
opStatus = ERROR;
|
opStatus = ERROR;
|
||||||
|
|
|
@ -1121,6 +1121,10 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount);
|
moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount);
|
||||||
|
datanode.getMetrics().incrReplaceBlockOpOnSameHost();
|
||||||
|
if (useVolumeOnSameMount) {
|
||||||
|
datanode.getMetrics().incrReplaceBlockOpOnSameMount();
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (volumeRef != null) {
|
if (volumeRef != null) {
|
||||||
volumeRef.close();
|
volumeRef.close();
|
||||||
|
|
|
@ -188,6 +188,15 @@ public class DataNodeMetrics {
|
||||||
@Metric MutableCounterLong packetsSlowWriteToDisk;
|
@Metric MutableCounterLong packetsSlowWriteToDisk;
|
||||||
@Metric MutableCounterLong packetsSlowWriteToOsCache;
|
@Metric MutableCounterLong packetsSlowWriteToOsCache;
|
||||||
|
|
||||||
|
@Metric("Number of replaceBlock ops between" +
|
||||||
|
" storage types on same host with local copy")
|
||||||
|
private MutableCounterLong replaceBlockOpOnSameHost;
|
||||||
|
@Metric("Number of replaceBlock ops between" +
|
||||||
|
" storage types on same disk mount with same disk tiering feature")
|
||||||
|
private MutableCounterLong replaceBlockOpOnSameMount;
|
||||||
|
@Metric("Number of replaceBlock ops to another node")
|
||||||
|
private MutableCounterLong replaceBlockOpToOtherHost;
|
||||||
|
|
||||||
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
||||||
@Metric("Milliseconds spent on calling NN rpc")
|
@Metric("Milliseconds spent on calling NN rpc")
|
||||||
private MutableRatesWithAggregation
|
private MutableRatesWithAggregation
|
||||||
|
@ -711,4 +720,17 @@ public class DataNodeMetrics {
|
||||||
public void incrPacketsSlowWriteToOsCache() {
|
public void incrPacketsSlowWriteToOsCache() {
|
||||||
packetsSlowWriteToOsCache.incr();
|
packetsSlowWriteToOsCache.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrReplaceBlockOpOnSameMount() {
|
||||||
|
replaceBlockOpOnSameMount.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrReplaceBlockOpOnSameHost() {
|
||||||
|
replaceBlockOpOnSameHost.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrReplaceBlockOpToOtherHost() {
|
||||||
|
replaceBlockOpToOtherHost.incr();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY;
|
||||||
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY;
|
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -83,6 +85,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.mover.Mover.MLocation;
|
import org.apache.hadoop.hdfs.server.mover.Mover.MLocation;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
||||||
import org.apache.hadoop.http.HttpConfig;
|
import org.apache.hadoop.http.HttpConfig;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||||
import org.apache.hadoop.minikdc.MiniKdc;
|
import org.apache.hadoop.minikdc.MiniKdc;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
@ -170,14 +173,26 @@ public class TestMover {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testWithinSameNode(Configuration conf) throws Exception {
|
private void testMovementWithLocalityOption(Configuration conf,
|
||||||
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
boolean sameNode) throws Exception {
|
||||||
|
final MiniDFSCluster cluster;
|
||||||
|
if (sameNode) {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
.numDataNodes(3)
|
.numDataNodes(3)
|
||||||
.storageTypes(
|
.storageTypes(
|
||||||
new StorageType[] {StorageType.DISK, StorageType.ARCHIVE})
|
new StorageType[] {StorageType.DISK, StorageType.ARCHIVE})
|
||||||
.build();
|
.build();
|
||||||
|
} else {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.numDataNodes(2)
|
||||||
|
.storageTypes(
|
||||||
|
new StorageType[][] {{StorageType.DISK}, {StorageType.ARCHIVE}})
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
|
|
||||||
final DistributedFileSystem dfs = cluster.getFileSystem();
|
final DistributedFileSystem dfs = cluster.getFileSystem();
|
||||||
final String file = "/testScheduleWithinSameNode/file";
|
final String file = "/testScheduleWithinSameNode/file";
|
||||||
Path dir = new Path("/testScheduleWithinSameNode");
|
Path dir = new Path("/testScheduleWithinSameNode");
|
||||||
|
@ -201,12 +216,37 @@ public class TestMover {
|
||||||
Assert.assertEquals("Movement to ARCHIVE should be successful", 0, rc);
|
Assert.assertEquals("Movement to ARCHIVE should be successful", 0, rc);
|
||||||
|
|
||||||
// Wait till namenode notified about the block location details
|
// Wait till namenode notified about the block location details
|
||||||
waitForLocatedBlockWithArchiveStorageType(dfs, file, 3);
|
waitForLocatedBlockWithArchiveStorageType(dfs, file, sameNode ? 3 : 1);
|
||||||
|
|
||||||
|
MetricsRecordBuilder rb =
|
||||||
|
getMetrics(cluster.getDataNodes().get(1).getMetrics().name());
|
||||||
|
|
||||||
|
if (!sameNode) {
|
||||||
|
testReplaceBlockOpLocalityMetrics(0, 0, 1, rb);
|
||||||
|
} else if (conf.getBoolean(
|
||||||
|
DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false)) {
|
||||||
|
testReplaceBlockOpLocalityMetrics(1, 1, 0, rb);
|
||||||
|
} else {
|
||||||
|
testReplaceBlockOpLocalityMetrics(1, 0, 0, rb);
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void testReplaceBlockOpLocalityMetrics(
|
||||||
|
long sameHost,
|
||||||
|
long sameMount,
|
||||||
|
long otherHost,
|
||||||
|
MetricsRecordBuilder rb) {
|
||||||
|
assertCounter("ReplaceBlockOpOnSameHost",
|
||||||
|
sameHost, rb);
|
||||||
|
assertCounter("ReplaceBlockOpOnSameMount",
|
||||||
|
sameMount, rb);
|
||||||
|
assertCounter("ReplaceBlockOpToOtherHost",
|
||||||
|
otherHost, rb);
|
||||||
|
}
|
||||||
|
|
||||||
private void setupStoragePoliciesAndPaths(DistributedFileSystem dfs1,
|
private void setupStoragePoliciesAndPaths(DistributedFileSystem dfs1,
|
||||||
DistributedFileSystem dfs2,
|
DistributedFileSystem dfs2,
|
||||||
Path dir, String file)
|
Path dir, String file)
|
||||||
|
@ -441,17 +481,27 @@ public class TestMover {
|
||||||
}, 100, 3000);
|
}, 100, 3000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test block movement with different block locality scenarios.
|
||||||
|
* 1) Block will be copied to local host,
|
||||||
|
* if there is target storage type on same datanode.
|
||||||
|
* 2) Block will be moved within local mount with hardlink,
|
||||||
|
* if disk/archive are on same mount with same-disk-tiering feature on.
|
||||||
|
* 3) Block will be moved to another datanode,
|
||||||
|
* if there is no available target storage type on local datanode.
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testScheduleBlockWithinSameNode() throws Exception {
|
public void testScheduleBlockLocality() throws Exception {
|
||||||
final Configuration conf = new HdfsConfiguration();
|
final Configuration conf = new HdfsConfiguration();
|
||||||
initConf(conf);
|
initConf(conf);
|
||||||
testWithinSameNode(conf);
|
testMovementWithLocalityOption(conf, true);
|
||||||
// Test movement with hardlink, when same disk tiering is enabled.
|
// Test movement with hardlink, when same disk tiering is enabled.
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, true);
|
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, true);
|
||||||
conf.setDouble(DFSConfigKeys
|
conf.setDouble(DFSConfigKeys
|
||||||
.DFS_DATANODE_RESERVE_FOR_ARCHIVE_DEFAULT_PERCENTAGE, 0.5);
|
.DFS_DATANODE_RESERVE_FOR_ARCHIVE_DEFAULT_PERCENTAGE, 0.5);
|
||||||
testWithinSameNode(conf);
|
testMovementWithLocalityOption(conf, true);
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false);
|
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false);
|
||||||
|
testMovementWithLocalityOption(conf, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkMovePaths(List<Path> actual, Path... expected) {
|
private void checkMovePaths(List<Path> actual, Path... expected) {
|
||||||
|
@ -1006,7 +1056,8 @@ public class TestMover {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Mover runs fine when logging in with a keytab in kerberized env.
|
* Test Mover runs fine when logging in with a keytab in kerberized env.
|
||||||
* Reusing testWithinSameNode here for basic functionality testing.
|
* Reusing testMovementWithLocalityOption
|
||||||
|
* here for basic functionality testing.
|
||||||
*/
|
*/
|
||||||
@Test(timeout = 300000)
|
@Test(timeout = 300000)
|
||||||
public void testMoverWithKeytabs() throws Exception {
|
public void testMoverWithKeytabs() throws Exception {
|
||||||
|
@ -1020,7 +1071,7 @@ public class TestMover {
|
||||||
@Override
|
@Override
|
||||||
public Void run() throws Exception {
|
public Void run() throws Exception {
|
||||||
// verify that mover runs Ok.
|
// verify that mover runs Ok.
|
||||||
testWithinSameNode(conf);
|
testMovementWithLocalityOption(conf, true);
|
||||||
// verify that UGI was logged in using keytab.
|
// verify that UGI was logged in using keytab.
|
||||||
Assert.assertTrue(UserGroupInformation.isLoginKeytabBased());
|
Assert.assertTrue(UserGroupInformation.isLoginKeytabBased());
|
||||||
return null;
|
return null;
|
||||||
|
|
Loading…
Reference in New Issue