HDDS-1783 : Latency metric for applyTransaction in ContainerStateMachine (#1363).
This commit is contained in:
parent
5ff76cb8bc
commit
b53d19a343
|
@ -60,6 +60,9 @@ public class CSMMetrics {
|
||||||
private @Metric MutableCounterLong numStartTransactionVerifyFailures;
|
private @Metric MutableCounterLong numStartTransactionVerifyFailures;
|
||||||
private @Metric MutableCounterLong numContainerNotOpenVerifyFailures;
|
private @Metric MutableCounterLong numContainerNotOpenVerifyFailures;
|
||||||
|
|
||||||
|
private @Metric MutableRate applyTransaction;
|
||||||
|
private @Metric MutableRate writeStateMachineData;
|
||||||
|
|
||||||
public CSMMetrics() {
|
public CSMMetrics() {
|
||||||
int numCmdTypes = ContainerProtos.Type.values().length;
|
int numCmdTypes = ContainerProtos.Type.values().length;
|
||||||
this.opsLatency = new MutableRate[numCmdTypes];
|
this.opsLatency = new MutableRate[numCmdTypes];
|
||||||
|
@ -186,6 +189,10 @@ public class CSMMetrics {
|
||||||
return numBytesCommittedCount.value();
|
return numBytesCommittedCount.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public MutableRate getApplyTransactionLatency() {
|
||||||
|
return applyTransaction;
|
||||||
|
}
|
||||||
|
|
||||||
public void incPipelineLatency(ContainerProtos.Type type, long latencyNanos) {
|
public void incPipelineLatency(ContainerProtos.Type type, long latencyNanos) {
|
||||||
opsLatency[type.ordinal()].add(latencyNanos);
|
opsLatency[type.ordinal()].add(latencyNanos);
|
||||||
transactionLatency.add(latencyNanos);
|
transactionLatency.add(latencyNanos);
|
||||||
|
@ -199,6 +206,13 @@ public class CSMMetrics {
|
||||||
numContainerNotOpenVerifyFailures.incr();
|
numContainerNotOpenVerifyFailures.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void recordApplyTransactionCompletion(long latencyNanos) {
|
||||||
|
applyTransaction.add(latencyNanos);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void recordWriteStateMachineCompletion(long latencyNanos) {
|
||||||
|
writeStateMachineData.add(latencyNanos);
|
||||||
|
}
|
||||||
|
|
||||||
public void unRegister() {
|
public void unRegister() {
|
||||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
|
|
|
@ -411,7 +411,8 @@ public class ContainerStateMachine extends BaseStateMachine {
|
||||||
}
|
}
|
||||||
|
|
||||||
private CompletableFuture<Message> handleWriteChunk(
|
private CompletableFuture<Message> handleWriteChunk(
|
||||||
ContainerCommandRequestProto requestProto, long entryIndex, long term) {
|
ContainerCommandRequestProto requestProto, long entryIndex, long term,
|
||||||
|
long startTime) {
|
||||||
final WriteChunkRequestProto write = requestProto.getWriteChunk();
|
final WriteChunkRequestProto write = requestProto.getWriteChunk();
|
||||||
RaftServer server = ratisServer.getServer();
|
RaftServer server = ratisServer.getServer();
|
||||||
Preconditions.checkState(server instanceof RaftServerProxy);
|
Preconditions.checkState(server instanceof RaftServerProxy);
|
||||||
|
@ -461,6 +462,8 @@ public class ContainerStateMachine extends BaseStateMachine {
|
||||||
write.getBlockID() + " logIndex " + entryIndex + " chunkName " +
|
write.getBlockID() + " logIndex " + entryIndex + " chunkName " +
|
||||||
write.getChunkData().getChunkName());
|
write.getChunkData().getChunkName());
|
||||||
raftFuture.complete(r::toByteString);
|
raftFuture.complete(r::toByteString);
|
||||||
|
metrics.recordWriteStateMachineCompletion(
|
||||||
|
Time.monotonicNowNanos() - startTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
writeChunkFutureMap.remove(entryIndex);
|
writeChunkFutureMap.remove(entryIndex);
|
||||||
|
@ -477,6 +480,7 @@ public class ContainerStateMachine extends BaseStateMachine {
|
||||||
public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
|
public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
|
||||||
try {
|
try {
|
||||||
metrics.incNumWriteStateMachineOps();
|
metrics.incNumWriteStateMachineOps();
|
||||||
|
long writeStateMachineStartTime = Time.monotonicNowNanos();
|
||||||
ContainerCommandRequestProto requestProto =
|
ContainerCommandRequestProto requestProto =
|
||||||
getContainerCommandRequestProto(
|
getContainerCommandRequestProto(
|
||||||
entry.getStateMachineLogEntry().getLogData());
|
entry.getStateMachineLogEntry().getLogData());
|
||||||
|
@ -493,7 +497,7 @@ public class ContainerStateMachine extends BaseStateMachine {
|
||||||
switch (cmdType) {
|
switch (cmdType) {
|
||||||
case WriteChunk:
|
case WriteChunk:
|
||||||
return handleWriteChunk(requestProto, entry.getIndex(),
|
return handleWriteChunk(requestProto, entry.getIndex(),
|
||||||
entry.getTerm());
|
entry.getTerm(), writeStateMachineStartTime);
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Cmd Type:" + cmdType
|
throw new IllegalStateException("Cmd Type:" + cmdType
|
||||||
+ " should not have state machine data");
|
+ " should not have state machine data");
|
||||||
|
@ -673,6 +677,7 @@ public class ContainerStateMachine extends BaseStateMachine {
|
||||||
.setTerm(trx.getLogEntry().getTerm())
|
.setTerm(trx.getLogEntry().getTerm())
|
||||||
.setLogIndex(index);
|
.setLogIndex(index);
|
||||||
|
|
||||||
|
long applyTxnStartTime = Time.monotonicNowNanos();
|
||||||
try {
|
try {
|
||||||
applyTransactionSemaphore.acquire();
|
applyTransactionSemaphore.acquire();
|
||||||
metrics.incNumApplyTransactionsOps();
|
metrics.incNumApplyTransactionsOps();
|
||||||
|
@ -740,7 +745,11 @@ public class ContainerStateMachine extends BaseStateMachine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return applyTransactionFuture;
|
return applyTransactionFuture;
|
||||||
}).whenComplete((r, t) -> applyTransactionSemaphore.release());
|
}).whenComplete((r, t) -> {
|
||||||
|
applyTransactionSemaphore.release();
|
||||||
|
metrics.recordApplyTransactionCompletion(
|
||||||
|
Time.monotonicNowNanos() - applyTxnStartTime);
|
||||||
|
});
|
||||||
return applyTransactionFuture;
|
return applyTransactionFuture;
|
||||||
} catch (IOException | InterruptedException e) {
|
} catch (IOException | InterruptedException e) {
|
||||||
metrics.incNumApplyTransactionsFails();
|
metrics.incNumApplyTransactionsFails();
|
||||||
|
|
|
@ -47,7 +47,7 @@ HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019
|
||||||
HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012
|
HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012
|
||||||
CORE-SITE.XML_dfs.data.transfer.protection=authentication
|
CORE-SITE.XML_dfs.data.transfer.protection=authentication
|
||||||
CORE-SITE.XML_hadoop.security.authentication=kerberos
|
CORE-SITE.XML_hadoop.security.authentication=kerberos
|
||||||
COER-SITE.XML_hadoop.security.auth_to_local=RULE:[2:$1@$0](.*@EXAMPLE.COM)s/@.*///L
|
CORE-SITE.XML_hadoop.security.auth_to_local=RULE:[2:$1@$0](.*@EXAMPLE.COM)s/@.*///L
|
||||||
CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms
|
CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms
|
||||||
|
|
||||||
#temporary disable authorization as org.apache.hadoop.yarn.server.api.ResourceTrackerPB is not properly annotated to support it
|
#temporary disable authorization as org.apache.hadoop.yarn.server.api.ResourceTrackerPB is not properly annotated to support it
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
||||||
|
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.getDoubleGauge;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -49,6 +50,8 @@ import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
|
|
||||||
import static org.apache.ratis.rpc.SupportedRpcType.GRPC;
|
import static org.apache.ratis.rpc.SupportedRpcType.GRPC;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import org.apache.ratis.protocol.RaftGroupId;
|
import org.apache.ratis.protocol.RaftGroupId;
|
||||||
import org.apache.ratis.util.function.CheckedBiConsumer;
|
import org.apache.ratis.util.function.CheckedBiConsumer;
|
||||||
|
|
||||||
|
@ -118,6 +121,12 @@ public class TestCSMMetrics {
|
||||||
assertCounter("NumStartTransactionVerifyFailures", 0L, metric);
|
assertCounter("NumStartTransactionVerifyFailures", 0L, metric);
|
||||||
assertCounter("NumContainerNotOpenVerifyFailures", 0L, metric);
|
assertCounter("NumContainerNotOpenVerifyFailures", 0L, metric);
|
||||||
assertCounter("WriteChunkNumOps", 0L, metric);
|
assertCounter("WriteChunkNumOps", 0L, metric);
|
||||||
|
double applyTransactionLatency = getDoubleGauge(
|
||||||
|
"ApplyTransactionAvgTime", metric);
|
||||||
|
assertTrue(applyTransactionLatency == 0.0);
|
||||||
|
double writeStateMachineLatency = getDoubleGauge(
|
||||||
|
"WriteStateMachineDataAvgTime", metric);
|
||||||
|
assertTrue(writeStateMachineLatency == 0.0);
|
||||||
|
|
||||||
// Write Chunk
|
// Write Chunk
|
||||||
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
|
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
|
||||||
|
@ -152,6 +161,13 @@ public class TestCSMMetrics {
|
||||||
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
|
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
|
||||||
assertCounter("NumQueryStateMachineOps", 1L, metric);
|
assertCounter("NumQueryStateMachineOps", 1L, metric);
|
||||||
assertCounter("NumApplyTransactionOps", 1L, metric);
|
assertCounter("NumApplyTransactionOps", 1L, metric);
|
||||||
|
applyTransactionLatency = getDoubleGauge(
|
||||||
|
"ApplyTransactionAvgTime", metric);
|
||||||
|
assertTrue(applyTransactionLatency > 0.0);
|
||||||
|
writeStateMachineLatency = getDoubleGauge(
|
||||||
|
"WriteStateMachineDataAvgTime", metric);
|
||||||
|
assertTrue(writeStateMachineLatency > 0.0);
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
if (client != null) {
|
if (client != null) {
|
||||||
client.close();
|
client.close();
|
||||||
|
|
Loading…
Reference in New Issue