HDDS-1935. Improve the visibility with Ozone Insight tool (#1255)

This commit is contained in:
Elek, Márton 2019-08-30 02:07:55 +02:00 committed by Anu Engineer
parent 7b3fa4fcaa
commit 4f5f46eb4a
45 changed files with 2441 additions and 191 deletions

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.protocolPB;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import com.google.protobuf.ProtocolMessageEnum;
/**
* Metrics to count all the subtypes of a specific message.
*/
public class ProtocolMessageMetrics implements MetricsSource {
private String name;
private String description;
private Map<ProtocolMessageEnum, AtomicLong> counters =
new ConcurrentHashMap<>();
public static ProtocolMessageMetrics create(String name,
String description, ProtocolMessageEnum[] types) {
ProtocolMessageMetrics protocolMessageMetrics =
new ProtocolMessageMetrics(name, description,
types);
return protocolMessageMetrics;
}
public ProtocolMessageMetrics(String name, String description,
ProtocolMessageEnum[] values) {
this.name = name;
this.description = description;
for (ProtocolMessageEnum value : values) {
counters.put(value, new AtomicLong(0));
}
}
public void increment(ProtocolMessageEnum key) {
counters.get(key).incrementAndGet();
}
public void register() {
DefaultMetricsSystem.instance()
.register(name, description, this);
}
public void unregister() {
DefaultMetricsSystem.instance().unregisterSource(name);
}
@Override
public void getMetrics(MetricsCollector collector, boolean all) {
MetricsRecordBuilder builder = collector.addRecord(name);
counters.forEach((key, value) -> {
builder.addCounter(new MetricName(key.toString(), ""), value.longValue());
});
builder.endRecord();
}
/**
* Simple metrics info implementation.
*/
public static class MetricName implements MetricsInfo {
private String name;
private String description;
public MetricName(String name, String description) {
this.name = name;
this.description = description;
}
@Override
public String name() {
return name;
}
@Override
public String description() {
return description;
}
}
}

View File

@ -17,15 +17,25 @@
*/ */
package org.apache.hadoop.ozone.protocolPB; package org.apache.hadoop.ozone.protocolPB;
import com.google.protobuf.RpcController; import java.io.IOException;
import com.google.protobuf.ServiceException; import java.util.List;
import io.opentracing.Scope; import java.util.stream.Collectors;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos;
import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
.AllocateBlockResponse; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateBlockResponse;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockRequestProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.DeleteKeyBlocksResultProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.DeleteScmKeyBlocksRequestProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.DeleteScmKeyBlocksResponseProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationRequest;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationResponse;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SortDatanodesRequestProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SortDatanodesResponseProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Status;
import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.ScmInfo;
import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock;
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
@ -34,34 +44,15 @@
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolPB; import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolPB;
import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.AllocateScmBlockRequestProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.AllocateScmBlockResponseProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.DeleteKeyBlocksResultProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.DeleteScmKeyBlocksRequestProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.DeleteScmKeyBlocksResponseProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.SCMBlockLocationResponse;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.SCMBlockLocationRequest;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.Status;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.SortDatanodesRequestProto;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos
.SortDatanodesResponseProto;
import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.common.BlockGroup;
import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; import org.apache.hadoop.ozone.common.DeleteBlockGroupResult;
import java.io.IOException; import com.google.protobuf.RpcController;
import java.util.List; import com.google.protobuf.ServiceException;
import java.util.stream.Collectors; import io.opentracing.Scope;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* This class is the server-side translator that forwards requests received on * This class is the server-side translator that forwards requests received on
@ -74,14 +65,22 @@ public final class ScmBlockLocationProtocolServerSideTranslatorPB
private final ScmBlockLocationProtocol impl; private final ScmBlockLocationProtocol impl;
private static final Logger LOG = LoggerFactory
.getLogger(ScmBlockLocationProtocolServerSideTranslatorPB.class);
private final ProtocolMessageMetrics
protocolMessageMetrics;
/** /**
* Creates a new ScmBlockLocationProtocolServerSideTranslatorPB. * Creates a new ScmBlockLocationProtocolServerSideTranslatorPB.
* *
* @param impl {@link ScmBlockLocationProtocol} server implementation * @param impl {@link ScmBlockLocationProtocol} server implementation
*/ */
public ScmBlockLocationProtocolServerSideTranslatorPB( public ScmBlockLocationProtocolServerSideTranslatorPB(
ScmBlockLocationProtocol impl) throws IOException { ScmBlockLocationProtocol impl,
ProtocolMessageMetrics metrics) throws IOException {
this.impl = impl; this.impl = impl;
this.protocolMessageMetrics = metrics;
} }
private SCMBlockLocationResponse.Builder createSCMBlockResponse( private SCMBlockLocationResponse.Builder createSCMBlockResponse(
@ -97,15 +96,45 @@ public SCMBlockLocationResponse send(RpcController controller,
SCMBlockLocationRequest request) throws ServiceException { SCMBlockLocationRequest request) throws ServiceException {
String traceId = request.getTraceID(); String traceId = request.getTraceID();
if (LOG.isTraceEnabled()) {
LOG.trace("BlockLocationProtocol {} request is received: <json>{}</json>",
request.getCmdType().toString(),
request.toString().replaceAll("\n", "\\\\n"));
} else if (LOG.isDebugEnabled()) {
LOG.debug("BlockLocationProtocol {} request is received",
request.getCmdType().toString());
}
protocolMessageMetrics.increment(request.getCmdType());
try (Scope scope = TracingUtil
.importAndCreateScope(
"ScmBlockLocationProtocol." + request.getCmdType(),
request.getTraceID())) {
SCMBlockLocationResponse response =
processMessage(request, traceId);
if (LOG.isTraceEnabled()) {
LOG.trace(
"BlockLocationProtocol {} request is processed. Response: "
+ "<json>{}</json>",
request.getCmdType().toString(),
response.toString().replaceAll("\n", "\\\\n"));
}
return response;
}
}
private SCMBlockLocationResponse processMessage(
SCMBlockLocationRequest request, String traceId) throws ServiceException {
SCMBlockLocationResponse.Builder response = createSCMBlockResponse( SCMBlockLocationResponse.Builder response = createSCMBlockResponse(
request.getCmdType(), request.getCmdType(),
traceId); traceId);
response.setSuccess(true); response.setSuccess(true);
response.setStatus(Status.OK); response.setStatus(Status.OK);
try(Scope scope = TracingUtil try {
.importAndCreateScope("ScmBlockLocationProtocol."+request.getCmdType(),
request.getTraceID())) {
switch (request.getCmdType()) { switch (request.getCmdType()) {
case AllocateScmBlock: case AllocateScmBlock:
response.setAllocateScmBlockResponse( response.setAllocateScmBlockResponse(
@ -125,7 +154,7 @@ public SCMBlockLocationResponse send(RpcController controller,
break; break;
default: default:
// Should never happen // Should never happen
throw new IOException("Unknown Operation "+request.getCmdType()+ throw new IOException("Unknown Operation " + request.getCmdType() +
" in ScmBlockLocationProtocol"); " in ScmBlockLocationProtocol");
} }
} catch (IOException e) { } catch (IOException e) {
@ -135,6 +164,7 @@ public SCMBlockLocationResponse send(RpcController controller,
response.setMessage(e.getMessage()); response.setMessage(e.getMessage());
} }
} }
return response.build(); return response.build();
} }
@ -182,12 +212,12 @@ public DeleteScmKeyBlocksResponseProto deleteScmKeyBlocks(
.map(BlockGroup::getFromProto).collect(Collectors.toList()); .map(BlockGroup::getFromProto).collect(Collectors.toList());
final List<DeleteBlockGroupResult> results = final List<DeleteBlockGroupResult> results =
impl.deleteKeyBlocks(infoList); impl.deleteKeyBlocks(infoList);
for (DeleteBlockGroupResult result: results) { for (DeleteBlockGroupResult result : results) {
DeleteKeyBlocksResultProto.Builder deleteResult = DeleteKeyBlocksResultProto.Builder deleteResult =
DeleteKeyBlocksResultProto DeleteKeyBlocksResultProto
.newBuilder() .newBuilder()
.setObjectKey(result.getObjectKey()) .setObjectKey(result.getObjectKey())
.addAllBlockResults(result.getBlockResultProtoList()); .addAllBlockResults(result.getBlockResultProtoList());
resp.addResults(deleteResult.build()); resp.addResults(deleteResult.build());
} }
return resp.build(); return resp.build();

View File

@ -331,7 +331,7 @@
<property> <property>
<name>hdds.prometheus.endpoint.enabled</name> <name>hdds.prometheus.endpoint.enabled</name>
<value>false</value> <value>true</value>
<tag>OZONE, MANAGEMENT</tag> <tag>OZONE, MANAGEMENT</tag>
<description>Enable prometheus compatible metric page on the HTTP <description>Enable prometheus compatible metric page on the HTTP
servers. servers.

View File

@ -93,15 +93,16 @@ public boolean process(Set<? extends TypeElement> annotations,
} }
} }
FileObject resource = filer
.createResource(StandardLocation.CLASS_OUTPUT, "",
OUTPUT_FILE_NAME);
try (Writer writer = new OutputStreamWriter(
resource.openOutputStream(), StandardCharsets.UTF_8)) {
appender.write(writer);
}
} }
FileObject resource = filer
.createResource(StandardLocation.CLASS_OUTPUT, "",
OUTPUT_FILE_NAME);
try (Writer writer = new OutputStreamWriter(
resource.openOutputStream(), StandardCharsets.UTF_8)) {
appender.write(writer);
}
} catch (IOException e) { } catch (IOException e) {
processingEnv.getMessager().printMessage(Kind.ERROR, processingEnv.getMessager().printMessage(Kind.ERROR,
"Can't generate the config file from annotation: " + e.getMessage()); "Can't generate the config file from annotation: " + e.getMessage());
@ -109,5 +110,4 @@ public boolean process(Set<? extends TypeElement> annotations,
return false; return false;
} }
} }

View File

@ -26,6 +26,7 @@
import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.http.HttpServer2;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.eclipse.jetty.webapp.WebAppContext; import org.eclipse.jetty.webapp.WebAppContext;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -92,8 +93,9 @@ public BaseHttpServer(Configuration conf, String name) throws IOException {
httpServer = builder.build(); httpServer = builder.build();
httpServer.addServlet("conf", "/conf", HddsConfServlet.class); httpServer.addServlet("conf", "/conf", HddsConfServlet.class);
httpServer.addServlet("logstream", "/logstream", LogStreamServlet.class);
prometheusSupport = prometheusSupport =
conf.getBoolean(HddsConfigKeys.HDDS_PROMETHEUS_ENABLED, false); conf.getBoolean(HddsConfigKeys.HDDS_PROMETHEUS_ENABLED, true);
profilerSupport = profilerSupport =
conf.getBoolean(HddsConfigKeys.HDDS_PROFILER_ENABLED, false); conf.getBoolean(HddsConfigKeys.HDDS_PROFILER_ENABLED, false);

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.server;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.WriterAppender;
/**
* Servlet to stream the current logs to the response.
*/
public class LogStreamServlet extends HttpServlet {
private static final String PATTERN = "%d [%p|%c|%C{1}] %m%n";
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp)
throws ServletException, IOException {
WriterAppender appender =
new WriterAppender(new PatternLayout(PATTERN), resp.getWriter());
appender.setThreshold(Level.TRACE);
try {
Logger.getRootLogger().addAppender(appender);
try {
Thread.sleep(Integer.MAX_VALUE);
} catch (InterruptedException e) {
//interrupted
}
} finally {
Logger.getRootLogger().removeAppender(appender);
}
}
}

View File

@ -112,6 +112,10 @@ public String prometheusName(String recordName,
String baseName = StringUtils.capitalize(recordName) String baseName = StringUtils.capitalize(recordName)
+ StringUtils.capitalize(metricName); + StringUtils.capitalize(metricName);
return normalizeName(baseName);
}
public static String normalizeName(String baseName) {
String[] parts = SPLIT_PATTERN.split(baseName); String[] parts = SPLIT_PATTERN.split(baseName);
String result = String.join("_", parts).toLowerCase(); String result = String.join("_", parts).toLowerCase();
return REPLACE_PATTERN.matcher(result).replaceAll("_"); return REPLACE_PATTERN.matcher(result).replaceAll("_");

View File

@ -23,6 +23,8 @@
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -57,6 +59,8 @@ public class EventQueue implements EventPublisher, AutoCloseable {
private boolean isRunning = true; private boolean isRunning = true;
private static final Gson TRACING_SERIALIZER = new GsonBuilder().create();
public <PAYLOAD, EVENT_TYPE extends Event<PAYLOAD>> void addHandler( public <PAYLOAD, EVENT_TYPE extends Event<PAYLOAD>> void addHandler(
EVENT_TYPE event, EventHandler<PAYLOAD> handler) { EVENT_TYPE event, EventHandler<PAYLOAD> handler) {
this.addHandler(event, handler, generateHandlerName(handler)); this.addHandler(event, handler, generateHandlerName(handler));
@ -129,8 +133,6 @@ public <PAYLOAD, EVENT_TYPE extends Event<PAYLOAD>> void addHandler(
executors.get(event).get(executor).add(handler); executors.get(event).get(executor).add(handler);
} }
/** /**
* Route an event with payload to the right listener(s). * Route an event with payload to the right listener(s).
* *
@ -159,11 +161,17 @@ public <PAYLOAD, EVENT_TYPE extends Event<PAYLOAD>> void fireEvent(
for (EventHandler handler : executorAndHandlers.getValue()) { for (EventHandler handler : executorAndHandlers.getValue()) {
queuedCount.incrementAndGet(); queuedCount.incrementAndGet();
if (LOG.isDebugEnabled()) { if (LOG.isTraceEnabled()) {
LOG.debug(
"Delivering event {} to executor/handler {}: <json>{}</json>",
event.getName(),
executorAndHandlers.getKey().getName(),
TRACING_SERIALIZER.toJson(payload).replaceAll("\n", "\\\\n"));
} else if (LOG.isDebugEnabled()) {
LOG.debug("Delivering event {} to executor/handler {}: {}", LOG.debug("Delivering event {} to executor/handler {}: {}",
event.getName(), event.getName(),
executorAndHandlers.getKey().getName(), executorAndHandlers.getKey().getName(),
payload); payload.getClass().getSimpleName());
} }
executorAndHandlers.getKey() executorAndHandlers.getKey()
.onMessage(handler, payload, this); .onMessage(handler, payload, this);
@ -232,6 +240,7 @@ public void processAll(long timeout) {
} }
} }
} }
@Override @Override
public void close() { public void close() {
@ -250,5 +259,4 @@ public void close() {
}); });
} }
} }

View File

@ -18,21 +18,34 @@
package org.apache.hadoop.hdds.scm.container; package org.apache.hadoop.hdds.scm.container;
import com.google.common.annotations.VisibleForTesting; import java.io.IOException;
import com.google.protobuf.GeneratedMessage; import java.util.ArrayList;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.hadoop.hdds.conf.ConfigType;
import org.apache.hadoop.hdds.conf.ConfigGroup;
import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.Config;
import org.apache.hadoop.hdds.conf.ConfigGroup;
import org.apache.hadoop.hdds.conf.ConfigType;
import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
import org.apache.hadoop.hdds.protocol.proto import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy;
import org.apache.hadoop.hdds.scm.container.placement.algorithms
.ContainerPlacementPolicy;
import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.ozone.lock.LockManager; import org.apache.hadoop.ozone.lock.LockManager;
import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand;
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
@ -42,35 +55,27 @@
import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import static org.apache.hadoop.hdds.conf.ConfigTag.*; import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.GeneratedMessage;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import static org.apache.hadoop.hdds.conf.ConfigTag.OZONE;
import static org.apache.hadoop.hdds.conf.ConfigTag.SCM;
import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.Preconditions;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/** /**
* Replication Manager (RM) is the one which is responsible for making sure * Replication Manager (RM) is the one which is responsible for making sure
* that the containers are properly replicated. Replication Manager deals only * that the containers are properly replicated. Replication Manager deals only
* with Quasi Closed / Closed container. * with Quasi Closed / Closed container.
*/ */
public class ReplicationManager { public class ReplicationManager implements MetricsSource {
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(ReplicationManager.class); LoggerFactory.getLogger(ReplicationManager.class);
public static final String METRICS_SOURCE_NAME = "SCMReplicationManager";
/** /**
* Reference to the ContainerManager. * Reference to the ContainerManager.
*/ */
@ -140,15 +145,20 @@ public ReplicationManager(final ReplicationManagerConfiguration conf,
this.lockManager = lockManager; this.lockManager = lockManager;
this.conf = conf; this.conf = conf;
this.running = false; this.running = false;
this.inflightReplication = new HashMap<>(); this.inflightReplication = new ConcurrentHashMap<>();
this.inflightDeletion = new HashMap<>(); this.inflightDeletion = new ConcurrentHashMap<>();
} }
/** /**
* Starts Replication Monitor thread. * Starts Replication Monitor thread.
*/ */
public synchronized void start() { public synchronized void start() {
if (!isRunning()) { if (!isRunning()) {
DefaultMetricsSystem.instance().register(METRICS_SOURCE_NAME,
"SCM Replication manager (closed container replication) related "
+ "metrics",
this);
LOG.info("Starting Replication Monitor Thread."); LOG.info("Starting Replication Monitor Thread.");
running = true; running = true;
replicationMonitor = new Thread(this::run); replicationMonitor = new Thread(this::run);
@ -472,6 +482,8 @@ private void forceCloseContainer(final ContainerInfo container,
*/ */
private void handleUnderReplicatedContainer(final ContainerInfo container, private void handleUnderReplicatedContainer(final ContainerInfo container,
final Set<ContainerReplica> replicas) { final Set<ContainerReplica> replicas) {
LOG.debug("Handling underreplicated container: {}",
container.getContainerID());
try { try {
final ContainerID id = container.containerID(); final ContainerID id = container.containerID();
final List<DatanodeDetails> deletionInFlight = inflightDeletion final List<DatanodeDetails> deletionInFlight = inflightDeletion
@ -748,6 +760,16 @@ private static boolean compareState(final LifeCycleState containerState,
} }
} }
@Override
public void getMetrics(MetricsCollector collector, boolean all) {
collector.addRecord(ReplicationManager.class.getSimpleName())
.addGauge(ReplicationManagerMetrics.INFLIGHT_REPLICATION,
inflightReplication.size())
.addGauge(ReplicationManagerMetrics.INFLIGHT_DELETION,
inflightDeletion.size())
.endRecord();
}
/** /**
* Wrapper class to hold the InflightAction with its start time. * Wrapper class to hold the InflightAction with its start time.
*/ */
@ -822,4 +844,32 @@ public long getEventTimeout() {
return eventTimeout; return eventTimeout;
} }
} }
/**
* Metric name definitions for Replication manager.
*/
public enum ReplicationManagerMetrics implements MetricsInfo {
INFLIGHT_REPLICATION("Tracked inflight container replication requests."),
INFLIGHT_DELETION("Tracked inflight container deletion requests.");
private final String desc;
ReplicationManagerMetrics(String desc) {
this.desc = desc;
}
@Override
public String description() {
return desc;
}
@Override
public String toString() {
return new StringJoiner(", ", this.getClass().getSimpleName() + "{", "}")
.add("name=" + name())
.add("description=" + desc)
.toString();
}
}
} }

View File

@ -48,7 +48,6 @@ public void onMessage(NodeReportFromDatanode nodeReportFromDatanode,
DatanodeDetails dn = nodeReportFromDatanode.getDatanodeDetails(); DatanodeDetails dn = nodeReportFromDatanode.getDatanodeDetails();
Preconditions.checkNotNull(dn, "NodeReport is " Preconditions.checkNotNull(dn, "NodeReport is "
+ "missing DatanodeDetails."); + "missing DatanodeDetails.");
LOGGER.trace("Processing node report for dn: {}", dn);
nodeManager nodeManager
.processNodeReport(dn, nodeReportFromDatanode.getReport()); .processNodeReport(dn, nodeReportFromDatanode.getReport());
} }

View File

@ -17,52 +17,6 @@
*/ */
package org.apache.hadoop.hdds.scm.node; package org.apache.hadoop.hdds.scm.node;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.node.states.NodeAlreadyExistsException;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.scm.VersionInfo;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.NodeReportProto;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto
.ErrorCode;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.StorageReportProto;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.net.CachedDNSToSwitchMapping;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.TableMapping;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.protocol.VersionResponse;
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.management.ObjectName; import javax.management.ObjectName;
import java.io.IOException; import java.io.IOException;
import java.net.InetAddress; import java.net.InetAddress;
@ -76,6 +30,45 @@
import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledFuture;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
import org.apache.hadoop.hdds.scm.VersionInfo;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat;
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
import org.apache.hadoop.hdds.scm.node.states.NodeAlreadyExistsException;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.net.CachedDNSToSwitchMapping;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.TableMapping;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.protocol.VersionResponse;
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
import org.apache.hadoop.util.ReflectionUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* Maintains information about the Datanodes on SCM side. * Maintains information about the Datanodes on SCM side.
* <p> * <p>
@ -322,6 +315,15 @@ public Boolean isNodeRegistered(DatanodeDetails datanodeDetails) {
@Override @Override
public void processNodeReport(DatanodeDetails datanodeDetails, public void processNodeReport(DatanodeDetails datanodeDetails,
NodeReportProto nodeReport) { NodeReportProto nodeReport) {
if (LOG.isDebugEnabled()) {
LOG.debug("Processing node report from [datanode={}]",
datanodeDetails.getHostName());
}
if (LOG.isTraceEnabled()) {
LOG.trace("HB is received from [datanode={}]: <json>{}</json>",
datanodeDetails.getHostName(),
nodeReport.toString().replaceAll("\n", "\\\\n"));
}
try { try {
DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails); DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails);
if (nodeReport != null) { if (nodeReport != null) {

View File

@ -21,9 +21,14 @@
*/ */
package org.apache.hadoop.hdds.scm.server; package org.apache.hadoop.hdds.scm.server;
import com.google.common.collect.Maps; import java.io.IOException;
import com.google.protobuf.BlockingService; import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@ -50,29 +55,20 @@
import org.apache.hadoop.ozone.audit.Auditor; import org.apache.hadoop.ozone.audit.Auditor;
import org.apache.hadoop.ozone.audit.SCMAction; import org.apache.hadoop.ozone.audit.SCMAction;
import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.common.BlockGroup;
import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; import org.apache.hadoop.ozone.common.DeleteBlockGroupResult;
import org.apache.hadoop.ozone.protocolPB import org.apache.hadoop.ozone.protocolPB.ProtocolMessageMetrics;
.ScmBlockLocationProtocolServerSideTranslatorPB; import org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB;
import com.google.common.collect.Maps;
import com.google.protobuf.BlockingService;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_DEFAULT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_KEY;
import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.startRpcServer;
import static org.apache.hadoop.hdds.server.ServerUtils.updateRPCListenAddress;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys
.OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys
.OZONE_SCM_HANDLER_COUNT_DEFAULT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys
.OZONE_SCM_HANDLER_COUNT_KEY;
import static org.apache.hadoop.hdds.server.ServerUtils.updateRPCListenAddress;
import static org.apache.hadoop.hdds.scm.server.StorageContainerManager
.startRpcServer;
/** /**
* SCM block protocol is the protocol used by Namenode and OzoneManager to get * SCM block protocol is the protocol used by Namenode and OzoneManager to get
* blocks from the SCM. * blocks from the SCM.
@ -89,6 +85,8 @@ public class SCMBlockProtocolServer implements
private final OzoneConfiguration conf; private final OzoneConfiguration conf;
private final RPC.Server blockRpcServer; private final RPC.Server blockRpcServer;
private final InetSocketAddress blockRpcAddress; private final InetSocketAddress blockRpcAddress;
private final ProtocolMessageMetrics
protocolMessageMetrics;
/** /**
* The RPC server that listens to requests from block service clients. * The RPC server that listens to requests from block service clients.
@ -103,11 +101,18 @@ public SCMBlockProtocolServer(OzoneConfiguration conf,
RPC.setProtocolEngine(conf, ScmBlockLocationProtocolPB.class, RPC.setProtocolEngine(conf, ScmBlockLocationProtocolPB.class,
ProtobufRpcEngine.class); ProtobufRpcEngine.class);
protocolMessageMetrics =
ProtocolMessageMetrics.create("ScmBlockLocationProtocol",
"SCM Block location protocol counters",
ScmBlockLocationProtocolProtos.Type.values());
// SCM Block Service RPC. // SCM Block Service RPC.
BlockingService blockProtoPbService = BlockingService blockProtoPbService =
ScmBlockLocationProtocolProtos.ScmBlockLocationProtocolService ScmBlockLocationProtocolProtos.ScmBlockLocationProtocolService
.newReflectiveBlockingService( .newReflectiveBlockingService(
new ScmBlockLocationProtocolServerSideTranslatorPB(this)); new ScmBlockLocationProtocolServerSideTranslatorPB(this,
protocolMessageMetrics));
final InetSocketAddress scmBlockAddress = HddsServerUtil final InetSocketAddress scmBlockAddress = HddsServerUtil
.getScmBlockClientBindAddress(conf); .getScmBlockClientBindAddress(conf);
@ -137,6 +142,7 @@ public InetSocketAddress getBlockRpcAddress() {
} }
public void start() { public void start() {
protocolMessageMetrics.register();
LOG.info( LOG.info(
StorageContainerManager.buildRpcServerStartMessage( StorageContainerManager.buildRpcServerStartMessage(
"RPC server for Block Protocol", getBlockRpcAddress())); "RPC server for Block Protocol", getBlockRpcAddress()));
@ -145,6 +151,7 @@ public void start() {
public void stop() { public void stop() {
try { try {
protocolMessageMetrics.unregister();
LOG.info("Stopping the RPC server for Block Protocol"); LOG.info("Stopping the RPC server for Block Protocol");
getBlockRpcServer().stop(); getBlockRpcServer().stop();
} catch (Exception ex) { } catch (Exception ex) {

View File

@ -24,13 +24,16 @@
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos;
import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.TestUtils;
import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.ozone.protocolPB.ProtocolMessageMetrics;
import org.apache.hadoop.ozone.protocolPB import org.apache.hadoop.ozone.protocolPB
.ScmBlockLocationProtocolServerSideTranslatorPB; .ScmBlockLocationProtocolServerSideTranslatorPB;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
@ -39,7 +42,7 @@
/** /**
* Test class for @{@link SCMBlockProtocolServer}. * Test class for @{@link SCMBlockProtocolServer}.
* */ */
public class TestSCMBlockProtocolServer { public class TestSCMBlockProtocolServer {
private OzoneConfiguration config; private OzoneConfiguration config;
private SCMBlockProtocolServer server; private SCMBlockProtocolServer server;
@ -64,7 +67,8 @@ public void setUp() throws Exception {
} }
server = scm.getBlockProtocolServer(); server = scm.getBlockProtocolServer();
service = new ScmBlockLocationProtocolServerSideTranslatorPB(server); service = new ScmBlockLocationProtocolServerSideTranslatorPB(server,
Mockito.mock(ProtocolMessageMetrics.class));
} }
@After @After

View File

@ -51,6 +51,7 @@ function hadoop_usage
hadoop_add_subcommand "scmcli" client "run the CLI of the Storage Container Manager" hadoop_add_subcommand "scmcli" client "run the CLI of the Storage Container Manager"
hadoop_add_subcommand "sh" client "command line interface for object store operations" hadoop_add_subcommand "sh" client "command line interface for object store operations"
hadoop_add_subcommand "s3" client "command line interface for s3 related operations" hadoop_add_subcommand "s3" client "command line interface for s3 related operations"
hadoop_add_subcommand "insight" client "tool to get runtime opeartion information"
hadoop_add_subcommand "version" client "print the version" hadoop_add_subcommand "version" client "print the version"
hadoop_add_subcommand "dtutil" client "operations related to delegation tokens" hadoop_add_subcommand "dtutil" client "operations related to delegation tokens"
hadoop_add_subcommand "upgrade" client "HDFS to Ozone in-place upgrade tool" hadoop_add_subcommand "upgrade" client "HDFS to Ozone in-place upgrade tool"
@ -175,6 +176,11 @@ function ozonecmd_case
HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_SCM_CLI_OPTS}" HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_SCM_CLI_OPTS}"
OZONE_RUN_ARTIFACT_NAME="hadoop-hdds-tools" OZONE_RUN_ARTIFACT_NAME="hadoop-hdds-tools"
;; ;;
insight)
HADOOP_CLASSNAME=org.apache.hadoop.ozone.insight.Insight
HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_SCM_CLI_OPTS}"
OZONE_RUN_ARTIFACT_NAME="hadoop-ozone-insight"
;;
version) version)
HADOOP_CLASSNAME=org.apache.hadoop.ozone.util.OzoneVersionInfo HADOOP_CLASSNAME=org.apache.hadoop.ozone.util.OzoneVersionInfo
OZONE_RUN_ARTIFACT_NAME="hadoop-ozone-common" OZONE_RUN_ARTIFACT_NAME="hadoop-ozone-common"

View File

@ -63,4 +63,8 @@
<name>hdds.datanode.storage.utilization.critical.threshold</name> <name>hdds.datanode.storage.utilization.critical.threshold</name>
<value>0.99</value> <value>0.99</value>
</property> </property>
<property>
<name>hdds.prometheus.endpoint.enabled</name>
<value>true</value>
</property>
</configuration> </configuration>

View File

@ -136,6 +136,14 @@
<type>cp</type> <type>cp</type>
<destFileName>hadoop-ozone-upgrade.classpath</destFileName> <destFileName>hadoop-ozone-upgrade.classpath</destFileName>
</artifactItem> </artifactItem>
<artifactItem>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-insight</artifactId>
<version>${ozone.version}</version>
<classifier>classpath</classifier>
<type>cp</type>
<destFileName>hadoop-ozone-insight.classpath</destFileName>
</artifactItem>
</artifactItems> </artifactItems>
</configuration> </configuration>
</execution> </execution>
@ -326,6 +334,10 @@
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-upgrade</artifactId> <artifactId>hadoop-ozone-upgrade</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-insight</artifactId>
</dependency>
</dependencies> </dependencies>
<profiles> <profiles>
<profile> <profile>

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter>
</FindBugsFilter>

View File

@ -0,0 +1,132 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone</artifactId>
<version>0.5.0-SNAPSHOT</version>
</parent>
<artifactId>hadoop-ozone-insight</artifactId>
<version>0.5.0-SNAPSHOT</version>
<description>Apache Hadoop Ozone Insight Tool</description>
<name>Apache Hadoop Ozone Insight Tool</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-ozone-manager</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-common</artifactId>
</dependency>
<!-- Genesis requires server side components -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdds-server-scm</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-filesystem</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdds-server-framework</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-core</artifactId>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</dependency>
<dependency>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
</dependency>
<dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>3.2.4</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.19</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.19</version>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>findbugs</artifactId>
<version>3.0.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-integration-test</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<configuration>
<excludeFilterFile>${basedir}/dev-support/findbugsExcludeFile.xml
</excludeFilterFile>
<fork>true</fork>
<maxHeap>2048</maxHeap>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,188 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.StorageUnit;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.XceiverClientManager;
import org.apache.hadoop.hdds.scm.client.ContainerOperationClient;
import org.apache.hadoop.hdds.scm.client.ScmClient;
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB;
import org.apache.hadoop.hdds.server.PrometheusMetricsSink;
import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.ipc.Client;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.insight.LoggerSource.Level;
import org.apache.hadoop.security.UserGroupInformation;
import com.google.protobuf.ProtocolMessageEnum;
import static org.apache.hadoop.hdds.HddsUtils.getScmAddressForClients;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT;
/**
* Default implementation of Insight point logic.
*/
public abstract class BaseInsightPoint implements InsightPoint {
/**
* List the related metrics.
*/
@Override
public List<MetricGroupDisplay> getMetrics() {
return new ArrayList<>();
}
/**
* List the related configuration.
*/
@Override
public List<Class> getConfigurationClasses() {
return new ArrayList<>();
}
/**
* List the related loggers.
*
* @param verbose true if verbose logging is requested.
*/
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
return loggers;
}
/**
* Create scm client.
*/
public ScmClient createScmClient(OzoneConfiguration ozoneConf)
throws IOException {
if (!HddsUtils.getHostNameFromConfigKeys(ozoneConf,
ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY).isPresent()) {
throw new IllegalArgumentException(
ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY
+ " should be set in ozone-site.xml");
}
long version = RPC.getProtocolVersion(
StorageContainerLocationProtocolPB.class);
InetSocketAddress scmAddress =
getScmAddressForClients(ozoneConf);
int containerSizeGB = (int) ozoneConf.getStorageSize(
OZONE_SCM_CONTAINER_SIZE, OZONE_SCM_CONTAINER_SIZE_DEFAULT,
StorageUnit.GB);
ContainerOperationClient
.setContainerSizeB(containerSizeGB * OzoneConsts.GB);
RPC.setProtocolEngine(ozoneConf, StorageContainerLocationProtocolPB.class,
ProtobufRpcEngine.class);
StorageContainerLocationProtocol client =
TracingUtil.createProxy(
new StorageContainerLocationProtocolClientSideTranslatorPB(
RPC.getProxy(StorageContainerLocationProtocolPB.class, version,
scmAddress, UserGroupInformation.getCurrentUser(),
ozoneConf,
NetUtils.getDefaultSocketFactory(ozoneConf),
Client.getRpcTimeout(ozoneConf))),
StorageContainerLocationProtocol.class, ozoneConf);
return new ContainerOperationClient(
client, new XceiverClientManager(ozoneConf));
}
/**
* Convenient method to define default log levels.
*/
public Level defaultLevel(boolean verbose) {
return verbose ? Level.TRACE : Level.DEBUG;
}
/**
* Default metrics for any message type based RPC ServerSide translators.
*/
public void addProtocolMessageMetrics(List<MetricGroupDisplay> metrics,
String prefix,
Component.Type component,
ProtocolMessageEnum[] types) {
MetricGroupDisplay messageTypeCounters =
new MetricGroupDisplay(component, "Message type counters");
for (ProtocolMessageEnum type : types) {
String typeName = type.toString();
MetricDisplay metricDisplay = new MetricDisplay("Number of " + typeName,
prefix + "_" + PrometheusMetricsSink
.normalizeName(typeName));
messageTypeCounters.addMetrics(metricDisplay);
}
metrics.add(messageTypeCounters);
}
/**
* Rpc metrics for any hadoop rpc endpoint.
*/
public void addRpcMetrics(List<MetricGroupDisplay> metrics,
Component.Type component,
Map<String, String> filter) {
MetricGroupDisplay connection =
new MetricGroupDisplay(component, "RPC connections");
connection.addMetrics(new MetricDisplay("Open connections",
"rpc_num_open_connections", filter));
connection.addMetrics(
new MetricDisplay("Dropped connections", "rpc_num_dropped_connections",
filter));
connection.addMetrics(
new MetricDisplay("Received bytes", "rpc_received_bytes",
filter));
connection.addMetrics(
new MetricDisplay("Sent bytes", "rpc_sent_bytes",
filter));
metrics.add(connection);
MetricGroupDisplay queue = new MetricGroupDisplay(component, "RPC queue");
queue.addMetrics(new MetricDisplay("RPC average queue time",
"rpc_rpc_queue_time_avg_time", filter));
queue.addMetrics(
new MetricDisplay("RPC call queue length", "rpc_call_queue_length",
filter));
metrics.add(queue);
MetricGroupDisplay performance =
new MetricGroupDisplay(component, "RPC performance");
performance.addMetrics(new MetricDisplay("RPC processing time average",
"rpc_rpc_processing_time_avg_time", filter));
performance.addMetrics(
new MetricDisplay("Number of slow calls", "rpc_rpc_slow_calls",
filter));
metrics.add(performance);
}
}

View File

@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.om.KeyManagerInsight;
import org.apache.hadoop.ozone.insight.om.OmProtocolInsight;
import org.apache.hadoop.ozone.insight.scm.EventQueueInsight;
import org.apache.hadoop.ozone.insight.scm.NodeManagerInsight;
import org.apache.hadoop.ozone.insight.scm.ReplicaManagerInsight;
import org.apache.hadoop.ozone.insight.scm.ScmProtocolBlockLocationInsight;
import org.apache.hadoop.ozone.om.OMConfigKeys;
import picocli.CommandLine;
/**
* Parent class for all the insight subcommands.
*/
public class BaseInsightSubCommand {
@CommandLine.ParentCommand
private Insight insightCommand;
public InsightPoint getInsight(OzoneConfiguration configuration,
String selection) {
Map<String, InsightPoint> insights = createInsightPoints(configuration);
if (!insights.containsKey(selection)) {
throw new RuntimeException(String
.format("No such component; %s. Available components: %s", selection,
insights.keySet()));
}
return insights.get(selection);
}
/**
* Utility to get the host base on a component.
*/
public String getHost(OzoneConfiguration conf, Component component) {
if (component.getHostname() != null) {
return "http://" + component.getHostname() + ":" + component.getPort();
} else if (component.getName() == Type.SCM) {
Optional<String> scmHost =
HddsUtils.getHostNameFromConfigKeys(conf,
ScmConfigKeys.OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY,
ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY);
return "http://" + scmHost.get() + ":9876";
} else if (component.getName() == Type.OM) {
Optional<String> omHost =
HddsUtils.getHostNameFromConfigKeys(conf,
OMConfigKeys.OZONE_OM_ADDRESS_KEY);
return "http://" + omHost.get() + ":9874";
} else {
throw new IllegalArgumentException(
"Component type is not supported: " + component.getName());
}
}
public Map<String, InsightPoint> createInsightPoints(
OzoneConfiguration configuration) {
Map<String, InsightPoint> insights = new LinkedHashMap<>();
insights.put("scm.node-manager", new NodeManagerInsight());
insights.put("scm.replica-manager", new ReplicaManagerInsight());
insights.put("scm.event-queue", new EventQueueInsight());
insights.put("scm.protocol.block-location",
new ScmProtocolBlockLocationInsight());
insights.put("om.key-manager", new KeyManagerInsight());
insights.put("om.protocol.client", new OmProtocolInsight());
return insights;
}
public Insight getInsightCommand() {
return insightCommand;
}
}

View File

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.util.Objects;
/**
* Identifier an ozone component.
*/
public class Component {
/**
* The type of the component (eg. scm, s3g...)
*/
private Type name;
/**
* Unique identifier of the instance (uuid or index). Can be null for
* non-HA server component.
*/
private String id;
/**
* Hostname of the component. Optional, may help to find the right host
* name.
*/
private String hostname;
/**
* HTTP service port. Optional.
*/
private int port;
public Component(Type name) {
this.name = name;
}
public Component(Type name, String id) {
this.name = name;
this.id = id;
}
public Component(Type name, String id, String hostname) {
this.name = name;
this.id = id;
this.hostname = hostname;
}
public Component(Type name, String id, String hostname, int port) {
this.name = name;
this.id = id;
this.hostname = hostname;
this.port = port;
}
public Type getName() {
return name;
}
public String getId() {
return id;
}
public String getHostname() {
return hostname;
}
public int getPort() {
return port;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Component that = (Component) o;
return Objects.equals(name, that.name) &&
Objects.equals(id, that.id);
}
public String prefix() {
return name + (id != null && id.length() > 0 ? "-" + id : "");
}
@Override
public int hashCode() {
return Objects.hash(name, id);
}
/**
* Ozone component types.
*/
public enum Type {
SCM, OM, DATANODE, S3G, RECON;
}
}

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.conf.Config;
import org.apache.hadoop.hdds.conf.ConfigGroup;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.ozone.insight.Component.Type;
import picocli.CommandLine;
import java.lang.reflect.Method;
import java.util.concurrent.Callable;
/**
* Subcommand to show configuration values/documentation.
*/
@CommandLine.Command(
name = "config",
description = "Show configuration for a specific subcomponents",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
public class ConfigurationSubCommand extends BaseInsightSubCommand
implements Callable<Void> {
@CommandLine.Parameters(description = "Name of the insight point (use list "
+ "to check the available options)")
private String insightName;
@Override
public Void call() throws Exception {
InsightPoint insight =
getInsight(getInsightCommand().createOzoneConfiguration(), insightName);
System.out.println(
"Configuration for `" + insightName + "` (" + insight.getDescription()
+ ")");
System.out.println();
for (Class clazz : insight.getConfigurationClasses()) {
showConfig(clazz);
}
return null;
}
private void showConfig(Class clazz) {
OzoneConfiguration conf = new OzoneConfiguration();
conf.addResource(getHost(conf, new Component(Type.SCM)) + "/conf");
ConfigGroup configGroup =
(ConfigGroup) clazz.getAnnotation(ConfigGroup.class);
if (configGroup == null) {
return;
}
String prefix = configGroup.prefix();
for (Method method : clazz.getMethods()) {
if (method.isAnnotationPresent(Config.class)) {
Config config = method.getAnnotation(Config.class);
String key = prefix + "." + config.key();
System.out.println(">>> " + key);
System.out.println(" default: " + config.defaultValue());
System.out.println(" current: " + conf.get(key));
System.out.println();
System.out.println(config.description());
System.out.println();
System.out.println();
}
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import org.apache.hadoop.hdds.cli.GenericCli;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import picocli.CommandLine;
/**
* Command line utility to check logs/metrics of internal ozone components.
*/
@CommandLine.Command(name = "ozone insight",
hidden = true, description = "Show debug information about a selected "
+ "Ozone component",
versionProvider = HddsVersionProvider.class,
subcommands = {ListSubCommand.class, LogSubcommand.class,
MetricsSubCommand.class, ConfigurationSubCommand.class},
mixinStandardHelpOptions = true)
public class Insight extends GenericCli {
public static void main(String[] args) throws Exception {
new Insight().run(args);
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.util.List;
/**
* Definition of a specific insight points.
*/
public interface InsightPoint {
/**
* Human readdable description.
*/
String getDescription();
/**
* List of the related loggers.
*/
List<LoggerSource> getRelatedLoggers(boolean verbose);
/**
* List of the related metrics.
*/
List<MetricGroupDisplay> getMetrics();
/**
* List of the configuration classes.
*/
List<Class> getConfigurationClasses();
}

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import picocli.CommandLine;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
/**
* Subcommand to list of the available insight points.
*/
@CommandLine.Command(
name = "list",
description = "Show available insight points.",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
public class ListSubCommand extends BaseInsightSubCommand
implements Callable<Void> {
@CommandLine.Parameters(defaultValue = "")
private String insightPrefix;
@Override
public Void call() throws Exception {
System.out.println("Available insight points:\n\n");
Map<String, InsightPoint> insightPoints =
createInsightPoints(new OzoneConfiguration());
for (Entry<String, InsightPoint> entry : insightPoints.entrySet()) {
if (insightPrefix == null || entry.getKey().startsWith(insightPrefix)) {
System.out.println(String.format(" %-33s %s", entry.getKey(),
entry.getValue().getDescription()));
}
}
return null;
}
}

View File

@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.ozone.insight.LoggerSource.Level;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import picocli.CommandLine;
/**
* Subcommand to display log.
*/
@CommandLine.Command(
name = "log",
aliases = "logs",
description = "Show log4j events related to the insight point",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
public class LogSubcommand extends BaseInsightSubCommand
implements Callable<Void> {
@CommandLine.Parameters(description = "Name of the insight point (use list "
+ "to check the available options)")
private String insightName;
@CommandLine.Option(names = "-v", description = "Enable verbose mode to "
+ "show more information / detailed message")
private boolean verbose;
@Override
public Void call() throws Exception {
OzoneConfiguration conf =
getInsightCommand().createOzoneConfiguration();
InsightPoint insight =
getInsight(conf, insightName);
List<LoggerSource> loggers = insight.getRelatedLoggers(verbose);
for (LoggerSource logger : loggers) {
setLogLevel(conf, logger.getLoggerName(), logger.getComponent(),
logger.getLevel());
}
Set<Component> sources = loggers.stream().map(LoggerSource::getComponent)
.collect(Collectors.toSet());
try {
streamLog(conf, sources, loggers);
} finally {
for (LoggerSource logger : loggers) {
setLogLevel(conf, logger.getLoggerName(), logger.getComponent(),
Level.INFO);
}
}
return null;
}
private void streamLog(OzoneConfiguration conf, Set<Component> sources,
List<LoggerSource> relatedLoggers) {
List<Thread> loggers = new ArrayList<>();
for (Component sourceComponent : sources) {
loggers.add(new Thread(
() -> streamLog(conf, sourceComponent, relatedLoggers)));
}
for (Thread thread : loggers) {
thread.start();
}
for (Thread thread : loggers) {
try {
thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
private void streamLog(OzoneConfiguration conf, Component logComponent,
List<LoggerSource> loggers) {
HttpClient client = HttpClientBuilder.create().build();
HttpGet get = new HttpGet(getHost(conf, logComponent) + "/logstream");
try {
HttpResponse execute = client.execute(get);
try (BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(execute.getEntity().getContent(),
StandardCharsets.UTF_8))) {
bufferedReader.lines()
.filter(line -> {
for (LoggerSource logger : loggers) {
if (line.contains(logger.getLoggerName())) {
return true;
}
}
return false;
})
.map(this::processLogLine)
.map(l -> "[" + logComponent.prefix() + "] " + l)
.forEach(System.out::println);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public String processLogLine(String line) {
Pattern p = Pattern.compile("<json>(.*)</json>");
Matcher m = p.matcher(line);
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, "\n" + m.group(1).replaceAll("\\\\n", "\n"));
}
m.appendTail(sb);
return sb.toString();
}
private void setLogLevel(OzoneConfiguration conf, String name,
Component component, LoggerSource.Level level) {
HttpClient client = HttpClientBuilder.create().build();
String request = String
.format("/logLevel?log=%s&level=%s", name,
level);
String hostName = getHost(conf, component);
HttpGet get = new HttpGet(hostName + request);
try {
HttpResponse execute = client.execute(get);
if (execute.getStatusLine().getStatusCode() != 200) {
throw new RuntimeException(
"Can't set the log level: " + hostName + " -> HTTP " + execute
.getStatusLine().getStatusCode());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import org.apache.hadoop.ozone.insight.Component.Type;
/**
* Definition of a log source.
*/
public class LoggerSource {
/**
* Id of the component where the log is generated.
*/
private Component component;
/**
* Log4j/slf4j logger name.
*/
private String loggerName;
/**
* Log level.
*/
private Level level;
public LoggerSource(Component component, String loggerName, Level level) {
this.component = component;
this.loggerName = loggerName;
this.level = level;
}
public LoggerSource(Type componentType, Class<?> loggerClass,
Level level) {
this(new Component(componentType), loggerClass.getCanonicalName(), level);
}
public Component getComponent() {
return component;
}
public String getLoggerName() {
return loggerName;
}
public Level getLevel() {
return level;
}
/**
* Log level definition.
*/
public enum Level {
TRACE, DEBUG, INFO, WARN, ERROR
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.util.HashMap;
import java.util.Map;
/**
* Definition of one displayable hadoop metrics.
*/
public class MetricDisplay {
/**
* Prometheus metrics name.
*/
private String id;
/**
* Human readable definition of the metrhics.
*/
private String description;
/**
* Prometheus metrics tag to filter out the right metrics.
*/
private Map<String, String> filter;
public MetricDisplay(String description, String id) {
this(description, id, new HashMap<>());
}
public MetricDisplay(String description, String id,
Map<String, String> filter) {
this.id = id;
this.description = description;
this.filter = filter;
}
public String getId() {
return id;
}
public String getDescription() {
return description;
}
public Map<String, String> getFilter() {
return filter;
}
public boolean checkLine(String line) {
return false;
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.ozone.insight.Component.Type;
/**
* Definition of a group of metrics which can be displayed.
*/
public class MetricGroupDisplay {
/**
* List fhe included metrics.
*/
private List<MetricDisplay> metrics = new ArrayList<>();
/**
* Name of the component which includes the metrics (scm, om,...).
*/
private Component component;
/**
* Human readable description.
*/
private String description;
public MetricGroupDisplay(Component component, String description) {
this.component = component;
this.description = description;
}
public MetricGroupDisplay(Type componentType, String metricName) {
this(new Component(componentType), metricName);
}
public List<MetricDisplay> getMetrics() {
return metrics;
}
public void addMetrics(MetricDisplay item) {
this.metrics.add(item);
}
public String getDescription() {
return description;
}
public Component getComponent() {
return component;
}
}

View File

@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import picocli.CommandLine;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
/**
* Command line interface to show metrics for a specific component.
*/
@CommandLine.Command(
name = "metrics",
aliases = "metric",
description = "Show available metrics.",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
public class MetricsSubCommand extends BaseInsightSubCommand
implements Callable<Void> {
@CommandLine.Parameters(description = "Name of the insight point (use list "
+ "to check the available options)")
private String insightName;
@Override
public Void call() throws Exception {
OzoneConfiguration conf =
getInsightCommand().createOzoneConfiguration();
InsightPoint insight =
getInsight(conf, insightName);
Set<Component> sources =
insight.getMetrics().stream().map(MetricGroupDisplay::getComponent)
.collect(Collectors.toSet());
Map<Component, List<String>> metrics = getMetrics(conf, sources);
System.out.println(
"Metrics for `" + insightName + "` (" + insight.getDescription() + ")");
System.out.println();
for (MetricGroupDisplay group : insight.getMetrics()) {
System.out.println(group.getDescription());
System.out.println();
for (MetricDisplay display : group.getMetrics()) {
System.out.println(" " + display.getDescription() + ": " + selectValue(
metrics.get(group.getComponent()), display));
}
System.out.println();
System.out.println();
}
return null;
}
private Map<Component, List<String>> getMetrics(OzoneConfiguration conf,
Collection<Component> sources) {
Map<Component, List<String>> result = new HashMap<>();
for (Component source : sources) {
result.put(source, getMetrics(conf, source));
}
return result;
}
private String selectValue(List<String> metrics,
MetricDisplay metricDisplay) {
for (String line : metrics) {
if (line.startsWith(metricDisplay.getId())) {
boolean filtered = false;
for (Entry<String, String> filter : metricDisplay.getFilter()
.entrySet()) {
if (!line
.contains(filter.getKey() + "=\"" + filter.getValue() + "\"")) {
filtered = true;
}
}
if (!filtered) {
return line.split(" ")[1];
}
}
}
return "???";
}
private List<String> getMetrics(OzoneConfiguration conf,
Component component) {
HttpClient client = HttpClientBuilder.create().build();
HttpGet get = new HttpGet(getHost(conf, component) + "/prom");
try {
HttpResponse execute = client.execute(get);
if (execute.getStatusLine().getStatusCode() != 200) {
throw new RuntimeException(
"Can't read prometheus metrics endpoint" + execute.getStatusLine()
.getStatusCode());
}
try (BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(execute.getEntity().getContent(),
StandardCharsets.UTF_8))) {
return bufferedReader.lines().collect(Collectors.toList());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.datanode;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.scm.client.ScmClient;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.InsightPoint;
import org.apache.hadoop.ozone.insight.LoggerSource;
/**
* Insight definition for datanode/pipline metrics.
*/
public class RatisInsight extends BaseInsightPoint implements InsightPoint {
private OzoneConfiguration conf;
public RatisInsight(OzoneConfiguration conf) {
this.conf = conf;
}
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> result = new ArrayList<>();
try {
ScmClient scmClient = createScmClient(conf);
Pipeline pipeline = scmClient.listPipelines()
.stream()
.filter(d -> d.getNodes().size() > 1)
.findFirst()
.get();
for (DatanodeDetails datanode : pipeline.getNodes()) {
Component dn =
new Component(Type.DATANODE, datanode.getUuid().toString(),
datanode.getHostName(), 9882);
result
.add(new LoggerSource(dn, "org.apache.ratis.server.impl",
defaultLevel(verbose)));
}
} catch (IOException e) {
throw new RuntimeException("Can't enumerate required logs", e);
}
return result;
}
@Override
public String getDescription() {
return "More information about one ratis datanode ring.";
}
}

View File

@ -0,0 +1,23 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.datanode;
/**
* Insight points for the ozone datanodes.
*/

View File

@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.om;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.LoggerSource;
import org.apache.hadoop.ozone.insight.MetricDisplay;
import org.apache.hadoop.ozone.insight.MetricGroupDisplay;
import org.apache.hadoop.ozone.om.KeyManagerImpl;
/**
* Insight implementation for the key management related operations.
*/
public class KeyManagerInsight extends BaseInsightPoint {
@Override
public List<MetricGroupDisplay> getMetrics() {
List<MetricGroupDisplay> display = new ArrayList<>();
MetricGroupDisplay state =
new MetricGroupDisplay(Type.OM, "Key related metrics");
state
.addMetrics(new MetricDisplay("Number of keys", "om_metrics_num_keys"));
state.addMetrics(new MetricDisplay("Number of key operations",
"om_metrics_num_key_ops"));
display.add(state);
MetricGroupDisplay key =
new MetricGroupDisplay(Type.OM, "Key operation stats");
for (String operation : new String[] {"allocate", "commit", "lookup",
"list", "delete"}) {
key.addMetrics(new MetricDisplay(
"Number of key " + operation + "s (failure + success)",
"om_metrics_num_key_" + operation));
key.addMetrics(
new MetricDisplay("Number of failed key " + operation + "s",
"om_metrics_num_key_" + operation + "_fails"));
}
display.add(key);
return display;
}
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
loggers.add(
new LoggerSource(Type.OM, KeyManagerImpl.class,
defaultLevel(verbose)));
return loggers;
}
@Override
public String getDescription() {
return "OM Key Manager";
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.om;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.LoggerSource;
import org.apache.hadoop.ozone.insight.MetricGroupDisplay;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
import org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB;
/**
* Insight definition for the OM RPC server.
*/
public class OmProtocolInsight extends BaseInsightPoint {
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
loggers.add(
new LoggerSource(Type.OM,
OzoneManagerProtocolServerSideTranslatorPB.class,
defaultLevel(verbose)));
return loggers;
}
@Override
public List<MetricGroupDisplay> getMetrics() {
List<MetricGroupDisplay> metrics = new ArrayList<>();
Map<String, String> filter = new HashMap<>();
filter.put("servername", "OzoneManagerService");
addRpcMetrics(metrics, Type.OM, filter);
addProtocolMessageMetrics(metrics, "om_client_protocol", Type.OM,
OzoneManagerProtocolProtos.Type.values());
return metrics;
}
@Override
public String getDescription() {
return "Ozone Manager RPC endpoint";
}
}

View File

@ -0,0 +1,23 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.om;
/**
* Insight points for the Ozone Manager.
*/

View File

@ -0,0 +1,24 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
/**
* Framework to collect log/metrics and configuration for specified ozone
* components.
*/

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.scm;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.LoggerSource;
/**
* Insight definition to check internal events.
*/
public class EventQueueInsight extends BaseInsightPoint {
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
loggers
.add(new LoggerSource(Type.SCM, EventQueue.class,
defaultLevel(verbose)));
return loggers;
}
@Override
public String getDescription() {
return "Information about the internal async event delivery";
}
}

View File

@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.scm;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.LoggerSource;
import org.apache.hadoop.ozone.insight.MetricDisplay;
import org.apache.hadoop.ozone.insight.MetricGroupDisplay;
/**
* Insight definition to check node manager / node report events.
*/
public class NodeManagerInsight extends BaseInsightPoint {
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
loggers.add(
new LoggerSource(Type.SCM, SCMNodeManager.class,
defaultLevel(verbose)));
return loggers;
}
@Override
public List<MetricGroupDisplay> getMetrics() {
List<MetricGroupDisplay> display = new ArrayList<>();
MetricGroupDisplay nodes =
new MetricGroupDisplay(Type.SCM, "Node counters");
nodes.addMetrics(
new MetricDisplay("Healthy Nodes", "scm_node_manager_healthy_nodes"));
nodes.addMetrics(
new MetricDisplay("Dead Nodes", "scm_node_manager_dead_nodes"));
display.add(nodes);
MetricGroupDisplay hb =
new MetricGroupDisplay(Type.SCM, "HB processing stats");
hb.addMetrics(
new MetricDisplay("HB processed", "scm_node_manager_num_hb_processed"));
hb.addMetrics(new MetricDisplay("HB processing failed",
"scm_node_manager_num_hb_processing_failed"));
display.add(hb);
return display;
}
@Override
public String getDescription() {
return "SCM Datanode management related information.";
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.scm;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hdds.scm.container.ReplicationManager;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.LoggerSource;
import org.apache.hadoop.ozone.insight.MetricGroupDisplay;
/**
* Insight definition to chech the replication manager internal state.
*/
public class ReplicaManagerInsight extends BaseInsightPoint {
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
loggers.add(new LoggerSource(Type.SCM, ReplicationManager.class,
defaultLevel(verbose)));
return loggers;
}
@Override
public List<MetricGroupDisplay> getMetrics() {
List<MetricGroupDisplay> display = new ArrayList<>();
return display;
}
@Override
public List<Class> getConfigurationClasses() {
List<Class> result = new ArrayList<>();
result.add(ReplicationManager.ReplicationManagerConfiguration.class);
return result;
}
@Override
public String getDescription() {
return "SCM closed container replication manager";
}
}

View File

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.scm;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos;
import org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer;
import org.apache.hadoop.ozone.insight.BaseInsightPoint;
import org.apache.hadoop.ozone.insight.Component.Type;
import org.apache.hadoop.ozone.insight.LoggerSource;
import org.apache.hadoop.ozone.insight.MetricGroupDisplay;
import org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB;
/**
* Insight metric to check the SCM block location protocol behaviour.
*/
public class ScmProtocolBlockLocationInsight extends BaseInsightPoint {
@Override
public List<LoggerSource> getRelatedLoggers(boolean verbose) {
List<LoggerSource> loggers = new ArrayList<>();
loggers.add(
new LoggerSource(Type.SCM,
ScmBlockLocationProtocolServerSideTranslatorPB.class,
defaultLevel(verbose)));
new LoggerSource(Type.SCM,
SCMBlockProtocolServer.class,
defaultLevel(verbose));
return loggers;
}
@Override
public List<MetricGroupDisplay> getMetrics() {
List<MetricGroupDisplay> metrics = new ArrayList<>();
Map<String, String> filter = new HashMap<>();
filter.put("servername", "StorageContainerLocationProtocolService");
addRpcMetrics(metrics, Type.SCM, filter);
addProtocolMessageMetrics(metrics, "scm_block_location_protocol",
Type.SCM, ScmBlockLocationProtocolProtos.Type.values());
return metrics;
}
@Override
public String getDescription() {
return "SCM Block location protocol endpoint";
}
}

View File

@ -0,0 +1,23 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight.scm;
/**
* Insight points for the Storage Container Manager.
*/

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.insight;
import org.junit.Assert;
import org.junit.Test;
/**
* Testing utility methods of the log subcommand test.
*/
public class LogSubcommandTest {
@Test
public void filterLog() {
LogSubcommand logSubcommand = new LogSubcommand();
String result = logSubcommand.processLogLine(
"2019-08-04 12:27:08,648 [TRACE|org.apache.hadoop.hdds.scm.node"
+ ".SCMNodeManager|SCMNodeManager] HB is received from "
+ "[datanode=localhost]: <json>storageReport {\\n storageUuid: "
+ "\"DS-29204db6-a615-4106-9dd4-ce294c2f4cf6\"\\n "
+ "storageLocation: \"/tmp/hadoop-elek/dfs/data\"\\n capacity: "
+ "8348086272\\n scmUsed: 4096\\n remaining: 8246956032n "
+ "storageType: DISK\\n failed: falsen}\\n</json>\n");
Assert.assertEquals(3, result.split("\n").length);
}
}

View File

@ -35,6 +35,7 @@
import java.util.Objects; import java.util.Objects;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.conf.StorageUnit;
import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProvider;
@ -82,9 +83,11 @@
import org.apache.hadoop.ozone.om.protocol.OzoneManagerServerProtocol; import org.apache.hadoop.ozone.om.protocol.OzoneManagerServerProtocol;
import org.apache.hadoop.ozone.om.ratis.OMRatisSnapshotInfo; import org.apache.hadoop.ozone.om.ratis.OMRatisSnapshotInfo;
import org.apache.hadoop.ozone.om.snapshot.OzoneManagerSnapshotProvider; import org.apache.hadoop.ozone.om.snapshot.OzoneManagerSnapshotProvider;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DBUpdatesRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DBUpdatesRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
.KeyArgs; .KeyArgs;
import org.apache.hadoop.ozone.protocolPB.ProtocolMessageMetrics;
import org.apache.hadoop.ozone.security.OzoneSecurityException; import org.apache.hadoop.ozone.security.OzoneSecurityException;
import org.apache.hadoop.ozone.security.OzoneTokenIdentifier; import org.apache.hadoop.ozone.security.OzoneTokenIdentifier;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@ -150,6 +153,7 @@
import org.apache.hadoop.utils.db.SequenceNumberNotFoundException; import org.apache.hadoop.utils.db.SequenceNumberNotFoundException;
import org.apache.hadoop.utils.db.DBCheckpoint; import org.apache.hadoop.utils.db.DBCheckpoint;
import org.apache.hadoop.utils.db.DBStore; import org.apache.hadoop.utils.db.DBStore;
import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.protocol.TermIndex;
import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.FileUtils;
import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.LifeCycle;
@ -253,6 +257,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
private S3BucketManager s3BucketManager; private S3BucketManager s3BucketManager;
private final OMMetrics metrics; private final OMMetrics metrics;
private final ProtocolMessageMetrics omClientProtocolMetrics;
private OzoneManagerHttpServer httpServer; private OzoneManagerHttpServer httpServer;
private final OMStorage omStorage; private final OMStorage omStorage;
private final ScmBlockLocationProtocol scmBlockClient; private final ScmBlockLocationProtocol scmBlockClient;
@ -294,14 +299,12 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
// execution, we can get from ozoneManager. // execution, we can get from ozoneManager.
private long maxUserVolumeCount; private long maxUserVolumeCount;
private final ScmClient scmClient; private final ScmClient scmClient;
private final long scmBlockSize; private final long scmBlockSize;
private final int preallocateBlocksMax; private final int preallocateBlocksMax;
private final boolean grpcBlockTokenEnabled; private final boolean grpcBlockTokenEnabled;
private final boolean useRatisForReplication; private final boolean useRatisForReplication;
private OzoneManager(OzoneConfiguration conf) throws IOException, private OzoneManager(OzoneConfiguration conf) throws IOException,
AuthenticationException { AuthenticationException {
super(OzoneVersionInfo.OZONE_VERSION_INFO); super(OzoneVersionInfo.OZONE_VERSION_INFO);
@ -412,6 +415,10 @@ private OzoneManager(OzoneConfiguration conf) throws IOException,
metrics = OMMetrics.create(); metrics = OMMetrics.create();
omClientProtocolMetrics = ProtocolMessageMetrics
.create("OmClientProtocol", "Ozone Manager RPC endpoint",
OzoneManagerProtocolProtos.Type.values());
// Start Om Rpc Server. // Start Om Rpc Server.
omRpcServer = getRpcServer(conf); omRpcServer = getRpcServer(conf);
omRpcAddress = updateRPCListenAddress(configuration, omRpcAddress = updateRPCListenAddress(configuration,
@ -984,10 +991,11 @@ private static StorageContainerLocationProtocol getScmContainerClient(
StorageContainerLocationProtocol scmContainerClient = StorageContainerLocationProtocol scmContainerClient =
TracingUtil.createProxy( TracingUtil.createProxy(
new StorageContainerLocationProtocolClientSideTranslatorPB( new StorageContainerLocationProtocolClientSideTranslatorPB(
RPC.getProxy(StorageContainerLocationProtocolPB.class, scmVersion, RPC.getProxy(StorageContainerLocationProtocolPB.class,
scmAddr, UserGroupInformation.getCurrentUser(), conf, scmVersion,
NetUtils.getDefaultSocketFactory(conf), scmAddr, UserGroupInformation.getCurrentUser(), conf,
Client.getRpcTimeout(conf))), NetUtils.getDefaultSocketFactory(conf),
Client.getRpcTimeout(conf))),
StorageContainerLocationProtocol.class, conf); StorageContainerLocationProtocol.class, conf);
return scmContainerClient; return scmContainerClient;
} }
@ -1253,6 +1261,8 @@ public OMMetrics getMetrics() {
*/ */
public void start() throws IOException { public void start() throws IOException {
omClientProtocolMetrics.register();
LOG.info(buildRpcServerStartMessage("OzoneManager RPC server", LOG.info(buildRpcServerStartMessage("OzoneManager RPC server",
omRpcAddress)); omRpcAddress));
@ -1377,7 +1387,7 @@ private RPC.Server getRpcServer(OzoneConfiguration conf) throws IOException {
RPC.setProtocolEngine(configuration, OzoneManagerProtocolPB.class, RPC.setProtocolEngine(configuration, OzoneManagerProtocolPB.class,
ProtobufRpcEngine.class); ProtobufRpcEngine.class);
this.omServerProtocol = new OzoneManagerProtocolServerSideTranslatorPB( this.omServerProtocol = new OzoneManagerProtocolServerSideTranslatorPB(
this, omRatisServer, isRatisEnabled); this, omRatisServer, omClientProtocolMetrics, isRatisEnabled);
BlockingService omService = newReflectiveBlockingService(omServerProtocol); BlockingService omService = newReflectiveBlockingService(omServerProtocol);
@ -1471,6 +1481,7 @@ public void stop() {
} }
metadataManager.stop(); metadataManager.stop();
metrics.unRegister(); metrics.unRegister();
omClientProtocolMetrics.unregister();
unregisterMXBean(); unregisterMXBean();
if (jvmPauseMonitor != null) { if (jvmPauseMonitor != null) {
jvmPauseMonitor.stop(); jvmPauseMonitor.stop();

View File

@ -16,7 +16,6 @@
*/ */
package org.apache.hadoop.ozone.protocolPB; package org.apache.hadoop.ozone.protocolPB;
import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.OmUtils;
import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.OzoneManager;
@ -54,6 +53,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB implements
private final boolean isRatisEnabled; private final boolean isRatisEnabled;
private final OzoneManager ozoneManager; private final OzoneManager ozoneManager;
private final OzoneManagerDoubleBuffer ozoneManagerDoubleBuffer; private final OzoneManagerDoubleBuffer ozoneManagerDoubleBuffer;
private final ProtocolMessageMetrics protocolMessageMetrics;
/** /**
* Constructs an instance of the server handler. * Constructs an instance of the server handler.
@ -61,12 +61,15 @@ public class OzoneManagerProtocolServerSideTranslatorPB implements
* @param impl OzoneManagerProtocolPB * @param impl OzoneManagerProtocolPB
*/ */
public OzoneManagerProtocolServerSideTranslatorPB( public OzoneManagerProtocolServerSideTranslatorPB(
OzoneManager impl, OzoneManagerRatisServer ratisServer, OzoneManager impl,
OzoneManagerRatisServer ratisServer,
ProtocolMessageMetrics metrics,
boolean enableRatis) { boolean enableRatis) {
this.ozoneManager = impl; this.ozoneManager = impl;
handler = new OzoneManagerRequestHandler(impl); handler = new OzoneManagerRequestHandler(impl);
this.omRatisServer = ratisServer; this.omRatisServer = ratisServer;
this.isRatisEnabled = enableRatis; this.isRatisEnabled = enableRatis;
this.protocolMessageMetrics = metrics;
this.ozoneManagerDoubleBuffer = this.ozoneManagerDoubleBuffer =
new OzoneManagerDoubleBuffer(ozoneManager.getMetadataManager(), (i) -> { new OzoneManagerDoubleBuffer(ozoneManager.getMetadataManager(), (i) -> {
// Do nothing. // Do nothing.
@ -82,48 +85,77 @@ public OzoneManagerProtocolServerSideTranslatorPB(
* translator for OM protocol. * translator for OM protocol.
*/ */
@Override @Override
public OMResponse submitRequest(RpcController controller, public OMResponse submitRequest(RpcController controller,
OMRequest request) throws ServiceException { OMRequest request) throws ServiceException {
Scope scope = TracingUtil Scope scope = TracingUtil
.importAndCreateScope(request.getCmdType().name(), .importAndCreateScope(request.getCmdType().name(),
request.getTraceID()); request.getTraceID());
try { try {
if (isRatisEnabled) { if (LOG.isTraceEnabled()) {
// Check if the request is a read only request LOG.trace(
if (OmUtils.isReadOnly(request)) { "OzoneManagerProtocol {} request is received: <json>{}</json>",
return submitReadRequestToOM(request); request.getCmdType().toString(),
} else { request.toString().replaceAll("\n", "\\\\n"));
if (omRatisServer.isLeader()) { } else if (LOG.isDebugEnabled()) {
try { LOG.debug("OzoneManagerProtocol {} request is received",
OMClientRequest omClientRequest = request.getCmdType().toString());
OzoneManagerRatisUtils.createClientRequest(request);
if (omClientRequest != null) {
request = omClientRequest.preExecute(ozoneManager);
}
} catch(IOException ex) {
// As some of the preExecute returns error. So handle here.
return createErrorResponse(request, ex);
}
return submitRequestToRatis(request);
} else {
// throw not leader exception. This is being done, so to avoid
// unnecessary execution of preExecute on follower OM's. This
// will be helpful in the case like where we we reduce the
// chance of allocate blocks on follower OM's. Right now our
// leader status is updated every 1 second.
throw createNotLeaderException();
}
}
} else {
return submitRequestDirectlyToOM(request);
} }
protocolMessageMetrics.increment(request.getCmdType());
OMResponse omResponse = processRequest(request);
if (LOG.isTraceEnabled()) {
LOG.trace(
"OzoneManagerProtocol {} request is processed. Response: "
+ "<json>{}</json>",
request.getCmdType().toString(),
omResponse.toString().replaceAll("\n", "\\\\n"));
}
return omResponse;
} finally { } finally {
scope.close(); scope.close();
} }
} }
private OMResponse processRequest(OMRequest request) throws
ServiceException {
if (isRatisEnabled) {
// Check if the request is a read only request
if (OmUtils.isReadOnly(request)) {
return submitReadRequestToOM(request);
} else {
if (omRatisServer.isLeader()) {
try {
OMClientRequest omClientRequest =
OzoneManagerRatisUtils.createClientRequest(request);
if (omClientRequest != null) {
request = omClientRequest.preExecute(ozoneManager);
}
} catch (IOException ex) {
// As some of the preExecute returns error. So handle here.
return createErrorResponse(request, ex);
}
return submitRequestToRatis(request);
} else {
// throw not leader exception. This is being done, so to avoid
// unnecessary execution of preExecute on follower OM's. This
// will be helpful in the case like where we we reduce the
// chance of allocate blocks on follower OM's. Right now our
// leader status is updated every 1 second.
throw createNotLeaderException();
}
}
} else {
return submitRequestDirectlyToOM(request);
}
}
/** /**
* Create OMResponse from the specified OMRequest and exception. * Create OMResponse from the specified OMRequest and exception.
*
* @param omRequest * @param omRequest
* @param exception * @param exception
* @return OMResponse * @return OMResponse
@ -153,6 +185,7 @@ private OMResponse createErrorResponse(
return null; return null;
} }
} }
/** /**
* Submits request to OM's Ratis server. * Submits request to OM's Ratis server.
*/ */

View File

@ -54,6 +54,7 @@
<module>upgrade</module> <module>upgrade</module>
<module>csi</module> <module>csi</module>
<module>fault-injection-test</module> <module>fault-injection-test</module>
<module>insight</module>
</modules> </modules>
<repositories> <repositories>
@ -168,6 +169,11 @@
<artifactId>hadoop-hdds-tools</artifactId> <artifactId>hadoop-hdds-tools</artifactId>
<version>${hdds.version}</version> <version>${hdds.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-insight</artifactId>
<version>${hdds.version}</version>
</dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-recon</artifactId> <artifactId>hadoop-ozone-recon</artifactId>