HBASE-19769 Remove ZK metrics because of classloader issues

When we run MapReduce jobs via `yarn jar`, the special classloader
which is set up by YARN creates a situation where our invocation of
package-private Hadoop classes throws an IllegalAccessError. It's
easiest to just remove these and rethink how to avoid further
Hadoop metrics2 issues.

Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Josh Elser 2018-01-11 18:14:32 -05:00
parent 6bacb643bc
commit 057e80c163
4 changed files with 0 additions and 357 deletions

View File

@ -82,7 +82,6 @@ public class RecoverableZooKeeper {
private Watcher watcher;
private int sessionTimeout;
private String quorumServers;
private final ZKMetricsListener metrics;
public RecoverableZooKeeper(String quorumServers, int sessionTimeout,
Watcher watcher, int maxRetries, int retryIntervalMillis, int maxSleepTime)
@ -112,7 +111,6 @@ public class RecoverableZooKeeper {
this.watcher = watcher;
this.sessionTimeout = sessionTimeout;
this.quorumServers = quorumServers;
this.metrics = new ZKMetrics();
try {
checkZk();
@ -166,11 +164,8 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
checkZk().delete(path, version);
this.metrics.registerWriteOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case NONODE:
if (isRetry) {
@ -182,11 +177,9 @@ public class RecoverableZooKeeper {
throw e;
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "delete");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "delete");
break;
@ -211,18 +204,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
Stat nodeStat = checkZk().exists(path, watcher);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodeStat;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "exists");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "exists");
break;
@ -246,18 +234,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
Stat nodeStat = checkZk().exists(path, watch);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodeStat;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "exists");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "exists");
break;
@ -293,18 +276,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
List<String> children = checkZk().getChildren(path, watcher);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return children;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "getChildren");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "getChildren");
break;
@ -329,18 +307,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
List<String> children = checkZk().getChildren(path, watch);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return children;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "getChildren");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "getChildren");
break;
@ -365,18 +338,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
byte[] revData = checkZk().getData(path, watcher, stat);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return ZKMetadata.removeMetaData(revData);
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "getData");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "getData");
break;
@ -401,18 +369,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
byte[] revData = checkZk().getData(path, watch, stat);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return ZKMetadata.removeMetaData(revData);
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "getData");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "getData");
break;
@ -442,18 +405,13 @@ public class RecoverableZooKeeper {
try {
startTime = EnvironmentEdgeManager.currentTime();
Stat nodeStat = checkZk().setData(path, newData, version);
this.metrics.registerWriteOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodeStat;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "setData");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "setData");
break;
case BADVERSION:
@ -463,14 +421,11 @@ public class RecoverableZooKeeper {
Stat stat = new Stat();
startTime = EnvironmentEdgeManager.currentTime();
byte[] revData = checkZk().getData(path, false, stat);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
if(Bytes.compareTo(revData, newData) == 0) {
// the bad version is caused by previous successful setData
return stat;
}
} catch(KeeperException keeperException){
this.metrics.registerFailedZKCall();
// the ZK is not reliable at this moment. just throwing exception
throw keeperException;
}
@ -498,18 +453,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
List<ACL> nodeACL = checkZk().getACL(path, stat);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodeACL;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "getAcl");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "getAcl");
break;
@ -534,18 +484,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
Stat nodeStat = checkZk().setACL(path, acls, version);
this.metrics.registerWriteOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodeStat;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "setAcl");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "setAcl");
break;
@ -603,11 +548,8 @@ public class RecoverableZooKeeper {
try {
startTime = EnvironmentEdgeManager.currentTime();
String nodePath = checkZk().create(path, data, acl, createMode);
this.metrics.registerWriteOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodePath;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case NODEEXISTS:
if (isRetry) {
@ -616,8 +558,6 @@ public class RecoverableZooKeeper {
// so we read the node and compare.
startTime = EnvironmentEdgeManager.currentTime();
byte[] currentData = checkZk().getData(path, false, null);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
if (currentData != null &&
Bytes.compareTo(currentData, data) == 0) {
// We successfully created a non-sequential node
@ -632,11 +572,9 @@ public class RecoverableZooKeeper {
throw e;
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "create");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "create");
break;
@ -667,18 +605,13 @@ public class RecoverableZooKeeper {
first = false;
long startTime = EnvironmentEdgeManager.currentTime();
String nodePath = checkZk().create(newPath, data, acl, createMode);
this.metrics.registerWriteOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return nodePath;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "create");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "create");
break;
@ -730,18 +663,13 @@ public class RecoverableZooKeeper {
try {
long startTime = EnvironmentEdgeManager.currentTime();
List<OpResult> opResults = checkZk().multi(multiOps);
this.metrics.registerWriteOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
return opResults;
} catch (KeeperException e) {
this.metrics.registerFailedZKCall();
switch (e.code()) {
case CONNECTIONLOSS:
this.metrics.registerConnectionLossException();
retryOrThrow(retryCounter, e, "multi");
break;
case OPERATIONTIMEOUT:
this.metrics.registerOperationTimeoutException();
retryOrThrow(retryCounter, e, "multi");
break;
@ -762,15 +690,11 @@ public class RecoverableZooKeeper {
String nodePrefix = path.substring(lastSlashIdx+1);
long startTime = EnvironmentEdgeManager.currentTime();
List<String> nodes = checkZk().getChildren(parent, false);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
List<String> matching = filterByPrefix(nodes, nodePrefix);
for (String node : matching) {
String nodePath = parent + "/" + node;
startTime = EnvironmentEdgeManager.currentTime();
Stat stat = checkZk().exists(nodePath, false);
this.metrics.registerReadOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
if (stat != null) {
return nodePath;
}
@ -803,8 +727,6 @@ public class RecoverableZooKeeper {
public void sync(String path, AsyncCallback.VoidCallback cb, Object ctx) throws KeeperException {
long startTime = EnvironmentEdgeManager.currentTime();
checkZk().sync(path, cb, null);
this.metrics.registerSyncOperationLatency(
Math.min(EnvironmentEdgeManager.currentTime() - startTime, 1));
}
/**

View File

@ -1,108 +0,0 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.zookeeper;
import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
/**
* Class used to push numbers about ZooKeeper into the metrics subsystem. This will take a
* single function call and turn it into multiple manipulations of the hadoop metrics system.
*/
@InterfaceAudience.Private
public class ZKMetrics implements ZKMetricsListener {
private final MetricsZooKeeperSource source;
public ZKMetrics() {
this(CompatibilitySingletonFactory.getInstance(MetricsZooKeeperSource.class));
}
@VisibleForTesting
public ZKMetrics(MetricsZooKeeperSource s) {
this.source = s;
}
@Override
public void registerAuthFailedException() {
source.incrementAuthFailedCount();
}
@Override
public void registerConnectionLossException() {
source.incrementConnectionLossCount();
}
@Override
public void registerDataInconsistencyException() {
source.incrementDataInconsistencyCount();
}
@Override
public void registerInvalidACLException() {
source.incrementInvalidACLCount();
}
@Override
public void registerNoAuthException() {
source.incrementNoAuthCount();
}
@Override
public void registerOperationTimeoutException() {
source.incrementOperationTimeoutCount();
}
@Override
public void registerRuntimeInconsistencyException() {
source.incrementRuntimeInconsistencyCount();
}
@Override
public void registerSessionExpiredException() {
source.incrementSessionExpiredCount();
}
@Override
public void registerSystemErrorException() {
source.incrementSystemErrorCount();
}
@Override
public void registerFailedZKCall() {
source.incrementTotalFailedZKCalls();
}
@Override
public void registerReadOperationLatency(long latency) {
source.recordReadOperationLatency(latency);
}
@Override
public void registerWriteOperationLatency(long latency) {
source.recordWriteOperationLatency(latency);
}
@Override
public void registerSyncOperationLatency(long latency) {
source.recordSyncOperationLatency(latency);
}
}

View File

@ -1,91 +0,0 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.zookeeper;
import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
public interface ZKMetricsListener {
/**
* An AUTHFAILED Exception was seen.
*/
void registerAuthFailedException();
/**
* A CONNECTIONLOSS Exception was seen.
*/
void registerConnectionLossException();
/**
* A DATAINCONSISTENCY Exception was seen.
*/
void registerDataInconsistencyException();
/**
* An INVALIDACL Exception was seen.
*/
void registerInvalidACLException();
/**
* A NOAUTH Exception was seen.
*/
void registerNoAuthException();
/**
* A OPERATIONTIMEOUT Exception was seen.
*/
void registerOperationTimeoutException();
/**
* A RUNTIMEINCONSISTENCY Exception was seen.
*/
void registerRuntimeInconsistencyException();
/**
* A SESSIONEXPIRED Exception was seen.
*/
void registerSessionExpiredException();
/**
* A SYSTEMERROR Exception was seen.
*/
void registerSystemErrorException();
/**
* A ZooKeeper API Call failed.
*/
void registerFailedZKCall();
/**
* Register the latency incurred for read operations.
*/
void registerReadOperationLatency(long latency);
/**
* Register the latency incurred for write operations.
*/
void registerWriteOperationLatency(long latency);
/**
* Register the latency incurred for sync operations.
*/
void registerSyncOperationLatency(long latency);
}

View File

@ -1,80 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.zookeeper;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.testclassification.ZKTests;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({ ZKTests.class, SmallTests.class })
public class TestZKMetrics {
@Test
public void testRegisterExceptions() {
MetricsZooKeeperSource zkSource = mock(MetricsZooKeeperSourceImpl.class);
ZKMetrics metricsZK = new ZKMetrics(zkSource);
metricsZK.registerAuthFailedException();
metricsZK.registerConnectionLossException();
metricsZK.registerConnectionLossException();
metricsZK.registerDataInconsistencyException();
metricsZK.registerInvalidACLException();
metricsZK.registerNoAuthException();
metricsZK.registerOperationTimeoutException();
metricsZK.registerOperationTimeoutException();
metricsZK.registerRuntimeInconsistencyException();
metricsZK.registerSessionExpiredException();
metricsZK.registerSystemErrorException();
metricsZK.registerSystemErrorException();
metricsZK.registerFailedZKCall();
verify(zkSource, times(1)).incrementAuthFailedCount();
// ConnectionLoss Exception was registered twice.
verify(zkSource, times(2)).incrementConnectionLossCount();
verify(zkSource, times(1)).incrementDataInconsistencyCount();
verify(zkSource, times(1)).incrementInvalidACLCount();
verify(zkSource, times(1)).incrementNoAuthCount();
// OperationTimeout Exception was registered twice.
verify(zkSource, times(2)).incrementOperationTimeoutCount();
verify(zkSource, times(1)).incrementRuntimeInconsistencyCount();
verify(zkSource, times(1)).incrementSessionExpiredCount();
// SystemError Exception was registered twice.
verify(zkSource, times(2)).incrementSystemErrorCount();
verify(zkSource, times(1)).incrementTotalFailedZKCalls();
}
@Test
public void testLatencyHistogramUpdates() {
MetricsZooKeeperSource zkSource = mock(MetricsZooKeeperSourceImpl.class);
ZKMetrics metricsZK = new ZKMetrics(zkSource);
long latency = 100;
metricsZK.registerReadOperationLatency(latency);
metricsZK.registerReadOperationLatency(latency);
metricsZK.registerWriteOperationLatency(latency);
metricsZK.registerSyncOperationLatency(latency);
// Read Operation Latency update was registered twice.
verify(zkSource, times(2)).recordReadOperationLatency(latency);
verify(zkSource, times(1)).recordWriteOperationLatency(latency);
verify(zkSource, times(1)).recordSyncOperationLatency(latency);
}
}