HDDS-1318. Fix MalformedTracerStateStringException on DN logs. Contributed by Xiaoyu Yao.

This closes #641
This commit is contained in:
Xiaoyu Yao 2019-03-28 12:00:58 -07:00 committed by GitHub
parent f3f51284d5
commit ca5e4ce036
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 114 additions and 58 deletions

View File

@ -33,6 +33,8 @@
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
import org.apache.hadoop.hdds.security.x509.SecurityConfig;
import org.apache.hadoop.hdds.tracing.GrpcClientInterceptor;
import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.security.UserGroupInformation;
@ -136,7 +138,8 @@ private void connectToDatanode(DatanodeDetails dn, String encodedToken)
NettyChannelBuilder channelBuilder = NettyChannelBuilder.forAddress(dn
.getIpAddress(), port).usePlaintext()
.maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE)
.intercept(new ClientCredentialInterceptor(userName, encodedToken));
.intercept(new ClientCredentialInterceptor(userName, encodedToken),
new GrpcClientInterceptor());
if (secConfig.isGrpcTlsEnabled()) {
File trustCertCollectionFile = secConfig.getTrustStoreFile();
File privateKeyFile = secConfig.getClientPrivateKeyFile();
@ -204,7 +207,7 @@ public ContainerCommandResponseProto sendCommand(
ContainerCommandRequestProto request) throws IOException {
try {
XceiverClientReply reply;
reply = sendCommandWithRetry(request, null);
reply = sendCommandWithTraceIDAndRetry(request, null);
ContainerCommandResponseProto responseProto = reply.getResponse().get();
return responseProto;
} catch (ExecutionException | InterruptedException e) {
@ -217,7 +220,21 @@ public XceiverClientReply sendCommand(
ContainerCommandRequestProto request, List<DatanodeDetails> excludeDns)
throws IOException {
Preconditions.checkState(HddsUtils.isReadOnly(request));
return sendCommandWithRetry(request, excludeDns);
return sendCommandWithTraceIDAndRetry(request, excludeDns);
}
private XceiverClientReply sendCommandWithTraceIDAndRetry(
ContainerCommandRequestProto request, List<DatanodeDetails> excludeDns)
throws IOException {
try (Scope scope = GlobalTracer.get()
.buildSpan("XceiverClientGrpc." + request.getCmdType().name())
.startActive(true)) {
ContainerCommandRequestProto finalPayload =
ContainerCommandRequestProto.newBuilder(request)
.setTraceID(TracingUtil.exportCurrentSpan())
.build();
return sendCommandWithRetry(finalPayload, excludeDns);
}
}
private XceiverClientReply sendCommandWithRetry(

View File

@ -25,20 +25,27 @@
import io.jaegertracing.internal.exceptions.TraceIdOutOfBoundException;
import io.jaegertracing.spi.Codec;
import io.opentracing.propagation.Format;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A jaeger codec to save the current tracing context t a string.
* A jaeger codec to save the current tracing context as a string.
*/
public class StringCodec implements Codec<StringBuilder> {
public static final Logger LOG = LoggerFactory.getLogger(StringCodec.class);
public static final StringFormat FORMAT = new StringFormat();
@Override
public JaegerSpanContext extract(StringBuilder s) {
if (s == null) {
throw new EmptyTracerStateStringException();
}
String value = s.toString();
if (value != null && !value.equals("")) {
String[] parts = value.split(":");
if (parts.length != 4) {
LOG.trace("MalformedTracerStateString: {}", value);
throw new MalformedTracerStateStringException(value);
} else {
String traceId = parts[0];

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.tracing;
import io.jaegertracing.internal.JaegerSpanContext;
import io.jaegertracing.internal.exceptions.EmptyTracerStateStringException;
import io.jaegertracing.internal.exceptions.MalformedTracerStateStringException;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
class TestStringCodec {
@Test
void testExtract() throws Exception {
StringCodec codec = new StringCodec();
LambdaTestUtils.intercept(EmptyTracerStateStringException.class,
() -> codec.extract(null));
StringBuilder sb = new StringBuilder().append("123");
LambdaTestUtils.intercept(MalformedTracerStateStringException.class,
"String does not match tracer state format",
() -> codec.extract(sb));
sb.append(":456:789");
LambdaTestUtils.intercept(MalformedTracerStateStringException.class,
"String does not match tracer state format",
() -> codec.extract(sb));
sb.append(":66");
JaegerSpanContext context = codec.extract(sb);
String expectedContextString = new String("123:456:789:66");
assertTrue(context.getTraceId().equals("123"));
assertTrue(context.toString().equals(expectedContextString));
}
}

View File

@ -0,0 +1,21 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.tracing;
/**
Test cases for ozone tracing.
*/

View File

@ -545,7 +545,6 @@ public static ContainerCommandRequestProto getBlockRequest(
*/
public static void verifyGetBlock(ContainerCommandRequestProto request,
ContainerCommandResponseProto response, int expectedChunksCount) {
Assert.assertEquals(request.getTraceID(), response.getTraceID());
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertEquals(expectedChunksCount,
response.getGetBlock().getBlockData().getChunksCount());

View File

@ -112,8 +112,6 @@ public void testContainerReplication() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(
putBlockRequest.getTraceID().equals(response.getTraceID()));
HddsDatanodeService destinationDatanode =
chooseDatanodeWithoutContainer(sourcePipelines,

View File

@ -123,7 +123,6 @@ public void testContainerMetrics() throws Exception {
ContainerCommandRequestProto request = ContainerTestHelper
.getCreateContainerRequest(containerID, pipeline);
ContainerCommandResponseProto response = client.sendCommand(request);
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
response.getResult());

View File

@ -158,7 +158,6 @@ static void runTestOzoneContainerViaDataNode(
response = client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
// Put Block
putBlockRequest = ContainerTestHelper.getPutBlockRequest(
@ -168,8 +167,6 @@ static void runTestOzoneContainerViaDataNode(
response = client.sendCommand(putBlockRequest);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(putBlockRequest.getTraceID()
.equals(response.getTraceID()));
// Get Block
request = ContainerTestHelper.
@ -187,7 +184,6 @@ static void runTestOzoneContainerViaDataNode(
response = client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
//Delete Chunk
request = ContainerTestHelper.getDeleteChunkRequest(
@ -196,7 +192,6 @@ static void runTestOzoneContainerViaDataNode(
response = client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
//Update an existing container
Map<String, String> containerUpdate = new HashMap<String, String>();
@ -259,8 +254,6 @@ static void runTestBothGetandPutSmallFile(
ContainerProtos.ContainerCommandResponseProto response
= client.sendCommand(smallFileRequest);
Assert.assertNotNull(response);
Assert.assertTrue(smallFileRequest.getTraceID()
.equals(response.getTraceID()));
final ContainerProtos.ContainerCommandRequestProto getSmallFileRequest
= ContainerTestHelper.getReadSmallFileRequest(client.getPipeline(),
@ -310,8 +303,6 @@ public void testCloseContainer() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
response.getResult());
Assert.assertTrue(
putBlockRequest.getTraceID().equals(response.getTraceID()));
// Close the contianer.
request = ContainerTestHelper.getCloseContainer(
@ -319,7 +310,6 @@ public void testCloseContainer() throws Exception {
response = client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
// Assert that none of the write operations are working after close.
@ -330,8 +320,6 @@ public void testCloseContainer() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.CLOSED_CONTAINER_IO,
response.getResult());
Assert.assertTrue(
writeChunkRequest.getTraceID().equals(response.getTraceID()));
// Read chunk must work on a closed container.
request = ContainerTestHelper.getReadChunkRequest(client.getPipeline(),
@ -339,16 +327,12 @@ public void testCloseContainer() throws Exception {
response = client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
// Put block will fail on a closed container.
response = client.sendCommand(putBlockRequest);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.CLOSED_CONTAINER_IO,
response.getResult());
Assert.assertTrue(putBlockRequest.getTraceID()
.equals(response.getTraceID()));
// Get block must work on the closed container.
request = ContainerTestHelper.getBlockRequest(client.getPipeline(),
@ -366,7 +350,6 @@ public void testCloseContainer() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.CLOSED_CONTAINER_IO,
response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
} finally {
if (client != null) {
client.close();
@ -407,8 +390,6 @@ public void testDeleteContainer() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
response.getResult());
Assert.assertTrue(
putBlockRequest.getTraceID().equals(response.getTraceID()));
// Container cannot be deleted because force flag is set to false and
// the container is still open
@ -419,7 +400,6 @@ public void testDeleteContainer() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.DELETE_ON_OPEN_CONTAINER,
response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
// Container can be deleted, by setting force flag, even with out closing
request = ContainerTestHelper.getDeleteContainer(
@ -429,7 +409,6 @@ public void testDeleteContainer() throws Exception {
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
response.getResult());
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
} finally {
if (client != null) {
@ -524,7 +503,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
ContainerProtos.ContainerCommandResponseProto response =
client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
}
public static ContainerProtos.ContainerCommandRequestProto
@ -539,30 +517,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
client.sendCommand(writeChunkRequest);
Assert.assertNotNull(response);
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
Assert.assertTrue(response.getTraceID().equals(response.getTraceID()));
return writeChunkRequest;
}
static void runRequestWithoutTraceId(
long containerID, XceiverClientSpi client) throws Exception {
try {
client.connect();
createContainerForTesting(client, containerID);
BlockID blockID = ContainerTestHelper.getTestBlockID(containerID);
final ContainerProtos.ContainerCommandRequestProto smallFileRequest
= ContainerTestHelper.getWriteSmallFileRequest(
client.getPipeline(), blockID, 1024);
ContainerProtos.ContainerCommandResponseProto response
= client.sendCommand(smallFileRequest);
Assert.assertNotNull(response);
Assert.assertTrue(smallFileRequest.getTraceID()
.equals(response.getTraceID()));
} finally {
if (client != null) {
client.close();
}
}
}
}

View File

@ -178,7 +178,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
ContainerProtos.ContainerCommandResponseProto response =
client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
}
private StateContext getContext(DatanodeDetails datanodeDetails) {

View File

@ -212,7 +212,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
ContainerProtos.ContainerCommandResponseProto response =
client.sendCommand(request);
Assert.assertNotNull(response);
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
}
private StateContext getContext(DatanodeDetails datanodeDetails) {

View File

@ -188,7 +188,6 @@ static void runTestClientServer(
Assert.assertNotNull(request.getTraceID());
ContainerCommandResponseProto response = client.sendCommand(request);
Assert.assertEquals(request.getTraceID(), response.getTraceID());
} finally {
if (client != null) {
client.close();
@ -245,7 +244,6 @@ caClient, createReplicationService(
ContainerTestHelper.getCreateContainerRequest(
ContainerTestHelper.getTestContainerID(), pipeline);
ContainerCommandResponseProto response = client.sendCommand(request);
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
} finally {
if (client != null) {

View File

@ -40,6 +40,7 @@
import org.apache.hadoop.hdds.client.ReplicationFactor;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.tracing.StringCodec;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneAcl;
@ -80,9 +81,12 @@
import org.junit.Assert;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.slf4j.event.Level.TRACE;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
@ -884,6 +888,9 @@ public void testPutKey() throws Exception {
@Test
public void testGetKey() throws Exception {
GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer
.captureLogs(StringCodec.LOG);
GenericTestUtils.setLogLevel(StringCodec.LOG, TRACE);
LOG.info("Running testGetKey");
String keyName = "key" + RandomStringUtils.randomNumeric(5);
OzoneBucket bucket = creatBucket();
@ -895,6 +902,9 @@ public void testGetKey() throws Exception {
bucket.createKey(keyName, dataStr.length());
keyOutputStream.write(dataStr.getBytes());
keyOutputStream.close();
assertFalse("put key without malformed tracing",
logs.getOutput().contains("MalformedTracerStateString"));
logs.clearOutput();
String tmpPath = baseDir.getAbsolutePath() + "/testfile-"
+ UUID.randomUUID().toString();
@ -902,6 +912,9 @@ public void testGetKey() throws Exception {
url + "/" + volumeName + "/" + bucketName + "/" + keyName,
tmpPath};
execute(shell, args);
assertFalse("get key without malformed tracing",
logs.getOutput().contains("MalformedTracerStateString"));
logs.clearOutput();
byte[] dataBytes = new byte[dataStr.length()];
try (FileInputStream randFile = new FileInputStream(new File(tmpPath))) {