HDFS-16942. Send error to datanode if FBR is rejected due to bad lease (#5460)
This commit is contained in:
parent
734f7abfb8
commit
ca6f5afb6d
|
@ -126,6 +126,12 @@
|
||||||
<exclude>org/apache/hadoop/yarn/client/api/package-info.class</exclude>
|
<exclude>org/apache/hadoop/yarn/client/api/package-info.class</exclude>
|
||||||
</excludes>
|
</excludes>
|
||||||
</filter>
|
</filter>
|
||||||
|
<filter>
|
||||||
|
<artifact>org.apache.hadoop:*</artifact>
|
||||||
|
<excludes>
|
||||||
|
<exclude>org/apache/hadoop/hdfs/server/protocol/package-info.class</exclude>
|
||||||
|
</excludes>
|
||||||
|
</filter>
|
||||||
</filters>
|
</filters>
|
||||||
<relocations>
|
<relocations>
|
||||||
<relocation>
|
<relocation>
|
||||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports;
|
import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports;
|
||||||
|
@ -791,6 +792,9 @@ class BPServiceActor implements Runnable {
|
||||||
shouldServiceRun = false;
|
shouldServiceRun = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (InvalidBlockReportLeaseException.class.getName().equals(reClass)) {
|
||||||
|
fullBlockReportLeaseId = 0;
|
||||||
|
}
|
||||||
LOG.warn("RemoteException in offerService", re);
|
LOG.warn("RemoteException in offerService", re);
|
||||||
sleepAfterException();
|
sleepAfterException();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
@ -172,6 +172,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
|
||||||
|
@ -1651,6 +1652,8 @@ public class NameNodeRpcServer implements NamenodeProtocols {
|
||||||
bm.processReport(nodeReg, reports[index].getStorage(),
|
bm.processReport(nodeReg, reports[index].getStorage(),
|
||||||
blocks, context));
|
blocks, context));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
throw new InvalidBlockReportLeaseException(context.getReportId(), context.getLeaseId());
|
||||||
}
|
}
|
||||||
} catch (UnregisteredNodeException une) {
|
} catch (UnregisteredNodeException une) {
|
||||||
LOG.warn("Datanode {} is attempting to report but not register yet.",
|
LOG.warn("Datanode {} is attempting to report but not register yet.",
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.server.protocol;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This exception is thrown when a datanode sends a full block report but it is
|
||||||
|
* rejected by the Namenode due to an invalid lease (expired or otherwise).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class InvalidBlockReportLeaseException extends IOException {
|
||||||
|
/** for java.io.Serializable. */
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
public InvalidBlockReportLeaseException(long blockReportID, long leaseID) {
|
||||||
|
super("Block report 0x" + Long.toHexString(blockReportID) + " was rejected as lease 0x"
|
||||||
|
+ Long.toHexString(leaseID) + " is invalid");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This package provides classes for the namenode server protocol.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
package org.apache.hadoop.hdfs.server.protocol;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
|
import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
||||||
|
@ -41,12 +42,14 @@ import org.junit.Test;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
import static org.mockito.Mockito.doAnswer;
|
import static org.mockito.Mockito.doAnswer;
|
||||||
import static org.mockito.Mockito.spy;
|
import static org.mockito.Mockito.spy;
|
||||||
|
@ -137,6 +140,72 @@ public class TestBlockReportLease {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExceptionThrownWhenFBRLeaseExpired() throws Exception {
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
Random rand = new Random();
|
||||||
|
|
||||||
|
try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.numDataNodes(1).build()) {
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
FSNamesystem fsn = cluster.getNamesystem();
|
||||||
|
BlockManager blockManager = fsn.getBlockManager();
|
||||||
|
BlockManager spyBlockManager = spy(blockManager);
|
||||||
|
fsn.setBlockManagerForTesting(spyBlockManager);
|
||||||
|
String poolId = cluster.getNamesystem().getBlockPoolId();
|
||||||
|
|
||||||
|
NamenodeProtocols rpcServer = cluster.getNameNodeRpc();
|
||||||
|
|
||||||
|
// Test based on one DataNode report to Namenode
|
||||||
|
DataNode dn = cluster.getDataNodes().get(0);
|
||||||
|
DatanodeDescriptor datanodeDescriptor = spyBlockManager
|
||||||
|
.getDatanodeManager().getDatanode(dn.getDatanodeId());
|
||||||
|
|
||||||
|
DatanodeRegistration dnRegistration = dn.getDNRegistrationForBP(poolId);
|
||||||
|
StorageReport[] storages = dn.getFSDataset().getStorageReports(poolId);
|
||||||
|
|
||||||
|
// Send heartbeat and request full block report lease
|
||||||
|
HeartbeatResponse hbResponse = rpcServer.sendHeartbeat(
|
||||||
|
dnRegistration, storages, 0, 0, 0, 0, 0, null, true,
|
||||||
|
SlowPeerReports.EMPTY_REPORT, SlowDiskReports.EMPTY_REPORT);
|
||||||
|
|
||||||
|
// Remove full block report lease about dn
|
||||||
|
spyBlockManager.getBlockReportLeaseManager()
|
||||||
|
.removeLease(datanodeDescriptor);
|
||||||
|
|
||||||
|
ExecutorService pool = Executors.newFixedThreadPool(1);
|
||||||
|
|
||||||
|
// Trigger sendBlockReport
|
||||||
|
BlockReportContext brContext = new BlockReportContext(1, 0,
|
||||||
|
rand.nextLong(), hbResponse.getFullBlockReportLeaseId());
|
||||||
|
Future<DatanodeCommand> sendBRfuturea = pool.submit(() -> {
|
||||||
|
// Build every storage with 100 blocks for sending report
|
||||||
|
DatanodeStorage[] datanodeStorages
|
||||||
|
= new DatanodeStorage[storages.length];
|
||||||
|
for (int i = 0; i < storages.length; i++) {
|
||||||
|
datanodeStorages[i] = storages[i].getStorage();
|
||||||
|
}
|
||||||
|
StorageBlockReport[] reports = createReports(datanodeStorages, 100);
|
||||||
|
|
||||||
|
// Send blockReport
|
||||||
|
return rpcServer.blockReport(dnRegistration, poolId, reports,
|
||||||
|
brContext);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get result, it will not null if process successfully
|
||||||
|
ExecutionException exception = null;
|
||||||
|
try {
|
||||||
|
sendBRfuturea.get();
|
||||||
|
} catch (ExecutionException e) {
|
||||||
|
exception = e;
|
||||||
|
}
|
||||||
|
assertNotNull(exception);
|
||||||
|
assertEquals(InvalidBlockReportLeaseException.class,
|
||||||
|
exception.getCause().getClass());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception {
|
public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception {
|
||||||
HdfsConfiguration conf = new HdfsConfiguration();
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.StorageType;
|
import org.apache.hadoop.fs.StorageType;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
||||||
|
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||||
|
@ -39,7 +40,6 @@ import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.ConnectException;
|
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -1187,8 +1187,9 @@ public class TestBPOfferService {
|
||||||
// just reject and wait until DN request for a new leaseId
|
// just reject and wait until DN request for a new leaseId
|
||||||
if(leaseId == 1) {
|
if(leaseId == 1) {
|
||||||
firstLeaseId = leaseId;
|
firstLeaseId = leaseId;
|
||||||
throw new ConnectException(
|
InvalidBlockReportLeaseException e =
|
||||||
"network is not reachable for test. ");
|
new InvalidBlockReportLeaseException(context.getReportId(), 1);
|
||||||
|
throw new RemoteException(e.getClass().getName(), e.getMessage());
|
||||||
} else {
|
} else {
|
||||||
secondLeaseId = leaseId;
|
secondLeaseId = leaseId;
|
||||||
return null;
|
return null;
|
||||||
|
|
Loading…
Reference in New Issue