HBASE-24480: Deflake TestRSGroupsBasics#testClearDeadServers (#1821)

More details about the flakiness in the jira comments.

Signed-off-by: Reid Chan <reidchan@apache.org>
Signed-off-by: Viraj Jasani <vjasani@apache.org>
This commit is contained in:
Bharath Vissapragada 2020-06-01 09:11:27 -07:00 committed by GitHub
parent cd0aaed929
commit d889c7b442
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 7 deletions

View File

@ -1166,13 +1166,16 @@ public class RSGroupAdminEndpoint extends RSGroupAdminService
public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx, public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx,
List<ServerName> servers, List<ServerName> notClearedServers) List<ServerName> servers, List<ServerName> notClearedServers)
throws IOException { throws IOException {
Set<Address> clearedServer = Sets.newHashSet(); Set<Address> clearedServers = Sets.newHashSet();
for (ServerName server: servers) { for (ServerName server: servers) {
if (!notClearedServers.contains(server)) { if (!notClearedServers.contains(server)) {
clearedServer.add(server.getAddress()); clearedServers.add(server.getAddress());
} }
} }
groupAdminServer.removeServers(clearedServer); if (clearedServers.isEmpty()) {
return;
}
groupAdminServer.removeServers(clearedServers);
} }
@Override @Override

View File

@ -26,7 +26,6 @@ import static org.junit.Assert.assertTrue;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -212,9 +211,15 @@ public class TestRSGroupsBasics extends TestRSGroupsBase {
assertTrue(newGroup.getServers().contains(serverToStop.getAddress())); assertTrue(newGroup.getServers().contains(serverToStop.getAddress()));
// clear dead servers list // clear dead servers list
List<ServerName> notClearedServers = admin.clearDeadServers(Lists.newArrayList(serverToStop)); // We need to retry here because the clearDeadServers() RPC may race with currently processing
assertEquals(0, notClearedServers.size()); // dead servers in the ServerManager and might not succeed.
final ServerName finalServerToStop = serverToStop;
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return admin.clearDeadServers(Lists.newArrayList(finalServerToStop)).isEmpty();
}
});
// verify if the stopped region server gets cleared and removed from the group // verify if the stopped region server gets cleared and removed from the group
Set<Address> newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers(); Set<Address> newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers();
assertFalse(newGroupServers.contains(serverToStop.getAddress())); assertFalse(newGroupServers.contains(serverToStop.getAddress()));