HBASE-27793 Make HBCK be able to report unknown servers (#5229)

Co-authored-by: Rajeshbabu Chintaguntla <rajeshbabu@apache.org>
This commit is contained in:
Rajeshbabu Chintaguntla 2023-05-25 08:58:26 +05:30 committed by GitHub
parent 256e5e9cd8
commit d7b2fb463e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 1 deletions

View File

@ -655,6 +655,8 @@ public class HBaseFsck extends Configured implements Closeable {
loadDeployedRegions();
// check whether hbase:meta is deployed and online
recordMetaRegion();
// Report inconsistencies if there are any unknown servers.
reportUnknownServers();
// Check if hbase:meta is found only once and in the right place
if (!checkMetaRegion()) {
String errorMsg = "hbase:meta table is not consistent. ";
@ -707,6 +709,18 @@ public class HBaseFsck extends Configured implements Closeable {
return errors.getErrorList().size();
}
private void reportUnknownServers() throws IOException {
List<ServerName> unknownServers = admin.listUnknownServers();
if (!unknownServers.isEmpty()) {
unknownServers.stream().forEach(serverName -> {
errors.reportError(ERROR_CODE.UNKNOWN_SERVER,
"Found unknown server,"
+ "some of the regions held by this server may not get assigned. "
+ String.format("Use HBCK2 scheduleRecoveries %s to recover.", serverName));
});
}
}
/**
* This method maintains an ephemeral znode. If the creation fails we return false or throw
* exception

View File

@ -65,7 +65,8 @@ public interface HbckErrorReporter {
UNDELETED_REPLICATION_QUEUE,
DUPE_ENDKEYS,
UNSUPPORTED_OPTION,
INVALID_TABLE
INVALID_TABLE,
UNKNOWN_SERVER
}
void clear();

View File

@ -40,6 +40,9 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.LargeTests;
@ -552,4 +555,23 @@ public class TestAdmin extends TestAdminBase {
ADMIN.listTableDescriptors();
assertFalse(ADMIN.tableExists(tableName));
}
@Test
public void testUnknownServers() throws Exception {
TableName table = TableName.valueOf(name.getMethodName());
ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY);
ADMIN.createTable(TableDescriptorBuilder.newBuilder(table).setColumnFamily(cfd).build());
final List<RegionInfo> regions = ADMIN.getRegions(table);
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
final AssignmentManager am = master.getAssignmentManager();
RegionStateNode rsNode = am.getRegionStates().getRegionStateNode(regions.get(0));
ServerName regionLocation = rsNode.getRegionLocation();
rsNode.setRegionLocation(ServerName.valueOf("dummyserver", 1234, System.currentTimeMillis()));
try {
assertTrue(ADMIN.listUnknownServers().get(0).getHostname().equals("dummyserver"));
} finally {
rsNode.setRegionLocation(regionLocation);
}
assertTrue(ADMIN.listUnknownServers().isEmpty());
}
}