HBASE-22723 Have CatalogJanitor report holes and overlaps; i.e. problems it sees when doing its regular scan of hbase:meta
Refactor of CatalogJanitor so it generates a Report on the state of hbase:meta when it runs. Also refactor so CJ runs even if RIT (previous it would punt on running if RIT) so it can generate a 'Report' on the interval regardless. If RIT, it just doesn't go on to do the merge/split GC as it used to. If report finds an issue, dump as a WARN message to the master log. Follow-on is to make the Report actionable/available for the Master to pull when it goes to draw the hbck UI page (could also consider shipping the Report as part of ClusterMetrics?) Adds new, fatter Visitor to CJ, one that generates Report on each run keeping around more findings as it runs. Moved some methods around so class reads better; previous methods were randomly ordered in the class. M hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java Make a few handy methods public. M hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionInfo.java Add utility as defaults on the Inteface; i.e. is this the first region in table, is it last, does a passed region come next, or does passed region overlap this region (added tests for this new stuff). M hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java Bugfix... handle case where buffer passed is null. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java Lots of change, reorg., but mostly adding consistency checking to the visitor used scanning hbase:meta on a period and the generation of a Report on what the scan has found traversing hbase:meta. Added a main so could try the CatalogJanitor against a running cluster. A hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java Fat ugly test for CatalogJanitor consistency checking. M hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionInfo.java Add tests for new functionality in RI. M hbase-shell/src/main/ruby/hbase/table.rb Bug fix for case where meta has a null regioninfo; scan was aborting. Signed-off-by: Andrew Purtell <apurtell@apache.org> Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
parent
f68cda32e1
commit
0c80d5b42b
|
@ -810,7 +810,7 @@ public class MetaTableAccessor {
|
|||
* Returns the column family used for meta columns.
|
||||
* @return HConstants.CATALOG_FAMILY.
|
||||
*/
|
||||
private static byte[] getCatalogFamily() {
|
||||
public static byte[] getCatalogFamily() {
|
||||
return HConstants.CATALOG_FAMILY;
|
||||
}
|
||||
|
||||
|
@ -826,7 +826,7 @@ public class MetaTableAccessor {
|
|||
* Returns the column qualifier for serialized region info
|
||||
* @return HConstants.REGIONINFO_QUALIFIER
|
||||
*/
|
||||
private static byte[] getRegionInfoColumn() {
|
||||
public static byte[] getRegionInfoColumn() {
|
||||
return HConstants.REGIONINFO_QUALIFIER;
|
||||
}
|
||||
|
||||
|
@ -1049,7 +1049,7 @@ public class MetaTableAccessor {
|
|||
* @return An RegionInfo instance or null.
|
||||
*/
|
||||
@Nullable
|
||||
private static RegionInfo getRegionInfo(final Result r, byte [] qualifier) {
|
||||
public static RegionInfo getRegionInfo(final Result r, byte [] qualifier) {
|
||||
Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier);
|
||||
if (cell == null) return null;
|
||||
return RegionInfo.parseFromOrNull(cell.getValueArray(),
|
||||
|
|
|
@ -70,6 +70,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
|
|||
*/
|
||||
@InterfaceAudience.Public
|
||||
public interface RegionInfo {
|
||||
public static final RegionInfo UNDEFINED =
|
||||
RegionInfoBuilder.newBuilder(TableName.valueOf("__UNDEFINED__")).build();
|
||||
/**
|
||||
* Separator used to demarcate the encodedName in a region name
|
||||
* in the new format. See description on new format above.
|
||||
|
@ -775,4 +777,55 @@ public interface RegionInfo {
|
|||
}
|
||||
return ris;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return True if this is first Region in Table
|
||||
*/
|
||||
default boolean isFirst() {
|
||||
return Bytes.equals(getStartKey(), HConstants.EMPTY_START_ROW);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if this is last Region in Table
|
||||
*/
|
||||
default boolean isLast() {
|
||||
return Bytes.equals(getEndKey(), HConstants.EMPTY_START_ROW);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if regions are adjacent, if 'after' next. Does not do tablename compare.
|
||||
*/
|
||||
default boolean isNext(RegionInfo after) {
|
||||
return Bytes.equals(getEndKey(), after.getStartKey());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if RegionInfo is degenerate... if startKey > endKey.
|
||||
*/
|
||||
default boolean isDegenerate() {
|
||||
return !isLast() && Bytes.compareTo(getStartKey(), getEndKey()) > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if an overlap in region range. Does not do tablename compare.
|
||||
* Does not check if <code>other</code> has degenerate range.
|
||||
* @see #isDegenerate()
|
||||
*/
|
||||
default boolean isOverlap(RegionInfo other) {
|
||||
int startKeyCompare = Bytes.compareTo(getStartKey(), other.getStartKey());
|
||||
if (startKeyCompare == 0) {
|
||||
return true;
|
||||
}
|
||||
if (startKeyCompare < 0) {
|
||||
if (isLast()) {
|
||||
return true;
|
||||
}
|
||||
return Bytes.compareTo(getEndKey(), other.getStartKey()) > 0;
|
||||
}
|
||||
if (other.isLast()) {
|
||||
return true;
|
||||
}
|
||||
return Bytes.compareTo(getStartKey(), other.getEndKey()) < 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1359,7 +1359,7 @@ public class Bytes implements Comparable<Bytes> {
|
|||
*/
|
||||
public static int compareTo(final byte [] left, final byte [] right) {
|
||||
return LexicographicalComparerHolder.BEST_COMPARER.
|
||||
compareTo(left, 0, left.length, right, 0, right.length);
|
||||
compareTo(left, 0, left == null? 0: left.length, right, 0, right == null? 0: right.length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
|
@ -18,26 +18,35 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionLocation;
|
||||
import org.apache.hadoop.hbase.MetaTableAccessor;
|
||||
import org.apache.hadoop.hbase.RegionLocations;
|
||||
import org.apache.hadoop.hbase.ScheduledChore;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||
import org.apache.hadoop.hbase.client.TableState;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.GCMergedRegionsProcedure;
|
||||
import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
|
||||
|
@ -45,51 +54,59 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
|||
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.hadoop.hbase.util.Triple;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog
|
||||
* table on a period looking for unused regions to garbage collect.
|
||||
* table on a period. Makes a lastReport on state of hbase:meta. Looks for unused
|
||||
* regions to garbage collect. Scan of hbase:meta runs if we are NOT in maintenance
|
||||
* mode, if we are NOT shutting down, AND if the assignmentmanager is loaded.
|
||||
* Playing it safe, we will garbage collect no-longer needed region references
|
||||
* only if there are no regions-in-transition (RIT).
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
// TODO: Only works with single hbase:meta region currently. Fix.
|
||||
// TODO: Should it start over every time? Could it continue if runs into problem? Only if
|
||||
// problem does not mess up 'results'.
|
||||
@org.apache.yetus.audience.InterfaceAudience.Private
|
||||
public class CatalogJanitor extends ScheduledChore {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName());
|
||||
|
||||
private final AtomicBoolean alreadyRunning = new AtomicBoolean(false);
|
||||
private final AtomicBoolean enabled = new AtomicBoolean(true);
|
||||
private final MasterServices services;
|
||||
private final Connection connection;
|
||||
// PID of the last Procedure launched herein. Keep around for Tests.
|
||||
|
||||
/**
|
||||
* Saved report from last hbase:meta scan to completion. May be stale if having trouble
|
||||
* completing scan. Check its date.
|
||||
*/
|
||||
private volatile Report lastReport;
|
||||
|
||||
CatalogJanitor(final MasterServices services) {
|
||||
super("CatalogJanitor-" + services.getServerName().toShortString(), services,
|
||||
services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000));
|
||||
this.services = services;
|
||||
this.connection = services.getConnection();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean initialChore() {
|
||||
try {
|
||||
if (this.enabled.get()) scan();
|
||||
if (getEnabled()) {
|
||||
scan();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed initial scan of catalog table", e);
|
||||
LOG.warn("Failed initial janitorial scan of hbase:meta table", e);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param enabled
|
||||
*/
|
||||
public boolean setEnabled(final boolean enabled) {
|
||||
boolean setEnabled(final boolean enabled) {
|
||||
boolean alreadyEnabled = this.enabled.getAndSet(enabled);
|
||||
// If disabling is requested on an already enabled chore, we could have an active
|
||||
// scan still going on, callers might not be aware of that and do further action thinkng
|
||||
|
@ -111,93 +128,129 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
protected void chore() {
|
||||
try {
|
||||
AssignmentManager am = this.services.getAssignmentManager();
|
||||
if (this.enabled.get() && !this.services.isInMaintenanceMode() &&
|
||||
!this.services.getServerManager().isClusterShutdown() && am != null &&
|
||||
am.isMetaLoaded() && !am.hasRegionsInTransition()) {
|
||||
if (getEnabled() && !this.services.isInMaintenanceMode() &&
|
||||
!this.services.getServerManager().isClusterShutdown() &&
|
||||
isMetaLoaded(am)) {
|
||||
scan();
|
||||
} else {
|
||||
LOG.warn("CatalogJanitor is disabled! Enabled=" + this.enabled.get() +
|
||||
LOG.warn("CatalogJanitor is disabled! Enabled=" + getEnabled() +
|
||||
", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am +
|
||||
", metaLoaded=" + (am != null && am.isMetaLoaded()) + ", hasRIT=" +
|
||||
(am != null && am.hasRegionsInTransition()) + " clusterShutDown=" + this.services
|
||||
.getServerManager().isClusterShutdown());
|
||||
", metaLoaded=" + isMetaLoaded(am) + ", hasRIT=" + isRIT(am) +
|
||||
" clusterShutDown=" + this.services.getServerManager().isClusterShutdown());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed scan of catalog table", e);
|
||||
LOG.warn("Failed janitorial scan of hbase:meta table", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isMetaLoaded(AssignmentManager am) {
|
||||
return am != null && am.isMetaLoaded();
|
||||
}
|
||||
|
||||
private static boolean isRIT(AssignmentManager am) {
|
||||
return isMetaLoaded(am) && am.hasRegionsInTransition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Run janitorial scan of catalog <code>hbase:meta</code> table looking for
|
||||
* garbage to collect.
|
||||
* @return How many items gc'd whether for merge or split.
|
||||
*/
|
||||
int scan() throws IOException {
|
||||
int gcs = 0;
|
||||
try {
|
||||
if (!alreadyRunning.compareAndSet(false, true)) {
|
||||
LOG.debug("CatalogJanitor already running");
|
||||
return gcs;
|
||||
}
|
||||
Report report = scanForReport();
|
||||
this.lastReport = report;
|
||||
if (!report.isEmpty()) {
|
||||
LOG.warn(report.toString());
|
||||
}
|
||||
|
||||
if (isRIT(this.services.getAssignmentManager())) {
|
||||
LOG.warn("Playing-it-safe skipping merge/split gc'ing of regions from hbase:meta while " +
|
||||
"regions-in-transition (RIT)");
|
||||
}
|
||||
Map<RegionInfo, Result> mergedRegions = report.mergedRegions;
|
||||
for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
|
||||
if (this.services.isInMaintenanceMode()) {
|
||||
// Stop cleaning if the master is in maintenance mode
|
||||
break;
|
||||
}
|
||||
|
||||
PairOfSameType<RegionInfo> p = MetaTableAccessor.getMergeRegions(e.getValue());
|
||||
RegionInfo regionA = p.getFirst();
|
||||
RegionInfo regionB = p.getSecond();
|
||||
if (regionA == null || regionB == null) {
|
||||
LOG.warn("Unexpected references regionA="
|
||||
+ (regionA == null ? "null" : regionA.getShortNameToLog())
|
||||
+ ",regionB="
|
||||
+ (regionB == null ? "null" : regionB.getShortNameToLog())
|
||||
+ " in merged region " + e.getKey().getShortNameToLog());
|
||||
} else {
|
||||
if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
|
||||
gcs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Clean split parents
|
||||
Map<RegionInfo, Result> splitParents = report.splitParents;
|
||||
|
||||
// Now work on our list of found parents. See if any we can clean up.
|
||||
// regions whose parents are still around
|
||||
HashSet<String> parentNotCleaned = new HashSet<>();
|
||||
for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
|
||||
if (this.services.isInMaintenanceMode()) {
|
||||
// Stop cleaning if the master is in maintenance mode
|
||||
break;
|
||||
}
|
||||
|
||||
if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
|
||||
cleanParent(e.getKey(), e.getValue())) {
|
||||
gcs++;
|
||||
} else {
|
||||
// We could not clean the parent, so it's daughters should not be
|
||||
// cleaned either (HBASE-6160)
|
||||
PairOfSameType<RegionInfo> daughters =
|
||||
MetaTableAccessor.getDaughterRegions(e.getValue());
|
||||
parentNotCleaned.add(daughters.getFirst().getEncodedName());
|
||||
parentNotCleaned.add(daughters.getSecond().getEncodedName());
|
||||
}
|
||||
}
|
||||
return gcs;
|
||||
} finally {
|
||||
alreadyRunning.set(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans hbase:meta and returns a number of scanned rows, and a map of merged
|
||||
* regions, and an ordered map of split parents.
|
||||
* @return triple of scanned rows, map of merged regions and map of split
|
||||
* parent regioninfos
|
||||
* @throws IOException
|
||||
* Scan hbase:meta.
|
||||
* @return Return generated {@link Report}
|
||||
*/
|
||||
Triple<Integer, Map<RegionInfo, Result>, Map<RegionInfo, Result>>
|
||||
getMergedRegionsAndSplitParents() throws IOException {
|
||||
return getMergedRegionsAndSplitParents(null);
|
||||
Report scanForReport() throws IOException {
|
||||
ReportMakingVisitor visitor = new ReportMakingVisitor(this.services);
|
||||
// Null tablename means scan all of meta.
|
||||
MetaTableAccessor.scanMetaForTableRegions(this.services.getConnection(), visitor, null);
|
||||
return visitor.getReport();
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans hbase:meta and returns a number of scanned rows, and a map of merged
|
||||
* regions, and an ordered map of split parents. if the given table name is
|
||||
* null, return merged regions and split parents of all tables, else only the
|
||||
* specified table
|
||||
* @param tableName null represents all tables
|
||||
* @return triple of scanned rows, and map of merged regions, and map of split
|
||||
* parent regioninfos
|
||||
* @throws IOException
|
||||
* @return Returns last published Report that comes of last successful scan
|
||||
* of hbase:meta.
|
||||
*/
|
||||
Triple<Integer, Map<RegionInfo, Result>, Map<RegionInfo, Result>>
|
||||
getMergedRegionsAndSplitParents(final TableName tableName) throws IOException {
|
||||
final boolean isTableSpecified = (tableName != null);
|
||||
// TODO: Only works with single hbase:meta region currently. Fix.
|
||||
final AtomicInteger count = new AtomicInteger(0);
|
||||
// Keep Map of found split parents. There are candidates for cleanup.
|
||||
// Use a comparator that has split parents come before its daughters.
|
||||
final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
|
||||
final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
|
||||
// This visitor collects split parents and counts rows in the hbase:meta table
|
||||
|
||||
MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
|
||||
@Override
|
||||
public boolean visit(Result r) throws IOException {
|
||||
if (r == null || r.isEmpty()) return true;
|
||||
count.incrementAndGet();
|
||||
RegionInfo info = MetaTableAccessor.getRegionInfo(r);
|
||||
if (info == null) return true; // Keep scanning
|
||||
if (isTableSpecified
|
||||
&& info.getTable().compareTo(tableName) > 0) {
|
||||
// Another table, stop scanning
|
||||
return false;
|
||||
}
|
||||
if (LOG.isTraceEnabled()) LOG.trace("" + info + " IS-SPLIT_PARENT=" + info.isSplitParent());
|
||||
if (info.isSplitParent()) splitParents.put(info, r);
|
||||
if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
|
||||
mergedRegions.put(info, r);
|
||||
}
|
||||
// Returning true means "keep scanning"
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// Run full scan of hbase:meta catalog table passing in our custom visitor with
|
||||
// the start row
|
||||
MetaTableAccessor.scanMetaForTableRegions(this.connection, visitor, tableName);
|
||||
|
||||
return new Triple<>(count.get(), mergedRegions, splitParents);
|
||||
Report getLastReport() {
|
||||
return this.lastReport;
|
||||
}
|
||||
|
||||
/**
|
||||
* If merged region no longer holds reference to the merge regions, archive
|
||||
* merge region on hdfs and perform deleting references in hbase:meta
|
||||
* @param mergedRegion
|
||||
* @return true if we delete references in merged region on hbase:meta and archive
|
||||
* the files on the file system
|
||||
* @throws IOException
|
||||
*/
|
||||
boolean cleanMergeRegion(final RegionInfo mergedRegion,
|
||||
private boolean cleanMergeRegion(final RegionInfo mergedRegion,
|
||||
final RegionInfo regionA, final RegionInfo regionB) throws IOException {
|
||||
FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
|
||||
Path rootdir = this.services.getMasterFileSystem().getRootDir();
|
||||
|
@ -228,81 +281,7 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
}
|
||||
|
||||
/**
|
||||
* Run janitorial scan of catalog <code>hbase:meta</code> table looking for
|
||||
* garbage to collect.
|
||||
* @return number of archiving jobs started.
|
||||
* @throws IOException
|
||||
*/
|
||||
int scan() throws IOException {
|
||||
int result = 0;
|
||||
|
||||
try {
|
||||
if (!alreadyRunning.compareAndSet(false, true)) {
|
||||
LOG.debug("CatalogJanitor already running");
|
||||
return result;
|
||||
}
|
||||
Triple<Integer, Map<RegionInfo, Result>, Map<RegionInfo, Result>> scanTriple =
|
||||
getMergedRegionsAndSplitParents();
|
||||
/**
|
||||
* clean merge regions first
|
||||
*/
|
||||
Map<RegionInfo, Result> mergedRegions = scanTriple.getSecond();
|
||||
for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
|
||||
if (this.services.isInMaintenanceMode()) {
|
||||
// Stop cleaning if the master is in maintenance mode
|
||||
break;
|
||||
}
|
||||
|
||||
PairOfSameType<RegionInfo> p = MetaTableAccessor.getMergeRegions(e.getValue());
|
||||
RegionInfo regionA = p.getFirst();
|
||||
RegionInfo regionB = p.getSecond();
|
||||
if (regionA == null || regionB == null) {
|
||||
LOG.warn("Unexpected references regionA="
|
||||
+ (regionA == null ? "null" : regionA.getShortNameToLog())
|
||||
+ ",regionB="
|
||||
+ (regionB == null ? "null" : regionB.getShortNameToLog())
|
||||
+ " in merged region " + e.getKey().getShortNameToLog());
|
||||
} else {
|
||||
if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* clean split parents
|
||||
*/
|
||||
Map<RegionInfo, Result> splitParents = scanTriple.getThird();
|
||||
|
||||
// Now work on our list of found parents. See if any we can clean up.
|
||||
// regions whose parents are still around
|
||||
HashSet<String> parentNotCleaned = new HashSet<>();
|
||||
for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) {
|
||||
if (this.services.isInMaintenanceMode()) {
|
||||
// Stop cleaning if the master is in maintenance mode
|
||||
break;
|
||||
}
|
||||
|
||||
if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
|
||||
cleanParent(e.getKey(), e.getValue())) {
|
||||
result++;
|
||||
} else {
|
||||
// We could not clean the parent, so it's daughters should not be
|
||||
// cleaned either (HBASE-6160)
|
||||
PairOfSameType<RegionInfo> daughters =
|
||||
MetaTableAccessor.getDaughterRegions(e.getValue());
|
||||
parentNotCleaned.add(daughters.getFirst().getEncodedName());
|
||||
parentNotCleaned.add(daughters.getSecond().getEncodedName());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} finally {
|
||||
alreadyRunning.set(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare HRegionInfos in a way that has split parents sort BEFORE their
|
||||
* daughters.
|
||||
* Compare HRegionInfos in a way that has split parents sort BEFORE their daughters.
|
||||
*/
|
||||
static class SplitParentFirstComparator implements Comparator<RegionInfo> {
|
||||
Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
|
||||
|
@ -310,14 +289,22 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
public int compare(RegionInfo left, RegionInfo right) {
|
||||
// This comparator differs from the one RegionInfo in that it sorts
|
||||
// parent before daughters.
|
||||
if (left == null) return -1;
|
||||
if (right == null) return 1;
|
||||
if (left == null) {
|
||||
return -1;
|
||||
}
|
||||
if (right == null) {
|
||||
return 1;
|
||||
}
|
||||
// Same table name.
|
||||
int result = left.getTable().compareTo(right.getTable());
|
||||
if (result != 0) return result;
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
// Compare start keys.
|
||||
result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
|
||||
if (result != 0) return result;
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
// Compare end keys, but flip the operands so parent comes first
|
||||
result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
|
||||
|
||||
|
@ -332,7 +319,6 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
* <code>metaRegionName</code>
|
||||
* @return True if we removed <code>parent</code> from meta table and from
|
||||
* the filesystem.
|
||||
* @throws IOException
|
||||
*/
|
||||
boolean cleanParent(final RegionInfo parent, Result rowContent)
|
||||
throws IOException {
|
||||
|
@ -383,9 +369,9 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
* @return A pair where the first boolean says whether or not the daughter
|
||||
* region directory exists in the filesystem and then the second boolean says
|
||||
* whether the daughter has references to the parent.
|
||||
* @throws IOException
|
||||
*/
|
||||
Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent, final RegionInfo daughter)
|
||||
private Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent,
|
||||
final RegionInfo daughter)
|
||||
throws IOException {
|
||||
if (daughter == null) {
|
||||
return new Pair<>(Boolean.FALSE, Boolean.FALSE);
|
||||
|
@ -397,7 +383,7 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
|
||||
Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
|
||||
|
||||
HRegionFileSystem regionFs = null;
|
||||
HRegionFileSystem regionFs;
|
||||
|
||||
try {
|
||||
if (!FSUtils.isExists(fs, daughterRegionDir)) {
|
||||
|
@ -425,23 +411,19 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
+ ", to: " + parent.getEncodedName() + " assuming has references", e);
|
||||
return new Pair<>(Boolean.TRUE, Boolean.TRUE);
|
||||
}
|
||||
return new Pair<>(Boolean.TRUE, Boolean.valueOf(references));
|
||||
return new Pair<>(Boolean.TRUE, references);
|
||||
}
|
||||
|
||||
private TableDescriptor getDescriptor(final TableName tableName)
|
||||
throws FileNotFoundException, IOException {
|
||||
private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
|
||||
return this.services.getTableDescriptors().get(tableName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the specified region has merge qualifiers, if so, try to clean
|
||||
* them
|
||||
* @param region
|
||||
* @return true if the specified region doesn't have merge qualifier now
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean cleanMergeQualifier(final RegionInfo region)
|
||||
throws IOException {
|
||||
public boolean cleanMergeQualifier(final RegionInfo region) throws IOException {
|
||||
// Get merge regions if it is a merged region and already has merge
|
||||
// qualifier
|
||||
Pair<RegionInfo, RegionInfo> mergeRegions = MetaTableAccessor
|
||||
|
@ -458,7 +440,319 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
+ " has only one merge qualifier in META.");
|
||||
return false;
|
||||
}
|
||||
return cleanMergeRegion(region, mergeRegions.getFirst(),
|
||||
mergeRegions.getSecond());
|
||||
return cleanMergeRegion(region, mergeRegions.getFirst(), mergeRegions.getSecond());
|
||||
}
|
||||
|
||||
/**
|
||||
* Report made by {@link ReportMakingVisitor}.
|
||||
*/
|
||||
static class Report {
|
||||
private final long now = EnvironmentEdgeManager.currentTime();
|
||||
|
||||
// Keep Map of found split parents. These are candidates for cleanup.
|
||||
// Use a comparator that has split parents come before its daughters.
|
||||
final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
|
||||
final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
|
||||
|
||||
final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
|
||||
final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
|
||||
final Map<ServerName, RegionInfo> unknownServers = new HashMap<ServerName, RegionInfo>();
|
||||
final List<byte []> emptyRegionInfo = new ArrayList<>();
|
||||
int count = 0;
|
||||
|
||||
@VisibleForTesting
|
||||
Report() {}
|
||||
|
||||
/**
|
||||
* @return True if an 'empty' lastReport -- no problems found.
|
||||
*/
|
||||
boolean isEmpty() {
|
||||
return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
|
||||
this.emptyRegionInfo.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (Pair<MetaRow, MetaRow> p: this.holes) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("hole=" + Bytes.toString(p.getFirst().metaRow) + "/" +
|
||||
Bytes.toString(p.getSecond().metaRow));
|
||||
}
|
||||
for (Pair<MetaRow, MetaRow> p: this.overlaps) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("overlap=").append(Bytes.toString(p.getFirst().metaRow)).append("/").
|
||||
append(Bytes.toString(p.getSecond().metaRow));
|
||||
}
|
||||
for (byte [] r: this.emptyRegionInfo) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("empty=").append(Bytes.toString(r));
|
||||
}
|
||||
for (Map.Entry<ServerName, RegionInfo> e: this.unknownServers.entrySet()) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("unknown_server=").append(e.getKey()).append("/").
|
||||
append(e.getValue().getRegionNameAsString());
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple datastructure to hold a MetaRow content.
|
||||
*/
|
||||
static class MetaRow {
|
||||
/**
|
||||
* A marker for use in case where there is a hole at the very
|
||||
* first row in hbase:meta. Should never happen.
|
||||
*/
|
||||
private static final MetaRow UNDEFINED =
|
||||
new MetaRow(HConstants.EMPTY_START_ROW, RegionInfo.UNDEFINED);
|
||||
|
||||
/**
|
||||
* Row from hbase:meta table.
|
||||
*/
|
||||
final byte [] metaRow;
|
||||
|
||||
/**
|
||||
* The decoded RegionInfo gotten from hbase:meta.
|
||||
*/
|
||||
final RegionInfo regionInfo;
|
||||
|
||||
MetaRow(byte [] metaRow, RegionInfo regionInfo) {
|
||||
this.metaRow = metaRow;
|
||||
this.regionInfo = regionInfo;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Visitor we use in here in CatalogJanitor to go against hbase:meta table.
|
||||
* Generates a Report made of a collection of split parents and counts of rows
|
||||
* in the hbase:meta table. Also runs hbase:meta consistency checks to
|
||||
* generate more report. Report is NOT ready until after this visitor has been
|
||||
* {@link #close()}'d.
|
||||
*/
|
||||
static class ReportMakingVisitor implements MetaTableAccessor.CloseableVisitor {
|
||||
private final MasterServices services;
|
||||
private volatile boolean closed;
|
||||
|
||||
/**
|
||||
* Report is not done until after the close has been called.
|
||||
* @see #close()
|
||||
* @see #getReport()
|
||||
*/
|
||||
private Report report = new Report();
|
||||
|
||||
/**
|
||||
* RegionInfo from previous row.
|
||||
*/
|
||||
private MetaRow previous = null;
|
||||
|
||||
ReportMakingVisitor(MasterServices services) {
|
||||
this.services = services;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not call until after {@link #close()}.
|
||||
* Will throw a {@link RuntimeException} if you do.
|
||||
*/
|
||||
Report getReport() {
|
||||
if (!this.closed) {
|
||||
throw new RuntimeException("Report not ready until after close()");
|
||||
}
|
||||
return this.report;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean visit(Result r) {
|
||||
if (r == null || r.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
this.report.count++;
|
||||
RegionInfo regionInfo = metaTableConsistencyCheck(r);
|
||||
if (regionInfo != null) {
|
||||
LOG.trace(regionInfo.toString());
|
||||
if (regionInfo.isSplitParent()) { // splitParent means split and offline.
|
||||
this.report.splitParents.put(regionInfo, r);
|
||||
}
|
||||
if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
|
||||
this.report.mergedRegions.put(regionInfo, r);
|
||||
}
|
||||
}
|
||||
// Returning true means "keep scanning"
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check row.
|
||||
* @param metaTableRow Row from hbase:meta table.
|
||||
* @return Returns default regioninfo found in row parse as a convenience to save
|
||||
* on having to do a double-parse of Result.
|
||||
*/
|
||||
private RegionInfo metaTableConsistencyCheck(Result metaTableRow) {
|
||||
RegionInfo ri;
|
||||
// Locations comes back null if the RegionInfo field is empty.
|
||||
// If locations is null, ensure the regioninfo is for sure empty before progressing.
|
||||
// If really empty, report as missing regioninfo! Otherwise, can run server check
|
||||
// and get RegionInfo from locations.
|
||||
RegionLocations locations = MetaTableAccessor.getRegionLocations(metaTableRow);
|
||||
if (locations == null) {
|
||||
ri = MetaTableAccessor.getRegionInfo(metaTableRow,
|
||||
MetaTableAccessor.getRegionInfoColumn());
|
||||
} else {
|
||||
ri = locations.getDefaultRegionLocation().getRegion();
|
||||
checkServer(locations);
|
||||
}
|
||||
|
||||
if (ri == null) {
|
||||
this.report.emptyRegionInfo.add(metaTableRow.getRow());
|
||||
return ri;
|
||||
}
|
||||
MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri);
|
||||
// If table is disabled, skip integrity check.
|
||||
if (!isTableDisabled(ri)) {
|
||||
if (isTableTransition(ri)) {
|
||||
// On table transition, look to see if last region was last in table
|
||||
// and if this is the first. Report 'hole' if neither is true.
|
||||
// HBCK1 used to have a special category for missing start or end keys.
|
||||
// We'll just lump them in as 'holes'.
|
||||
if ((this.previous != null && !this.previous.regionInfo.isLast()) || !ri.isFirst()) {
|
||||
addHole(this.previous == null? MetaRow.UNDEFINED: this.previous, mrri);
|
||||
}
|
||||
} else {
|
||||
if (!this.previous.regionInfo.isNext(ri)) {
|
||||
if (this.previous.regionInfo.isOverlap(ri)) {
|
||||
addOverlap(this.previous, mrri);
|
||||
} else {
|
||||
addHole(this.previous, mrri);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this.previous = mrri;
|
||||
return ri;
|
||||
}
|
||||
|
||||
private void addOverlap(MetaRow a, MetaRow b) {
|
||||
this.report.overlaps.add(new Pair<>(a, b));
|
||||
}
|
||||
|
||||
private void addHole(MetaRow a, MetaRow b) {
|
||||
this.report.holes.add(new Pair<>(a, b));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if table is disabled or disabling; defaults false!
|
||||
*/
|
||||
boolean isTableDisabled(RegionInfo ri) {
|
||||
if (ri == null) {
|
||||
return false;
|
||||
}
|
||||
if (this.services == null) {
|
||||
return false;
|
||||
}
|
||||
if (this.services.getTableStateManager() == null) {
|
||||
return false;
|
||||
}
|
||||
TableState state = null;
|
||||
try {
|
||||
state = this.services.getTableStateManager().getTableState(ri.getTable());
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed getting table state", e);
|
||||
}
|
||||
return state != null && state.isDisabledOrDisabling();
|
||||
}
|
||||
|
||||
/**
|
||||
* Run through referenced servers and save off unknown and the dead.
|
||||
*/
|
||||
private void checkServer(RegionLocations locations) {
|
||||
if (this.services == null) {
|
||||
// Can't do this test if no services.
|
||||
return;
|
||||
}
|
||||
if (locations == null) {
|
||||
return;
|
||||
}
|
||||
// Check referenced servers are known/online.
|
||||
for (HRegionLocation location: locations.getRegionLocations()) {
|
||||
ServerName sn = location.getServerName();
|
||||
if (sn == null) {
|
||||
continue;
|
||||
}
|
||||
ServerManager.ServerLiveState state = this.services.getServerManager().
|
||||
isServerKnownAndOnline(sn);
|
||||
switch (state) {
|
||||
case UNKNOWN:
|
||||
this.report.unknownServers.put(sn, location.getRegion());
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True iff first row in hbase:meta or if we've broached a new table in hbase:meta
|
||||
*/
|
||||
private boolean isTableTransition(RegionInfo ri) {
|
||||
return this.previous == null ||
|
||||
!this.previous.regionInfo.getTable().equals(ri.getTable());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
this.closed = true;
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkLog4jProperties() {
|
||||
String filename = "log4j.properties";
|
||||
try {
|
||||
final InputStream inStream =
|
||||
CatalogJanitor.class.getClassLoader().getResourceAsStream(filename);
|
||||
if (inStream != null) {
|
||||
new Properties().load(inStream);
|
||||
} else {
|
||||
System.out.println("No " + filename + " on classpath; Add one else no logging output!");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error("Log4j check failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing against a cluster.
|
||||
* Doesn't have a MasterServices context so does not report on good vs bad servers.
|
||||
*/
|
||||
public static void main(String [] args) throws IOException {
|
||||
checkLog4jProperties();
|
||||
ReportMakingVisitor visitor = new ReportMakingVisitor(null);
|
||||
try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) {
|
||||
/* Used to generate an overlap.
|
||||
Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9."));
|
||||
g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
|
||||
try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
|
||||
Result r = t.get(g);
|
||||
byte [] row = g.getRow();
|
||||
row[row.length - 3] <<= ((byte)row[row.length -3]);
|
||||
Put p = new Put(g.getRow());
|
||||
p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||
r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
|
||||
t.put(p);
|
||||
}
|
||||
*/
|
||||
MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
|
||||
Report report = visitor.getReport();
|
||||
LOG.info(report != null? report.toString(): "empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,14 +27,12 @@ import java.net.InetSocketAddress;
|
|||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.ClusterMetricsBuilder;
|
||||
import org.apache.hadoop.hbase.DoNotRetryIOException;
|
||||
|
@ -1493,7 +1491,8 @@ public class MasterRpcServices extends RSRpcServices
|
|||
RunCatalogScanRequest req) throws ServiceException {
|
||||
rpcPreCheck("runCatalogScan");
|
||||
try {
|
||||
return ResponseConverter.buildRunCatalogScanResponse(master.catalogJanitorChore.scan());
|
||||
return ResponseConverter.buildRunCatalogScanResponse(
|
||||
this.master.catalogJanitorChore.scan());
|
||||
} catch (IOException ioe) {
|
||||
throw new ServiceException(ioe);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -59,7 +59,6 @@ import org.apache.hadoop.hbase.testclassification.SmallTests;
|
|||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||
import org.apache.hadoop.hbase.util.Triple;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
@ -309,8 +308,13 @@ public class TestCatalogJanitor {
|
|||
|
||||
final Map<HRegionInfo, Result> mergedRegions = new TreeMap<>();
|
||||
CatalogJanitor spy = spy(this.janitor);
|
||||
doReturn(new Triple<>(10, mergedRegions, splitParents)).when(spy).
|
||||
getMergedRegionsAndSplitParents();
|
||||
|
||||
CatalogJanitor.Report report = new CatalogJanitor.Report();
|
||||
report.count = 10;
|
||||
report.mergedRegions.putAll(mergedRegions);
|
||||
report.splitParents.putAll(splitParents);
|
||||
|
||||
doReturn(report).when(spy).scanForReport();
|
||||
|
||||
// Create ref from splita to parent
|
||||
LOG.info("parent=" + parent.getShortNameToLog() + ", splita=" + splita.getShortNameToLog());
|
||||
|
@ -319,14 +323,16 @@ public class TestCatalogJanitor {
|
|||
LOG.info("Created reference " + splitaRef);
|
||||
|
||||
// Parent and splita should not be removed because a reference from splita to parent.
|
||||
assertEquals(0, spy.scan());
|
||||
int gcs = spy.scan();
|
||||
assertEquals(0, gcs);
|
||||
|
||||
// Now delete the ref
|
||||
FileSystem fs = FileSystem.get(HTU.getConfiguration());
|
||||
assertTrue(fs.delete(splitaRef, true));
|
||||
|
||||
//now, both parent, and splita can be deleted
|
||||
assertEquals(2, spy.scan());
|
||||
gcs = spy.scan();
|
||||
assertEquals(2, gcs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.MetaTableAccessor;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.rules.TestName;
|
||||
|
||||
@Category({MasterTests.class, LargeTests.class})
|
||||
public class TestCatalogJanitorCluster {
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestCatalogJanitorCluster.class);
|
||||
|
||||
@Rule
|
||||
public final TestName name = new TestName();
|
||||
|
||||
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
private static final TableName T1 = TableName.valueOf("t1");
|
||||
private static final TableName T2 = TableName.valueOf("t2");
|
||||
private static final TableName T3 = TableName.valueOf("t3");
|
||||
|
||||
@Before
|
||||
public void before() throws Exception {
|
||||
TEST_UTIL.startMiniCluster();
|
||||
TEST_UTIL.createMultiRegionTable(T1, new byte [][] {HConstants.CATALOG_FAMILY});
|
||||
TEST_UTIL.createMultiRegionTable(T2, new byte [][] {HConstants.CATALOG_FAMILY});
|
||||
TEST_UTIL.createMultiRegionTable(T3, new byte [][] {HConstants.CATALOG_FAMILY});
|
||||
}
|
||||
|
||||
@After
|
||||
public void after() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fat method where we start with a fat hbase:meta and then gradually intro
|
||||
* problems running catalogjanitor for each to ensure it triggers complaint.
|
||||
* Do one big method because takes a while to build up the context we need.
|
||||
* We create three tables and then make holes, overlaps, add unknown servers
|
||||
* and empty out regioninfo columns. Each should up counts in the
|
||||
* CatalogJanitor.Report produced.
|
||||
*/
|
||||
@Test
|
||||
public void testConsistency() throws IOException {
|
||||
CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor();
|
||||
int gc = janitor.scan();
|
||||
CatalogJanitor.Report report = janitor.getLastReport();
|
||||
// Assert no problems.
|
||||
assertTrue(report.isEmpty());
|
||||
// Now remove first region in table t2 to see if catalogjanitor scan notices.
|
||||
List<RegionInfo> t2Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T2);
|
||||
MetaTableAccessor.deleteRegion(TEST_UTIL.getConnection(), t2Ris.get(0));
|
||||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
assertEquals(1, report.holes.size());
|
||||
assertTrue(report.holes.get(0).getFirst().regionInfo.getTable().equals(T1));
|
||||
assertTrue(report.holes.get(0).getFirst().regionInfo.isLast());
|
||||
assertTrue(report.holes.get(0).getSecond().regionInfo.getTable().equals(T2));
|
||||
assertEquals(0, report.overlaps.size());
|
||||
// Next, add overlaps to first row in t3
|
||||
List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3);
|
||||
RegionInfo ri = t3Ris.get(0);
|
||||
RegionInfo newRi1 = RegionInfoBuilder.newBuilder(ri.getTable()).
|
||||
setStartKey(incrementRow(ri.getStartKey())).
|
||||
setEndKey(incrementRow(ri.getEndKey())).build();
|
||||
Put p1 = MetaTableAccessor.makePutFromRegionInfo(newRi1, System.currentTimeMillis());
|
||||
RegionInfo newRi2 = RegionInfoBuilder.newBuilder(newRi1.getTable()).
|
||||
setStartKey(incrementRow(newRi1.getStartKey())).
|
||||
setEndKey(incrementRow(newRi1.getEndKey())).build();
|
||||
Put p2 = MetaTableAccessor.makePutFromRegionInfo(newRi2, System.currentTimeMillis());
|
||||
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(p1, p2));
|
||||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
// We added two overlaps so total three.
|
||||
assertEquals(3, report.overlaps.size());
|
||||
// Assert hole is still there.
|
||||
assertEquals(1, report.holes.size());
|
||||
// Assert other attributes are empty still.
|
||||
assertTrue(report.emptyRegionInfo.isEmpty());
|
||||
assertTrue(report.unknownServers.isEmpty());
|
||||
// Now make bad server in t1.
|
||||
List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1);
|
||||
RegionInfo t1Ri1 = t1Ris.get(1);
|
||||
Put pServer = new Put(t1Ri1.getRegionName());
|
||||
pServer.addColumn(MetaTableAccessor.getCatalogFamily(),
|
||||
MetaTableAccessor.getServerColumn(0), Bytes.toBytes("bad.server.example.org:1234"));
|
||||
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pServer));
|
||||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
assertEquals(1, report.unknownServers.size());
|
||||
// Finally, make an empty regioninfo in t1.
|
||||
RegionInfo t1Ri2 = t1Ris.get(2);
|
||||
Put pEmptyRI = new Put(t1Ri2.getRegionName());
|
||||
pEmptyRI.addColumn(MetaTableAccessor.getCatalogFamily(),
|
||||
MetaTableAccessor.getRegionInfoColumn(), HConstants.EMPTY_BYTE_ARRAY);
|
||||
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI));
|
||||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertEquals(1, report.emptyRegionInfo.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Take last byte and add one to it.
|
||||
*/
|
||||
private static byte [] incrementRow(byte [] row) {
|
||||
if (row.length == 0) {
|
||||
return new byte []{'0'};
|
||||
}
|
||||
row[row.length - 1] = (byte)(((int)row[row.length - 1]) + 1);
|
||||
return row;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
|||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
|
||||
|
@ -50,7 +51,6 @@ import org.junit.rules.TestName;
|
|||
import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo;
|
||||
|
||||
@Category({RegionServerTests.class, SmallTests.class})
|
||||
public class TestHRegionInfo {
|
||||
|
@ -62,6 +62,73 @@ public class TestHRegionInfo {
|
|||
@Rule
|
||||
public TestName name = new TestName();
|
||||
|
||||
@Test
|
||||
public void testIsStart() {
|
||||
assertTrue(RegionInfoBuilder.FIRST_META_REGIONINFO.isFirst());
|
||||
org.apache.hadoop.hbase.client.RegionInfo ri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setStartKey(Bytes.toBytes("not_start")).build();
|
||||
assertFalse(ri.isFirst());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsEnd() {
|
||||
assertTrue(RegionInfoBuilder.FIRST_META_REGIONINFO.isFirst());
|
||||
org.apache.hadoop.hbase.client.RegionInfo ri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setEndKey(Bytes.toBytes("not_end")).build();
|
||||
assertFalse(ri.isLast());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsNext() {
|
||||
byte [] bytes = Bytes.toBytes("row");
|
||||
org.apache.hadoop.hbase.client.RegionInfo ri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setEndKey(bytes).build();
|
||||
org.apache.hadoop.hbase.client.RegionInfo ri2 =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setStartKey(bytes).build();
|
||||
assertFalse(ri.isNext(RegionInfoBuilder.FIRST_META_REGIONINFO));
|
||||
assertTrue(ri.isNext(ri2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsOverlap() {
|
||||
byte [] a = Bytes.toBytes("a");
|
||||
byte [] b = Bytes.toBytes("b");
|
||||
byte [] c = Bytes.toBytes("c");
|
||||
byte [] d = Bytes.toBytes("d");
|
||||
org.apache.hadoop.hbase.client.RegionInfo all =
|
||||
RegionInfoBuilder.FIRST_META_REGIONINFO;
|
||||
org.apache.hadoop.hbase.client.RegionInfo ari =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setEndKey(a).build();
|
||||
org.apache.hadoop.hbase.client.RegionInfo abri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setStartKey(a).setEndKey(b).build();
|
||||
org.apache.hadoop.hbase.client.RegionInfo adri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setStartKey(a).setEndKey(d).build();
|
||||
org.apache.hadoop.hbase.client.RegionInfo cdri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setStartKey(c).setEndKey(d).build();
|
||||
org.apache.hadoop.hbase.client.RegionInfo dri =
|
||||
org.apache.hadoop.hbase.client.RegionInfoBuilder.newBuilder(TableName.META_TABLE_NAME).
|
||||
setStartKey(d).build();
|
||||
assertTrue(all.isOverlap(all));
|
||||
assertTrue(all.isOverlap(abri));
|
||||
assertFalse(abri.isOverlap(cdri));
|
||||
assertTrue(all.isOverlap(ari));
|
||||
assertFalse(ari.isOverlap(abri));
|
||||
assertFalse(ari.isOverlap(abri));
|
||||
assertTrue(ari.isOverlap(all));
|
||||
assertTrue(dri.isOverlap(all));
|
||||
assertTrue(abri.isOverlap(adri));
|
||||
assertFalse(dri.isOverlap(ari));
|
||||
assertTrue(abri.isOverlap(adri));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPb() throws DeserializationException {
|
||||
HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
|
||||
|
@ -279,7 +346,7 @@ public class TestHRegionInfo {
|
|||
assertEquals(hri, convertedHri);
|
||||
|
||||
// test convert RegionInfo without replicaId
|
||||
RegionInfo info = RegionInfo.newBuilder()
|
||||
HBaseProtos.RegionInfo info = HBaseProtos.RegionInfo.newBuilder()
|
||||
.setTableName(HBaseProtos.TableName.newBuilder()
|
||||
.setQualifier(UnsafeByteOperations.unsafeWrap(tableName.getQualifier()))
|
||||
.setNamespace(UnsafeByteOperations.unsafeWrap(tableName.getNamespace()))
|
||||
|
|
|
@ -741,7 +741,7 @@ EOF
|
|||
if column == 'info:regioninfo' || column == 'info:splitA' || column == 'info:splitB'
|
||||
hri = org.apache.hadoop.hbase.HRegionInfo.parseFromOrNull(kv.getValueArray,
|
||||
kv.getValueOffset, kv.getValueLength)
|
||||
return format('timestamp=%d, value=%s', kv.getTimestamp, hri.toString)
|
||||
return format('timestamp=%d, value=%s', kv.getTimestamp, hri.nil? ? '' : hri.toString)
|
||||
end
|
||||
if column == 'info:serverstartcode'
|
||||
if kv.getValueLength > 0
|
||||
|
|
Loading…
Reference in New Issue