HBASE-23213 Backport HBASE-22460 to branch-1 (#761)
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
090780c5bd
commit
5e414f2d46
|
@ -165,6 +165,13 @@ public class ClusterStatus extends VersionedWritable {
|
|||
return deadServers != null ? deadServers.size() : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return map of the names of region servers on the live list with associated ServerLoad
|
||||
*/
|
||||
public Map<ServerName, ServerLoad> getLiveServersLoad() {
|
||||
return Collections.unmodifiableMap(liveServers);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the average cluster load
|
||||
*/
|
||||
|
|
|
@ -194,6 +194,14 @@ public class RegionLoad {
|
|||
return regionLoadPB.getStoreRefCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the max reference count for any store file among all stores files
|
||||
* of this region
|
||||
*/
|
||||
public int getMaxStoreFileRefCount() {
|
||||
return regionLoadPB.getMaxStoreFileRefCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
|
@ -204,6 +212,7 @@ public class RegionLoad {
|
|||
sb = Strings.appendKeyValue(sb, "numberOfStorefiles",
|
||||
this.getStorefiles());
|
||||
sb = Strings.appendKeyValue(sb, "storeRefCount", this.getStoreRefCount());
|
||||
sb = Strings.appendKeyValue(sb, "maxStoreFileRefCount", this.getMaxStoreFileRefCount());
|
||||
sb = Strings.appendKeyValue(sb, "storefileUncompressedSizeMB",
|
||||
this.getStoreUncompressedSizeMB());
|
||||
sb = Strings.appendKeyValue(sb, "lastMajorCompactionTimestamp",
|
||||
|
|
|
@ -1341,6 +1341,13 @@ public final class HConstants {
|
|||
// User defined Default TTL config key
|
||||
public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY = "hbase.master.snapshot.ttl";
|
||||
|
||||
// Regions Recovery based on high storeFileRefCount threshold value
|
||||
public static final String STORE_FILE_REF_COUNT_THRESHOLD =
|
||||
"hbase.regions.recovery.store.file.ref.count";
|
||||
|
||||
// default -1 indicates there is no threshold on high storeRefCount
|
||||
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
|
||||
|
||||
/**
|
||||
* Configurations for master executor services.
|
||||
*/
|
||||
|
|
|
@ -1643,4 +1643,33 @@ possible configurations would overwhelm and obscure the important.
|
|||
automatically deleted until it is manually deleted
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.master.regions.recovery.check.interval</name>
|
||||
<value>1200000</value>
|
||||
<description>
|
||||
Regions Recovery Chore interval in milliseconds.
|
||||
This chore keeps running at this interval to
|
||||
find all regions with configurable max store file ref count
|
||||
and reopens them.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.regions.recovery.store.file.ref.count</name>
|
||||
<value>-1</value>
|
||||
<description>
|
||||
Very large ref count on a file indicates
|
||||
that it is a ref leak on that object. Such files
|
||||
can not be removed even after it is invalidated
|
||||
via compaction. Only way to recover in such
|
||||
scenario is to reopen the region which can
|
||||
release all resources, like the refcount, leases, etc.
|
||||
This config represents Store files Ref Count threshold
|
||||
value considered for reopening regions.
|
||||
Any region with store files ref count > this value
|
||||
would be eligible for reopening by master.
|
||||
Default value -1 indicates this feature is turned off.
|
||||
Only positive integer value should be provided to enable
|
||||
this feature.
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
||||
|
|
|
@ -231,6 +231,7 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
|
|||
String STOREFILE_COUNT_DESC = "Number of Store Files";
|
||||
String STORE_REF_COUNT = "storeRefCount";
|
||||
String STORE_REF_COUNT_DESC = "Store reference count";
|
||||
String MAX_STORE_FILE_REF_COUNT = "maxStoreFileRefCount";
|
||||
String MEMSTORE_SIZE = "memStoreSize";
|
||||
String MEMSTORE_SIZE_DESC = "Size of the memstore";
|
||||
String STOREFILE_SIZE = "storeFileSize";
|
||||
|
|
|
@ -147,4 +147,10 @@ public interface MetricsRegionWrapper {
|
|||
* @return the number of references active on the store
|
||||
*/
|
||||
long getStoreRefCount();
|
||||
|
||||
/**
|
||||
* @return the max reference count for any store file among all stores files
|
||||
* of this region
|
||||
*/
|
||||
int getMaxStoreFileRefCount();
|
||||
}
|
||||
|
|
|
@ -217,6 +217,10 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
|
|||
regionNamePrefix + MetricsRegionServerSource.STORE_REF_COUNT,
|
||||
MetricsRegionServerSource.STORE_REF_COUNT),
|
||||
this.regionWrapper.getStoreRefCount());
|
||||
mrb.addGauge(Interns.info(
|
||||
regionNamePrefix + MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT,
|
||||
MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT),
|
||||
this.regionWrapper.getMaxStoreFileRefCount());
|
||||
mrb.addGauge(Interns.info(
|
||||
regionNamePrefix + MetricsRegionServerSource.MEMSTORE_SIZE,
|
||||
MetricsRegionServerSource.MEMSTORE_SIZE_DESC),
|
||||
|
|
|
@ -96,6 +96,11 @@ public class TestMetricsRegionSourceImpl {
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxStoreFileRefCount() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemstoreSize() {
|
||||
return 0;
|
||||
|
|
|
@ -3631,6 +3631,28 @@ public final class ClusterStatusProtos {
|
|||
* </pre>
|
||||
*/
|
||||
int getStoreRefCount();
|
||||
|
||||
// optional int32 max_store_file_ref_count = 22 [default = 0];
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
boolean hasMaxStoreFileRefCount();
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
int getMaxStoreFileRefCount();
|
||||
}
|
||||
/**
|
||||
* Protobuf type {@code hbase.pb.RegionLoad}
|
||||
|
@ -3789,6 +3811,11 @@ public final class ClusterStatusProtos {
|
|||
storeRefCount_ = input.readInt32();
|
||||
break;
|
||||
}
|
||||
case 176: {
|
||||
bitField0_ |= 0x00040000;
|
||||
maxStoreFileRefCount_ = input.readInt32();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
|
||||
|
@ -4330,6 +4357,34 @@ public final class ClusterStatusProtos {
|
|||
return storeRefCount_;
|
||||
}
|
||||
|
||||
// optional int32 max_store_file_ref_count = 22 [default = 0];
|
||||
public static final int MAX_STORE_FILE_REF_COUNT_FIELD_NUMBER = 22;
|
||||
private int maxStoreFileRefCount_;
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
public boolean hasMaxStoreFileRefCount() {
|
||||
return ((bitField0_ & 0x00040000) == 0x00040000);
|
||||
}
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
public int getMaxStoreFileRefCount() {
|
||||
return maxStoreFileRefCount_;
|
||||
}
|
||||
|
||||
private void initFields() {
|
||||
regionSpecifier_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.getDefaultInstance();
|
||||
stores_ = 0;
|
||||
|
@ -4350,6 +4405,7 @@ public final class ClusterStatusProtos {
|
|||
lastMajorCompactionTs_ = 0L;
|
||||
storeCompleteSequenceId_ = java.util.Collections.emptyList();
|
||||
storeRefCount_ = 0;
|
||||
maxStoreFileRefCount_ = 0;
|
||||
}
|
||||
private byte memoizedIsInitialized = -1;
|
||||
public final boolean isInitialized() {
|
||||
|
@ -4434,6 +4490,9 @@ public final class ClusterStatusProtos {
|
|||
if (((bitField0_ & 0x00020000) == 0x00020000)) {
|
||||
output.writeInt32(21, storeRefCount_);
|
||||
}
|
||||
if (((bitField0_ & 0x00040000) == 0x00040000)) {
|
||||
output.writeInt32(22, maxStoreFileRefCount_);
|
||||
}
|
||||
getUnknownFields().writeTo(output);
|
||||
}
|
||||
|
||||
|
@ -4519,6 +4578,10 @@ public final class ClusterStatusProtos {
|
|||
size += com.google.protobuf.CodedOutputStream
|
||||
.computeInt32Size(21, storeRefCount_);
|
||||
}
|
||||
if (((bitField0_ & 0x00040000) == 0x00040000)) {
|
||||
size += com.google.protobuf.CodedOutputStream
|
||||
.computeInt32Size(22, maxStoreFileRefCount_);
|
||||
}
|
||||
size += getUnknownFields().getSerializedSize();
|
||||
memoizedSerializedSize = size;
|
||||
return size;
|
||||
|
@ -4633,6 +4696,11 @@ public final class ClusterStatusProtos {
|
|||
result = result && (getStoreRefCount()
|
||||
== other.getStoreRefCount());
|
||||
}
|
||||
result = result && (hasMaxStoreFileRefCount() == other.hasMaxStoreFileRefCount());
|
||||
if (hasMaxStoreFileRefCount()) {
|
||||
result = result && (getMaxStoreFileRefCount()
|
||||
== other.getMaxStoreFileRefCount());
|
||||
}
|
||||
result = result &&
|
||||
getUnknownFields().equals(other.getUnknownFields());
|
||||
return result;
|
||||
|
@ -4723,6 +4791,10 @@ public final class ClusterStatusProtos {
|
|||
hash = (37 * hash) + STORE_REF_COUNT_FIELD_NUMBER;
|
||||
hash = (53 * hash) + getStoreRefCount();
|
||||
}
|
||||
if (hasMaxStoreFileRefCount()) {
|
||||
hash = (37 * hash) + MAX_STORE_FILE_REF_COUNT_FIELD_NUMBER;
|
||||
hash = (53 * hash) + getMaxStoreFileRefCount();
|
||||
}
|
||||
hash = (29 * hash) + getUnknownFields().hashCode();
|
||||
memoizedHashCode = hash;
|
||||
return hash;
|
||||
|
@ -4880,6 +4952,8 @@ public final class ClusterStatusProtos {
|
|||
}
|
||||
storeRefCount_ = 0;
|
||||
bitField0_ = (bitField0_ & ~0x00040000);
|
||||
maxStoreFileRefCount_ = 0;
|
||||
bitField0_ = (bitField0_ & ~0x00080000);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -4993,6 +5067,10 @@ public final class ClusterStatusProtos {
|
|||
to_bitField0_ |= 0x00020000;
|
||||
}
|
||||
result.storeRefCount_ = storeRefCount_;
|
||||
if (((from_bitField0_ & 0x00080000) == 0x00080000)) {
|
||||
to_bitField0_ |= 0x00040000;
|
||||
}
|
||||
result.maxStoreFileRefCount_ = maxStoreFileRefCount_;
|
||||
result.bitField0_ = to_bitField0_;
|
||||
onBuilt();
|
||||
return result;
|
||||
|
@ -5089,6 +5167,9 @@ public final class ClusterStatusProtos {
|
|||
if (other.hasStoreRefCount()) {
|
||||
setStoreRefCount(other.getStoreRefCount());
|
||||
}
|
||||
if (other.hasMaxStoreFileRefCount()) {
|
||||
setMaxStoreFileRefCount(other.getMaxStoreFileRefCount());
|
||||
}
|
||||
this.mergeUnknownFields(other.getUnknownFields());
|
||||
return this;
|
||||
}
|
||||
|
@ -6428,6 +6509,63 @@ public final class ClusterStatusProtos {
|
|||
return this;
|
||||
}
|
||||
|
||||
// optional int32 max_store_file_ref_count = 22 [default = 0];
|
||||
private int maxStoreFileRefCount_ ;
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
public boolean hasMaxStoreFileRefCount() {
|
||||
return ((bitField0_ & 0x00080000) == 0x00080000);
|
||||
}
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
public int getMaxStoreFileRefCount() {
|
||||
return maxStoreFileRefCount_;
|
||||
}
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
public Builder setMaxStoreFileRefCount(int value) {
|
||||
bitField0_ |= 0x00080000;
|
||||
maxStoreFileRefCount_ = value;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* <code>optional int32 max_store_file_ref_count = 22 [default = 0];</code>
|
||||
*
|
||||
* <pre>
|
||||
**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
* </pre>
|
||||
*/
|
||||
public Builder clearMaxStoreFileRefCount() {
|
||||
bitField0_ = (bitField0_ & ~0x00080000);
|
||||
maxStoreFileRefCount_ = 0;
|
||||
onChanged();
|
||||
return this;
|
||||
}
|
||||
|
||||
// @@protoc_insertion_point(builder_scope:hbase.pb.RegionLoad)
|
||||
}
|
||||
|
||||
|
@ -14845,7 +14983,7 @@ public final class ClusterStatusProtos {
|
|||
"e\030\001 \002(\014\022\023\n\013sequence_id\030\002 \002(\004\"p\n\026RegionSt" +
|
||||
"oreSequenceIds\022 \n\030last_flushed_sequence_" +
|
||||
"id\030\001 \002(\004\0224\n\021store_sequence_id\030\002 \003(\0132\031.hb" +
|
||||
"ase.pb.StoreSequenceId\"\360\004\n\nRegionLoad\0223\n" +
|
||||
"ase.pb.StoreSequenceId\"\225\005\n\nRegionLoad\0223\n" +
|
||||
"\020region_specifier\030\001 \002(\0132\031.hbase.pb.Regio" +
|
||||
"nSpecifier\022\016\n\006stores\030\002 \001(\r\022\022\n\nstorefiles",
|
||||
"\030\003 \001(\r\022\"\n\032store_uncompressed_size_MB\030\004 \001" +
|
||||
|
@ -14861,38 +14999,38 @@ public final class ClusterStatusProtos {
|
|||
"\002\022#\n\030last_major_compaction_ts\030\021 \001(\004:\0010\022=" +
|
||||
"\n\032store_complete_sequence_id\030\022 \003(\0132\031.hba" +
|
||||
"se.pb.StoreSequenceId\022\032\n\017store_ref_count" +
|
||||
"\030\025 \001(\005:\0010\"T\n\023ReplicationLoadSink\022\032\n\022ageO" +
|
||||
"fLastAppliedOp\030\001 \002(\004\022!\n\031timeStampsOfLast" +
|
||||
"AppliedOp\030\002 \002(\004\"\225\001\n\025ReplicationLoadSourc" +
|
||||
"e\022\016\n\006peerID\030\001 \002(\t\022\032\n\022ageOfLastShippedOp\030" +
|
||||
"\002 \002(\004\022\026\n\016sizeOfLogQueue\030\003 \002(\r\022 \n\030timeSta" +
|
||||
"mpOfLastShippedOp\030\004 \002(\004\022\026\n\016replicationLa" +
|
||||
"g\030\005 \002(\004\"\212\003\n\nServerLoad\022\032\n\022number_of_requ",
|
||||
"ests\030\001 \001(\004\022 \n\030total_number_of_requests\030\002" +
|
||||
" \001(\004\022\024\n\014used_heap_MB\030\003 \001(\r\022\023\n\013max_heap_M" +
|
||||
"B\030\004 \001(\r\022*\n\014region_loads\030\005 \003(\0132\024.hbase.pb" +
|
||||
".RegionLoad\022+\n\014coprocessors\030\006 \003(\0132\025.hbas" +
|
||||
"e.pb.Coprocessor\022\031\n\021report_start_time\030\007 " +
|
||||
"\001(\004\022\027\n\017report_end_time\030\010 \001(\004\022\030\n\020info_ser" +
|
||||
"ver_port\030\t \001(\r\0227\n\016replLoadSource\030\n \003(\0132\037" +
|
||||
".hbase.pb.ReplicationLoadSource\0223\n\014replL" +
|
||||
"oadSink\030\013 \001(\0132\035.hbase.pb.ReplicationLoad" +
|
||||
"Sink\"a\n\016LiveServerInfo\022$\n\006server\030\001 \002(\0132\024",
|
||||
".hbase.pb.ServerName\022)\n\013server_load\030\002 \002(" +
|
||||
"\0132\024.hbase.pb.ServerLoad\"\250\003\n\rClusterStatu" +
|
||||
"s\0228\n\rhbase_version\030\001 \001(\0132!.hbase.pb.HBas" +
|
||||
"eVersionFileContent\022.\n\014live_servers\030\002 \003(" +
|
||||
"\0132\030.hbase.pb.LiveServerInfo\022*\n\014dead_serv" +
|
||||
"ers\030\003 \003(\0132\024.hbase.pb.ServerName\022;\n\025regio" +
|
||||
"ns_in_transition\030\004 \003(\0132\034.hbase.pb.Region" +
|
||||
"InTransition\022\'\n\ncluster_id\030\005 \001(\0132\023.hbase" +
|
||||
".pb.ClusterId\0222\n\023master_coprocessors\030\006 \003" +
|
||||
"(\0132\025.hbase.pb.Coprocessor\022$\n\006master\030\007 \001(",
|
||||
"\0132\024.hbase.pb.ServerName\022,\n\016backup_master" +
|
||||
"s\030\010 \003(\0132\024.hbase.pb.ServerName\022\023\n\013balance" +
|
||||
"r_on\030\t \001(\010BF\n*org.apache.hadoop.hbase.pr" +
|
||||
"otobuf.generatedB\023ClusterStatusProtosH\001\240" +
|
||||
"\001\001"
|
||||
"\030\025 \001(\005:\0010\022#\n\030max_store_file_ref_count\030\026 " +
|
||||
"\001(\005:\0010\"T\n\023ReplicationLoadSink\022\032\n\022ageOfLa" +
|
||||
"stAppliedOp\030\001 \002(\004\022!\n\031timeStampsOfLastApp" +
|
||||
"liedOp\030\002 \002(\004\"\225\001\n\025ReplicationLoadSource\022\016" +
|
||||
"\n\006peerID\030\001 \002(\t\022\032\n\022ageOfLastShippedOp\030\002 \002" +
|
||||
"(\004\022\026\n\016sizeOfLogQueue\030\003 \002(\r\022 \n\030timeStampO" +
|
||||
"fLastShippedOp\030\004 \002(\004\022\026\n\016replicationLag\030\005",
|
||||
" \002(\004\"\212\003\n\nServerLoad\022\032\n\022number_of_request" +
|
||||
"s\030\001 \001(\004\022 \n\030total_number_of_requests\030\002 \001(" +
|
||||
"\004\022\024\n\014used_heap_MB\030\003 \001(\r\022\023\n\013max_heap_MB\030\004" +
|
||||
" \001(\r\022*\n\014region_loads\030\005 \003(\0132\024.hbase.pb.Re" +
|
||||
"gionLoad\022+\n\014coprocessors\030\006 \003(\0132\025.hbase.p" +
|
||||
"b.Coprocessor\022\031\n\021report_start_time\030\007 \001(\004" +
|
||||
"\022\027\n\017report_end_time\030\010 \001(\004\022\030\n\020info_server" +
|
||||
"_port\030\t \001(\r\0227\n\016replLoadSource\030\n \003(\0132\037.hb" +
|
||||
"ase.pb.ReplicationLoadSource\0223\n\014replLoad" +
|
||||
"Sink\030\013 \001(\0132\035.hbase.pb.ReplicationLoadSin",
|
||||
"k\"a\n\016LiveServerInfo\022$\n\006server\030\001 \002(\0132\024.hb" +
|
||||
"ase.pb.ServerName\022)\n\013server_load\030\002 \002(\0132\024" +
|
||||
".hbase.pb.ServerLoad\"\250\003\n\rClusterStatus\0228" +
|
||||
"\n\rhbase_version\030\001 \001(\0132!.hbase.pb.HBaseVe" +
|
||||
"rsionFileContent\022.\n\014live_servers\030\002 \003(\0132\030" +
|
||||
".hbase.pb.LiveServerInfo\022*\n\014dead_servers" +
|
||||
"\030\003 \003(\0132\024.hbase.pb.ServerName\022;\n\025regions_" +
|
||||
"in_transition\030\004 \003(\0132\034.hbase.pb.RegionInT" +
|
||||
"ransition\022\'\n\ncluster_id\030\005 \001(\0132\023.hbase.pb" +
|
||||
".ClusterId\0222\n\023master_coprocessors\030\006 \003(\0132",
|
||||
"\025.hbase.pb.Coprocessor\022$\n\006master\030\007 \001(\0132\024" +
|
||||
".hbase.pb.ServerName\022,\n\016backup_masters\030\010" +
|
||||
" \003(\0132\024.hbase.pb.ServerName\022\023\n\013balancer_o" +
|
||||
"n\030\t \001(\010BF\n*org.apache.hadoop.hbase.proto" +
|
||||
"buf.generatedB\023ClusterStatusProtosH\001\240\001\001"
|
||||
};
|
||||
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
|
||||
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
|
||||
|
@ -14928,7 +15066,7 @@ public final class ClusterStatusProtos {
|
|||
internal_static_hbase_pb_RegionLoad_fieldAccessorTable = new
|
||||
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
|
||||
internal_static_hbase_pb_RegionLoad_descriptor,
|
||||
new java.lang.String[] { "RegionSpecifier", "Stores", "Storefiles", "StoreUncompressedSizeMB", "StorefileSizeMB", "MemstoreSizeMB", "StorefileIndexSizeMB", "ReadRequestsCount", "WriteRequestsCount", "TotalCompactingKVs", "CurrentCompactedKVs", "RootIndexSizeKB", "TotalStaticIndexSizeKB", "TotalStaticBloomSizeKB", "CompleteSequenceId", "DataLocality", "LastMajorCompactionTs", "StoreCompleteSequenceId", "StoreRefCount", });
|
||||
new java.lang.String[] { "RegionSpecifier", "Stores", "Storefiles", "StoreUncompressedSizeMB", "StorefileSizeMB", "MemstoreSizeMB", "StorefileIndexSizeMB", "ReadRequestsCount", "WriteRequestsCount", "TotalCompactingKVs", "CurrentCompactedKVs", "RootIndexSizeKB", "TotalStaticIndexSizeKB", "TotalStaticBloomSizeKB", "CompleteSequenceId", "DataLocality", "LastMajorCompactionTs", "StoreCompleteSequenceId", "StoreRefCount", "MaxStoreFileRefCount", });
|
||||
internal_static_hbase_pb_ReplicationLoadSink_descriptor =
|
||||
getDescriptor().getMessageTypes().get(5);
|
||||
internal_static_hbase_pb_ReplicationLoadSink_fieldAccessorTable = new
|
||||
|
|
|
@ -145,6 +145,12 @@ message RegionLoad {
|
|||
|
||||
/** the number of references active on the store */
|
||||
optional int32 store_ref_count = 21 [ default = 0 ];
|
||||
|
||||
/**
|
||||
* The max number of references active on single store file among all store files
|
||||
* that belong to given region
|
||||
*/
|
||||
optional int32 max_store_file_ref_count = 22 [default = 0];
|
||||
}
|
||||
|
||||
/* Server-level protobufs */
|
||||
|
|
|
@ -842,6 +842,16 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve HRegionInfo for given region name
|
||||
*
|
||||
* @param regionName Region name in byte[]
|
||||
* @return HRegionInfo
|
||||
*/
|
||||
public HRegionInfo getRegionInfo(final byte[] regionName) {
|
||||
return regionStates.getRegionInfo(regionName);
|
||||
}
|
||||
|
||||
/**
|
||||
* This call is invoked only (1) master assign meta;
|
||||
* (2) during failover mode startup, zk assignment node processing.
|
||||
|
|
|
@ -117,6 +117,7 @@ import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
|
|||
import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
|
||||
import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
|
||||
import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
|
||||
import org.apache.hadoop.hbase.master.procedure.MasterDDLOperationHelper;
|
||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
|
||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.ProcedureEvent;
|
||||
|
@ -302,6 +303,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
// manager of assignment nodes in zookeeper
|
||||
AssignmentManager assignmentManager;
|
||||
|
||||
private RegionsRecoveryChore regionsRecoveryChore = null;
|
||||
|
||||
// buffer for "fatal error" notices from region servers
|
||||
// in the cluster. This is only used for assisting
|
||||
// operations/debugging.
|
||||
|
@ -1261,6 +1264,20 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
|
||||
getChoreService().scheduleChore(hfileCleaner);
|
||||
|
||||
// Regions Reopen based on very high storeFileRefCount is considered enabled
|
||||
// only if hbase.regions.recovery.store.file.ref.count has value > 0
|
||||
final int maxStoreFileRefCount = conf.getInt(
|
||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
||||
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
|
||||
if (maxStoreFileRefCount > 0) {
|
||||
this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
|
||||
getChoreService().scheduleChore(this.regionsRecoveryChore);
|
||||
} else {
|
||||
LOG.info("Reopening regions with very high storeFileRefCount is disabled. "
|
||||
+ "Provide threshold value > 0 for " + HConstants.STORE_FILE_REF_COUNT_THRESHOLD
|
||||
+ " to enable it.\"");
|
||||
}
|
||||
|
||||
final boolean isSnapshotChoreEnabled = this.snapshotCleanupTracker
|
||||
.isSnapshotCleanupEnabled();
|
||||
this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
|
||||
|
@ -1409,6 +1426,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
choreService.cancelChore(this.replicationZKLockCleanerChore);
|
||||
choreService.cancelChore(this.replicationZKNodeCleanerChore);
|
||||
choreService.cancelChore(this.snapshotCleanerChore);
|
||||
choreService.cancelChore(this.regionsRecoveryChore);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3263,6 +3281,46 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reopen regions provided in the argument
|
||||
*
|
||||
* @param tableName The current table name
|
||||
* @param hRegionInfos List of HRegionInfo of the regions to reopen
|
||||
* @param nonceGroup Identifier for the source of the request, a client or process
|
||||
* @param nonce A unique identifier for this operation from the client or process identified by
|
||||
* <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
|
||||
* @return procedure Id
|
||||
* @throws IOException if reopening region fails while running procedure
|
||||
*/
|
||||
long reopenRegions(final TableName tableName, final List<HRegionInfo> hRegionInfos,
|
||||
final long nonceGroup, final long nonce)
|
||||
throws IOException {
|
||||
|
||||
return MasterProcedureUtil
|
||||
.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
|
||||
|
||||
@Override
|
||||
protected void run() throws IOException {
|
||||
boolean areAllRegionsReopened = MasterDDLOperationHelper.reOpenAllRegions(
|
||||
procedureExecutor.getEnvironment(), tableName, hRegionInfos);
|
||||
if (areAllRegionsReopened) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("All required regions reopened for table: " + tableName);
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Error while reopening regions of table: " + tableName);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDescription() {
|
||||
return "ReopenTableRegionsProcedure";
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
|
||||
return getClusterStatusWithoutCoprocessor().getLastMajorCompactionTsForTable(table);
|
||||
|
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.collections.MapUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.ClusterStatus;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.RegionLoad;
|
||||
import org.apache.hadoop.hbase.ScheduledChore;
|
||||
import org.apache.hadoop.hbase.ServerLoad;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.Stoppable;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
/**
|
||||
* This chore, every time it runs, will try to recover regions with high store ref count
|
||||
* by reopening them
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class RegionsRecoveryChore extends ScheduledChore {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class);
|
||||
|
||||
private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
|
||||
|
||||
private static final String REGIONS_RECOVERY_INTERVAL =
|
||||
"hbase.master.regions.recovery.check.interval";
|
||||
|
||||
private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
|
||||
|
||||
private static final String ERROR_REOPEN_REIONS_MSG =
|
||||
"Error reopening regions with high storeRefCount. ";
|
||||
|
||||
private final HMaster hMaster;
|
||||
private final int storeFileRefCountThreshold;
|
||||
|
||||
private static final PerClientRandomNonceGenerator NONCE_GENERATOR =
|
||||
new PerClientRandomNonceGenerator();
|
||||
|
||||
/**
|
||||
* Construct RegionsRecoveryChore with provided params
|
||||
*
|
||||
* @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup
|
||||
* @param configuration The configuration params to be used
|
||||
* @param hMaster HMaster instance to initiate RegionTableRegions
|
||||
*/
|
||||
RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
|
||||
final HMaster hMaster) {
|
||||
|
||||
super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
|
||||
DEFAULT_REGIONS_RECOVERY_INTERVAL));
|
||||
this.hMaster = hMaster;
|
||||
this.storeFileRefCountThreshold = configuration.getInt(
|
||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
|
||||
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void chore() {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace(
|
||||
"Starting up Regions Recovery chore for reopening regions based on storeFileRefCount...");
|
||||
}
|
||||
try {
|
||||
// only if storeFileRefCountThreshold > 0, consider the feature turned on
|
||||
if (storeFileRefCountThreshold > 0) {
|
||||
final ClusterStatus clusterStatus = hMaster.getClusterStatus();
|
||||
final Map<ServerName, ServerLoad> serverMetricsMap =
|
||||
clusterStatus.getLiveServersLoad();
|
||||
final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap =
|
||||
getTableToRegionsByRefCount(serverMetricsMap);
|
||||
if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) {
|
||||
for (Map.Entry<TableName, List<HRegionInfo>> tableRegionEntry :
|
||||
tableToReopenRegionsMap.entrySet()) {
|
||||
TableName tableName = tableRegionEntry.getKey();
|
||||
List<HRegionInfo> hRegionInfos = tableRegionEntry.getValue();
|
||||
try {
|
||||
LOG.warn("Reopening regions due to high storeFileRefCount. " +
|
||||
"TableName: {} , noOfRegions: {}", tableName, hRegionInfos.size());
|
||||
hMaster.reopenRegions(tableName, hRegionInfos, NONCE_GENERATOR.getNonceGroup(),
|
||||
NONCE_GENERATOR.newNonce());
|
||||
} catch (IOException e) {
|
||||
List<String> regionNames = new ArrayList<>();
|
||||
for (HRegionInfo hRegionInfo : hRegionInfos) {
|
||||
regionNames.add(hRegionInfo.getRegionNameAsString());
|
||||
}
|
||||
LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG,
|
||||
tableName, regionNames, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Reopening regions with very high storeFileRefCount is disabled. " +
|
||||
"Provide threshold value > 0 for {} to enable it.",
|
||||
HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error while reopening regions based on storeRefCount threshold", e);
|
||||
}
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace(
|
||||
"Exiting Regions Recovery chore for reopening regions based on storeFileRefCount...");
|
||||
}
|
||||
}
|
||||
|
||||
private Map<TableName, List<HRegionInfo>> getTableToRegionsByRefCount(
|
||||
final Map<ServerName, ServerLoad> serverMetricsMap) {
|
||||
|
||||
final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap = new HashMap<>();
|
||||
for (ServerLoad serverLoad : serverMetricsMap.values()) {
|
||||
Map<byte[], RegionLoad> regionLoadsMap = serverLoad.getRegionsLoad();
|
||||
for (RegionLoad regionLoad : regionLoadsMap.values()) {
|
||||
// For each region, each store file can have different ref counts
|
||||
// We need to find maximum of all such ref counts and if that max count
|
||||
// is beyond a threshold value, we should reopen the region.
|
||||
// Here, we take max ref count of all store files and not the cumulative
|
||||
// count of all store files
|
||||
final int maxStoreFileRefCount = regionLoad.getMaxStoreFileRefCount();
|
||||
|
||||
if (maxStoreFileRefCount > storeFileRefCountThreshold) {
|
||||
final byte[] regionName = regionLoad.getName();
|
||||
prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
|
||||
maxStoreFileRefCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
return tableToReopenRegionsMap;
|
||||
|
||||
}
|
||||
|
||||
private void prepareTableToReopenRegionsMap(
|
||||
final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap,
|
||||
final byte[] regionName, final int regionStoreRefCount) {
|
||||
|
||||
final HRegionInfo hRegionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
|
||||
final TableName tableName = hRegionInfo.getTable();
|
||||
if (TableName.META_TABLE_NAME.equals(tableName)) {
|
||||
// Do not reopen regions of meta table even if it has
|
||||
// high store file reference count
|
||||
return;
|
||||
}
|
||||
LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
|
||||
hRegionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
|
||||
if (!tableToReopenRegionsMap.containsKey(tableName)) {
|
||||
tableToReopenRegionsMap.put(tableName, new ArrayList<HRegionInfo>());
|
||||
}
|
||||
tableToReopenRegionsMap.get(tableName).add(hRegionInfo);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1578,6 +1578,8 @@ public class HRegionServer extends HasThread implements
|
|||
byte[] name = r.getRegionInfo().getRegionName();
|
||||
int stores = 0;
|
||||
int storefiles = 0;
|
||||
int storeRefCount = 0;
|
||||
int maxStoreFileRefCount = 0;
|
||||
int storeUncompressedSizeMB = 0;
|
||||
int storefileSizeMB = 0;
|
||||
int memstoreSizeMB = (int) (r.getMemstoreSize() / 1024 / 1024);
|
||||
|
@ -1591,6 +1593,13 @@ public class HRegionServer extends HasThread implements
|
|||
stores += storeList.size();
|
||||
for (Store store : storeList) {
|
||||
storefiles += store.getStorefilesCount();
|
||||
if (store instanceof HStore) {
|
||||
HStore hStore = (HStore) store;
|
||||
int currentStoreRefCount = hStore.getStoreRefCount();
|
||||
storeRefCount += currentStoreRefCount;
|
||||
int currentMaxStoreFileRefCount = hStore.getMaxStoreFileRefCount();
|
||||
maxStoreFileRefCount = Math.max(maxStoreFileRefCount, currentMaxStoreFileRefCount);
|
||||
}
|
||||
storeUncompressedSizeMB += (int) (store.getStoreSizeUncompressed() / 1024 / 1024);
|
||||
storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024);
|
||||
storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024);
|
||||
|
@ -1617,6 +1626,8 @@ public class HRegionServer extends HasThread implements
|
|||
regionLoadBldr.setRegionSpecifier(regionSpecifier.build())
|
||||
.setStores(stores)
|
||||
.setStorefiles(storefiles)
|
||||
.setStoreRefCount(storeRefCount)
|
||||
.setMaxStoreFileRefCount(maxStoreFileRefCount)
|
||||
.setStoreUncompressedSizeMB(storeUncompressedSizeMB)
|
||||
.setStorefileSizeMB(storefileSizeMB)
|
||||
.setMemstoreSizeMB(memstoreSizeMB)
|
||||
|
|
|
@ -2868,4 +2868,22 @@ public class HStore implements Store {
|
|||
}
|
||||
return refCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return get maximum ref count of storeFile among all HStore Files
|
||||
* for the HStore
|
||||
*/
|
||||
public int getMaxStoreFileRefCount() {
|
||||
int maxStoreFileRefCount = 0;
|
||||
for (StoreFile store : storeEngine.getStoreFileManager().getStorefiles()) {
|
||||
if (store.isHFile()) {
|
||||
StoreFile.Reader storeReader = store.getReader();
|
||||
if (storeReader != null) {
|
||||
maxStoreFileRefCount = Math.max(maxStoreFileRefCount, storeReader.getRefCount());
|
||||
}
|
||||
}
|
||||
}
|
||||
return maxStoreFileRefCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -55,6 +55,7 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
|
|||
private long numReferenceFiles;
|
||||
private long maxFlushQueueSize;
|
||||
private long maxCompactionQueueSize;
|
||||
private int maxStoreFileRefCount;
|
||||
|
||||
private ScheduledFuture<?> regionMetricsUpdateTask;
|
||||
|
||||
|
@ -123,6 +124,11 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
|
|||
return storeRefCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxStoreFileRefCount() {
|
||||
return maxStoreFileRefCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getReadRequestCount() {
|
||||
return this.region.getReadRequestsCount();
|
||||
|
@ -216,6 +222,7 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
|
|||
public void run() {
|
||||
long tempNumStoreFiles = 0;
|
||||
int tempStoreRefCount = 0;
|
||||
int tempMaxStoreFileRefCount = 0;
|
||||
long tempMemstoreSize = 0;
|
||||
long tempStoreFileSize = 0;
|
||||
long tempMaxStoreFileAge = 0;
|
||||
|
@ -247,13 +254,18 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable
|
|||
|
||||
if (store instanceof HStore) {
|
||||
// Cast here to avoid interface changes to Store
|
||||
tempStoreRefCount += ((HStore)store).getStoreRefCount();
|
||||
HStore hStore = ((HStore) store);
|
||||
tempStoreRefCount += hStore.getStoreRefCount();
|
||||
int currentMaxStoreFileRefCount = hStore.getMaxStoreFileRefCount();
|
||||
tempMaxStoreFileRefCount = Math.max(tempMaxStoreFileRefCount,
|
||||
currentMaxStoreFileRefCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
numStoreFiles = tempNumStoreFiles;
|
||||
storeRefCount = tempStoreRefCount;
|
||||
maxStoreFileRefCount = tempMaxStoreFileRefCount;
|
||||
memstoreSize = tempMemstoreSize;
|
||||
storeFileSize = tempStoreFileSize;
|
||||
maxStoreFileAge = tempMaxStoreFileAge;
|
||||
|
|
|
@ -65,6 +65,11 @@ public class MetricsRegionWrapperStub implements MetricsRegionWrapper {
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxStoreFileRefCount() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemstoreSize() {
|
||||
return 103;
|
||||
|
|
|
@ -2208,3 +2208,43 @@ The percent of region server RPC threads failed to abort RS.
|
|||
+
|
||||
.Default
|
||||
`0`
|
||||
|
||||
|
||||
[[hbase.master.regions.recovery.check.interval]]
|
||||
*`hbase.master.regions.recovery.check.interval`*::
|
||||
+
|
||||
.Description
|
||||
|
||||
Regions Recovery Chore interval in milliseconds.
|
||||
This chore keeps running at this interval to
|
||||
find all regions with configurable max store file ref count
|
||||
and reopens them.
|
||||
|
||||
+
|
||||
.Default
|
||||
`1200000`
|
||||
|
||||
|
||||
[[hbase.regions.recovery.store.file.ref.count]]
|
||||
*`hbase.regions.recovery.store.file.ref.count`*::
|
||||
+
|
||||
.Description
|
||||
|
||||
Very large ref count on a file indicates
|
||||
that it is a ref leak on that object. Such files
|
||||
can not be removed even after it is invalidated
|
||||
via compaction. Only way to recover in such
|
||||
scenario is to reopen the region which can
|
||||
release all resources, like the refcount, leases, etc.
|
||||
This config represents Store files Ref Count threshold
|
||||
value considered for reopening regions.
|
||||
Any region with store files ref count > this value
|
||||
would be eligible for reopening by master.
|
||||
Default value -1 indicates this feature is turned off.
|
||||
Only positive integer value should be provided to enable
|
||||
this feature.
|
||||
|
||||
+
|
||||
.Default
|
||||
`-1`
|
||||
|
||||
|
|
|
@ -2365,3 +2365,26 @@ void rename(Admin admin, String oldTableName, String newTableName) {
|
|||
admin.deleteTable(oldTableName);
|
||||
}
|
||||
----
|
||||
|
||||
|
||||
|
||||
[[auto_reopen_regions]]
|
||||
== Auto Region Reopen
|
||||
|
||||
We can leak store reader references if a coprocessor or core function somehow
|
||||
opens a scanner, or wraps one, and then does not take care to call close on the
|
||||
scanner or the wrapped instance. Leaked store files can not be removed even
|
||||
after it is invalidated via compaction.
|
||||
A reasonable mitigation for a reader reference
|
||||
leak would be a fast reopen of the region on the same server.
|
||||
This will release all resources, like the refcount, leases, etc.
|
||||
The clients should gracefully ride over this like any other region in
|
||||
transition.
|
||||
By default this auto reopen of region feature would be disabled.
|
||||
To enabled it, please provide high ref count value for config
|
||||
`hbase.regions.recovery.store.file.ref.count`.
|
||||
|
||||
Please refer to config descriptions for
|
||||
`hbase.master.regions.recovery.check.interval` and
|
||||
`hbase.regions.recovery.store.file.ref.count`.
|
||||
|
||||
|
|
Loading…
Reference in New Issue