From 7be181e45562908146de7ecf33cd2901070ac0c7 Mon Sep 17 00:00:00 2001 From: Enis Soztutar Date: Sat, 15 Mar 2014 23:24:26 +0000 Subject: [PATCH] HBASE-9721 RegionServer should not accept regionOpen RPC intended for another(previous) server git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1577951 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/hbase/client/HBaseAdmin.java | 22 +- .../hadoop/hbase/protobuf/ProtobufUtil.java | 25 +- .../hbase/protobuf/RequestConverter.java | 29 +- .../hbase/protobuf/generated/AdminProtos.java | 392 ++++++++++++++---- hbase-protocol/src/main/protobuf/Admin.proto | 4 + .../hadoop/hbase/master/ServerManager.java | 8 +- .../hbase/regionserver/HRegionServer.java | 18 + .../hadoop/hbase/util/HBaseFsckRepair.java | 2 +- .../client/TestScannersFromClientSide.java | 4 +- .../hbase/master/TestAssignmentManager.java | 38 +- .../TestAssignmentManagerOnCluster.java | 26 +- .../hbase/master/TestMasterFailover.java | 16 +- .../master/TestZKBasedOpenCloseRegion.java | 6 +- .../TestRegionServerNoMaster.java | 56 ++- .../hadoop/hbase/util/TestHBaseFsck.java | 2 +- 15 files changed, 505 insertions(+), 143 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index 409d274d2dc..387947a1bd5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -39,7 +39,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -47,12 +46,13 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableExistsException; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotEnabledException; import org.apache.hadoop.hbase.TableNotFoundException; @@ -63,12 +63,7 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; import org.apache.hadoop.hbase.exceptions.DeserializationException; -import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; -import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; import org.apache.hadoop.hbase.exceptions.MergeRegionException; -import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; -import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; -import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; import org.apache.hadoop.hbase.ipc.MasterCoprocessorRpcChannel; import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController; @@ -135,7 +130,12 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureRequ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureResponse; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneRequest; import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneResponse; +import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; +import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -1394,7 +1394,7 @@ public class HBaseAdmin implements Abortable, Closeable { AdminService.BlockingInterface admin = this.connection.getAdmin(sn); // Close the region without updating zk state. CloseRegionRequest request = - RequestConverter.buildCloseRegionRequest(encodedRegionName, false); + RequestConverter.buildCloseRegionRequest(sn, encodedRegionName, false); try { CloseRegionResponse response = admin.closeRegion(null, request); boolean isRegionClosed = response.getClosed(); @@ -1418,7 +1418,7 @@ public class HBaseAdmin implements Abortable, Closeable { throws IOException { AdminService.BlockingInterface admin = this.connection.getAdmin(sn); // Close the region without updating zk state. - ProtobufUtil.closeRegion(admin, hri.getRegionName(), false); + ProtobufUtil.closeRegion(admin, sn, hri.getRegionName(), false); } /** @@ -1758,9 +1758,9 @@ public class HBaseAdmin implements Abortable, Closeable { * region as in unassign. This API can be used when a region not served by any region server and * still online as per Master's in memory state. If this API is incorrectly used on active region * then master will loose track of that region. - * + * * This is a special method that should be used by experts or hbck. - * + * * @param regionName * Region to offline. * @throws IOException diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java index 1a0525e9b89..09dc4360b4e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java @@ -642,7 +642,7 @@ public final class ProtobufUtil { * @param cellScanner * @param proto the protocol buffer Mutate to convert * @return the converted client Append - * @throws IOException + * @throws IOException */ public static Append toAppend(final MutationProto proto, final CellScanner cellScanner) throws IOException { @@ -1548,9 +1548,9 @@ public final class ProtobufUtil { * @throws IOException */ public static void closeRegion(final AdminService.BlockingInterface admin, - final byte[] regionName, final boolean transitionInZK) throws IOException { + final ServerName server, final byte[] regionName, final boolean transitionInZK) throws IOException { CloseRegionRequest closeRegionRequest = - RequestConverter.buildCloseRegionRequest(regionName, transitionInZK); + RequestConverter.buildCloseRegionRequest(server, regionName, transitionInZK); try { admin.closeRegion(null, closeRegionRequest); } catch (ServiceException se) { @@ -1569,11 +1569,12 @@ public final class ProtobufUtil { * @throws IOException */ public static boolean closeRegion(final AdminService.BlockingInterface admin, + final ServerName server, final byte[] regionName, final int versionOfClosingNode, final ServerName destinationServer, final boolean transitionInZK) throws IOException { CloseRegionRequest closeRegionRequest = - RequestConverter.buildCloseRegionRequest( + RequestConverter.buildCloseRegionRequest(server, regionName, versionOfClosingNode, destinationServer, transitionInZK); try { CloseRegionResponse response = admin.closeRegion(null, closeRegionRequest); @@ -1591,9 +1592,9 @@ public final class ProtobufUtil { * @throws IOException */ public static void openRegion(final AdminService.BlockingInterface admin, - final HRegionInfo region) throws IOException { + ServerName server, final HRegionInfo region) throws IOException { OpenRegionRequest request = - RequestConverter.buildOpenRegionRequest(region, -1, null); + RequestConverter.buildOpenRegionRequest(server, region, -1, null); try { admin.openRegion(null, request); } catch (ServiceException se) { @@ -2489,7 +2490,7 @@ public final class ProtobufUtil { /** * Convert a protocol buffer CellVisibility to a client CellVisibility - * + * * @param proto * @return the converted client CellVisibility */ @@ -2500,7 +2501,7 @@ public final class ProtobufUtil { /** * Convert a protocol buffer CellVisibility bytes to a client CellVisibility - * + * * @param protoBytes * @return the converted client CellVisibility * @throws DeserializationException @@ -2519,7 +2520,7 @@ public final class ProtobufUtil { /** * Create a protocol buffer CellVisibility based on a client CellVisibility. - * + * * @param cellVisibility * @return a protocol buffer CellVisibility */ @@ -2531,7 +2532,7 @@ public final class ProtobufUtil { /** * Convert a protocol buffer Authorizations to a client Authorizations - * + * * @param proto * @return the converted client Authorizations */ @@ -2542,7 +2543,7 @@ public final class ProtobufUtil { /** * Convert a protocol buffer Authorizations bytes to a client Authorizations - * + * * @param protoBytes * @return the converted client Authorizations * @throws DeserializationException @@ -2561,7 +2562,7 @@ public final class ProtobufUtil { /** * Create a protocol buffer Authorizations based on a client Authorizations. - * + * * @param authorizations * @return a protocol buffer Authorizations */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java index 49a7a44e4be..685fd4722a2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/RequestConverter.java @@ -23,13 +23,13 @@ import java.util.List; import com.google.protobuf.HBaseZeroCopyByteString; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.CellScannable; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Action; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.Delete; @@ -719,15 +719,19 @@ public final class RequestConverter { /** * Create a protocol buffer OpenRegionRequest for a given region * + * @param server the serverName for the RPC * @param region the region to open - * @param versionOfOfflineNode that needs to be present in the offline node - * @param favoredNodes + * @param versionOfOfflineNode that needs to be present in the offline node + * @param favoredNodes * @return a protocol buffer OpenRegionRequest */ - public static OpenRegionRequest buildOpenRegionRequest( + public static OpenRegionRequest buildOpenRegionRequest(ServerName server, final HRegionInfo region, final int versionOfOfflineNode, List favoredNodes) { OpenRegionRequest.Builder builder = OpenRegionRequest.newBuilder(); builder.addOpenInfo(buildRegionOpenInfo(region, versionOfOfflineNode, favoredNodes)); + if (server != null) { + builder.setServerStartCode(server.getStartcode()); + } return builder.build(); } @@ -757,17 +761,20 @@ public final class RequestConverter { * @param transitionInZK indicator if to transition in ZK * @return a CloseRegionRequest */ - public static CloseRegionRequest buildCloseRegionRequest( + public static CloseRegionRequest buildCloseRegionRequest(ServerName server, final byte[] regionName, final boolean transitionInZK) { CloseRegionRequest.Builder builder = CloseRegionRequest.newBuilder(); RegionSpecifier region = buildRegionSpecifier( RegionSpecifierType.REGION_NAME, regionName); builder.setRegion(region); builder.setTransitionInZK(transitionInZK); + if (server != null) { + builder.setServerStartCode(server.getStartcode()); + } return builder.build(); } - public static CloseRegionRequest buildCloseRegionRequest( + public static CloseRegionRequest buildCloseRegionRequest(ServerName server, final byte[] regionName, final int versionOfClosingNode, ServerName destinationServer, final boolean transitionInZK) { CloseRegionRequest.Builder builder = CloseRegionRequest.newBuilder(); @@ -779,6 +786,9 @@ public final class RequestConverter { if (destinationServer != null){ builder.setDestinationServer(ProtobufUtil.toServerName( destinationServer) ); } + if (server != null) { + builder.setServerStartCode(server.getStartcode()); + } return builder.build(); } @@ -790,7 +800,7 @@ public final class RequestConverter { * @return a CloseRegionRequest */ public static CloseRegionRequest - buildCloseRegionRequest(final String encodedRegionName, + buildCloseRegionRequest(ServerName server, final String encodedRegionName, final boolean transitionInZK) { CloseRegionRequest.Builder builder = CloseRegionRequest.newBuilder(); RegionSpecifier region = buildRegionSpecifier( @@ -798,6 +808,9 @@ public final class RequestConverter { Bytes.toBytes(encodedRegionName)); builder.setRegion(region); builder.setTransitionInZK(transitionInZK); + if (server != null) { + builder.setServerStartCode(server.getStartcode()); + } return builder.build(); } diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/AdminProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/AdminProtos.java index 54e8f32cc55..0ad10ad7370 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/AdminProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/AdminProtos.java @@ -3856,6 +3856,24 @@ public final class AdminProtos { */ org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest.RegionOpenInfoOrBuilder getOpenInfoOrBuilder( int index); + + // optional uint64 serverStartCode = 2; + /** + * optional uint64 serverStartCode = 2; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + boolean hasServerStartCode(); + /** + * optional uint64 serverStartCode = 2; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + long getServerStartCode(); } /** * Protobuf type {@code OpenRegionRequest} @@ -3916,6 +3934,11 @@ public final class AdminProtos { openInfo_.add(input.readMessage(org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest.RegionOpenInfo.PARSER, extensionRegistry)); break; } + case 16: { + bitField0_ |= 0x00000001; + serverStartCode_ = input.readUInt64(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -4989,6 +5012,7 @@ public final class AdminProtos { // @@protoc_insertion_point(class_scope:OpenRegionRequest.RegionOpenInfo) } + private int bitField0_; // repeated .OpenRegionRequest.RegionOpenInfo open_info = 1; public static final int OPEN_INFO_FIELD_NUMBER = 1; private java.util.List openInfo_; @@ -5025,8 +5049,33 @@ public final class AdminProtos { return openInfo_.get(index); } + // optional uint64 serverStartCode = 2; + public static final int SERVERSTARTCODE_FIELD_NUMBER = 2; + private long serverStartCode_; + /** + * optional uint64 serverStartCode = 2; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + public boolean hasServerStartCode() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional uint64 serverStartCode = 2; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + public long getServerStartCode() { + return serverStartCode_; + } + private void initFields() { openInfo_ = java.util.Collections.emptyList(); + serverStartCode_ = 0L; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -5049,6 +5098,9 @@ public final class AdminProtos { for (int i = 0; i < openInfo_.size(); i++) { output.writeMessage(1, openInfo_.get(i)); } + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeUInt64(2, serverStartCode_); + } getUnknownFields().writeTo(output); } @@ -5062,6 +5114,10 @@ public final class AdminProtos { size += com.google.protobuf.CodedOutputStream .computeMessageSize(1, openInfo_.get(i)); } + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeUInt64Size(2, serverStartCode_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -5087,6 +5143,11 @@ public final class AdminProtos { boolean result = true; result = result && getOpenInfoList() .equals(other.getOpenInfoList()); + result = result && (hasServerStartCode() == other.hasServerStartCode()); + if (hasServerStartCode()) { + result = result && (getServerStartCode() + == other.getServerStartCode()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -5104,6 +5165,10 @@ public final class AdminProtos { hash = (37 * hash) + OPEN_INFO_FIELD_NUMBER; hash = (53 * hash) + getOpenInfoList().hashCode(); } + if (hasServerStartCode()) { + hash = (37 * hash) + SERVERSTARTCODE_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getServerStartCode()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -5220,6 +5285,8 @@ public final class AdminProtos { } else { openInfoBuilder_.clear(); } + serverStartCode_ = 0L; + bitField0_ = (bitField0_ & ~0x00000002); return this; } @@ -5247,6 +5314,7 @@ public final class AdminProtos { public org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest buildPartial() { org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest result = new org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest(this); int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; if (openInfoBuilder_ == null) { if (((bitField0_ & 0x00000001) == 0x00000001)) { openInfo_ = java.util.Collections.unmodifiableList(openInfo_); @@ -5256,6 +5324,11 @@ public final class AdminProtos { } else { result.openInfo_ = openInfoBuilder_.build(); } + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000001; + } + result.serverStartCode_ = serverStartCode_; + result.bitField0_ = to_bitField0_; onBuilt(); return result; } @@ -5297,6 +5370,9 @@ public final class AdminProtos { } } } + if (other.hasServerStartCode()) { + setServerStartCode(other.getServerStartCode()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -5570,6 +5646,55 @@ public final class AdminProtos { return openInfoBuilder_; } + // optional uint64 serverStartCode = 2; + private long serverStartCode_ ; + /** + * optional uint64 serverStartCode = 2; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public boolean hasServerStartCode() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional uint64 serverStartCode = 2; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public long getServerStartCode() { + return serverStartCode_; + } + /** + * optional uint64 serverStartCode = 2; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public Builder setServerStartCode(long value) { + bitField0_ |= 0x00000002; + serverStartCode_ = value; + onChanged(); + return this; + } + /** + * optional uint64 serverStartCode = 2; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public Builder clearServerStartCode() { + bitField0_ = (bitField0_ & ~0x00000002); + serverStartCode_ = 0L; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:OpenRegionRequest) } @@ -6243,6 +6368,24 @@ public final class AdminProtos { * optional .ServerName destination_server = 4; */ org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getDestinationServerOrBuilder(); + + // optional uint64 serverStartCode = 5; + /** + * optional uint64 serverStartCode = 5; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + boolean hasServerStartCode(); + /** + * optional uint64 serverStartCode = 5; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + long getServerStartCode(); } /** * Protobuf type {@code CloseRegionRequest} @@ -6337,6 +6480,11 @@ public final class AdminProtos { bitField0_ |= 0x00000008; break; } + case 40: { + bitField0_ |= 0x00000010; + serverStartCode_ = input.readUInt64(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -6453,11 +6601,36 @@ public final class AdminProtos { return destinationServer_; } + // optional uint64 serverStartCode = 5; + public static final int SERVERSTARTCODE_FIELD_NUMBER = 5; + private long serverStartCode_; + /** + * optional uint64 serverStartCode = 5; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + public boolean hasServerStartCode() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * optional uint64 serverStartCode = 5; + * + *
+     * the intended server for this RPC.
+     * 
+ */ + public long getServerStartCode() { + return serverStartCode_; + } + private void initFields() { region_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.getDefaultInstance(); versionOfClosingNode_ = 0; transitionInZK_ = true; destinationServer_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance(); + serverStartCode_ = 0L; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -6497,6 +6670,9 @@ public final class AdminProtos { if (((bitField0_ & 0x00000008) == 0x00000008)) { output.writeMessage(4, destinationServer_); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeUInt64(5, serverStartCode_); + } getUnknownFields().writeTo(output); } @@ -6522,6 +6698,10 @@ public final class AdminProtos { size += com.google.protobuf.CodedOutputStream .computeMessageSize(4, destinationServer_); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeUInt64Size(5, serverStartCode_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -6565,6 +6745,11 @@ public final class AdminProtos { result = result && getDestinationServer() .equals(other.getDestinationServer()); } + result = result && (hasServerStartCode() == other.hasServerStartCode()); + if (hasServerStartCode()) { + result = result && (getServerStartCode() + == other.getServerStartCode()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -6594,6 +6779,10 @@ public final class AdminProtos { hash = (37 * hash) + DESTINATION_SERVER_FIELD_NUMBER; hash = (53 * hash) + getDestinationServer().hashCode(); } + if (hasServerStartCode()) { + hash = (37 * hash) + SERVERSTARTCODE_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getServerStartCode()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -6727,6 +6916,8 @@ public final class AdminProtos { destinationServerBuilder_.clear(); } bitField0_ = (bitField0_ & ~0x00000008); + serverStartCode_ = 0L; + bitField0_ = (bitField0_ & ~0x00000010); return this; } @@ -6779,6 +6970,10 @@ public final class AdminProtos { } else { result.destinationServer_ = destinationServerBuilder_.build(); } + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000010; + } + result.serverStartCode_ = serverStartCode_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -6807,6 +7002,9 @@ public final class AdminProtos { if (other.hasDestinationServer()) { mergeDestinationServer(other.getDestinationServer()); } + if (other.hasServerStartCode()) { + setServerStartCode(other.getServerStartCode()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -7148,6 +7346,55 @@ public final class AdminProtos { return destinationServerBuilder_; } + // optional uint64 serverStartCode = 5; + private long serverStartCode_ ; + /** + * optional uint64 serverStartCode = 5; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public boolean hasServerStartCode() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * optional uint64 serverStartCode = 5; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public long getServerStartCode() { + return serverStartCode_; + } + /** + * optional uint64 serverStartCode = 5; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public Builder setServerStartCode(long value) { + bitField0_ |= 0x00000010; + serverStartCode_ = value; + onChanged(); + return this; + } + /** + * optional uint64 serverStartCode = 5; + * + *
+       * the intended server for this RPC.
+       * 
+ */ + public Builder clearServerStartCode() { + bitField0_ = (bitField0_ & ~0x00000010); + serverStartCode_ = 0L; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:CloseRegionRequest) } @@ -20919,76 +21166,77 @@ public final class AdminProtos { "FileResponse\022\022\n\nstore_file\030\001 \003(\t\"\030\n\026GetO" + "nlineRegionRequest\";\n\027GetOnlineRegionRes" + "ponse\022 \n\013region_info\030\001 \003(\0132\013.RegionInfo\"" + - "\275\001\n\021OpenRegionRequest\0224\n\topen_info\030\001 \003(\013" + - "2!.OpenRegionRequest.RegionOpenInfo\032r\n\016R" + - "egionOpenInfo\022\033\n\006region\030\001 \002(\0132\013.RegionIn" + - "fo\022\037\n\027version_of_offline_node\030\002 \001(\r\022\"\n\rf" + - "avored_nodes\030\003 \003(\0132\013.ServerName\"\235\001\n\022Open" + - "RegionResponse\022=\n\ropening_state\030\001 \003(\0162&.", - "OpenRegionResponse.RegionOpeningState\"H\n" + - "\022RegionOpeningState\022\n\n\006OPENED\020\000\022\022\n\016ALREA" + - "DY_OPENED\020\001\022\022\n\016FAILED_OPENING\020\002\"\240\001\n\022Clos" + - "eRegionRequest\022 \n\006region\030\001 \002(\0132\020.RegionS" + - "pecifier\022\037\n\027version_of_closing_node\030\002 \001(" + - "\r\022\036\n\020transition_in_ZK\030\003 \001(\010:\004true\022\'\n\022des" + - "tination_server\030\004 \001(\0132\013.ServerName\"%\n\023Cl" + - "oseRegionResponse\022\016\n\006closed\030\001 \002(\010\"P\n\022Flu" + - "shRegionRequest\022 \n\006region\030\001 \002(\0132\020.Region" + - "Specifier\022\030\n\020if_older_than_ts\030\002 \001(\004\"?\n\023F", - "lushRegionResponse\022\027\n\017last_flush_time\030\001 " + - "\002(\004\022\017\n\007flushed\030\002 \001(\010\"K\n\022SplitRegionReque" + - "st\022 \n\006region\030\001 \002(\0132\020.RegionSpecifier\022\023\n\013" + - "split_point\030\002 \001(\014\"\025\n\023SplitRegionResponse" + - "\"W\n\024CompactRegionRequest\022 \n\006region\030\001 \002(\013" + - "2\020.RegionSpecifier\022\r\n\005major\030\002 \001(\010\022\016\n\006fam" + - "ily\030\003 \001(\014\"\027\n\025CompactRegionResponse\"\262\001\n\031U" + - "pdateFavoredNodesRequest\022@\n\013update_info\030" + - "\001 \003(\0132+.UpdateFavoredNodesRequest.Region" + - "UpdateInfo\032S\n\020RegionUpdateInfo\022\033\n\006region", - "\030\001 \002(\0132\013.RegionInfo\022\"\n\rfavored_nodes\030\002 \003" + - "(\0132\013.ServerName\".\n\032UpdateFavoredNodesRes" + - "ponse\022\020\n\010response\030\001 \001(\r\"v\n\023MergeRegionsR" + - "equest\022\"\n\010region_a\030\001 \002(\0132\020.RegionSpecifi" + - "er\022\"\n\010region_b\030\002 \002(\0132\020.RegionSpecifier\022\027" + - "\n\010forcible\030\003 \001(\010:\005false\"\026\n\024MergeRegionsR" + - "esponse\"X\n\010WALEntry\022\024\n\003key\030\001 \002(\0132\007.WALKe" + - "y\022\027\n\017key_value_bytes\030\002 \003(\014\022\035\n\025associated" + - "_cell_count\030\003 \001(\005\"4\n\030ReplicateWALEntryRe" + - "quest\022\030\n\005entry\030\001 \003(\0132\t.WALEntry\"\033\n\031Repli", - "cateWALEntryResponse\"\026\n\024RollWALWriterReq" + - "uest\"0\n\025RollWALWriterResponse\022\027\n\017region_" + - "to_flush\030\001 \003(\014\"#\n\021StopServerRequest\022\016\n\006r" + - "eason\030\001 \002(\t\"\024\n\022StopServerResponse\"\026\n\024Get" + - "ServerInfoRequest\"B\n\nServerInfo\022 \n\013serve" + - "r_name\030\001 \002(\0132\013.ServerName\022\022\n\nwebui_port\030" + - "\002 \001(\r\"9\n\025GetServerInfoResponse\022 \n\013server" + - "_info\030\001 \002(\0132\013.ServerInfo2\306\007\n\014AdminServic" + - "e\022>\n\rGetRegionInfo\022\025.GetRegionInfoReques" + - "t\032\026.GetRegionInfoResponse\022;\n\014GetStoreFil", - "e\022\024.GetStoreFileRequest\032\025.GetStoreFileRe" + - "sponse\022D\n\017GetOnlineRegion\022\027.GetOnlineReg" + - "ionRequest\032\030.GetOnlineRegionResponse\0225\n\n" + - "OpenRegion\022\022.OpenRegionRequest\032\023.OpenReg" + - "ionResponse\0228\n\013CloseRegion\022\023.CloseRegion" + - "Request\032\024.CloseRegionResponse\0228\n\013FlushRe" + - "gion\022\023.FlushRegionRequest\032\024.FlushRegionR" + - "esponse\0228\n\013SplitRegion\022\023.SplitRegionRequ" + - "est\032\024.SplitRegionResponse\022>\n\rCompactRegi" + - "on\022\025.CompactRegionRequest\032\026.CompactRegio", - "nResponse\022;\n\014MergeRegions\022\024.MergeRegions" + - "Request\032\025.MergeRegionsResponse\022J\n\021Replic" + - "ateWALEntry\022\031.ReplicateWALEntryRequest\032\032" + - ".ReplicateWALEntryResponse\022?\n\006Replay\022\031.R" + - "eplicateWALEntryRequest\032\032.ReplicateWALEn" + - "tryResponse\022>\n\rRollWALWriter\022\025.RollWALWr" + - "iterRequest\032\026.RollWALWriterResponse\022>\n\rG" + - "etServerInfo\022\025.GetServerInfoRequest\032\026.Ge" + - "tServerInfoResponse\0225\n\nStopServer\022\022.Stop" + - "ServerRequest\032\023.StopServerResponse\022M\n\022Up", - "dateFavoredNodes\022\032.UpdateFavoredNodesReq" + - "uest\032\033.UpdateFavoredNodesResponseBA\n*org" + - ".apache.hadoop.hbase.protobuf.generatedB" + - "\013AdminProtosH\001\210\001\001\240\001\001" + "\326\001\n\021OpenRegionRequest\0224\n\topen_info\030\001 \003(\013" + + "2!.OpenRegionRequest.RegionOpenInfo\022\027\n\017s" + + "erverStartCode\030\002 \001(\004\032r\n\016RegionOpenInfo\022\033" + + "\n\006region\030\001 \002(\0132\013.RegionInfo\022\037\n\027version_o" + + "f_offline_node\030\002 \001(\r\022\"\n\rfavored_nodes\030\003 " + + "\003(\0132\013.ServerName\"\235\001\n\022OpenRegionResponse\022", + "=\n\ropening_state\030\001 \003(\0162&.OpenRegionRespo" + + "nse.RegionOpeningState\"H\n\022RegionOpeningS" + + "tate\022\n\n\006OPENED\020\000\022\022\n\016ALREADY_OPENED\020\001\022\022\n\016" + + "FAILED_OPENING\020\002\"\271\001\n\022CloseRegionRequest\022" + + " \n\006region\030\001 \002(\0132\020.RegionSpecifier\022\037\n\027ver" + + "sion_of_closing_node\030\002 \001(\r\022\036\n\020transition" + + "_in_ZK\030\003 \001(\010:\004true\022\'\n\022destination_server" + + "\030\004 \001(\0132\013.ServerName\022\027\n\017serverStartCode\030\005" + + " \001(\004\"%\n\023CloseRegionResponse\022\016\n\006closed\030\001 " + + "\002(\010\"P\n\022FlushRegionRequest\022 \n\006region\030\001 \002(", + "\0132\020.RegionSpecifier\022\030\n\020if_older_than_ts\030" + + "\002 \001(\004\"?\n\023FlushRegionResponse\022\027\n\017last_flu" + + "sh_time\030\001 \002(\004\022\017\n\007flushed\030\002 \001(\010\"K\n\022SplitR" + + "egionRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpe" + + "cifier\022\023\n\013split_point\030\002 \001(\014\"\025\n\023SplitRegi" + + "onResponse\"W\n\024CompactRegionRequest\022 \n\006re" + + "gion\030\001 \002(\0132\020.RegionSpecifier\022\r\n\005major\030\002 " + + "\001(\010\022\016\n\006family\030\003 \001(\014\"\027\n\025CompactRegionResp" + + "onse\"\262\001\n\031UpdateFavoredNodesRequest\022@\n\013up" + + "date_info\030\001 \003(\0132+.UpdateFavoredNodesRequ", + "est.RegionUpdateInfo\032S\n\020RegionUpdateInfo" + + "\022\033\n\006region\030\001 \002(\0132\013.RegionInfo\022\"\n\rfavored" + + "_nodes\030\002 \003(\0132\013.ServerName\".\n\032UpdateFavor" + + "edNodesResponse\022\020\n\010response\030\001 \001(\r\"v\n\023Mer" + + "geRegionsRequest\022\"\n\010region_a\030\001 \002(\0132\020.Reg" + + "ionSpecifier\022\"\n\010region_b\030\002 \002(\0132\020.RegionS" + + "pecifier\022\027\n\010forcible\030\003 \001(\010:\005false\"\026\n\024Mer" + + "geRegionsResponse\"X\n\010WALEntry\022\024\n\003key\030\001 \002" + + "(\0132\007.WALKey\022\027\n\017key_value_bytes\030\002 \003(\014\022\035\n\025" + + "associated_cell_count\030\003 \001(\005\"4\n\030Replicate", + "WALEntryRequest\022\030\n\005entry\030\001 \003(\0132\t.WALEntr" + + "y\"\033\n\031ReplicateWALEntryResponse\"\026\n\024RollWA" + + "LWriterRequest\"0\n\025RollWALWriterResponse\022" + + "\027\n\017region_to_flush\030\001 \003(\014\"#\n\021StopServerRe" + + "quest\022\016\n\006reason\030\001 \002(\t\"\024\n\022StopServerRespo" + + "nse\"\026\n\024GetServerInfoRequest\"B\n\nServerInf" + + "o\022 \n\013server_name\030\001 \002(\0132\013.ServerName\022\022\n\nw" + + "ebui_port\030\002 \001(\r\"9\n\025GetServerInfoResponse" + + "\022 \n\013server_info\030\001 \002(\0132\013.ServerInfo2\306\007\n\014A" + + "dminService\022>\n\rGetRegionInfo\022\025.GetRegion", + "InfoRequest\032\026.GetRegionInfoResponse\022;\n\014G" + + "etStoreFile\022\024.GetStoreFileRequest\032\025.GetS" + + "toreFileResponse\022D\n\017GetOnlineRegion\022\027.Ge" + + "tOnlineRegionRequest\032\030.GetOnlineRegionRe" + + "sponse\0225\n\nOpenRegion\022\022.OpenRegionRequest" + + "\032\023.OpenRegionResponse\0228\n\013CloseRegion\022\023.C" + + "loseRegionRequest\032\024.CloseRegionResponse\022" + + "8\n\013FlushRegion\022\023.FlushRegionRequest\032\024.Fl" + + "ushRegionResponse\0228\n\013SplitRegion\022\023.Split" + + "RegionRequest\032\024.SplitRegionResponse\022>\n\rC", + "ompactRegion\022\025.CompactRegionRequest\032\026.Co" + + "mpactRegionResponse\022;\n\014MergeRegions\022\024.Me" + + "rgeRegionsRequest\032\025.MergeRegionsResponse" + + "\022J\n\021ReplicateWALEntry\022\031.ReplicateWALEntr" + + "yRequest\032\032.ReplicateWALEntryResponse\022?\n\006" + + "Replay\022\031.ReplicateWALEntryRequest\032\032.Repl" + + "icateWALEntryResponse\022>\n\rRollWALWriter\022\025" + + ".RollWALWriterRequest\032\026.RollWALWriterRes" + + "ponse\022>\n\rGetServerInfo\022\025.GetServerInfoRe" + + "quest\032\026.GetServerInfoResponse\0225\n\nStopSer", + "ver\022\022.StopServerRequest\032\023.StopServerResp" + + "onse\022M\n\022UpdateFavoredNodes\022\032.UpdateFavor" + + "edNodesRequest\032\033.UpdateFavoredNodesRespo" + + "nseBA\n*org.apache.hadoop.hbase.protobuf." + + "generatedB\013AdminProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -21036,7 +21284,7 @@ public final class AdminProtos { internal_static_OpenRegionRequest_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_OpenRegionRequest_descriptor, - new java.lang.String[] { "OpenInfo", }); + new java.lang.String[] { "OpenInfo", "ServerStartCode", }); internal_static_OpenRegionRequest_RegionOpenInfo_descriptor = internal_static_OpenRegionRequest_descriptor.getNestedTypes().get(0); internal_static_OpenRegionRequest_RegionOpenInfo_fieldAccessorTable = new @@ -21054,7 +21302,7 @@ public final class AdminProtos { internal_static_CloseRegionRequest_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_CloseRegionRequest_descriptor, - new java.lang.String[] { "Region", "VersionOfClosingNode", "TransitionInZK", "DestinationServer", }); + new java.lang.String[] { "Region", "VersionOfClosingNode", "TransitionInZK", "DestinationServer", "ServerStartCode", }); internal_static_CloseRegionResponse_descriptor = getDescriptor().getMessageTypes().get(9); internal_static_CloseRegionResponse_fieldAccessorTable = new diff --git a/hbase-protocol/src/main/protobuf/Admin.proto b/hbase-protocol/src/main/protobuf/Admin.proto index c881851e84d..5b889cd10b5 100644 --- a/hbase-protocol/src/main/protobuf/Admin.proto +++ b/hbase-protocol/src/main/protobuf/Admin.proto @@ -68,6 +68,8 @@ message GetOnlineRegionResponse { message OpenRegionRequest { repeated RegionOpenInfo open_info = 1; + // the intended server for this RPC. + optional uint64 serverStartCode = 2; message RegionOpenInfo { required RegionInfo region = 1; @@ -95,6 +97,8 @@ message CloseRegionRequest { optional uint32 version_of_closing_node = 2; optional bool transition_in_ZK = 3 [default = true]; optional ServerName destination_server = 4; + // the intended server for this RPC. + optional uint64 serverStartCode = 5; } message CloseRegionResponse { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 72f7dc437f8..09cfee1c21f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -628,7 +628,7 @@ public class ServerManager { return RegionOpeningState.FAILED_OPENING; } OpenRegionRequest request = - RequestConverter.buildOpenRegionRequest(region, versionOfOfflineNode, favoredNodes); + RequestConverter.buildOpenRegionRequest(server, region, versionOfOfflineNode, favoredNodes); try { OpenRegionResponse response = admin.openRegion(null, request); return ResponseConverter.getRegionOpeningState(response); @@ -690,7 +690,7 @@ public class ServerManager { region.getRegionNameAsString() + " failed because no RPC connection found to this server"); } - return ProtobufUtil.closeRegion(admin, region.getRegionName(), + return ProtobufUtil.closeRegion(admin, server, region.getRegionName(), versionOfClosingNode, dest, transitionInZK); } @@ -881,7 +881,7 @@ public class ServerManager { Map getRequeuedDeadServers() { return Collections.unmodifiableMap(this.requeuedDeadServers); } - + public boolean isServerOnline(ServerName serverName) { return serverName != null && onlineServers.containsKey(serverName); } @@ -969,7 +969,7 @@ public class ServerManager { } } } - + /** * To clear any dead server with same host name and port of any online server */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index e5eec504a7a..d94f029c496 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -3606,6 +3606,15 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa throw new ServiceException(ie); } requestCount.increment(); + if (request.hasServerStartCode() && this.serverNameFromMasterPOV != null) { + // check that we are the same server that this RPC is intended for. + long serverStartCode = request.getServerStartCode(); + if (this.serverNameFromMasterPOV.getStartcode() != serverStartCode) { + throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " + + "different server with startCode: " + serverStartCode + ", this server is: " + + this.serverNameFromMasterPOV)); + } + } OpenRegionResponse.Builder builder = OpenRegionResponse.newBuilder(); final int regionCount = request.getOpenInfoCount(); final Map htds = @@ -3763,6 +3772,15 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa try { checkOpen(); + if (request.hasServerStartCode() && this.serverNameFromMasterPOV != null) { + // check that we are the same server that this RPC is intended for. + long serverStartCode = request.getServerStartCode(); + if (this.serverNameFromMasterPOV.getStartcode() != serverStartCode) { + throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " + + "different server with startCode: " + serverStartCode + ", this server is: " + + this.serverNameFromMasterPOV)); + } + } final String encodedRegionName = ProtobufUtil.getRegionEncodedName(request.getRegion()); // Can be null if we're calling close on a region that's not online diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java index 1b97d6903c3..727c0d41def 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java @@ -149,7 +149,7 @@ public class HBaseFsckRepair { HConnection connection = admin.getConnection(); AdminService.BlockingInterface rs = connection.getAdmin(server); try { - ProtobufUtil.closeRegion(rs, region.getRegionName(), false); + ProtobufUtil.closeRegion(rs, server, region.getRegionName(), false); } catch (IOException e) { LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java index 17a56b6e46e..46b02a003df 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannersFromClientSide.java @@ -472,7 +472,7 @@ public class TestScannersFromClientSide { byte[] regionName = hri.getRegionName(); int i = cluster.getServerWith(regionName); HRegionServer rs = cluster.getRegionServer(i); - ProtobufUtil.closeRegion(rs, regionName, false); + ProtobufUtil.closeRegion(rs, rs.getServerName(), regionName, false); long startTime = EnvironmentEdgeManager.currentTimeMillis(); long timeOut = 300000; while (true) { @@ -492,7 +492,7 @@ public class TestScannersFromClientSide { states.regionOffline(hri); states.updateRegionState(hri, State.OPENING); ZKAssign.createNodeOffline(zkw, hri, loc.getServerName()); - ProtobufUtil.openRegion(rs, hri); + ProtobufUtil.openRegion(rs, rs.getServerName(), hri); startTime = EnvironmentEdgeManager.currentTimeMillis(); while (true) { if (rs.getOnlineRegion(regionName) != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java index 4a06b6dadb6..455105a9246 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java @@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.hbase.CellScannable; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; @@ -56,8 +57,8 @@ import org.apache.hadoop.hbase.executor.ExecutorType; import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager; -import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; +import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; @@ -611,6 +612,7 @@ public class TestAssignmentManager { Mockito.when(implementation.scan( (RpcController)Mockito.any(), (ScanRequest)Mockito.any())). thenAnswer(new Answer() { + @Override public ScanResponse answer(InvocationOnMock invocation) throws Throwable { PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation .getArguments()[0]; @@ -716,6 +718,7 @@ public class TestAssignmentManager { this.server, this.serverManager); Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest", null) { + @Override public RecoverableZooKeeper getRecoverableZooKeeper() { return recoverableZk; } @@ -1111,6 +1114,7 @@ public class TestAssignmentManager { final List rows = new ArrayList(1); rows.add(r); Answer ans = new Answer() { + @Override public ScanResponse answer(InvocationOnMock invocation) throws Throwable { PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation .getArguments()[0]; @@ -1240,6 +1244,7 @@ public class TestAssignmentManager { // Thats ok because we make a new zk watcher for each test. watcher.registerListenerFirst(am); Thread t = new Thread("RunAmJoinCluster") { + @Override public void run() { // Call the joinCluster function as though we were doing a master // failover at this point. It will stall just before we go to add @@ -1362,4 +1367,35 @@ public class TestAssignmentManager { assertFalse("The region should not in transition", regionStates.isRegionInTransition(hri)); } + + /** + * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port + * for openRegion. AM should assign this somewhere else. (HBASE-9721) + */ + @SuppressWarnings("unchecked") + @Test + public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception { + Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO), + Mockito.anyInt(), (List)Mockito.any())) + .thenThrow(new DoNotRetryIOException()); + this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100); + + HRegionInfo hri = REGIONINFO; + CatalogTracker ct = Mockito.mock(CatalogTracker.class); + LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer( + server.getConfiguration()); + // Create an AM. + AssignmentManager am = new AssignmentManager(this.server, + this.serverManager, ct, balancer, null, null, master.getTableLockManager()); + RegionStates regionStates = am.getRegionStates(); + try { + am.regionPlans.put(REGIONINFO.getEncodedName(), + new RegionPlan(REGIONINFO, null, SERVERNAME_B)); + + // Should fail once, but succeed on the second attempt for the SERVERNAME_A + am.assign(hri, true, false); + } finally { + assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName()); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 0611ed57be0..baa458d85be 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownRegionException; +import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; @@ -137,9 +138,13 @@ public class TestAssignmentManagerOnCluster { /** * This tests region assignment on a simulated restarted server */ - @Test (timeout=60000) + @Test (timeout=120000) public void testAssignRegionOnRestartedServer() throws Exception { String table = "testAssignRegionOnRestartedServer"; + TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40); + TEST_UTIL.getMiniHBaseCluster().stopMaster(0); + TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect + ServerName deadServer = null; HMaster master = null; try { @@ -148,7 +153,7 @@ public class TestAssignmentManagerOnCluster { admin.createTable(desc); HTable meta = new HTable(conf, TableName.META_TABLE_NAME); - HRegionInfo hri = new HRegionInfo( + final HRegionInfo hri = new HRegionInfo( desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z")); MetaEditor.addRegionToMeta(meta, hri); @@ -164,7 +169,7 @@ public class TestAssignmentManagerOnCluster { destServer.getPort(), destServer.getStartcode() - 100L); master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD); - AssignmentManager am = master.getAssignmentManager(); + final AssignmentManager am = master.getAssignmentManager(); RegionPlan plan = new RegionPlan(hri, null, deadServer); am.addPlan(hri.getEncodedName(), plan); master.assignRegion(hri); @@ -174,15 +179,14 @@ public class TestAssignmentManagerOnCluster { EventType.RS_ZK_REGION_OPENING, 0); assertEquals("TansitionNode should fail", -1, version); - // Give region 2 seconds to assign, which may not be enough. - // However, if HBASE-8545 is broken, this test will be flaky. - // Otherwise, this test should never be flaky. - Thread.sleep(2000); + TEST_UTIL.waitFor(60000, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return ! am.getRegionStates().isRegionInTransition(hri); + } + }); - assertTrue("Region should still be in transition", - am.getRegionStates().isRegionInTransition(hri)); - assertEquals("Assign node should still be in version 0", 0, - ZKAssign.getVersion(master.getZooKeeper(), hri)); + assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri)); } finally { if (deadServer != null) { master.serverManager.expireServer(deadServer); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index f10f91e4564..07be41d2d3f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -337,7 +337,7 @@ public class TestMasterFailover { region = enabledRegions.remove(0); regionsThatShouldBeOnline.add(region); ZKAssign.createNodeOffline(zkw, region, serverName); - ProtobufUtil.openRegion(hrs, region); + ProtobufUtil.openRegion(hrs, hrs.getServerName(), region); while (true) { byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); RegionTransition rt = RegionTransition.parseFrom(bytes); @@ -352,7 +352,7 @@ public class TestMasterFailover { region = disabledRegions.remove(0); regionsThatShouldBeOffline.add(region); ZKAssign.createNodeOffline(zkw, region, serverName); - ProtobufUtil.openRegion(hrs, region); + ProtobufUtil.openRegion(hrs, hrs.getServerName(), region); while (true) { byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); RegionTransition rt = RegionTransition.parseFrom(bytes); @@ -736,7 +736,7 @@ public class TestMasterFailover { region = enabledRegions.remove(0); regionsThatShouldBeOnline.add(region); ZKAssign.createNodeOffline(zkw, region, deadServerName); - ProtobufUtil.openRegion(hrsDead, region); + ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region); while (true) { byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); RegionTransition rt = RegionTransition.parseFrom(bytes); @@ -752,7 +752,7 @@ public class TestMasterFailover { region = disabledRegions.remove(0); regionsThatShouldBeOffline.add(region); ZKAssign.createNodeOffline(zkw, region, deadServerName); - ProtobufUtil.openRegion(hrsDead, region); + ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region); while (true) { byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); RegionTransition rt = RegionTransition.parseFrom(bytes); @@ -772,7 +772,7 @@ public class TestMasterFailover { region = enabledRegions.remove(0); regionsThatShouldBeOnline.add(region); ZKAssign.createNodeOffline(zkw, region, deadServerName); - ProtobufUtil.openRegion(hrsDead, region); + ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region); while (true) { byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); RegionTransition rt = RegionTransition.parseFrom(bytes); @@ -790,7 +790,7 @@ public class TestMasterFailover { region = disabledRegions.remove(0); regionsThatShouldBeOffline.add(region); ZKAssign.createNodeOffline(zkw, region, deadServerName); - ProtobufUtil.openRegion(hrsDead, region); + ProtobufUtil.openRegion(hrsDead, hrsDead.getServerName(), region); while (true) { byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName()); RegionTransition rt = RegionTransition.parseFrom(bytes); @@ -824,12 +824,12 @@ public class TestMasterFailover { log("Waiting for master to be ready"); assertTrue(cluster.waitForActiveAndReadyMaster()); log("Master is ready"); - + // Wait until SSH processing completed for dead server. while (master.getServerManager().areDeadServersInProgress()) { Thread.sleep(10); } - + // Failover should be completed, now wait for no RIT log("Waiting for no more RIT"); ZKAssign.blockUntilNoRIT(zkw); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java index a16a4b27be9..c11cbc89dd8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java @@ -30,19 +30,19 @@ import java.util.Collection; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.TableDescriptors; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; @@ -230,7 +230,7 @@ public class TestZKBasedOpenCloseRegion { Whitebox.setInternalState(regionServer, "tableDescriptors", htd); Mockito.doThrow(new IOException()).when(htd).get((TableName) Mockito.any()); try { - ProtobufUtil.openRegion(regionServer, REGIONINFO); + ProtobufUtil.openRegion(regionServer, regionServer.getServerName(), REGIONINFO); fail("It should throw IOException "); } catch (IOException e) { } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java index 5d70b97f19e..42e9e8f65cf 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java @@ -19,17 +19,21 @@ package org.apache.hadoop.hbase.regionserver; +import java.io.IOException; + import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest; import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKAssign; @@ -104,7 +108,7 @@ public class TestRegionServerNoMaster { // We reopen. We need a ZK node here, as a open is always triggered by a master. ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); // first version is '0' - AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null); + AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null); AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr); Assert.assertTrue(responseOpen.getOpeningStateCount() == 1); Assert.assertTrue(responseOpen.getOpeningState(0). @@ -150,7 +154,7 @@ public class TestRegionServerNoMaster { private void closeNoZK() throws Exception { // no transition in ZK AdminProtos.CloseRegionRequest crr = - RequestConverter.buildCloseRegionRequest(regionName, false); + RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false); AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr); Assert.assertTrue(responseClose.getClosed()); @@ -170,7 +174,7 @@ public class TestRegionServerNoMaster { // Transition in ZK on. This should fail, as there is no znode AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest( - regionName, true); + getRS().getServerName(), regionName, true); AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr); Assert.assertTrue(responseClose.getClosed()); @@ -189,7 +193,7 @@ public class TestRegionServerNoMaster { ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest( - regionName, true); + getRS().getServerName(), regionName, true); AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr); Assert.assertTrue(responseClose.getClosed()); @@ -223,7 +227,7 @@ public class TestRegionServerNoMaster { // We're sending multiple requests in a row. The region server must handle this nicely. for (int i = 0; i < 10; i++) { - AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null); + AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null); AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr); Assert.assertTrue(responseOpen.getOpeningStateCount() == 1); @@ -244,7 +248,7 @@ public class TestRegionServerNoMaster { try { // fake region to be closing now, need to clear state afterwards getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE); - AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null); + AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(getRS().getServerName(), hri, 0, null); getRS().openRegion(null, orr); Assert.fail("The closing region should not be opened"); } catch (ServiceException se) { @@ -262,7 +266,7 @@ public class TestRegionServerNoMaster { ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); for (int i = 0; i < 10; i++) { AdminProtos.CloseRegionRequest crr = - RequestConverter.buildCloseRegionRequest(regionName, 0, null, true); + RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true); try { AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr); Assert.assertEquals("The first request should succeeds", 0, i); @@ -298,7 +302,7 @@ public class TestRegionServerNoMaster { // That's a close without ZK. AdminProtos.CloseRegionRequest crr = - RequestConverter.buildCloseRegionRequest(regionName, false); + RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false); try { getRS().closeRegion(null, crr); Assert.assertTrue(false); @@ -341,7 +345,7 @@ public class TestRegionServerNoMaster { // That's a close without ZK. ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); AdminProtos.CloseRegionRequest crr = - RequestConverter.buildCloseRegionRequest(regionName, false); + RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false); try { getRS().closeRegion(null, crr); Assert.assertTrue(false); @@ -375,4 +379,38 @@ public class TestRegionServerNoMaster { reopenRegion(); } + + /** + * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port + * for openRegion. The region server should reject this RPC. (HBASE-9721) + */ + @Test + public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception { + Assert.assertTrue(getRS().getRegion(regionName).isAvailable()); + + ServerName sn = getRS().getServerName(); + ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1); + + try { + CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true); + getRS().closeRegion(null, request); + Assert.fail("The closeRegion should have been rejected"); + } catch (ServiceException se) { + Assert.assertTrue(se.getCause() instanceof IOException); + Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server")); + } + + //actual close + closeNoZK(); + try { + AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(earlierServerName, hri, 0, null); + getRS().openRegion(null, orr); + Assert.fail("The openRegion should have been rejected"); + } catch (ServiceException se) { + Assert.assertTrue(se.getCause() instanceof IOException); + Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server")); + } finally { + reopenRegion(); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index f37b0b0f447..a75678358a0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -2137,7 +2137,7 @@ public class TestHBaseFsck { HConnection connection = HConnectionManager.getConnection(conf); HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW); - ServerName hsa = ServerName.valueOf(metaLocation.getHostnamePort(), 0L); + ServerName hsa = metaLocation.getServerName(); HRegionInfo hri = metaLocation.getRegionInfo(); if (unassign) { LOG.info("Undeploying meta region " + hri + " from server " + hsa);