From 529223130ab61cfd059197be5cba7e08e9dadc0d Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 27 Apr 2011 23:12:42 +0000 Subject: [PATCH] HBASE-1502 Remove need for heartbeats in HBase git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1097275 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + src/docbkx/troubleshooting.xml | 838 +++++++++--------- .../apache/hadoop/hbase/ClusterStatus.java | 109 ++- .../org/apache/hadoop/hbase/HConstants.java | 6 + .../java/org/apache/hadoop/hbase/HMsg.java | 235 ----- .../apache/hadoop/hbase/HRegionLocation.java | 64 +- .../apache/hadoop/hbase/HServerAddress.java | 136 ++- .../org/apache/hadoop/hbase/HServerInfo.java | 256 ++---- .../org/apache/hadoop/hbase/HServerLoad.java | 127 +-- .../hadoop/hbase/LocalHBaseCluster.java | 10 +- .../hadoop/hbase/MasterAddressTracker.java | 15 +- .../java/org/apache/hadoop/hbase/Server.java | 8 +- .../org/apache/hadoop/hbase/ServerName.java | 221 +++++ .../apache/hadoop/hbase/avro/AvroUtil.java | 51 +- .../hadoop/hbase/catalog/CatalogTracker.java | 54 +- .../hadoop/hbase/catalog/MetaEditor.java | 36 +- .../hadoop/hbase/catalog/MetaReader.java | 134 ++- .../hbase/catalog/RootLocationEditor.java | 8 +- .../hadoop/hbase/client/HBaseAdmin.java | 72 +- .../hadoop/hbase/client/HConnection.java | 33 +- .../hbase/client/HConnectionManager.java | 192 ++-- .../apache/hadoop/hbase/client/HTable.java | 22 +- .../hadoop/hbase/client/MetaScanner.java | 57 +- .../RetriesExhaustedWithDetailsException.java | 33 +- .../hbase/coprocessor/BaseMasterObserver.java | 2 +- .../hbase/coprocessor/MasterObserver.java | 9 +- .../hadoop/hbase/executor/EventHandler.java | 2 +- .../hbase/executor/RegionTransitionData.java | 33 +- .../hadoop/hbase/io/HbaseObjectWritable.java | 14 +- .../apache/hadoop/hbase/ipc/HBaseServer.java | 12 +- .../hbase/ipc/HMasterRegionInterface.java | 49 +- .../hadoop/hbase/ipc/HRegionInterface.java | 4 +- .../hadoop/hbase/ipc/WritableRpcEngine.java | 5 +- .../hbase/master/ActiveMasterManager.java | 36 +- .../hbase/master/AssignmentManager.java | 369 ++++---- .../hadoop/hbase/master/DeadServer.java | 68 +- .../apache/hadoop/hbase/master/HMaster.java | 236 +++-- .../hadoop/hbase/master/LoadBalancer.java | 265 +++--- .../hbase/master/MasterCoprocessorHost.java | 4 +- .../hadoop/hbase/master/MasterFileSystem.java | 18 +- .../hadoop/hbase/master/ServerManager.java | 400 +++------ .../hadoop/hbase/master/SplitLogManager.java | 11 +- .../handler/MetaServerShutdownHandler.java | 8 +- .../master/handler/OpenedRegionHandler.java | 12 +- .../master/handler/ServerShutdownHandler.java | 24 +- .../master/handler/SplitRegionHandler.java | 10 +- .../hadoop/hbase/regionserver/HRegion.java | 4 +- .../hbase/regionserver/HRegionServer.java | 702 ++++++++------- .../regionserver/RegionServerServices.java | 7 - .../hbase/regionserver/SplitLogWorker.java | 2 +- .../hbase/regionserver/SplitTransaction.java | 9 +- .../hadoop/hbase/regionserver/wal/HLog.java | 36 +- .../hbase/replication/ReplicationPeer.java | 9 +- .../replication/ReplicationZookeeper.java | 48 +- .../regionserver/ReplicationSource.java | 6 +- .../rest/StorageClusterStatusResource.java | 18 +- .../apache/hadoop/hbase/util/Addressing.java | 75 ++ .../apache/hadoop/hbase/util/HBaseFsck.java | 119 +-- .../hadoop/hbase/util/HBaseFsckRepair.java | 11 +- .../hadoop/hbase/util/JVMClusterUtil.java | 2 +- .../hbase/zookeeper/RegionServerTracker.java | 64 +- .../hbase/zookeeper/RootRegionTracker.java | 26 +- .../hadoop/hbase/zookeeper/ZKAssign.java | 29 +- .../apache/hadoop/hbase/zookeeper/ZKUtil.java | 117 +-- .../resources/hbase-webapps/master/master.jsp | 35 +- .../resources/hbase-webapps/master/table.jsp | 22 +- .../apache/hadoop/hbase/MiniHBaseCluster.java | 143 +-- .../hadoop/hbase/TestHRegionLocation.java | 67 ++ .../hadoop/hbase/TestHServerAddress.java | 83 ++ .../apache/hadoop/hbase/TestHServerInfo.java | 80 ++ .../hadoop/hbase/TestRegionRebalancing.java | 35 +- .../hadoop/hbase/TestSerialization.java | 15 +- .../apache/hadoop/hbase/TestServerName.java | 56 ++ .../hbase/catalog/TestCatalogTracker.java | 25 +- .../catalog/TestCatalogTrackerOnCluster.java | 5 +- .../hbase/catalog/TestMetaReaderEditor.java | 4 +- .../hbase/client/TestFromClientSide.java | 293 +++--- .../hbase/coprocessor/TestMasterObserver.java | 38 +- ...ROKE_FIX_TestKillingServersFromMaster.java | 103 --- .../hadoop/hbase/master/OOMEHMaster.java | 58 -- .../hbase/master/TestActiveMasterManager.java | 22 +- .../hbase/master/TestCatalogJanitor.java | 7 +- .../hbase/master/TestClockSkewDetection.java | 21 +- .../hadoop/hbase/master/TestDeadServer.java | 25 +- .../master/TestDistributedLogSplitting.java | 21 +- .../hbase/master/TestHMasterRPCException.java | 13 +- .../hadoop/hbase/master/TestLoadBalancer.java | 249 ++++-- .../hadoop/hbase/master/TestLogsCleaner.java | 8 +- .../hadoop/hbase/master/TestMaster.java | 8 +- .../hbase/master/TestMasterFailover.java | 23 +- .../hbase/master/TestRestartCluster.java | 11 +- .../hbase/master/TestRollingRestart.java | 5 +- .../TestMasterAddressManager.java | 10 +- .../hbase/regionserver/TestScanner.java | 9 +- .../TestSplitTransactionOnCluster.java | 5 +- .../handler/TestOpenRegionHandler.java | 17 +- .../TestReplicationSourceManager.java | 17 +- .../hadoop/hbase/util/TestHBaseFsck.java | 10 +- 98 files changed, 3514 insertions(+), 3622 deletions(-) delete mode 100644 src/main/java/org/apache/hadoop/hbase/HMsg.java create mode 100644 src/main/java/org/apache/hadoop/hbase/ServerName.java create mode 100644 src/main/java/org/apache/hadoop/hbase/util/Addressing.java create mode 100644 src/test/java/org/apache/hadoop/hbase/TestHRegionLocation.java create mode 100644 src/test/java/org/apache/hadoop/hbase/TestHServerAddress.java create mode 100644 src/test/java/org/apache/hadoop/hbase/TestHServerInfo.java create mode 100644 src/test/java/org/apache/hadoop/hbase/TestServerName.java delete mode 100644 src/test/java/org/apache/hadoop/hbase/master/BROKE_FIX_TestKillingServersFromMaster.java delete mode 100644 src/test/java/org/apache/hadoop/hbase/master/OOMEHMaster.java diff --git a/CHANGES.txt b/CHANGES.txt index eccc9d27705..85238c27f8f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -9,6 +9,7 @@ Release 0.91.0 - Unreleased HBASE-3762 HTableFactory.releaseHTableInterface() should throw IOException instead of wrapping in RuntimeException (Ted Yu via garyh) HBASE-3629 Update our thrift to 0.6 (Moaz Reyad) + HBASE-1502 Remove need for heartbeats in HBase BUG FIXES HBASE-3280 YouAreDeadException being swallowed in HRS getMaster diff --git a/src/docbkx/troubleshooting.xml b/src/docbkx/troubleshooting.xml index 1e640c03c68..67b67419faa 100644 --- a/src/docbkx/troubleshooting.xml +++ b/src/docbkx/troubleshooting.xml @@ -1,419 +1,419 @@ - - - Troubleshooting and Debugging HBase -
- General Guidelines - - Always start with the master log (TODO: Which lines?). - Normally it’s just printing the same lines over and over again. - If not, then there’s an issue. - Google or search-hadoop.com - should return some hits for those exceptions you’re seeing. - - - An error rarely comes alone in HBase, usually when something gets screwed up what will - follow may be hundreds of exceptions and stack traces coming from all over the place. - The best way to approach this type of problem is to walk the log up to where it all - began, for example one trick with RegionServers is that they will print some - metrics when aborting so grepping for Dump - should get you around the start of the problem. - - - RegionServer suicides are “normal”, as this is what they do when something goes wrong. - For example, if ulimit and xcievers (the two most important initial settings, see ) - aren’t changed, it will make it impossible at some point for datanodes to create new threads - that from the HBase point of view is seen as if HDFS was gone. Think about what would happen if your - MySQL database was suddenly unable to access files on your local file system, well it’s the same with - HBase and HDFS. Another very common reason to see RegionServers committing seppuku is when they enter - prolonged garbage collection pauses that last longer than the default ZooKeeper session timeout. - For more information on GC pauses, see the - 3 part blog post by Todd Lipcon - and above. - -
-
- Logs - - The key process logs are as follows... (replace <user> with the user that started the service, and <hostname> for the machine name) - - - NameNode: $HADOOP_HOME/logs/hadoop-<user>-namenode-<hostname>.log - - - DataNode: $HADOOP_HOME/logs/hadoop-<user>-datanode-<hostname>.log - - - JobTracker: $HADOOP_HOME/logs/hadoop-<user>-jobtracker-<hostname>.log - - - TaskTracker: $HADOOP_HOME/logs/hadoop-<user>-jobtracker-<hostname>.log - - - HMaster: $HBASE_HOME/logs/hbase-<user>-master-<hostname>.log - - - RegionServer: $HBASE_HOME/logs/hbase-<user>-regionserver-<hostname>.log - - - ZooKeeper: TODO - -
- Log Locations - For stand-alone deployments the logs are obviously going to be on a single machine, however this is a development configuration only. - Production deployments need to run on a cluster. -
- NameNode - The NameNode log is on the NameNode server. The HBase Master is typically run on the NameNode server, and well as ZooKeeper. - For smaller clusters the JobTracker is typically run on the NameNode server as well. -
-
- DataNode - Each DataNode server will have a DataNode log for HDFS, as well as a RegionServer log for HBase. - Additionally, each DataNode server will also have a TaskTracker log for MapReduce task execution. -
-
- -
-
- Tools -
- search-hadoop.com - - search-hadoop.com indexes all the mailing lists and JIRA, it’s really helpful when looking for Hadoop/HBase-specific issues. - -
-
- tail - - tail is the command line tool that lets you look at the end of a file. Add the “-f” option and it will refresh when new data is available. It’s useful when you are wondering what’s happening, for example, when a cluster is taking a long time to shutdown or startup as you can just fire a new terminal and tail the master log (and maybe a few RegionServers). - -
-
- top - - top is probably one of the most important tool when first trying to see what’s running on a machine and how the resources are consumed. Here’s an example from production system: - -top - 14:46:59 up 39 days, 11:55, 1 user, load average: 3.75, 3.57, 3.84 -Tasks: 309 total, 1 running, 308 sleeping, 0 stopped, 0 zombie -Cpu(s): 4.5%us, 1.6%sy, 0.0%ni, 91.7%id, 1.4%wa, 0.1%hi, 0.6%si, 0.0%st -Mem: 24414432k total, 24296956k used, 117476k free, 7196k buffers -Swap: 16008732k total, 14348k used, 15994384k free, 11106908k cached - - PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND -15558 hadoop 18 -2 3292m 2.4g 3556 S 79 10.4 6523:52 java -13268 hadoop 18 -2 8967m 8.2g 4104 S 21 35.1 5170:30 java - 8895 hadoop 18 -2 1581m 497m 3420 S 11 2.1 4002:32 java -… - - - - Here we can see that the system load average during the last five minutes is 3.75, which very roughly means that on average 3.75 threads were waiting for CPU time during these 5 minutes. In general, the “perfect” utilization equals to the number of cores, under that number the machine is under utilized and over that the machine is over utilized. This is an important concept, see this article to understand it more: http://www.linuxjournal.com/article/9001. - - - Apart from load, we can see that the system is using almost all its available RAM but most of it is used for the OS cache (which is good). The swap only has a few KBs in it and this is wanted, high numbers would indicate swapping activity which is the nemesis of performance of Java systems. Another way to detect swapping is when the load average goes through the roof (although this could also be caused by things like a dying disk, among others). - - - The list of processes isn’t super useful by default, all we know is that 3 java processes are using about 111% of the CPUs. To know which is which, simply type “c” and each line will be expanded. Typing “1” will give you the detail of how each CPU is used instead of the average for all of them like shown here. - -
-
- jps - - jps is shipped with every JDK and gives the java process ids for the current user (if root, then it gives the ids for all users). Example: - -hadoop@sv4borg12:~$ jps -1322 TaskTracker -17789 HRegionServer -27862 Child -1158 DataNode -25115 HQuorumPeer -2950 Jps -19750 ThriftServer -18776 jmx - - In order, we see a: - - Hadoop TaskTracker, manages the local Childs - HBase RegionServer, serves regions - Child, its MapReduce task, cannot tell which type exactly - Hadoop TaskTracker, manages the local Childs - Hadoop DataNode, serves blocks - HQuorumPeer, a zookeeper ensemble member - Jps, well… it’s the current process - ThriftServer, it’s a special one will be running only if thrift was started - jmx, this is a local process that’s part of our monitoring platform ( poorly named maybe). You probably don’t have that. - - - - You can then do stuff like checking out the full command line that started the process: - -hadoop@sv4borg12:~$ ps aux | grep HRegionServer -hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/jdk1.6.0_14/bin/java -Xmx8000m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -XX:+UseConcMarkSweepGC -XX:NewSize=64m -XX:MaxNewSize=64m -XX:CMSInitiatingOccupancyFraction=88 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/export1/hadoop/logs/gc-hbase.log -Dcom.sun.management.jmxremote.port=10102 -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.password.file=/home/hadoop/hbase/conf/jmxremote.password -Dcom.sun.management.jmxremote -Dhbase.log.dir=/export1/hadoop/logs -Dhbase.log.file=hbase-hadoop-regionserver-sv4borg12.log -Dhbase.home.dir=/home/hadoop/hbase -Dhbase.id.str=hadoop -Dhbase.root.logger=INFO,DRFA -Djava.library.path=/home/hadoop/hbase/lib/native/Linux-amd64-64 -classpath /home/hadoop/hbase/bin/../conf:[many jars]:/home/hadoop/hadoop/conf org.apache.hadoop.hbase.regionserver.HRegionServer start - - -
-
- jstack - - jstack is one of the most important tools when trying to figure out what a java process is doing apart from looking at the logs. It has to be used in conjunction with jps in order to give it a process id. It shows a list of threads, each one has a name, and they appear in the order that they were created (so the top ones are the most recent threads). Here’s a few example: - - - The main thread of a RegionServer that’s waiting for something to do from the master: - - "regionserver60020" prio=10 tid=0x0000000040ab4000 nid=0x45cf waiting on condition [0x00007f16b6a96000..0x00007f16b6a96a70] - java.lang.Thread.State: TIMED_WAITING (parking) - at sun.misc.Unsafe.park(Native Method) - - parking to wait for <0x00007f16cd5c2f30> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) - at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:198) - at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:1963) - at java.util.concurrent.LinkedBlockingQueue.poll(LinkedBlockingQueue.java:395) - at org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:647) - at java.lang.Thread.run(Thread.java:619) - - The MemStore flusher thread that is currently flushing to a file: -"regionserver60020.cacheFlusher" daemon prio=10 tid=0x0000000040f4e000 nid=0x45eb in Object.wait() [0x00007f16b5b86000..0x00007f16b5b87af0] - java.lang.Thread.State: WAITING (on object monitor) - at java.lang.Object.wait(Native Method) - at java.lang.Object.wait(Object.java:485) - at org.apache.hadoop.ipc.Client.call(Client.java:803) - - locked <0x00007f16cb14b3a8> (a org.apache.hadoop.ipc.Client$Call) - at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:221) - at $Proxy1.complete(Unknown Source) - at sun.reflect.GeneratedMethodAccessor38.invoke(Unknown Source) - at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) - at java.lang.reflect.Method.invoke(Method.java:597) - at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82) - at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59) - at $Proxy1.complete(Unknown Source) - at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3390) - - locked <0x00007f16cb14b470> (a org.apache.hadoop.hdfs.DFSClient$DFSOutputStream) - at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3304) - at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:61) - at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:86) - at org.apache.hadoop.hbase.io.hfile.HFile$Writer.close(HFile.java:650) - at org.apache.hadoop.hbase.regionserver.StoreFile$Writer.close(StoreFile.java:853) - at org.apache.hadoop.hbase.regionserver.Store.internalFlushCache(Store.java:467) - - locked <0x00007f16d00e6f08> (a java.lang.Object) - at org.apache.hadoop.hbase.regionserver.Store.flushCache(Store.java:427) - at org.apache.hadoop.hbase.regionserver.Store.access$100(Store.java:80) - at org.apache.hadoop.hbase.regionserver.Store$StoreFlusherImpl.flushCache(Store.java:1359) - at org.apache.hadoop.hbase.regionserver.HRegion.internalFlushcache(HRegion.java:907) - at org.apache.hadoop.hbase.regionserver.HRegion.internalFlushcache(HRegion.java:834) - at org.apache.hadoop.hbase.regionserver.HRegion.flushcache(HRegion.java:786) - at org.apache.hadoop.hbase.regionserver.MemStoreFlusher.flushRegion(MemStoreFlusher.java:250) - at org.apache.hadoop.hbase.regionserver.MemStoreFlusher.flushRegion(MemStoreFlusher.java:224) - at org.apache.hadoop.hbase.regionserver.MemStoreFlusher.run(MemStoreFlusher.java:146) - - - - A handler thread that’s waiting for stuff to do (like put, delete, scan, etc): - -"IPC Server handler 16 on 60020" daemon prio=10 tid=0x00007f16b011d800 nid=0x4a5e waiting on condition [0x00007f16afefd000..0x00007f16afefd9f0] - java.lang.Thread.State: WAITING (parking) - at sun.misc.Unsafe.park(Native Method) - - parking to wait for <0x00007f16cd3f8dd8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) - at java.util.concurrent.locks.LockSupport.park(LockSupport.java:158) - at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1925) - at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:358) - at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1013) - - - - And one that’s busy doing an increment of a counter (it’s in the phase where it’s trying to create a scanner in order to read the last value): - -"IPC Server handler 66 on 60020" daemon prio=10 tid=0x00007f16b006e800 nid=0x4a90 runnable [0x00007f16acb77000..0x00007f16acb77cf0] - java.lang.Thread.State: RUNNABLE - at org.apache.hadoop.hbase.regionserver.KeyValueHeap.<init>(KeyValueHeap.java:56) - at org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:79) - at org.apache.hadoop.hbase.regionserver.Store.getScanner(Store.java:1202) - at org.apache.hadoop.hbase.regionserver.HRegion$RegionScanner.<init>(HRegion.java:2209) - at org.apache.hadoop.hbase.regionserver.HRegion.instantiateInternalScanner(HRegion.java:1063) - at org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1055) - at org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1039) - at org.apache.hadoop.hbase.regionserver.HRegion.getLastIncrement(HRegion.java:2875) - at org.apache.hadoop.hbase.regionserver.HRegion.incrementColumnValue(HRegion.java:2978) - at org.apache.hadoop.hbase.regionserver.HRegionServer.incrementColumnValue(HRegionServer.java:2433) - at sun.reflect.GeneratedMethodAccessor20.invoke(Unknown Source) - at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) - at java.lang.reflect.Method.invoke(Method.java:597) - at org.apache.hadoop.hbase.ipc.HBaseRPC$Server.call(HBaseRPC.java:560) - at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1027) - - - - A thread that receives data from HDFS: - -"IPC Client (47) connection to sv4borg9/10.4.24.40:9000 from hadoop" daemon prio=10 tid=0x00007f16a02d0000 nid=0x4fa3 runnable [0x00007f16b517d000..0x00007f16b517dbf0] - java.lang.Thread.State: RUNNABLE - at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method) - at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:215) - at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:65) - at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:69) - - locked <0x00007f17d5b68c00> (a sun.nio.ch.Util$1) - - locked <0x00007f17d5b68be8> (a java.util.Collections$UnmodifiableSet) - - locked <0x00007f1877959b50> (a sun.nio.ch.EPollSelectorImpl) - at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:80) - at org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:332) - at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:157) - at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:155) - at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:128) - at java.io.FilterInputStream.read(FilterInputStream.java:116) - at org.apache.hadoop.ipc.Client$Connection$PingInputStream.read(Client.java:304) - at java.io.BufferedInputStream.fill(BufferedInputStream.java:218) - at java.io.BufferedInputStream.read(BufferedInputStream.java:237) - - locked <0x00007f1808539178> (a java.io.BufferedInputStream) - at java.io.DataInputStream.readInt(DataInputStream.java:370) - at org.apache.hadoop.ipc.Client$Connection.receiveResponse(Client.java:569) - at org.apache.hadoop.ipc.Client$Connection.run(Client.java:477) - - - - And here is a master trying to recover a lease after a region server died: - -"LeaseChecker" daemon prio=10 tid=0x00000000407ef800 nid=0x76cd waiting on condition [0x00007f6d0eae2000..0x00007f6d0eae2a70] --- - java.lang.Thread.State: WAITING (on object monitor) - at java.lang.Object.wait(Native Method) - at java.lang.Object.wait(Object.java:485) - at org.apache.hadoop.ipc.Client.call(Client.java:726) - - locked <0x00007f6d1cd28f80> (a org.apache.hadoop.ipc.Client$Call) - at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220) - at $Proxy1.recoverBlock(Unknown Source) - at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.processDatanodeError(DFSClient.java:2636) - at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2832) - at org.apache.hadoop.hdfs.DFSClient.append(DFSClient.java:529) - at org.apache.hadoop.hdfs.DistributedFileSystem.append(DistributedFileSystem.java:186) - at org.apache.hadoop.fs.FileSystem.append(FileSystem.java:530) - at org.apache.hadoop.hbase.util.FSUtils.recoverFileLease(FSUtils.java:619) - at org.apache.hadoop.hbase.regionserver.wal.HLog.splitLog(HLog.java:1322) - at org.apache.hadoop.hbase.regionserver.wal.HLog.splitLog(HLog.java:1210) - at org.apache.hadoop.hbase.master.HMaster.splitLogAfterStartup(HMaster.java:648) - at org.apache.hadoop.hbase.master.HMaster.joinCluster(HMaster.java:572) - at org.apache.hadoop.hbase.master.HMaster.run(HMaster.java:503) - - -
-
- OpenTSDB - - OpenTSDB is an excellent alternative to Ganglia as it uses HBase to store all the time series and doesn’t have to downsample. Monitoring your own HBase cluster that hosts OpenTSDB is a good exercise. - - - Here’s an example of a cluster that’s suffering from hundreds of compactions launched almost all around the same time, which severely affects the IO performance: (TODO: insert graph plotting compactionQueueSize) - - - It’s a good practice to build dashboards with all the important graphs per machine and per cluster so that debugging issues can be done with a single quick look. For example, at StumbleUpon there’s one dashboard per cluster with the most important metrics from both the OS and HBase. You can then go down at the machine level and get even more detailed metrics. - -
-
- clusterssh+top - - clusterssh+top, it’s like a poor man’s monitoring system and it can be quite useful when you have only a few machines as it’s very easy to setup. Starting clusterssh will give you one terminal per machine and another terminal in which whatever you type will be retyped in every window. This means that you can type “top” once and it will start it for all of your machines at the same time giving you full view of the current state of your cluster. You can also tail all the logs at the same time, edit files, etc. - -
-
- -
- Client -
- ScannerTimeoutException - This is thrown if the time between RPC calls from the client to RegionServer exceeds the scan timeout. - For example, if Scan.setCaching is set to 500, then there will be an RPC call to fetch the next batch of rows every 500 .next() calls on the ResultScanner - because data is being transferred in blocks of 500 rows to the client. Reducing the setCaching value may be an option, but setting this value too low makes for inefficient - processing on numbers of rows. - -
- -
-
- RegionServer -
- Startup Errors -
- Compression Link Errors - - Since compression algorithms such as LZO need to be installed and configured on each cluster this is a frequent source of startup error. If you see messages like this... - -11/02/20 01:32:15 ERROR lzo.GPLNativeCodeLoader: Could not load native gpl library -java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path - at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1734) - at java.lang.Runtime.loadLibrary0(Runtime.java:823) - at java.lang.System.loadLibrary(System.java:1028) - - .. then there is a path issue with the compression libraries. See the Configuration section on LZO compression configuration. - -
-
-
- Runtime Errors - -
- java.io.IOException...(Too many open files) - - See the Getting Started section on ulimit and nproc configuration. - -
-
- xceiverCount 258 exceeds the limit of concurrent xcievers 256 - - This typically shows up in the DataNode logs. - - - See the Getting Started section on xceivers configuration. - -
-
- System instability, and the presence of "java.lang.OutOfMemoryError: unable to create new native thread in exceptions" HDFS datanode logs or that of any system daemon - - See the Getting Started section on ulimit and nproc configuration. - -
-
- DFS instability and/or RegionServer lease timeouts - - If you see warning messages like this... - -2009-02-24 10:01:33,516 WARN org.apache.hadoop.hbase.util.Sleeper: We slept xxx ms, ten times longer than scheduled: 10000 -2009-02-24 10:01:33,516 WARN org.apache.hadoop.hbase.util.Sleeper: We slept xxx ms, ten times longer than scheduled: 15000 -2009-02-24 10:01:36,472 WARN org.apache.hadoop.hbase.regionserver.HRegionServer: unable to report to master for xxx milliseconds - retrying - - ... or see full GC compactions then you may be experiencing full GC's. - -
-
- "No live nodes contain current block" and/or YouAreDeadException - - These errors can happen either when running out of OS file handles or in periods of severe network problems where the nodes are unreachable. - - - See the Getting Started section on ulimit and nproc configuration and check your network. - -
- -
-
- Shutdown Errors - -
- -
-
- Master -
- Startup Errors - -
-
- Shutdown Errors - -
- -
-
+ + + Troubleshooting and Debugging HBase +
+ General Guidelines + + Always start with the master log (TODO: Which lines?). + Normally it’s just printing the same lines over and over again. + If not, then there’s an issue. + Google or search-hadoop.com + should return some hits for those exceptions you’re seeing. + + + An error rarely comes alone in HBase, usually when something gets screwed up what will + follow may be hundreds of exceptions and stack traces coming from all over the place. + The best way to approach this type of problem is to walk the log up to where it all + began, for example one trick with RegionServers is that they will print some + metrics when aborting so grepping for Dump + should get you around the start of the problem. + + + RegionServer suicides are “normal”, as this is what they do when something goes wrong. + For example, if ulimit and xcievers (the two most important initial settings, see ) + aren’t changed, it will make it impossible at some point for datanodes to create new threads + that from the HBase point of view is seen as if HDFS was gone. Think about what would happen if your + MySQL database was suddenly unable to access files on your local file system, well it’s the same with + HBase and HDFS. Another very common reason to see RegionServers committing seppuku is when they enter + prolonged garbage collection pauses that last longer than the default ZooKeeper session timeout. + For more information on GC pauses, see the + 3 part blog post by Todd Lipcon + and above. + +
+
+ Logs + + The key process logs are as follows... (replace <user> with the user that started the service, and <hostname> for the machine name) + + + NameNode: $HADOOP_HOME/logs/hadoop-<user>-namenode-<hostname>.log + + + DataNode: $HADOOP_HOME/logs/hadoop-<user>-datanode-<hostname>.log + + + JobTracker: $HADOOP_HOME/logs/hadoop-<user>-jobtracker-<hostname>.log + + + TaskTracker: $HADOOP_HOME/logs/hadoop-<user>-jobtracker-<hostname>.log + + + HMaster: $HBASE_HOME/logs/hbase-<user>-master-<hostname>.log + + + RegionServer: $HBASE_HOME/logs/hbase-<user>-regionserver-<hostname>.log + + + ZooKeeper: TODO + +
+ Log Locations + For stand-alone deployments the logs are obviously going to be on a single machine, however this is a development configuration only. + Production deployments need to run on a cluster. +
+ NameNode + The NameNode log is on the NameNode server. The HBase Master is typically run on the NameNode server, and well as ZooKeeper. + For smaller clusters the JobTracker is typically run on the NameNode server as well. +
+
+ DataNode + Each DataNode server will have a DataNode log for HDFS, as well as a RegionServer log for HBase. + Additionally, each DataNode server will also have a TaskTracker log for MapReduce task execution. +
+
+ +
+
+ Tools +
+ search-hadoop.com + + search-hadoop.com indexes all the mailing lists and JIRA, it’s really helpful when looking for Hadoop/HBase-specific issues. + +
+
+ tail + + tail is the command line tool that lets you look at the end of a file. Add the “-f” option and it will refresh when new data is available. It’s useful when you are wondering what’s happening, for example, when a cluster is taking a long time to shutdown or startup as you can just fire a new terminal and tail the master log (and maybe a few RegionServers). + +
+
+ top + + top is probably one of the most important tool when first trying to see what’s running on a machine and how the resources are consumed. Here’s an example from production system: + +top - 14:46:59 up 39 days, 11:55, 1 user, load average: 3.75, 3.57, 3.84 +Tasks: 309 total, 1 running, 308 sleeping, 0 stopped, 0 zombie +Cpu(s): 4.5%us, 1.6%sy, 0.0%ni, 91.7%id, 1.4%wa, 0.1%hi, 0.6%si, 0.0%st +Mem: 24414432k total, 24296956k used, 117476k free, 7196k buffers +Swap: 16008732k total, 14348k used, 15994384k free, 11106908k cached + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND +15558 hadoop 18 -2 3292m 2.4g 3556 S 79 10.4 6523:52 java +13268 hadoop 18 -2 8967m 8.2g 4104 S 21 35.1 5170:30 java + 8895 hadoop 18 -2 1581m 497m 3420 S 11 2.1 4002:32 java +… + + + + Here we can see that the system load average during the last five minutes is 3.75, which very roughly means that on average 3.75 threads were waiting for CPU time during these 5 minutes. In general, the “perfect” utilization equals to the number of cores, under that number the machine is under utilized and over that the machine is over utilized. This is an important concept, see this article to understand it more: http://www.linuxjournal.com/article/9001. + + + Apart from load, we can see that the system is using almost all its available RAM but most of it is used for the OS cache (which is good). The swap only has a few KBs in it and this is wanted, high numbers would indicate swapping activity which is the nemesis of performance of Java systems. Another way to detect swapping is when the load average goes through the roof (although this could also be caused by things like a dying disk, among others). + + + The list of processes isn’t super useful by default, all we know is that 3 java processes are using about 111% of the CPUs. To know which is which, simply type “c” and each line will be expanded. Typing “1” will give you the detail of how each CPU is used instead of the average for all of them like shown here. + +
+
+ jps + + jps is shipped with every JDK and gives the java process ids for the current user (if root, then it gives the ids for all users). Example: + +hadoop@sv4borg12:~$ jps +1322 TaskTracker +17789 HRegionServer +27862 Child +1158 DataNode +25115 HQuorumPeer +2950 Jps +19750 ThriftServer +18776 jmx + + In order, we see a: + + Hadoop TaskTracker, manages the local Childs + HBase RegionServer, serves regions + Child, its MapReduce task, cannot tell which type exactly + Hadoop TaskTracker, manages the local Childs + Hadoop DataNode, serves blocks + HQuorumPeer, a zookeeper ensemble member + Jps, well… it’s the current process + ThriftServer, it’s a special one will be running only if thrift was started + jmx, this is a local process that’s part of our monitoring platform ( poorly named maybe). You probably don’t have that. + + + + You can then do stuff like checking out the full command line that started the process: + +hadoop@sv4borg12:~$ ps aux | grep HRegionServer +hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/jdk1.6.0_14/bin/java -Xmx8000m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -XX:+UseConcMarkSweepGC -XX:NewSize=64m -XX:MaxNewSize=64m -XX:CMSInitiatingOccupancyFraction=88 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/export1/hadoop/logs/gc-hbase.log -Dcom.sun.management.jmxremote.port=10102 -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.password.file=/home/hadoop/hbase/conf/jmxremote.password -Dcom.sun.management.jmxremote -Dhbase.log.dir=/export1/hadoop/logs -Dhbase.log.file=hbase-hadoop-regionserver-sv4borg12.log -Dhbase.home.dir=/home/hadoop/hbase -Dhbase.id.str=hadoop -Dhbase.root.logger=INFO,DRFA -Djava.library.path=/home/hadoop/hbase/lib/native/Linux-amd64-64 -classpath /home/hadoop/hbase/bin/../conf:[many jars]:/home/hadoop/hadoop/conf org.apache.hadoop.hbase.regionserver.HRegionServer start + + +
+
+ jstack + + jstack is one of the most important tools when trying to figure out what a java process is doing apart from looking at the logs. It has to be used in conjunction with jps in order to give it a process id. It shows a list of threads, each one has a name, and they appear in the order that they were created (so the top ones are the most recent threads). Here’s a few example: + + + The main thread of a RegionServer that’s waiting for something to do from the master: + + "regionserver60020" prio=10 tid=0x0000000040ab4000 nid=0x45cf waiting on condition [0x00007f16b6a96000..0x00007f16b6a96a70] + java.lang.Thread.State: TIMED_WAITING (parking) + at sun.misc.Unsafe.park(Native Method) + - parking to wait for <0x00007f16cd5c2f30> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) + at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:198) + at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:1963) + at java.util.concurrent.LinkedBlockingQueue.poll(LinkedBlockingQueue.java:395) + at org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:647) + at java.lang.Thread.run(Thread.java:619) + + The MemStore flusher thread that is currently flushing to a file: +"regionserver60020.cacheFlusher" daemon prio=10 tid=0x0000000040f4e000 nid=0x45eb in Object.wait() [0x00007f16b5b86000..0x00007f16b5b87af0] + java.lang.Thread.State: WAITING (on object monitor) + at java.lang.Object.wait(Native Method) + at java.lang.Object.wait(Object.java:485) + at org.apache.hadoop.ipc.Client.call(Client.java:803) + - locked <0x00007f16cb14b3a8> (a org.apache.hadoop.ipc.Client$Call) + at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:221) + at $Proxy1.complete(Unknown Source) + at sun.reflect.GeneratedMethodAccessor38.invoke(Unknown Source) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) + at java.lang.reflect.Method.invoke(Method.java:597) + at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82) + at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59) + at $Proxy1.complete(Unknown Source) + at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3390) + - locked <0x00007f16cb14b470> (a org.apache.hadoop.hdfs.DFSClient$DFSOutputStream) + at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3304) + at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:61) + at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:86) + at org.apache.hadoop.hbase.io.hfile.HFile$Writer.close(HFile.java:650) + at org.apache.hadoop.hbase.regionserver.StoreFile$Writer.close(StoreFile.java:853) + at org.apache.hadoop.hbase.regionserver.Store.internalFlushCache(Store.java:467) + - locked <0x00007f16d00e6f08> (a java.lang.Object) + at org.apache.hadoop.hbase.regionserver.Store.flushCache(Store.java:427) + at org.apache.hadoop.hbase.regionserver.Store.access$100(Store.java:80) + at org.apache.hadoop.hbase.regionserver.Store$StoreFlusherImpl.flushCache(Store.java:1359) + at org.apache.hadoop.hbase.regionserver.HRegion.internalFlushcache(HRegion.java:907) + at org.apache.hadoop.hbase.regionserver.HRegion.internalFlushcache(HRegion.java:834) + at org.apache.hadoop.hbase.regionserver.HRegion.flushcache(HRegion.java:786) + at org.apache.hadoop.hbase.regionserver.MemStoreFlusher.flushRegion(MemStoreFlusher.java:250) + at org.apache.hadoop.hbase.regionserver.MemStoreFlusher.flushRegion(MemStoreFlusher.java:224) + at org.apache.hadoop.hbase.regionserver.MemStoreFlusher.run(MemStoreFlusher.java:146) + + + + A handler thread that’s waiting for stuff to do (like put, delete, scan, etc): + +"IPC Server handler 16 on 60020" daemon prio=10 tid=0x00007f16b011d800 nid=0x4a5e waiting on condition [0x00007f16afefd000..0x00007f16afefd9f0] + java.lang.Thread.State: WAITING (parking) + at sun.misc.Unsafe.park(Native Method) + - parking to wait for <0x00007f16cd3f8dd8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) + at java.util.concurrent.locks.LockSupport.park(LockSupport.java:158) + at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1925) + at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:358) + at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1013) + + + + And one that’s busy doing an increment of a counter (it’s in the phase where it’s trying to create a scanner in order to read the last value): + +"IPC Server handler 66 on 60020" daemon prio=10 tid=0x00007f16b006e800 nid=0x4a90 runnable [0x00007f16acb77000..0x00007f16acb77cf0] + java.lang.Thread.State: RUNNABLE + at org.apache.hadoop.hbase.regionserver.KeyValueHeap.<init>(KeyValueHeap.java:56) + at org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:79) + at org.apache.hadoop.hbase.regionserver.Store.getScanner(Store.java:1202) + at org.apache.hadoop.hbase.regionserver.HRegion$RegionScanner.<init>(HRegion.java:2209) + at org.apache.hadoop.hbase.regionserver.HRegion.instantiateInternalScanner(HRegion.java:1063) + at org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1055) + at org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1039) + at org.apache.hadoop.hbase.regionserver.HRegion.getLastIncrement(HRegion.java:2875) + at org.apache.hadoop.hbase.regionserver.HRegion.incrementColumnValue(HRegion.java:2978) + at org.apache.hadoop.hbase.regionserver.HRegionServer.incrementColumnValue(HRegionServer.java:2433) + at sun.reflect.GeneratedMethodAccessor20.invoke(Unknown Source) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) + at java.lang.reflect.Method.invoke(Method.java:597) + at org.apache.hadoop.hbase.ipc.HBaseRPC$Server.call(HBaseRPC.java:560) + at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1027) + + + + A thread that receives data from HDFS: + +"IPC Client (47) connection to sv4borg9/10.4.24.40:9000 from hadoop" daemon prio=10 tid=0x00007f16a02d0000 nid=0x4fa3 runnable [0x00007f16b517d000..0x00007f16b517dbf0] + java.lang.Thread.State: RUNNABLE + at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method) + at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:215) + at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:65) + at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:69) + - locked <0x00007f17d5b68c00> (a sun.nio.ch.Util$1) + - locked <0x00007f17d5b68be8> (a java.util.Collections$UnmodifiableSet) + - locked <0x00007f1877959b50> (a sun.nio.ch.EPollSelectorImpl) + at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:80) + at org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:332) + at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:157) + at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:155) + at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:128) + at java.io.FilterInputStream.read(FilterInputStream.java:116) + at org.apache.hadoop.ipc.Client$Connection$PingInputStream.read(Client.java:304) + at java.io.BufferedInputStream.fill(BufferedInputStream.java:218) + at java.io.BufferedInputStream.read(BufferedInputStream.java:237) + - locked <0x00007f1808539178> (a java.io.BufferedInputStream) + at java.io.DataInputStream.readInt(DataInputStream.java:370) + at org.apache.hadoop.ipc.Client$Connection.receiveResponse(Client.java:569) + at org.apache.hadoop.ipc.Client$Connection.run(Client.java:477) + + + + And here is a master trying to recover a lease after a region server died: + +"LeaseChecker" daemon prio=10 tid=0x00000000407ef800 nid=0x76cd waiting on condition [0x00007f6d0eae2000..0x00007f6d0eae2a70] +-- + java.lang.Thread.State: WAITING (on object monitor) + at java.lang.Object.wait(Native Method) + at java.lang.Object.wait(Object.java:485) + at org.apache.hadoop.ipc.Client.call(Client.java:726) + - locked <0x00007f6d1cd28f80> (a org.apache.hadoop.ipc.Client$Call) + at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220) + at $Proxy1.recoverBlock(Unknown Source) + at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.processDatanodeError(DFSClient.java:2636) + at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2832) + at org.apache.hadoop.hdfs.DFSClient.append(DFSClient.java:529) + at org.apache.hadoop.hdfs.DistributedFileSystem.append(DistributedFileSystem.java:186) + at org.apache.hadoop.fs.FileSystem.append(FileSystem.java:530) + at org.apache.hadoop.hbase.util.FSUtils.recoverFileLease(FSUtils.java:619) + at org.apache.hadoop.hbase.regionserver.wal.HLog.splitLog(HLog.java:1322) + at org.apache.hadoop.hbase.regionserver.wal.HLog.splitLog(HLog.java:1210) + at org.apache.hadoop.hbase.master.HMaster.splitLogAfterStartup(HMaster.java:648) + at org.apache.hadoop.hbase.master.HMaster.joinCluster(HMaster.java:572) + at org.apache.hadoop.hbase.master.HMaster.run(HMaster.java:503) + + +
+
+ OpenTSDB + + OpenTSDB is an excellent alternative to Ganglia as it uses HBase to store all the time series and doesn’t have to downsample. Monitoring your own HBase cluster that hosts OpenTSDB is a good exercise. + + + Here’s an example of a cluster that’s suffering from hundreds of compactions launched almost all around the same time, which severely affects the IO performance: (TODO: insert graph plotting compactionQueueSize) + + + It’s a good practice to build dashboards with all the important graphs per machine and per cluster so that debugging issues can be done with a single quick look. For example, at StumbleUpon there’s one dashboard per cluster with the most important metrics from both the OS and HBase. You can then go down at the machine level and get even more detailed metrics. + +
+
+ clusterssh+top + + clusterssh+top, it’s like a poor man’s monitoring system and it can be quite useful when you have only a few machines as it’s very easy to setup. Starting clusterssh will give you one terminal per machine and another terminal in which whatever you type will be retyped in every window. This means that you can type “top” once and it will start it for all of your machines at the same time giving you full view of the current state of your cluster. You can also tail all the logs at the same time, edit files, etc. + +
+
+ +
+ Client +
+ ScannerTimeoutException + This is thrown if the time between RPC calls from the client to RegionServer exceeds the scan timeout. + For example, if Scan.setCaching is set to 500, then there will be an RPC call to fetch the next batch of rows every 500 .next() calls on the ResultScanner + because data is being transferred in blocks of 500 rows to the client. Reducing the setCaching value may be an option, but setting this value too low makes for inefficient + processing on numbers of rows. + +
+ +
+
+ RegionServer +
+ Startup Errors +
+ Compression Link Errors + + Since compression algorithms such as LZO need to be installed and configured on each cluster this is a frequent source of startup error. If you see messages like this... + +11/02/20 01:32:15 ERROR lzo.GPLNativeCodeLoader: Could not load native gpl library +java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path + at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1734) + at java.lang.Runtime.loadLibrary0(Runtime.java:823) + at java.lang.System.loadLibrary(System.java:1028) + + .. then there is a path issue with the compression libraries. See the Configuration section on LZO compression configuration. + +
+
+
+ Runtime Errors + +
+ java.io.IOException...(Too many open files) + + See the Getting Started section on ulimit and nproc configuration. + +
+
+ xceiverCount 258 exceeds the limit of concurrent xcievers 256 + + This typically shows up in the DataNode logs. + + + See the Getting Started section on xceivers configuration. + +
+
+ System instability, and the presence of "java.lang.OutOfMemoryError: unable to create new native thread in exceptions" HDFS datanode logs or that of any system daemon + + See the Getting Started section on ulimit and nproc configuration. + +
+
+ DFS instability and/or RegionServer lease timeouts + + If you see warning messages like this... + +2009-02-24 10:01:33,516 WARN org.apache.hadoop.hbase.util.Sleeper: We slept xxx ms, ten times longer than scheduled: 10000 +2009-02-24 10:01:33,516 WARN org.apache.hadoop.hbase.util.Sleeper: We slept xxx ms, ten times longer than scheduled: 15000 +2009-02-24 10:01:36,472 WARN org.apache.hadoop.hbase.regionserver.HRegionServer: unable to report to master for xxx milliseconds - retrying + + ... or see full GC compactions then you may be experiencing full GC's. + +
+
+ "No live nodes contain current block" and/or YouAreDeadException + + These errors can happen either when running out of OS file handles or in periods of severe network problems where the nodes are unreachable. + + + See the Getting Started section on ulimit and nproc configuration and check your network. + +
+ +
+
+ Shutdown Errors + +
+ +
+
+ Master +
+ Startup Errors + +
+
+ Shutdown Errors + +
+ +
+
diff --git a/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java b/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java index 26a8bef794d..01bc1dd6cbc 100644 --- a/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java +++ b/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.TreeMap; @@ -55,13 +56,14 @@ public class ClusterStatus extends VersionedWritable { *
*
0
initial version
*
1
added cluster ID
+ *
2
Added Map of ServerName to ServerLoad
*
*/ - private static final byte VERSION = 1; + private static final byte VERSION = 2; private String hbaseVersion; - private Collection liveServerInfo; - private Collection deadServers; + private Map liveServers; + private Collection deadServers; private Map intransition; private String clusterId; @@ -72,18 +74,28 @@ public class ClusterStatus extends VersionedWritable { super(); } + public ClusterStatus(final String hbaseVersion, final String clusterid, + final Map servers, + final Collection deadServers, final Map rit) { + this.hbaseVersion = hbaseVersion; + this.liveServers = servers; + this.deadServers = deadServers; + this.intransition = rit; + this.clusterId = clusterid; + } + /** * @return the names of region servers on the dead list */ - public Collection getDeadServerNames() { + public Collection getDeadServerNames() { return Collections.unmodifiableCollection(deadServers); } /** * @return the number of region servers in the cluster */ - public int getServers() { - return liveServerInfo.size(); + public int getServersSize() { + return liveServers.size(); } /** @@ -97,11 +109,8 @@ public class ClusterStatus extends VersionedWritable { * @return the average cluster load */ public double getAverageLoad() { - int load = 0; - for (HServerInfo server: liveServerInfo) { - load += server.getLoad().getLoad(); - } - return (double)load / (double)liveServerInfo.size(); + int load = getRegionsCount(); + return (double)load / (double)getServersSize(); } /** @@ -109,8 +118,8 @@ public class ClusterStatus extends VersionedWritable { */ public int getRegionsCount() { int count = 0; - for (HServerInfo server: liveServerInfo) { - count += server.getLoad().getNumberOfRegions(); + for (Map.Entry e: this.liveServers.entrySet()) { + count += e.getValue().getNumberOfRegions(); } return count; } @@ -120,8 +129,8 @@ public class ClusterStatus extends VersionedWritable { */ public int getRequestsCount() { int count = 0; - for (HServerInfo server: liveServerInfo) { - count += server.getLoad().getNumberOfRequests(); + for (Map.Entry e: this.liveServers.entrySet()) { + count += e.getValue().getNumberOfRequests(); } return count; } @@ -133,13 +142,6 @@ public class ClusterStatus extends VersionedWritable { return hbaseVersion; } - /** - * @param version the HBase version string - */ - public void setHBaseVersion(String version) { - hbaseVersion = version; - } - /** * @see java.lang.Object#equals(java.lang.Object) */ @@ -152,7 +154,7 @@ public class ClusterStatus extends VersionedWritable { } return (getVersion() == ((ClusterStatus)o).getVersion()) && getHBaseVersion().equals(((ClusterStatus)o).getHBaseVersion()) && - liveServerInfo.equals(((ClusterStatus)o).liveServerInfo) && + this.liveServers.equals(((ClusterStatus)o).liveServers) && deadServers.equals(((ClusterStatus)o).deadServers); } @@ -160,7 +162,7 @@ public class ClusterStatus extends VersionedWritable { * @see java.lang.Object#hashCode() */ public int hashCode() { - return VERSION + hbaseVersion.hashCode() + liveServerInfo.hashCode() + + return VERSION + hbaseVersion.hashCode() + this.liveServers.hashCode() + deadServers.hashCode(); } @@ -175,43 +177,34 @@ public class ClusterStatus extends VersionedWritable { /** * Returns detailed region server information: A list of - * {@link HServerInfo}, containing server load and resource usage - * statistics as {@link HServerLoad}, containing per-region - * statistics as {@link HServerLoad.RegionLoad}. + * {@link ServerName}. * @return region server information + * @deprecated Use {@link #getServers()} */ - public Collection getServerInfo() { - return Collections.unmodifiableCollection(liveServerInfo); + public Collection getServerInfo() { + return getServers(); } - // - // Setters - // - - public void setServerInfo(Collection serverInfo) { - this.liveServerInfo = serverInfo; + public Collection getServers() { + return Collections.unmodifiableCollection(this.liveServers.keySet()); } - public void setDeadServers(Collection deadServers) { - this.deadServers = deadServers; + /** + * @param sn + * @return Server's load or null if not found. + */ + public HServerLoad getLoad(final ServerName sn) { + return this.liveServers.get(sn); } public Map getRegionsInTransition() { return this.intransition; } - public void setRegionsInTransition(final Map m) { - this.intransition = m; - } - public String getClusterId() { return clusterId; } - public void setClusterId(String id) { - this.clusterId = id; - } - // // Writable // @@ -219,13 +212,14 @@ public class ClusterStatus extends VersionedWritable { public void write(DataOutput out) throws IOException { super.write(out); out.writeUTF(hbaseVersion); - out.writeInt(liveServerInfo.size()); - for (HServerInfo server: liveServerInfo) { - server.write(out); + out.writeInt(getServersSize()); + for (Map.Entry e: this.liveServers.entrySet()) { + out.writeUTF(e.getKey().toString()); + e.getValue().write(out); } out.writeInt(deadServers.size()); - for (String server: deadServers) { - out.writeUTF(server); + for (ServerName server: deadServers) { + out.writeUTF(server.toString()); } out.writeInt(this.intransition.size()); for (Map.Entry e: this.intransition.entrySet()) { @@ -239,16 +233,17 @@ public class ClusterStatus extends VersionedWritable { super.readFields(in); hbaseVersion = in.readUTF(); int count = in.readInt(); - liveServerInfo = new ArrayList(count); + this.liveServers = new HashMap(count); for (int i = 0; i < count; i++) { - HServerInfo info = new HServerInfo(); - info.readFields(in); - liveServerInfo.add(info); + String str = in.readUTF(); + HServerLoad hsl = new HServerLoad(); + hsl.readFields(in); + this.liveServers.put(new ServerName(str), hsl); } count = in.readInt(); - deadServers = new ArrayList(count); + deadServers = new ArrayList(count); for (int i = 0; i < count; i++) { - deadServers.add(in.readUTF()); + deadServers.add(new ServerName(in.readUTF())); } count = in.readInt(); this.intransition = new TreeMap(); @@ -260,4 +255,4 @@ public class ClusterStatus extends VersionedWritable { } this.clusterId = in.readUTF(); } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/HConstants.java b/src/main/java/org/apache/hadoop/hbase/HConstants.java index 5701639d769..0911375e052 100644 --- a/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -373,6 +373,12 @@ public final class HConstants { /** HBCK special code name used as server name when manipulating ZK nodes */ public static final String HBCK_CODE_NAME = "HBCKServerName"; + public static final ServerName HBCK_CODE_SERVERNAME = + new ServerName(HBCK_CODE_NAME, -1, -1L); + + public static final String KEY_FOR_HOSTNAME_SEEN_BY_MASTER = + "hbase.regionserver.hostname.seen.by.master"; + public static final String HBASE_MASTER_LOGCLEANER_PLUGINS = "hbase.master.logcleaner.plugins"; diff --git a/src/main/java/org/apache/hadoop/hbase/HMsg.java b/src/main/java/org/apache/hadoop/hbase/HMsg.java deleted file mode 100644 index 87beb008dab..00000000000 --- a/src/main/java/org/apache/hadoop/hbase/HMsg.java +++ /dev/null @@ -1,235 +0,0 @@ -/** - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.io.Writable; - -/** - * HMsg is used to send messages between master and regionservers. Messages are - * sent as payload on the regionserver-to-master heartbeats. Region assignment - * does not use this mechanism. It goes via zookeeper. - * - *

Most of the time the messages are simple but some messages are accompanied - * by the region affected. HMsg may also carry an optional message. - * - *

TODO: Clean out all messages that go from master to regionserver; by - * design, these are to go via zk from here on out. - */ -public class HMsg implements Writable { - public static final HMsg [] EMPTY_HMSG_ARRAY = new HMsg[0]; - - public static enum Type { - /** - * When RegionServer receives this message, it goes into a sleep that only - * an exit will cure. This message is sent by unit tests simulating - * pathological states. - */ - TESTING_BLOCK_REGIONSERVER, - } - - private Type type = null; - private HRegionInfo info = null; - private byte[] message = null; - private HRegionInfo daughterA = null; - private HRegionInfo daughterB = null; - - /** Default constructor. Used during deserialization */ - public HMsg() { - this(null); - } - - /** - * Construct a message with the specified message and empty HRegionInfo - * @param type Message type - */ - public HMsg(final HMsg.Type type) { - this(type, new HRegionInfo(), null); - } - - /** - * Construct a message with the specified message and HRegionInfo - * @param type Message type - * @param hri Region to which message type applies - */ - public HMsg(final HMsg.Type type, final HRegionInfo hri) { - this(type, hri, null); - } - - /** - * Construct a message with the specified message and HRegionInfo - * - * @param type Message type - * @param hri Region to which message type applies. Cannot be - * null. If no info associated, used other Constructor. - * @param msg Optional message (Stringified exception, etc.) - */ - public HMsg(final HMsg.Type type, final HRegionInfo hri, final byte[] msg) { - this(type, hri, null, null, msg); - } - - /** - * Construct a message with the specified message and HRegionInfo - * - * @param type Message type - * @param hri Region to which message type applies. Cannot be - * null. If no info associated, used other Constructor. - * @param daughterA - * @param daughterB - * @param msg Optional message (Stringified exception, etc.) - */ - public HMsg(final HMsg.Type type, final HRegionInfo hri, - final HRegionInfo daughterA, final HRegionInfo daughterB, final byte[] msg) { - this.type = type; - if (hri == null) { - throw new NullPointerException("Region cannot be null"); - } - this.info = hri; - this.message = msg; - this.daughterA = daughterA; - this.daughterB = daughterB; - } - - /** - * @return Region info or null if none associated with this message type. - */ - public HRegionInfo getRegionInfo() { - return this.info; - } - - /** @return the type of message */ - public Type getType() { - return this.type; - } - - /** - * @param other Message type to compare to - * @return True if we are of same message type as other - */ - public boolean isType(final HMsg.Type other) { - return this.type.equals(other); - } - - /** @return the message type */ - public byte[] getMessage() { - return this.message; - } - - /** - * @return First daughter if Type is MSG_REPORT_SPLIT_INCLUDES_DAUGHTERS else - * null - */ - public HRegionInfo getDaughterA() { - return this.daughterA; - } - - /** - * @return Second daughter if Type is MSG_REPORT_SPLIT_INCLUDES_DAUGHTERS else - * null - */ - public HRegionInfo getDaughterB() { - return this.daughterB; - } - - /** - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(this.type.toString()); - // If null or empty region, don't bother printing it out. - if (this.info != null && this.info.getRegionName().length > 0) { - sb.append(": "); - sb.append(this.info.getRegionNameAsString()); - } - if (this.message != null && this.message.length > 0) { - sb.append(": " + Bytes.toString(this.message)); - } - return sb.toString(); - } - - /** - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - HMsg that = (HMsg)obj; - return this.type.equals(that.type) && - (this.info != null)? this.info.equals(that.info): - that.info == null; - } - - /** - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - int result = this.type.hashCode(); - if (this.info != null) { - result ^= this.info.hashCode(); - } - return result; - } - - // //////////////////////////////////////////////////////////////////////////// - // Writable - ////////////////////////////////////////////////////////////////////////////// - - /** - * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) - */ - public void write(DataOutput out) throws IOException { - out.writeInt(this.type.ordinal()); - this.info.write(out); - if (this.message == null || this.message.length == 0) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - Bytes.writeByteArray(out, this.message); - } - } - - /** - * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) - */ - public void readFields(DataInput in) throws IOException { - int ordinal = in.readInt(); - this.type = HMsg.Type.values()[ordinal]; - this.info.readFields(in); - boolean hasMessage = in.readBoolean(); - if (hasMessage) { - this.message = Bytes.readByteArray(in); - } - } -} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/HRegionLocation.java b/src/main/java/org/apache/hadoop/hbase/HRegionLocation.java index bd353b82649..43ea0196ca4 100644 --- a/src/main/java/org/apache/hadoop/hbase/HRegionLocation.java +++ b/src/main/java/org/apache/hadoop/hbase/HRegionLocation.java @@ -19,24 +19,30 @@ */ package org.apache.hadoop.hbase; +import java.net.InetSocketAddress; + +import org.apache.hadoop.hbase.util.Addressing; + /** - * Contains the HRegionInfo for the region and the HServerAddress for the - * HRegionServer serving the region + * Data structure to hold HRegionInfo and the address for the hosting + * HRegionServer. Immutable. */ public class HRegionLocation implements Comparable { - // TODO: Is this class necessary? Why not just have a Pair? - private HRegionInfo regionInfo; - private HServerAddress serverAddress; + private final HRegionInfo regionInfo; + private final String hostname; + private final int port; /** * Constructor - * * @param regionInfo the HRegionInfo for the region - * @param serverAddress the HServerAddress for the region server + * @param hostname Hostname + * @param port port */ - public HRegionLocation(HRegionInfo regionInfo, HServerAddress serverAddress) { + public HRegionLocation(HRegionInfo regionInfo, final String hostname, + final int port) { this.regionInfo = regionInfo; - this.serverAddress = serverAddress; + this.hostname = hostname; + this.port = port; } /** @@ -44,8 +50,8 @@ public class HRegionLocation implements Comparable { */ @Override public String toString() { - return "address: " + this.serverAddress.toString() + ", regioninfo: " + - this.regionInfo.getRegionNameAsString(); + return "region=" + this.regionInfo.getRegionNameAsString() + + ", hostname=" + this.hostname + ", port=" + this.port; } /** @@ -71,7 +77,8 @@ public class HRegionLocation implements Comparable { @Override public int hashCode() { int result = this.regionInfo.hashCode(); - result ^= this.serverAddress.hashCode(); + result ^= this.hostname.hashCode(); + result ^= this.port; return result; } @@ -80,9 +87,30 @@ public class HRegionLocation implements Comparable { return regionInfo; } - /** @return HServerAddress */ + /** @return HServerAddress + * @deprecated Use {@link #getHostnamePort} + */ public HServerAddress getServerAddress(){ - return serverAddress; + return new HServerAddress(this.hostname, this.port); + } + + public String getHostname() { + return this.hostname; + } + + public int getPort() { + return this.port; + } + + /** + * @return String made of hostname and port formatted as per {@link Addressing#createHostAndPortStr(String, int)} + */ + public String getHostnamePort() { + return Addressing.createHostAndPortStr(this.hostname, this.port); + } + + public InetSocketAddress getInetSocketAddress() { + return new InetSocketAddress(this.hostname, this.port); } // @@ -91,9 +119,9 @@ public class HRegionLocation implements Comparable { public int compareTo(HRegionLocation o) { int result = this.regionInfo.compareTo(o.regionInfo); - if(result == 0) { - result = this.serverAddress.compareTo(o.serverAddress); - } - return result; + if (result != 0) return result; + result = this.hostname.compareTo(o.getHostname()); + if (result != 0) return result; + return this.port - o.getPort(); } } \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/HServerAddress.java b/src/main/java/org/apache/hadoop/hbase/HServerAddress.java index 11c845a93a7..166fa57f62c 100644 --- a/src/main/java/org/apache/hadoop/hbase/HServerAddress.java +++ b/src/main/java/org/apache/hadoop/hbase/HServerAddress.java @@ -19,25 +19,38 @@ */ package org.apache.hadoop.hbase; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.io.WritableComparable; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.net.InetSocketAddress; import java.net.InetAddress; +import java.net.InetSocketAddress; + +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.WritableComparable; /** - * HServerAddress is a "label" for a HBase server made of host and port number. + * HServerAddress hosts a {@link InetSocketAddress} and makes it + * {@link WritableComparable}. Resolves on construction AND on + * deserialization -- since we're internally creating an InetSocketAddress -- + * so could end up with different results if the two ends of serialization have + * different resolvers. Be careful where you use it. Should only be used when + * you need to pass an InetSocketAddress across an RPC. Even then its a bad + * idea because of the above resolve issue. + * @deprecated Use {@link InetSocketAddress} or {@link ServerName} or + * a hostname String and port. */ public class HServerAddress implements WritableComparable { - private InetSocketAddress address; - String stringValue; + // Hard to deprecate this class. Its in the API as internal class, + // in particular as an inner class of HRegionLocation. Besides, sometimes + // we do want to serialize a InetSocketAddress; this class can be used then. + private InetSocketAddress address = null; + private String cachedToString = ""; + /** + * Constructor for deserialization use only. + */ public HServerAddress() { - this.address = null; - this.stringValue = null; + super(); } /** @@ -46,34 +59,20 @@ public class HServerAddress implements WritableComparable { */ public HServerAddress(InetSocketAddress address) { this.address = address; - this.stringValue = address.getAddress().getHostName() + ":" + - address.getPort(); checkBindAddressCanBeResolved(); + this.cachedToString = createCachedToString(); + } + + private String createCachedToString() { + return this.address.toString(); } /** - * @param hostAndPort Hostname and port formatted as <hostname> ':' <port> - */ - public HServerAddress(String hostAndPort) { - int colonIndex = hostAndPort.lastIndexOf(':'); - if (colonIndex < 0) { - throw new IllegalArgumentException("Not a host:port pair: " + hostAndPort); - } - String host = hostAndPort.substring(0, colonIndex); - int port = Integer.parseInt(hostAndPort.substring(colonIndex + 1)); - this.address = new InetSocketAddress(host, port); - this.stringValue = address.getHostName() + ":" + port; - checkBindAddressCanBeResolved(); - } - - /** - * @param bindAddress Hostname + * @param hostname Hostname * @param port Port number */ - public HServerAddress(String bindAddress, int port) { - this.address = new InetSocketAddress(bindAddress, port); - this.stringValue = address.getHostName() + ":" + port; - checkBindAddressCanBeResolved(); + public HServerAddress(final String hostname, final int port) { + this(new InetSocketAddress(hostname, port)); } /** @@ -81,45 +80,48 @@ public class HServerAddress implements WritableComparable { * @param other HServerAddress to copy from */ public HServerAddress(HServerAddress other) { - String bindAddress = other.getBindAddress(); - int port = other.getPort(); - this.address = new InetSocketAddress(bindAddress, port); - stringValue = other.stringValue; - checkBindAddressCanBeResolved(); + this(new InetSocketAddress(other.getHostname(), other.getPort())); } - /** @return Bind address */ + /** @return Bind address -- the raw IP, the result of a call to + * {@link InetSocketAddress#getAddress()#getHostAddress()} -- + * or null if cannot resolve */ public String getBindAddress() { - final InetAddress addr = address.getAddress(); - if (addr != null) { - return addr.getHostAddress(); - } else { - LogFactory.getLog(HServerAddress.class).error("Could not resolve the" - + " DNS name of " + stringValue); - return null; - } + // This returns null if the address is not resolved. + final InetAddress addr = this.address.getAddress(); + if (addr != null) return addr.getHostAddress(); + LogFactory.getLog(HServerAddress.class).error("Could not resolve the" + + " DNS name of " + this.address.toString()); + return null; } private void checkBindAddressCanBeResolved() { if (getBindAddress() == null) { throw new IllegalArgumentException("Could not resolve the" - + " DNS name of " + stringValue); + + " DNS name of " + this.address.toString()); } } /** @return Port number */ public int getPort() { - return address.getPort(); + return this.address.getPort(); } /** @return Hostname */ public String getHostname() { - return address.getHostName(); + return this.address.getHostName(); + } + + /** + * @return Returns ':' + */ + public String getHostnameAndPort() { + return getHostname() + ":" + getPort(); } /** @return The InetSocketAddress */ public InetSocketAddress getInetSocketAddress() { - return address; + return this.address; } /** @@ -127,21 +129,15 @@ public class HServerAddress implements WritableComparable { */ @Override public String toString() { - return stringValue == null ? "" : stringValue; + return this.cachedToString; } @Override public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null) { - return false; - } - if (getClass() != o.getClass()) { - return false; - } - return compareTo((HServerAddress) o) == 0; + if (this == o) return true; + if (o == null) return false; + if (getClass() != o.getClass()) return false; + return compareTo((HServerAddress)o) == 0; } @Override @@ -158,24 +154,20 @@ public class HServerAddress implements WritableComparable { public void readFields(DataInput in) throws IOException { String hostname = in.readUTF(); int port = in.readInt(); - - if (hostname == null || hostname.length() == 0) { - address = null; - stringValue = null; - } else { - address = new InetSocketAddress(hostname, port); - stringValue = hostname + ":" + port; + if (hostname != null && hostname.length() > 0) { + this.address = new InetSocketAddress(hostname, port); checkBindAddressCanBeResolved(); + createCachedToString(); } } public void write(DataOutput out) throws IOException { - if (address == null) { + if (this.address == null) { out.writeUTF(""); out.writeInt(0); } else { - out.writeUTF(address.getAddress().getHostName()); - out.writeInt(address.getPort()); + out.writeUTF(this.address.getAddress().getHostName()); + out.writeInt(this.address.getPort()); } } @@ -187,7 +179,7 @@ public class HServerAddress implements WritableComparable { // Addresses as Strings may not compare though address is for the one // server with only difference being that one address has hostname // resolved whereas other only has IP. - if (address.equals(o.address)) return 0; + if (this.address.equals(o.address)) return 0; return toString().compareTo(o.toString()); } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/HServerInfo.java b/src/main/java/org/apache/hadoop/hbase/HServerInfo.java index 0b5bd946948..8f6abd05ee8 100644 --- a/src/main/java/org/apache/hadoop/hbase/HServerInfo.java +++ b/src/main/java/org/apache/hadoop/hbase/HServerInfo.java @@ -23,77 +23,48 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.net.InetSocketAddress; -import java.util.Comparator; -import java.util.Set; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.io.VersionedWritable; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; /** - * HServerInfo is meta info about an {@link HRegionServer}. It is the token - * by which a master distingushes a particular regionserver from the rest. - * It holds hostname, ports, regionserver startcode, and load. Each server has - * a servername where servername is made up of a concatenation of - * hostname, port, and regionserver startcode. This servername is used in - * various places identifying this regionserver. Its even used as part of - * a pathname in the filesystem. As part of the initialization, - * master will pass the regionserver the address that it knows this regionserver - * by. In subsequent communications, the regionserver will pass a HServerInfo - * with the master-supplied address. + * HServerInfo is meta info about an {@link HRegionServer}. It hosts the + * {@link HServerAddress}, its webui port, and its server startcode. It was + * used to pass meta info about a server across an RPC but we've since made + * it so regionserver info is up in ZooKeeper and so this class is on its + * way out. It used to carry {@link HServerLoad} but as off HBase 0.92.0, the + * HServerLoad is passed independent of this class. Also, we now no longer pass + * the webui from regionserver to master (TODO: Fix). + * @deprecated Use {@link InetSocketAddress} and or {@link ServerName} and or + * {@link HServerLoad} */ public class HServerInfo extends VersionedWritable - implements WritableComparable { - private static final byte VERSION = 0; - - /* - * This character is used as separator between server hostname and port and - * its startcode. Servername is formatted as - * <hostname> '{@ink #SERVERNAME_SEPARATOR"}' <port> '{@ink #SERVERNAME_SEPARATOR"}' <startcode>. - */ - private static final String SERVERNAME_SEPARATOR = ","; - - private HServerAddress serverAddress; +implements WritableComparable { + private static final byte VERSION = 1; + private HServerAddress serverAddress = new HServerAddress(); private long startCode; - private HServerLoad load; - private int infoPort; - // Servername is made of hostname, port and startcode. - private String serverName = null; - // Hostname of the regionserver. - private String hostname; - private String cachedHostnamePort = null; - - /** @return the object version number */ - public byte getVersion() { - return VERSION; - } + private int webuiport; public HServerInfo() { - this(new HServerAddress(), 0, HConstants.DEFAULT_REGIONSERVER_INFOPORT, - "default name"); + super(); } /** - * Constructor that creates a HServerInfo with a generated startcode and an - * empty load. - * @param serverAddress An {@link InetSocketAddress} encased in a {@link Writable} - * @param infoPort Port the webui runs on. - * @param hostname Server hostname. + * Constructor that creates a HServerInfo with a generated startcode + * @param serverAddress + * @param webuiport Port the webui runs on. */ - public HServerInfo(HServerAddress serverAddress, final int infoPort, - final String hostname) { - this(serverAddress, System.currentTimeMillis(), infoPort, hostname); + public HServerInfo(final HServerAddress serverAddress, final int webuiport) { + this(serverAddress, System.currentTimeMillis(), webuiport); } public HServerInfo(HServerAddress serverAddress, long startCode, - final int infoPort, String hostname) { + final int webuiport) { this.serverAddress = serverAddress; this.startCode = startCode; - this.load = new HServerLoad(); - this.infoPort = infoPort; - this.hostname = hostname; + this.webuiport = webuiport; } /** @@ -103,106 +74,32 @@ public class HServerInfo extends VersionedWritable public HServerInfo(HServerInfo other) { this.serverAddress = new HServerAddress(other.getServerAddress()); this.startCode = other.getStartCode(); - this.load = other.getLoad(); - this.infoPort = other.getInfoPort(); - this.hostname = other.hostname; + this.webuiport = other.getInfoPort(); } - public HServerLoad getLoad() { - return load; - } - - public void setLoad(HServerLoad load) { - this.load = load; + /** @return the object version number */ + public byte getVersion() { + return VERSION; } public synchronized HServerAddress getServerAddress() { return new HServerAddress(serverAddress); } - public synchronized void setServerAddress(HServerAddress serverAddress) { - this.serverAddress = serverAddress; - this.hostname = serverAddress.getHostname(); - this.serverName = null; - } - public synchronized long getStartCode() { return startCode; } public int getInfoPort() { - return this.infoPort; + return getWebuiPort(); + } + + public int getWebuiPort() { + return this.webuiport; } public String getHostname() { - return this.hostname; - } - - /** - * @return The hostname and port concatenated with a ':' as separator. - */ - public synchronized String getHostnamePort() { - if (this.cachedHostnamePort == null) { - this.cachedHostnamePort = getHostnamePort(this.hostname, this.serverAddress.getPort()); - } - return this.cachedHostnamePort; - } - - /** - * @param hostname - * @param port - * @return The hostname and port concatenated with a ':' as separator. - */ - public static String getHostnamePort(final String hostname, final int port) { - return hostname + ":" + port; - } - - /** - * Gets the unique server instance name. Includes the hostname, port, and - * start code. - * @return Server name made of the concatenation of hostname, port and - * startcode formatted as <hostname> ',' <port> ',' <startcode> - */ - public synchronized String getServerName() { - if (this.serverName == null) { - this.serverName = getServerName(this.hostname, - this.serverAddress.getPort(), this.startCode); - } - return this.serverName; - } - - public static synchronized String getServerName(final String hostAndPort, - final long startcode) { - int index = hostAndPort.indexOf(":"); - if (index <= 0) throw new IllegalArgumentException("Expected ':' "); - return getServerName(hostAndPort.substring(0, index), - Integer.parseInt(hostAndPort.substring(index + 1)), startcode); - } - - /** - * @param address Server address - * @param startCode Server startcode - * @return Server name made of the concatenation of hostname, port and - * startcode formatted as <hostname> ',' <port> ',' <startcode> - */ - public static String getServerName(HServerAddress address, long startCode) { - return getServerName(address.getHostname(), address.getPort(), startCode); - } - - /* - * @param hostName - * @param port - * @param startCode - * @return Server name made of the concatenation of hostname, port and - * startcode formatted as <hostname> ',' <port> ',' <startcode> - */ - public static String getServerName(String hostName, int port, long startCode) { - StringBuilder name = new StringBuilder(hostName); - name.append(SERVERNAME_SEPARATOR); - name.append(port); - name.append(SERVERNAME_SEPARATOR); - name.append(startCode); - return name.toString(); + return this.serverAddress.getHostname(); } /** @@ -211,97 +108,46 @@ public class HServerInfo extends VersionedWritable * @see #getLoad() */ @Override - public String toString() { - return "serverName=" + getServerName() + - ", load=(" + this.load.toString() + ")"; + public synchronized String toString() { + return ServerName.getServerName(this.serverAddress.getHostnameAndPort(), + this.startCode); } @Override public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; return compareTo((HServerInfo)obj) == 0; } @Override public int hashCode() { - return this.getServerName().hashCode(); + int code = this.serverAddress.hashCode(); + code ^= this.webuiport; + code ^= this.startCode; + return code; } public void readFields(DataInput in) throws IOException { + super.readFields(in); this.serverAddress.readFields(in); this.startCode = in.readLong(); - this.load.readFields(in); - this.infoPort = in.readInt(); - this.hostname = in.readUTF(); + this.webuiport = in.readInt(); } public void write(DataOutput out) throws IOException { + super.write(out); this.serverAddress.write(out); out.writeLong(this.startCode); - this.load.write(out); - out.writeInt(this.infoPort); - out.writeUTF(hostname); + out.writeInt(this.webuiport); } public int compareTo(HServerInfo o) { - return this.getServerName().compareTo(o.getServerName()); + int compare = this.serverAddress.compareTo(o.getServerAddress()); + if (compare != 0) return compare; + if (this.webuiport != o.getInfoPort()) return this.webuiport - o.getInfoPort(); + if (this.startCode != o.getStartCode()) return (int)(this.startCode - o.getStartCode()); + return 0; } - - /** - * Orders HServerInfos by load then name. Natural/ascending order. - */ - public static class LoadComparator implements Comparator { - @Override - public int compare(HServerInfo left, HServerInfo right) { - int loadCompare = left.getLoad().compareTo(right.getLoad()); - return loadCompare != 0 ? loadCompare : left.compareTo(right); - } - } - - /** - * Utility method that does a find of a servername or a hostandport combination - * in the passed Set. - * @param servers Set of server names - * @param serverName Name to look for - * @param hostAndPortOnly If serverName is a - * hostname ':' port - * or hostname , port , startcode. - * @return True if serverName found in servers - */ - public static boolean isServer(final Set servers, - final String serverName, final boolean hostAndPortOnly) { - if (!hostAndPortOnly) return servers.contains(serverName); - String serverNameColonReplaced = - serverName.replaceFirst(":", SERVERNAME_SEPARATOR); - for (String hostPortStartCode: servers) { - int index = hostPortStartCode.lastIndexOf(SERVERNAME_SEPARATOR); - String hostPortStrippedOfStartCode = hostPortStartCode.substring(0, index); - if (hostPortStrippedOfStartCode.equals(serverNameColonReplaced)) return true; - } - return false; - } - - /** - * Utility method to excise the start code from a server name - * @param inServerName full server name - * @return server name less its start code - */ - public static String getServerNameLessStartCode(String inServerName) { - if (inServerName != null && inServerName.length() > 0) { - int index = inServerName.lastIndexOf(SERVERNAME_SEPARATOR); - if (index > 0) { - return inServerName.substring(0, index); - } - } - return inServerName; - } - -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/HServerLoad.java b/src/main/java/org/apache/hadoop/hbase/HServerLoad.java index 23720537389..4007ceb24c8 100644 --- a/src/main/java/org/apache/hadoop/hbase/HServerLoad.java +++ b/src/main/java/org/apache/hadoop/hbase/HServerLoad.java @@ -29,29 +29,31 @@ import java.util.TreeMap; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Strings; import org.apache.hadoop.io.VersionedWritable; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; /** - * This class encapsulates metrics for determining the load on a HRegionServer + * This class is used exporting current state of load on a RegionServer. */ public class HServerLoad extends VersionedWritable - implements WritableComparable { - private static final byte VERSION = 0; +implements WritableComparable { + private static final byte VERSION = 1; + // Empty load instance. + public static final HServerLoad EMPTY_HSERVERLOAD = new HServerLoad(); + + /** Number of requests since last report + */ + // TODO: Instead build this up out of region counters. + private int numberOfRequests = 0; - /** number of regions */ - // could just use regionLoad.size() but master.RegionManager likes to play - // around with this value while passing HServerLoad objects around during - // balancer calculations - private int numberOfRegions; - /** number of requests since last report */ - private int numberOfRequests; /** the amount of used heap, in MB */ - private int usedHeapMB; + private int usedHeapMB = 0; + /** the maximum allowable size of the heap, in MB */ - private int maxHeapMB; + private int maxHeapMB = 0; + /** per-region load metrics */ - private Map regionLoad = new TreeMap(Bytes.BYTES_COMPARATOR); + private Map regionLoad = + new TreeMap(Bytes.BYTES_COMPARATOR); /** @return the object version number */ public byte getVersion() { @@ -61,7 +63,14 @@ public class HServerLoad extends VersionedWritable /** * Encapsulates per-region loading metrics. */ - public static class RegionLoad implements Writable { + public static class RegionLoad extends VersionedWritable { + private static final byte VERSION = 0; + + /** @return the object version number */ + public byte getVersion() { + return VERSION; + } + /** the region name */ private byte[] name; /** the number of stores for the region */ @@ -236,6 +245,9 @@ public class HServerLoad extends VersionedWritable // Writable public void readFields(DataInput in) throws IOException { + super.readFields(in); + int version = getVersion(); + if (version != VERSION) throw new IOException("Version mismatch; " + version); int namelen = in.readInt(); this.name = new byte[namelen]; in.readFully(this.name); @@ -249,6 +261,7 @@ public class HServerLoad extends VersionedWritable } public void write(DataOutput out) throws IOException { + super.write(out); out.writeInt(name.length); out.write(name); out.writeInt(stores); @@ -308,10 +321,11 @@ public class HServerLoad extends VersionedWritable * @param maxHeapMB */ public HServerLoad(final int numberOfRequests, final int usedHeapMB, - final int maxHeapMB) { + final int maxHeapMB, final Map regionLoad) { this.numberOfRequests = numberOfRequests; this.usedHeapMB = usedHeapMB; this.maxHeapMB = maxHeapMB; + this.regionLoad = regionLoad; } /** @@ -319,7 +333,7 @@ public class HServerLoad extends VersionedWritable * @param hsl the template HServerLoad */ public HServerLoad(final HServerLoad hsl) { - this(hsl.numberOfRequests, hsl.usedHeapMB, hsl.maxHeapMB); + this(hsl.numberOfRequests, hsl.usedHeapMB, hsl.maxHeapMB, hsl.getRegionsLoad()); for (Map.Entry e : hsl.regionLoad.entrySet()) { this.regionLoad.put(e.getKey(), e.getValue()); } @@ -338,7 +352,7 @@ public class HServerLoad extends VersionedWritable // int load = numberOfRequests == 0 ? 1 : numberOfRequests; // load *= numberOfRegions == 0 ? 1 : numberOfRegions; // return load; - return numberOfRegions; + return this.regionLoad.size(); } /** @@ -356,6 +370,7 @@ public class HServerLoad extends VersionedWritable * @return The load as a String */ public String toString(int msgInterval) { + int numberOfRegions = this.regionLoad.size(); StringBuilder sb = new StringBuilder(); sb = Strings.appendKeyValue(sb, "requests", Integer.valueOf(numberOfRequests/msgInterval)); @@ -384,23 +399,13 @@ public class HServerLoad extends VersionedWritable return compareTo((HServerLoad)o) == 0; } - /** - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - int result = Integer.valueOf(numberOfRequests).hashCode(); - result ^= Integer.valueOf(numberOfRegions).hashCode(); - return result; - } - // Getters /** * @return the numberOfRegions */ public int getNumberOfRegions() { - return numberOfRegions; + return this.regionLoad.size(); } /** @@ -471,69 +476,16 @@ public class HServerLoad extends VersionedWritable return count; } - // Setters - - /** - * @param numberOfRegions the number of regions - */ - public void setNumberOfRegions(int numberOfRegions) { - this.numberOfRegions = numberOfRegions; - } - - /** - * @param numberOfRequests the number of requests to set - */ - public void setNumberOfRequests(int numberOfRequests) { - this.numberOfRequests = numberOfRequests; - } - - /** - * @param usedHeapMB the amount of heap in use, in MB - */ - public void setUsedHeapMB(int usedHeapMB) { - this.usedHeapMB = usedHeapMB; - } - - /** - * @param maxHeapMB the maximum allowable heap size, in MB - */ - public void setMaxHeapMB(int maxHeapMB) { - this.maxHeapMB = maxHeapMB; - } - - /** - * @param load Instance of HServerLoad - */ - public void addRegionInfo(final HServerLoad.RegionLoad load) { - this.numberOfRegions++; - this.regionLoad.put(load.getName(), load); - } - - /** - * @param name - * @param stores - * @param storefiles - * @param memstoreSizeMB - * @param storefileIndexSizeMB - * @param requestsCount - * @deprecated Use {@link #addRegionInfo(RegionLoad)} - */ - @Deprecated - public void addRegionInfo(final byte[] name, final int stores, - final int storefiles, final int storefileSizeMB, - final int memstoreSizeMB, final int storefileIndexSizeMB, - final int readRequestsCount, final int writeRequestsCount) { - this.regionLoad.put(name, new HServerLoad.RegionLoad(name, stores, storefiles, - storefileSizeMB, memstoreSizeMB, storefileIndexSizeMB, readRequestsCount, writeRequestsCount)); - } - // Writable public void readFields(DataInput in) throws IOException { + super.readFields(in); + int version = getVersion(); + if (version != VERSION) throw new IOException("Version mismatch; " + version); numberOfRequests = in.readInt(); usedHeapMB = in.readInt(); maxHeapMB = in.readInt(); - numberOfRegions = in.readInt(); + int numberOfRegions = in.readInt(); for (int i = 0; i < numberOfRegions; i++) { RegionLoad rl = new RegionLoad(); rl.readFields(in); @@ -542,10 +494,11 @@ public class HServerLoad extends VersionedWritable } public void write(DataOutput out) throws IOException { + super.write(out); out.writeInt(numberOfRequests); out.writeInt(usedHeapMB); out.writeInt(maxHeapMB); - out.writeInt(numberOfRegions); + out.writeInt(this.regionLoad.size()); for (RegionLoad rl: regionLoad.values()) rl.write(out); } diff --git a/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java b/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java index 0d696ab4b9b..5bc3bb02d25 100644 --- a/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java +++ b/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java @@ -254,12 +254,10 @@ public class LocalHBaseCluster { while (regionServerThread.isAlive()) { try { LOG.info("Waiting on " + - regionServerThread.getRegionServer().getHServerInfo().toString()); + regionServerThread.getRegionServer().toString()); regionServerThread.join(); } catch (InterruptedException e) { e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); } } return regionServerThread.getName(); @@ -275,12 +273,10 @@ public class LocalHBaseCluster { while (rst.isAlive()) { try { LOG.info("Waiting on " + - rst.getRegionServer().getHServerInfo().toString()); + rst.getRegionServer().toString()); rst.join(); } catch (InterruptedException e) { e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); } } for (int i=0;i<hostname> ',' <port> ',' <startcode>. - * If the master, it returns <hostname> ':' <port>'. - * @return unique server name + * @return The unique server name for this server. */ - public String getServerName(); + public ServerName getServerName(); } \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/ServerName.java b/src/main/java/org/apache/hadoop/hbase/ServerName.java new file mode 100644 index 00000000000..75ae78f437f --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -0,0 +1,221 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.util.Collection; + +import org.apache.hadoop.hbase.util.Addressing; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Instance of an HBase ServerName. + * A server name is used uniquely identifying a server instance and is made + * of the combination of hostname, port, and startcode. The startcode + * distingushes restarted servers on same hostname and port (startcode is + * usually timestamp of server startup). The {@link #toString()} format of + * ServerName is safe to use in the filesystem and as znode name up in + * ZooKeeper. Its format is: + * <hostname> '{@link #SERVERNAME_SEPARATOR"}' <port> '{@ink #SERVERNAME_SEPARATOR"}' <startcode>. + * For example, if hostname is example.org, port is 1234, + * and the startcode for the regionserver is 1212121212, then + * the {@link #toString()} would be example.org,1234,1212121212. + * + *

Immutable. + */ +public class ServerName implements Comparable { + /** + * This character is used as separator between server hostname, port and + * startcode. + */ + public static final String SERVERNAME_SEPARATOR = ","; + + private final String servername; + private final String hostname; + private final int port; + private final long startcode; + private byte [] bytes; + + public ServerName(final String hostname, final int port, final long startcode) { + this.hostname = hostname; + this.port = port; + this.startcode = startcode; + this.servername = getServerName(hostname, port, startcode); + } + + public ServerName(final String serverName) { + this(parseHostname(serverName), parsePort(serverName), + parseStartcode(serverName)); + } + + public ServerName(final byte [] bytes) { + this(Bytes.toString(bytes)); + } + + public ServerName(final String hostAndPort, final long startCode) { + this(Addressing.parseHostname(hostAndPort), + Addressing.parsePort(hostAndPort), startCode); + } + + public static String parseHostname(final String serverName) { + if (serverName == null || serverName.length() <= 0) { + throw new IllegalArgumentException("Passed hostname is null or empty"); + } + int index = serverName.indexOf(SERVERNAME_SEPARATOR); + return serverName.substring(0, index); + } + + public static int parsePort(final String serverName) { + String [] split = serverName.split(SERVERNAME_SEPARATOR); + return Integer.parseInt(split[1]); + } + + public static long parseStartcode(final String serverName) { + int index = serverName.lastIndexOf(SERVERNAME_SEPARATOR); + return Long.parseLong(serverName.substring(index + 1)); + } + + @Override + public String toString() { + return getServerName(); + } + + /** + * @return {@link #getServerName()} as bytes + */ + public synchronized byte [] getBytes() { + if (this.bytes == null) this.bytes = Bytes.toBytes(getServerName()); + return this.bytes; + } + + public String getServerName() { + return servername; + } + + public String getHostname() { + return hostname; + } + + public int getPort() { + return port; + } + + public long getStartcode() { + return startcode; + } + + /** + * @param hostName + * @param port + * @param startcode + * @return Server name made of the concatenation of hostname, port and + * startcode formatted as <hostname> ',' <port> ',' <startcode> + */ + public static String getServerName(String hostName, int port, long startcode) { + StringBuilder name = new StringBuilder(hostName); + name.append(SERVERNAME_SEPARATOR); + name.append(port); + name.append(SERVERNAME_SEPARATOR); + name.append(startcode); + return name.toString(); + } + + /** + * @param hostAndPort String in form of <hostname> ':' <port> + * @param startcode + * @return Server name made of the concatenation of hostname, port and + * startcode formatted as <hostname> ',' <port> ',' <startcode> + */ + public static synchronized String getServerName(final String hostAndPort, + final long startcode) { + int index = hostAndPort.indexOf(":"); + if (index <= 0) throw new IllegalArgumentException("Expected ':' "); + return getServerName(hostAndPort.substring(0, index), + Integer.parseInt(hostAndPort.substring(index + 1)), startcode); + } + + /** + * @return Hostname and port formatted as described at + * {@link Addressing#createHostAndPortStr(String, int)} + */ + public String getHostAndPort() { + return Addressing.createHostAndPortStr(this.hostname, this.port); + } + + /** + * @param serverName ServerName in form specified by {@link #getServerName()} + * @return The server start code parsed from servername + */ + public static long getServerStartcodeFromServerName(final String serverName) { + int index = serverName.lastIndexOf(SERVERNAME_SEPARATOR); + return Long.parseLong(serverName.substring(index + 1)); + } + + /** + * Utility method to excise the start code from a server name + * @param inServerName full server name + * @return server name less its start code + */ + public static String getServerNameLessStartCode(String inServerName) { + if (inServerName != null && inServerName.length() > 0) { + int index = inServerName.lastIndexOf(SERVERNAME_SEPARATOR); + if (index > 0) { + return inServerName.substring(0, index); + } + } + return inServerName; + } + + @Override + public int compareTo(ServerName other) { + int compare = this.getHostname().compareTo(other.getHostname()); + if (compare != 0) return compare; + compare = this.getPort() - other.getPort(); + if (compare != 0) return compare; + return (int)(this.getStartcode() - other.getStartcode()); + } + + @Override + public int hashCode() { + return getServerName().hashCode(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null) return false; + if (!(o instanceof ServerName)) return false; + return this.compareTo((ServerName)o) == 0; + } + + + /** + * @return ServerName with matching hostname and port. + */ + public static ServerName findServerWithSameHostnamePort(final Collection names, + final ServerName serverName) { + for (ServerName sn: names) { + if (sn.getHostname().equals(serverName.getHostname()) && + sn.getPort() == serverName.getPort()) { + return sn; + } + } + return null; + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/avro/AvroUtil.java b/src/main/java/org/apache/hadoop/hbase/avro/AvroUtil.java index d7a1e67c307..529334dfaef 100644 --- a/src/main/java/org/apache/hadoop/hbase/avro/AvroUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/avro/AvroUtil.java @@ -23,20 +23,17 @@ import java.nio.ByteBuffer; import java.util.Collection; import java.util.List; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericArray; +import org.apache.avro.generic.GenericData; +import org.apache.avro.util.Utf8; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.Delete; -import org.apache.hadoop.hbase.client.Get; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.avro.generated.AClusterStatus; import org.apache.hadoop.hbase.avro.generated.AColumn; import org.apache.hadoop.hbase.avro.generated.AColumnValue; @@ -54,11 +51,13 @@ import org.apache.hadoop.hbase.avro.generated.AServerAddress; import org.apache.hadoop.hbase.avro.generated.AServerInfo; import org.apache.hadoop.hbase.avro.generated.AServerLoad; import org.apache.hadoop.hbase.avro.generated.ATableDescriptor; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericArray; -import org.apache.avro.generic.GenericData; -import org.apache.avro.util.Utf8; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.util.Bytes; public class AvroUtil { @@ -113,26 +112,26 @@ public class AvroUtil { return asl; } - static public AServerInfo hsiToASI(HServerInfo hsi) throws IOException { + static public AServerInfo hsiToASI(ServerName sn, HServerLoad hsl) throws IOException { AServerInfo asi = new AServerInfo(); - asi.infoPort = hsi.getInfoPort(); - asi.load = hslToASL(hsi.getLoad()); - asi.serverAddress = hsaToASA(hsi.getServerAddress()); - asi.serverName = new Utf8(hsi.getServerName()); - asi.startCode = hsi.getStartCode(); + asi.infoPort = -1; + asi.load = hslToASL(hsl); + asi.serverAddress = hsaToASA(new HServerAddress(sn.getHostname(), sn.getPort())); + asi.serverName = new Utf8(sn.toString()); + asi.startCode = sn.getStartcode(); return asi; } static public AClusterStatus csToACS(ClusterStatus cs) throws IOException { AClusterStatus acs = new AClusterStatus(); acs.averageLoad = cs.getAverageLoad(); - Collection deadServerNames = cs.getDeadServerNames(); + Collection deadServerNames = cs.getDeadServerNames(); Schema stringArraySchema = Schema.createArray(Schema.create(Schema.Type.STRING)); GenericData.Array adeadServerNames = null; if (deadServerNames != null) { adeadServerNames = new GenericData.Array(deadServerNames.size(), stringArraySchema); - for (String deadServerName : deadServerNames) { - adeadServerNames.add(new Utf8(deadServerName)); + for (ServerName deadServerName : deadServerNames) { + adeadServerNames.add(new Utf8(deadServerName.toString())); } } else { adeadServerNames = new GenericData.Array(0, stringArraySchema); @@ -142,19 +141,19 @@ public class AvroUtil { acs.hbaseVersion = new Utf8(cs.getHBaseVersion()); acs.regionsCount = cs.getRegionsCount(); acs.requestsCount = cs.getRequestsCount(); - Collection hserverInfos = cs.getServerInfo(); + Collection hserverInfos = cs.getServers(); Schema s = Schema.createArray(AServerInfo.SCHEMA$); GenericData.Array aserverInfos = null; if (hserverInfos != null) { aserverInfos = new GenericData.Array(hserverInfos.size(), s); - for (HServerInfo hsi : hserverInfos) { - aserverInfos.add(hsiToASI(hsi)); + for (ServerName hsi : hserverInfos) { + aserverInfos.add(hsiToASI(hsi, cs.getLoad(hsi))); } } else { aserverInfos = new GenericData.Array(0, s); } acs.serverInfos = aserverInfos; - acs.servers = cs.getServers(); + acs.servers = cs.getServers().size(); return acs; } diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java index be311797c81..feed777bc10 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java @@ -30,9 +30,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -63,12 +63,11 @@ public class CatalogTracker { private final MetaNodeTracker metaNodeTracker; private final AtomicBoolean metaAvailable = new AtomicBoolean(false); /** - * Do not clear this address once set. Let it be cleared by - * {@link #setMetaLocation(HServerAddress)} only. Its needed when we do + * Do not clear this address once set. Its needed when we do * server shutdown processing -- we need to know who had .META. last. If you * want to know if the address is good, rely on {@link #metaAvailable} value. */ - private HServerAddress metaLocation; + private ServerName metaLocation; private final int defaultTimeout; private boolean stopped = false; @@ -155,17 +154,18 @@ public class CatalogTracker { /** * Gets the current location for -ROOT- or null if location is * not currently available. - * @return location of root, null if not available + * @return server name * @throws InterruptedException */ - public HServerAddress getRootLocation() throws InterruptedException { + public ServerName getRootLocation() throws InterruptedException { return this.rootRegionTracker.getRootRegionLocation(); } /** - * @return Location of meta or null if not yet available. + * @return Location of server hosting meta region formatted as per + * {@link ServerName}, or null if none available */ - public HServerAddress getMetaLocation() { + public ServerName getMetaLocation() { return this.metaLocation; } @@ -184,18 +184,19 @@ public class CatalogTracker { * for up to the specified timeout if not immediately available. Returns null * if the timeout elapses before root is available. * @param timeout maximum time to wait for root availability, in milliseconds - * @return location of root + * @return Location of server hosting root region, + * or null if none available * @throws InterruptedException if interrupted while waiting * @throws NotAllMetaRegionsOnlineException if root not available before * timeout */ - HServerAddress waitForRoot(final long timeout) + ServerName waitForRoot(final long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException { - HServerAddress address = rootRegionTracker.waitRootRegionLocation(timeout); - if (address == null) { + ServerName sn = rootRegionTracker.waitRootRegionLocation(timeout); + if (sn == null) { throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms"); } - return address; + return sn; } /** @@ -238,11 +239,11 @@ public class CatalogTracker { */ private HRegionInterface getRootServerConnection() throws IOException, InterruptedException { - HServerAddress address = this.rootRegionTracker.getRootRegionLocation(); - if (address == null) { + ServerName sn = this.rootRegionTracker.getRootRegionLocation(); + if (sn == null) { return null; } - return getCachedConnection(address); + return getCachedConnection(sn); } /** @@ -278,7 +279,7 @@ public class CatalogTracker { if (rootConnection == null) { return null; } - HServerAddress newLocation = MetaReader.readMetaLocation(rootConnection); + ServerName newLocation = MetaReader.readMetaLocation(rootConnection); if (newLocation == null) { return null; } @@ -317,7 +318,7 @@ public class CatalogTracker { * @throws NotAllMetaRegionsOnlineException if meta not available before * timeout */ - public HServerAddress waitForMeta(long timeout) + public ServerName waitForMeta(long timeout) throws InterruptedException, IOException, NotAllMetaRegionsOnlineException { long stop = System.currentTimeMillis() + timeout; synchronized (metaAvailable) { @@ -372,18 +373,18 @@ public class CatalogTracker { this.metaAvailable.set(false); } - private void setMetaLocation(HServerAddress metaLocation) { + private void setMetaLocation(final ServerName metaLocation) { metaAvailable.set(true); this.metaLocation = metaLocation; // no synchronization because these are private and already under lock - metaAvailable.notifyAll(); + this.metaAvailable.notifyAll(); } - private HRegionInterface getCachedConnection(HServerAddress address) + private HRegionInterface getCachedConnection(ServerName sn) throws IOException { HRegionInterface protocol = null; try { - protocol = connection.getHRegionConnection(address, false); + protocol = connection.getHRegionConnection(sn.getHostname(), sn.getPort()); } catch (RetriesExhaustedException e) { if (e.getCause() != null && e.getCause() instanceof ConnectException) { // Catch this; presume it means the cached connection has gone bad. @@ -392,10 +393,10 @@ public class CatalogTracker { } } catch (SocketTimeoutException e) { // Return 'protocol' == null. - LOG.debug("Timed out connecting to " + address); + LOG.debug("Timed out connecting to " + sn); } catch (SocketException e) { // Return 'protocol' == null. - LOG.debug("Exception connecting to " + address); + LOG.debug("Exception connecting to " + sn); } catch (IOException ioe) { Throwable cause = ioe.getCause(); if (cause != null && cause instanceof EOFException) { @@ -412,7 +413,7 @@ public class CatalogTracker { } private boolean verifyRegionLocation(HRegionInterface metaServer, - final HServerAddress address, + final ServerName address, byte [] regionName) throws IOException { if (metaServer == null) { @@ -469,7 +470,8 @@ public class CatalogTracker { throw e; } return (connection == null)? false: - verifyRegionLocation(connection,this.rootRegionTracker.getRootRegionLocation(), + verifyRegionLocation(connection, + this.rootRegionTracker.getRootRegionLocation(), HRegionInfo.ROOT_REGIONINFO.getRegionName()); } diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java b/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java index c2ee0312c04..5d616078e60 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java @@ -26,8 +26,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -87,18 +87,17 @@ public class MetaEditor { } public static void addDaughter(final CatalogTracker catalogTracker, - final HRegionInfo regionInfo, final HServerInfo serverInfo) + final HRegionInfo regionInfo, final ServerName sn) throws NotAllMetaRegionsOnlineException, IOException { HRegionInterface server = catalogTracker.waitForMetaServerConnectionDefault(); byte [] catalogRegionName = CatalogTracker.META_REGION; Put put = new Put(regionInfo.getRegionName()); addRegionInfo(put, regionInfo); - if (serverInfo != null) addLocation(put, serverInfo); + if (sn != null) addLocation(put, sn); server.put(catalogRegionName, put); LOG.info("Added daughter " + regionInfo.getRegionNameAsString() + " in region " + Bytes.toString(catalogRegionName) + - (serverInfo == null? - ", serverInfo=null": ", serverInfo=" + serverInfo.getServerName())); + (sn == null? ", serverName=null": ", serverName=" + sn.toString())); } /** @@ -110,18 +109,18 @@ public class MetaEditor { * * @param catalogTracker catalog tracker * @param regionInfo region to update location of - * @param serverInfo server the region is located on + * @param sn Server name * @throws IOException * @throws ConnectException Usually because the regionserver carrying .META. * is down. * @throws NullPointerException Because no -ROOT- server connection */ public static void updateMetaLocation(CatalogTracker catalogTracker, - HRegionInfo regionInfo, HServerInfo serverInfo) + HRegionInfo regionInfo, ServerName sn) throws IOException, ConnectException { HRegionInterface server = catalogTracker.waitForRootServerConnectionDefault(); if (server == null) throw new IOException("No server for -ROOT-"); - updateLocation(server, CatalogTracker.ROOT_REGION, regionInfo, serverInfo); + updateLocation(server, CatalogTracker.ROOT_REGION, regionInfo, sn); } /** @@ -133,14 +132,14 @@ public class MetaEditor { * * @param catalogTracker catalog tracker * @param regionInfo region to update location of - * @param serverInfo server the region is located on + * @param sn Server name * @throws IOException */ public static void updateRegionLocation(CatalogTracker catalogTracker, - HRegionInfo regionInfo, HServerInfo serverInfo) + HRegionInfo regionInfo, ServerName sn) throws IOException { updateLocation(catalogTracker.waitForMetaServerConnectionDefault(), - CatalogTracker.META_REGION, regionInfo, serverInfo); + CatalogTracker.META_REGION, regionInfo, sn); } /** @@ -152,20 +151,19 @@ public class MetaEditor { * @param server connection to server hosting catalog region * @param catalogRegionName name of catalog region being updated * @param regionInfo region to update location of - * @param serverInfo server the region is located on + * @param sn Server name * @throws IOException In particular could throw {@link java.net.ConnectException} * if the server is down on other end. */ private static void updateLocation(HRegionInterface server, - byte [] catalogRegionName, HRegionInfo regionInfo, HServerInfo serverInfo) + byte [] catalogRegionName, HRegionInfo regionInfo, ServerName sn) throws IOException { Put put = new Put(regionInfo.getRegionName()); - addLocation(put, serverInfo); + addLocation(put, sn); server.put(catalogRegionName, put); LOG.info("Updated row " + regionInfo.getRegionNameAsString() + " in region " + Bytes.toString(catalogRegionName) + " with " + - "server=" + serverInfo.getHostnamePort() + ", " + - "startcode=" + serverInfo.getStartCode()); + "serverName=" + sn.toString()); } /** @@ -228,11 +226,11 @@ public class MetaEditor { return p; } - private static Put addLocation(final Put p, final HServerInfo hsi) { + private static Put addLocation(final Put p, final ServerName sn) { p.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - Bytes.toBytes(hsi.getHostnamePort())); + Bytes.toBytes(sn.getHostAndPort())); p.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, - Bytes.toBytes(hsi.getStartCode())); + Bytes.toBytes(sn.getStartcode())); return p; } } diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java b/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java index 6e22cf5e449..eb57197c8aa 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java @@ -30,11 +30,10 @@ import java.util.TreeSet; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; @@ -125,10 +124,11 @@ public class MetaReader { * to META. If the region does not have an assignment it will have a null * value in the map. * - * @return map of regions to their currently assigned server + * @return map of regions to their currently assigned server where server is + * a String of <host> ':' <port> * @throws IOException */ - public static Map fullScan( + public static Map fullScan( CatalogTracker catalogTracker) throws IOException { return fullScan(catalogTracker, new TreeSet()); @@ -147,7 +147,7 @@ public class MetaReader { * @return map of regions to their currently assigned server * @throws IOException */ - public static Map fullScan( + public static Map fullScan( CatalogTracker catalogTracker, final Set disabledTables) throws IOException { return fullScan(catalogTracker, disabledTables, false); @@ -168,17 +168,17 @@ public class MetaReader { * @return map of regions to their currently assigned server * @throws IOException */ - public static Map fullScan( + public static Map fullScan( CatalogTracker catalogTracker, final Set disabledTables, final boolean excludeOfflinedSplitParents) throws IOException { - final Map regions = - new TreeMap(); + final Map regions = + new TreeMap(); Visitor v = new Visitor() { @Override public boolean visit(Result r) throws IOException { if (r == null || r.isEmpty()) return true; - Pair region = metaRowToRegionPair(r); + Pair region = metaRowToRegionPair(r); if (region == null) return true; HRegionInfo hri = region.getFirst(); if (disabledTables.contains( @@ -199,8 +199,6 @@ public class MetaReader { * Returns a map of every region to it's currently assigned server, according * to META. If the region does not have an assignment it will have a null * value in the map. - *

- * Returns HServerInfo which includes server startcode. * * @return map of regions to their currently assigned server * @throws IOException @@ -273,10 +271,10 @@ public class MetaReader { /** * Reads the location of META from ROOT. * @param metaServer connection to server hosting ROOT - * @return location of META in ROOT, null if not available + * @return location of META in ROOT where location, or null if not available * @throws IOException */ - public static HServerAddress readMetaLocation(HRegionInterface metaServer) + public static ServerName readMetaLocation(HRegionInterface metaServer) throws IOException { return readLocation(metaServer, CatalogTracker.ROOT_REGION, CatalogTracker.META_REGION); @@ -286,10 +284,10 @@ public class MetaReader { * Reads the location of the specified region from META. * @param catalogTracker * @param regionName region to read location of - * @return location of region in META, null if not available + * @return location of META in ROOT where location is, or null if not available * @throws IOException */ - public static HServerAddress readRegionLocation(CatalogTracker catalogTracker, + public static ServerName readRegionLocation(CatalogTracker catalogTracker, byte [] regionName) throws IOException { if (isMetaRegion(regionName)) throw new IllegalArgumentException("See readMetaLocation"); @@ -297,14 +295,17 @@ public class MetaReader { CatalogTracker.META_REGION, regionName); } - private static HServerAddress readLocation(HRegionInterface metaServer, + private static ServerName readLocation(HRegionInterface metaServer, byte [] catalogRegionName, byte [] regionName) throws IOException { Result r = null; try { r = metaServer.get(catalogRegionName, - new Get(regionName).addColumn(HConstants.CATALOG_FAMILY, - HConstants.SERVER_QUALIFIER)); + new Get(regionName). + addColumn(HConstants.CATALOG_FAMILY, + HConstants.SERVER_QUALIFIER). + addColumn(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER)); } catch (java.net.SocketTimeoutException e) { // Treat this exception + message as unavailable catalog table. Catch it // and fall through to return a null @@ -334,78 +335,57 @@ public class MetaReader { if (r == null || r.isEmpty()) { return null; } - byte [] value = r.getValue(HConstants.CATALOG_FAMILY, - HConstants.SERVER_QUALIFIER); - return new HServerAddress(Bytes.toString(value)); + return getServerNameFromResult(r); } /** * Gets the region info and assignment for the specified region from META. * @param catalogTracker * @param regionName - * @return region info and assignment from META, null if not available + * @return location of META in ROOT where location is + * a String of <host> ':' <port>, or null if not available * @throws IOException */ - public static Pair getRegion( + public static Pair getRegion( CatalogTracker catalogTracker, byte [] regionName) throws IOException { Get get = new Get(regionName); get.addFamily(HConstants.CATALOG_FAMILY); byte [] meta = getCatalogRegionNameForRegion(regionName); Result r = catalogTracker.waitForMetaServerConnectionDefault().get(meta, get); - if(r == null || r.isEmpty()) { - return null; - } - return metaRowToRegionPair(r); + return (r == null || r.isEmpty())? null: metaRowToRegionPair(r); } /** * @param data A .META. table row. - * @return A pair of the regioninfo and the server address from data - * or null for server address if no address set in .META. or null for a result - * if no HRegionInfo found. - * @throws IOException - */ - public static Pair metaRowToRegionPair( - Result data) throws IOException { - byte [] bytes = - data.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); - if (bytes == null) return null; - HRegionInfo info = Writables.getHRegionInfo(bytes); - final byte[] value = data.getValue(HConstants.CATALOG_FAMILY, - HConstants.SERVER_QUALIFIER); - if (value != null && value.length > 0) { - HServerAddress server = new HServerAddress(Bytes.toString(value)); - return new Pair(info, server); - } else { - return new Pair(info, null); - } - } - - /** - * @param data A .META. table row. - * @return A pair of the regioninfo and the server info from data + * @return A pair of the regioninfo and the ServerName * (or null for server address if no address set in .META.). * @throws IOException */ - public static Pair metaRowToRegionPairWithInfo( - Result data) throws IOException { + public static Pair metaRowToRegionPair(Result data) + throws IOException { byte [] bytes = data.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); if (bytes == null) return null; HRegionInfo info = Writables.getHRegionInfo(bytes); - final byte[] value = data.getValue(HConstants.CATALOG_FAMILY, + ServerName sn = getServerNameFromResult(data); + // sn can be null in case where no server inof. + return new Pair(info, sn); + } + + /** + * @param data Result to interrogate. + * @return A ServerName instance or null if necessary fields not found or empty. + */ + private static ServerName getServerNameFromResult(final Result data) { + byte[] value = data.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); - if (value != null && value.length > 0) { - final long startCode = Bytes.toLong(data.getValue(HConstants.CATALOG_FAMILY, - HConstants.STARTCODE_QUALIFIER)); - HServerAddress server = new HServerAddress(Bytes.toString(value)); - HServerInfo hsi = new HServerInfo(server, startCode, 0, - server.getHostname()); - return new Pair(info, hsi); - } else { - return new Pair(info, null); - } + if (value == null || value.length == 0) return null; + String hostAndPort = Bytes.toString(value); + value = data.getValue(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); + if (value == null || value.length == 0) return null; + return new ServerName(hostAndPort, Bytes.toLong(value)); } /** @@ -528,26 +508,27 @@ public class MetaReader { /** * @param catalogTracker * @param tableName - * @return Return list of regioninfos and server addresses. + * @return Return list of regioninfos and server. * @throws IOException * @throws InterruptedException */ - public static List> + public static List> getTableRegionsAndLocations(CatalogTracker catalogTracker, String tableName) throws IOException, InterruptedException { byte [] tableNameBytes = Bytes.toBytes(tableName); if (Bytes.equals(tableNameBytes, HConstants.ROOT_TABLE_NAME)) { // If root, do a bit of special handling. - HServerAddress hsa = catalogTracker.getRootLocation(); - List> list = - new ArrayList>(); - list.add(new Pair(HRegionInfo.ROOT_REGIONINFO, hsa)); + ServerName serverName = catalogTracker.getRootLocation(); + List> list = + new ArrayList>(); + list.add(new Pair(HRegionInfo.ROOT_REGIONINFO, + serverName)); return list; } HRegionInterface metaServer = getCatalogRegionInterface(catalogTracker, tableNameBytes); - List> regions = - new ArrayList>(); + List> regions = + new ArrayList>(); Scan scan = getScanForTableName(tableNameBytes); scan.addFamily(HConstants.CATALOG_FAMILY); long scannerid = @@ -556,7 +537,7 @@ public class MetaReader { Result data; while((data = metaServer.next(scannerid)) != null) { if (data != null && data.size() > 0) { - Pair region = metaRowToRegionPair(data); + Pair region = metaRowToRegionPair(data); if (region == null) continue; regions.add(region); } @@ -575,7 +556,7 @@ public class MetaReader { * @throws IOException */ public static NavigableMap - getServerUserRegions(CatalogTracker catalogTracker, final HServerInfo hsi) + getServerUserRegions(CatalogTracker catalogTracker, final ServerName serverName) throws IOException { HRegionInterface metaServer = catalogTracker.waitForMetaServerConnectionDefault(); @@ -588,10 +569,9 @@ public class MetaReader { Result result; while((result = metaServer.next(scannerid)) != null) { if (result != null && result.size() > 0) { - Pair pair = - metaRowToRegionPairWithInfo(result); + Pair pair = metaRowToRegionPair(result); if (pair == null) continue; - if (pair.getSecond() == null || !pair.getSecond().equals(hsi)) { + if (pair.getSecond() == null || !serverName.equals(pair.getSecond())) { continue; } hris.put(pair.getFirst(), result); diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/RootLocationEditor.java b/src/main/java/org/apache/hadoop/hbase/catalog/RootLocationEditor.java index aee64c5bf1e..1cbf1b6561b 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/RootLocationEditor.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/RootLocationEditor.java @@ -21,7 +21,7 @@ package org.apache.hadoop.hbase.catalog; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -53,11 +53,11 @@ public class RootLocationEditor { * Sets the location of -ROOT- in ZooKeeper to the * specified server address. * @param zookeeper zookeeper reference - * @param location server address hosting root + * @param location The server hosting -ROOT- * @throws KeeperException unexpected zookeeper exception */ public static void setRootLocation(ZooKeeperWatcher zookeeper, - HServerAddress location) + final ServerName location) throws KeeperException { LOG.info("Setting ROOT region location in ZooKeeper as " + location); try { @@ -69,4 +69,4 @@ public class RootLocationEditor { Bytes.toBytes(location.toString())); } } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index afb666a8357..fa7448bca8b 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionException; import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; @@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.ipc.RemoteException; @@ -371,7 +373,7 @@ public class HBaseAdmin implements Abortable { } // Wait until all regions deleted HRegionInterface server = - connection.getHRegionConnection(firstMetaServer.getServerAddress()); + connection.getHRegionConnection(firstMetaServer.getHostname(), firstMetaServer.getPort()); for (int tries = 0; tries < (this.numRetries * this.retryLongerMultiplier); tries++) { long scannerId = -1L; try { @@ -762,18 +764,15 @@ public class HBaseAdmin implements Abortable { CatalogTracker ct = getCatalogTracker(); try { if (hostAndPort != null) { - HServerAddress hsa = new HServerAddress(hostAndPort); - Pair pair = - MetaReader.getRegion(ct, regionname); + Pair pair = MetaReader.getRegion(ct, regionname); if (pair == null || pair.getSecond() == null) { LOG.info("No server in .META. for " + Bytes.toString(regionname) + "; pair=" + pair); } else { - closeRegion(hsa, pair.getFirst()); + closeRegion(pair.getSecond(), pair.getFirst()); } } else { - Pair pair = - MetaReader.getRegion(ct, regionname); + Pair pair = MetaReader.getRegion(ct, regionname); if (pair == null || pair.getSecond() == null) { LOG.info("No server in .META. for " + Bytes.toString(regionname) + "; pair=" + pair); @@ -786,9 +785,10 @@ public class HBaseAdmin implements Abortable { } } - private void closeRegion(final HServerAddress hsa, final HRegionInfo hri) + private void closeRegion(final ServerName sn, final HRegionInfo hri) throws IOException { - HRegionInterface rs = this.connection.getHRegionConnection(hsa); + HRegionInterface rs = + this.connection.getHRegionConnection(sn.getHostname(), sn.getPort()); // Close the region without updating zk state. rs.closeRegion(hri, false); } @@ -820,7 +820,7 @@ public class HBaseAdmin implements Abortable { CatalogTracker ct = getCatalogTracker(); try { if (isRegionName) { - Pair pair = + Pair pair = MetaReader.getRegion(ct, tableNameOrRegionName); if (pair == null || pair.getSecond() == null) { LOG.info("No server in .META. for " + @@ -829,10 +829,10 @@ public class HBaseAdmin implements Abortable { flush(pair.getSecond(), pair.getFirst()); } } else { - List> pairs = + List> pairs = MetaReader.getTableRegionsAndLocations(ct, Bytes.toString(tableNameOrRegionName)); - for (Pair pair: pairs) { + for (Pair pair: pairs) { if (pair.getFirst().isOffline()) continue; if (pair.getSecond() == null) continue; try { @@ -850,9 +850,10 @@ public class HBaseAdmin implements Abortable { } } - private void flush(final HServerAddress hsa, final HRegionInfo hri) + private void flush(final ServerName sn, final HRegionInfo hri) throws IOException { - HRegionInterface rs = this.connection.getHRegionConnection(hsa); + HRegionInterface rs = + this.connection.getHRegionConnection(sn.getHostname(), sn.getPort()); rs.flushRegion(hri); } @@ -922,7 +923,7 @@ public class HBaseAdmin implements Abortable { CatalogTracker ct = getCatalogTracker(); try { if (isRegionName(tableNameOrRegionName)) { - Pair pair = + Pair pair = MetaReader.getRegion(ct, tableNameOrRegionName); if (pair == null || pair.getSecond() == null) { LOG.info("No server in .META. for " + @@ -931,10 +932,10 @@ public class HBaseAdmin implements Abortable { compact(pair.getSecond(), pair.getFirst(), major); } } else { - List> pairs = + List> pairs = MetaReader.getTableRegionsAndLocations(ct, Bytes.toString(tableNameOrRegionName)); - for (Pair pair: pairs) { + for (Pair pair: pairs) { if (pair.getFirst().isOffline()) continue; if (pair.getSecond() == null) continue; try { @@ -953,10 +954,11 @@ public class HBaseAdmin implements Abortable { } } - private void compact(final HServerAddress hsa, final HRegionInfo hri, + private void compact(final ServerName sn, final HRegionInfo hri, final boolean major) throws IOException { - HRegionInterface rs = this.connection.getHRegionConnection(hsa); + HRegionInterface rs = + this.connection.getHRegionConnection(sn.getHostname(), sn.getPort()); rs.compactRegion(hri, major); } @@ -969,7 +971,7 @@ public class HBaseAdmin implements Abortable { * @param destServerName The servername of the destination regionserver. If * passed the empty byte array we'll assign to a random server. A server name * is made of host, port and startcode. Here is an example: - * host187.example.com,60020,1289493121758. + * host187.example.com,60020,1289493121758 * @throws UnknownRegionException Thrown if we can't find a region named * encodedRegionName * @throws ZooKeeperConnectionException @@ -1077,7 +1079,7 @@ public class HBaseAdmin implements Abortable { try { if (isRegionName(tableNameOrRegionName)) { // Its a possible region name. - Pair pair = + Pair pair = MetaReader.getRegion(ct, tableNameOrRegionName); if (pair == null || pair.getSecond() == null) { LOG.info("No server in .META. for " + @@ -1086,10 +1088,10 @@ public class HBaseAdmin implements Abortable { split(pair.getSecond(), pair.getFirst(), splitPoint); } } else { - List> pairs = + List> pairs = MetaReader.getTableRegionsAndLocations(ct, Bytes.toString(tableNameOrRegionName)); - for (Pair pair: pairs) { + for (Pair pair: pairs) { // May not be a server for a particular row if (pair.getSecond() == null) continue; HRegionInfo r = pair.getFirst(); @@ -1106,9 +1108,10 @@ public class HBaseAdmin implements Abortable { } } - private void split(final HServerAddress hsa, final HRegionInfo hri, + private void split(final ServerName sn, final HRegionInfo hri, byte[] splitPoint) throws IOException { - HRegionInterface rs = this.connection.getHRegionConnection(hsa); + HRegionInterface rs = + this.connection.getHRegionConnection(sn.getHostname(), sn.getPort()); rs.splitRegion(hri, splitPoint); } @@ -1179,10 +1182,27 @@ public class HBaseAdmin implements Abortable { /** * Stop the designated regionserver. * @throws IOException if a remote or network exception occurs + * @deprecated Use {@link #stopRegionServer(String)} */ public synchronized void stopRegionServer(final HServerAddress hsa) throws IOException { - HRegionInterface rs = this.connection.getHRegionConnection(hsa); + HRegionInterface rs = + this.connection.getHRegionConnection(hsa); + rs.stop("Called by admin client " + this.connection.toString()); + } + + /** + * Stop the designated regionserver + * @param hostnamePort Hostname and port delimited by a : as in + * example.org:1234 + * @throws IOException if a remote or network exception occurs + */ + public synchronized void stopRegionServer(final String hostnamePort) + throws IOException { + String hostname = Addressing.parseHostname(hostnamePort); + int port = Addressing.parsePort(hostnamePort); + HRegionInterface rs = + this.connection.getHRegionConnection(hostname, port); rs.stop("Called by admin client " + this.connection.toString()); } diff --git a/src/main/java/org/apache/hadoop/hbase/client/HConnection.java b/src/main/java/org/apache/hadoop/hbase/client/HConnection.java index 2bb4725228a..1beedafb933 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HConnection.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HConnection.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -188,19 +189,44 @@ public interface HConnection extends Abortable { * @param regionServer - the server to connect to * @return proxy for HRegionServer * @throws IOException if a remote or network exception occurs + * @deprecated Use {@link #getHRegionConnection(InetSocketAddress)} */ public HRegionInterface getHRegionConnection(HServerAddress regionServer) throws IOException; + /** + * Establishes a connection to the region server at the specified address. + * @param hostname RegionServer hostname + * @param port RegionServer port + * @return proxy for HRegionServer + * @throws IOException if a remote or network exception occurs + * + */ + public HRegionInterface getHRegionConnection(final String hostname, final int port) + throws IOException; + /** * Establishes a connection to the region server at the specified address. * @param regionServer - the server to connect to * @param getMaster - do we check if master is alive * @return proxy for HRegionServer * @throws IOException if a remote or network exception occurs + * @deprecated Use {@link #getHRegionConnection(HServerAddress, boolean)} */ - public HRegionInterface getHRegionConnection( - HServerAddress regionServer, boolean getMaster) + public HRegionInterface getHRegionConnection(HServerAddress regionServer, + boolean getMaster) + throws IOException; + + /** + * Establishes a connection to the region server at the specified address. + * @param hostname RegionServer hostname + * @param port RegionServer port + * @param getMaster - do we check if master is alive + * @return proxy for HRegionServer + * @throws IOException if a remote or network exception occurs + */ + public HRegionInterface getHRegionConnection(final String hostname, + final int port, boolean getMaster) throws IOException; /** @@ -343,6 +369,7 @@ public interface HConnection extends Abortable { * Scan zookeeper to get the number of region servers * @return the number of region servers that are currently running * @throws IOException if a remote or network exception occurs + * @deprecated This method will be changed from public to package protected. */ public int getCurrentNrHRS() throws IOException; -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 70affa01bba..ded51c8ffde 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -22,8 +22,16 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; import java.lang.reflect.Proxy; import java.lang.reflect.UndeclaredThrowableException; -import java.util.*; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArraySet; @@ -46,11 +54,17 @@ import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterAddressTracker; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.coprocessor.Batch; -import org.apache.hadoop.hbase.ipc.*; +import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; +import org.apache.hadoop.hbase.ipc.ExecRPCInvoker; +import org.apache.hadoop.hbase.ipc.HBaseRPC; +import org.apache.hadoop.hbase.ipc.HMasterInterface; +import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.SoftValueSortedMap; @@ -243,7 +257,8 @@ public class HConnectionManager { private final Map servers = new ConcurrentHashMap(); - private final ConcurrentHashMap connectionLock = new ConcurrentHashMap(); + private final ConcurrentHashMap connectionLock = + new ConcurrentHashMap(); /** * Map of table to table {@link HRegionLocation}s. The table key is made @@ -340,7 +355,7 @@ public class HConnectionManager { } } - HServerAddress masterLocation = null; + ServerName sn = null; synchronized (this.masterLock) { for (int tries = 0; !this.closed && @@ -349,8 +364,8 @@ public class HConnectionManager { tries++) { try { - masterLocation = masterAddressTracker.getMasterAddress(); - if(masterLocation == null) { + sn = masterAddressTracker.getMasterAddress(); + if (sn == null) { LOG.info("ZooKeeper available but no active master location found"); throw new MasterNotRunningException(); } @@ -358,9 +373,11 @@ public class HConnectionManager { if (clusterId.hasId()) { conf.set(HConstants.CLUSTER_ID, clusterId.getId()); } + InetSocketAddress isa = + new InetSocketAddress(sn.getHostname(), sn.getPort()); HMasterInterface tryMaster = (HMasterInterface)HBaseRPC.getProxy( - HMasterInterface.class, HMasterInterface.VERSION, - masterLocation.getInetSocketAddress(), this.conf, this.rpcTimeout); + HMasterInterface.class, HMasterInterface.VERSION, isa, this.conf, + this.rpcTimeout); if (tryMaster.isMasterRunning()) { this.master = tryMaster; @@ -391,10 +408,10 @@ public class HConnectionManager { this.masterChecked = true; } if (this.master == null) { - if (masterLocation == null) { + if (sn == null) { throw new MasterNotRunningException(); } - throw new MasterNotRunningException(masterLocation.toString()); + throw new MasterNotRunningException(sn.toString()); } return this.master; } @@ -577,12 +594,13 @@ public class HConnectionManager { if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)) { try { - HServerAddress hsa = + ServerName servername = this.rootRegionTracker.waitRootRegionLocation(this.rpcTimeout); LOG.debug("Lookedup root region location, connection=" + this + - "; hsa=" + hsa); - if (hsa == null) return null; - return new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, hsa); + "; serverName=" + ((servername == null)? "": servername.toString())); + if (servername == null) return null; + return new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, + servername.getHostname(), servername.getPort()); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return null; @@ -631,11 +649,14 @@ public class HConnectionManager { if (value == null) { return true; // don't cache it } - final String serverAddress = Bytes.toString(value); - + final String hostAndPort = Bytes.toString(value); + String hostname = Addressing.parseHostname(hostAndPort); + int port = Addressing.parsePort(hostAndPort); + value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); // instantiate the location - HRegionLocation loc = new HRegionLocation(regionInfo, - new HServerAddress(serverAddress)); + HRegionLocation loc = + new HRegionLocation(regionInfo, hostname, port); // cache this meta entry cacheLocation(tableName, loc); } @@ -690,7 +711,7 @@ public class HConnectionManager { // If null still, go around again. if (metaLocation == null) continue; HRegionInterface server = - getHRegionConnection(metaLocation.getServerAddress()); + getHRegionConnection(metaLocation.getHostname(), metaLocation.getPort()); Result regionInfoRow = null; // This block guards against two threads trying to load the meta @@ -725,7 +746,7 @@ public class HConnectionManager { if (regionInfoRow == null) { throw new TableNotFoundException(Bytes.toString(tableName)); } - byte[] value = regionInfoRow.getValue(HConstants.CATALOG_FAMILY, + byte [] value = regionInfoRow.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); if (value == null || value.length == 0) { throw new IOException("HRegionInfo was null or empty in " + @@ -746,19 +767,22 @@ public class HConnectionManager { value = regionInfoRow.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); - String serverAddress = ""; - if(value != null) { - serverAddress = Bytes.toString(value); + String hostAndPort = ""; + if (value != null) { + hostAndPort = Bytes.toString(value); } - if (serverAddress.equals("")) { + if (hostAndPort.equals("")) { throw new NoServerForRegionException("No server address listed " + "in " + Bytes.toString(parentTable) + " for region " + regionInfo.getRegionNameAsString()); } - // instantiate the location - location = new HRegionLocation(regionInfo, - new HServerAddress(serverAddress)); + // Instantiate the location + String hostname = Addressing.parseHostname(hostAndPort); + int port = Addressing.parsePort(hostAndPort); + value = regionInfoRow.getValue(HConstants.CATALOG_FAMILY, + HConstants.SERVER_QUALIFIER); + location = new HRegionLocation(regionInfo, hostname, port); cacheLocation(tableName, location); return location; } catch (TableNotFoundException e) { @@ -936,14 +960,48 @@ public class HConnectionManager { } } - public HRegionInterface getHRegionConnection( - HServerAddress regionServer, boolean getMaster) + public HRegionInterface getHRegionConnection(HServerAddress hsa) throws IOException { - if (getMaster) { - getMaster(); - } + return getHRegionConnection(hsa, false); + } + + @Override + public HRegionInterface getHRegionConnection(final String hostname, + final int port) + throws IOException { + return getHRegionConnection(hostname, port, false); + } + + public HRegionInterface getHRegionConnection(HServerAddress hsa, + boolean master) + throws IOException { + return getHRegionConnection(null, -1, hsa.getInetSocketAddress(), master); + } + + @Override + public HRegionInterface getHRegionConnection(final String hostname, + final int port, final boolean master) + throws IOException { + return getHRegionConnection(hostname, port, null, master); + } + + /** + * Either the passed isa is null or hostname + * can be but not both. + * @param hostname + * @param port + * @param isa + * @param master + * @return Proxy. + * @throws IOException + */ + HRegionInterface getHRegionConnection(final String hostname, final int port, + final InetSocketAddress isa, final boolean master) + throws IOException { + if (master) getMaster(); HRegionInterface server; - String rsName = regionServer.toString(); + String rsName = isa != null? + isa.toString(): Addressing.createHostAndPortStr(hostname, port); // See if we already have a connection (common case) server = this.servers.get(rsName); if (server == null) { @@ -958,12 +1016,15 @@ public class HConnectionManager { if (clusterId.hasId()) { conf.set(HConstants.CLUSTER_ID, clusterId.getId()); } + // Only create isa when we need to. + InetSocketAddress address = isa != null? isa: + new InetSocketAddress(hostname, port); // definitely a cache miss. establish an RPC for this RS server = (HRegionInterface) HBaseRPC.waitForProxy( serverInterfaceClass, HRegionInterface.VERSION, - regionServer.getInetSocketAddress(), this.conf, + address, this.conf, this.maxRPCAttempts, this.rpcTimeout, this.rpcTimeout); - this.servers.put(rsName, server); + this.servers.put(address.toString(), server); } catch (RemoteException e) { LOG.warn("RemoteException connecting to RS", e); // Throw what the RemoteException was carrying. @@ -975,12 +1036,6 @@ public class HConnectionManager { return server; } - public HRegionInterface getHRegionConnection( - HServerAddress regionServer) - throws IOException { - return getHRegionConnection(regionServer, false); - } - /** * Get the ZooKeeper instance for this TableServers instance. * @@ -1065,10 +1120,8 @@ public class HConnectionManager { this.closed = true; } - private Callable createCallable( - final HServerAddress address, - final MultiAction multi, - final byte [] tableName) { + private Callable createCallable(final HRegionLocation loc, + final MultiAction multi, final byte [] tableName) { final HConnection connection = this; return new Callable() { public MultiResponse call() throws IOException { @@ -1079,7 +1132,8 @@ public class HConnectionManager { } @Override public void instantiateServer(boolean reload) throws IOException { - server = connection.getHRegionConnection(address); + server = + connection.getHRegionConnection(loc.getHostname(), loc.getPort()); } } ); @@ -1191,8 +1245,10 @@ public class HConnectionManager { } // Keep track of the most recent servers for any given item for better - // exceptional reporting. - HServerAddress [] lastServers = new HServerAddress[results.length]; + // exceptional reporting. We keep HRegionLocation to save on parsing. + // Later below when we use lastServers, we'll pull what we need from + // lastServers. + HRegionLocation [] lastServers = new HRegionLocation[results.length]; List workingList = new ArrayList(list); boolean retry = true; // count that helps presize actions array @@ -1208,43 +1264,41 @@ public class HConnectionManager { Thread.sleep(sleepTime); } // step 1: break up into regionserver-sized chunks and build the data structs - Map> actionsByServer = - new HashMap>(); + Map> actionsByServer = + new HashMap>(); for (int i = 0; i < workingList.size(); i++) { Row row = workingList.get(i); if (row != null) { HRegionLocation loc = locateRegion(tableName, row.getRow(), true); - HServerAddress address = loc.getServerAddress(); byte[] regionName = loc.getRegionInfo().getRegionName(); - MultiAction actions = actionsByServer.get(address); + MultiAction actions = actionsByServer.get(loc); if (actions == null) { actions = new MultiAction(); - actionsByServer.put(address, actions); + actionsByServer.put(loc, actions); } Action action = new Action(regionName, row, i); - lastServers[i] = address; + lastServers[i] = loc; actions.add(regionName, action); } } // step 2: make the requests - Map> futures = - new HashMap>( + Map> futures = + new HashMap>( actionsByServer.size()); - for (Entry> e - : actionsByServer.entrySet()) { + for (Entry> e: actionsByServer.entrySet()) { futures.put(e.getKey(), pool.submit(createCallable(e.getKey(), e.getValue(), tableName))); } // step 3: collect the failures and successes and prepare for retry - for (Entry> responsePerServer + for (Entry> responsePerServer : futures.entrySet()) { - HServerAddress address = responsePerServer.getKey(); + HRegionLocation loc = responsePerServer.getKey(); try { Future future = responsePerServer.getValue(); @@ -1252,7 +1306,8 @@ public class HConnectionManager { if (resp == null) { // Entire server failed - LOG.debug("Failed all for server: " + address + ", removing from cache"); + LOG.debug("Failed all for server: " + loc.getHostnamePort() + + ", removing from cache"); continue; } @@ -1277,7 +1332,7 @@ public class HConnectionManager { } } } catch (ExecutionException e) { - LOG.debug("Failed all from " + address, e); + LOG.debug("Failed all from " + loc, e); } } @@ -1320,13 +1375,13 @@ public class HConnectionManager { List exceptions = new ArrayList(actionCount); List actions = new ArrayList(actionCount); - List addresses = new ArrayList(actionCount); + List addresses = new ArrayList(actionCount); for (int i = 0 ; i < results.length; i++) { if (results[i] == null || results[i] instanceof Throwable) { exceptions.add((Throwable)results[i]); actions.add(list.get(i)); - addresses.add(lastServers[i]); + addresses.add(lastServers[i].getHostnamePort()); } } @@ -1418,11 +1473,14 @@ public class HConnectionManager { return !regionCachePrefetchDisabledTables.contains(Bytes.mapKey(tableName)); } - public void prewarmRegionCache(final byte[] tableName, - final Map regions) { + @Override + public void prewarmRegionCache(byte[] tableName, + Map regions) { for (Map.Entry e : regions.entrySet()) { + HServerAddress hsa = e.getValue(); + if (hsa == null || hsa.getInetSocketAddress() == null) continue; cacheLocation(tableName, - new HRegionLocation(e.getKey(), e.getValue())); + new HRegionLocation(e.getKey(), hsa.getHostname(), hsa.getPort())); } } diff --git a/src/main/java/org/apache/hadoop/hbase/client/HTable.java b/src/main/java/org/apache/hadoop/hbase/client/HTable.java index edacf5654f1..46bac9f90dc 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HTable.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HTable.java @@ -29,6 +29,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.TreeMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.SynchronousQueue; @@ -49,12 +50,14 @@ import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.coprocessor.Batch; import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; import org.apache.hadoop.hbase.ipc.ExecRPCInvoker; +import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Writables; @@ -283,6 +286,7 @@ public class HTable implements HTableInterface { * INTERNAL Used by unit tests and tools to do low-level * manipulations. * @return An HConnection instance. + * @deprecated This method will be changed from public to package protected. */ // TODO(tsuna): Remove this. Unit tests shouldn't require public helpers. public HConnection getConnection() { @@ -378,10 +382,9 @@ public class HTable implements HTableInterface { /** * Gets all the regions and their address for this table. - *

- * This is mainly useful for the MapReduce integration. * @return A map of HRegionInfo with it's server address * @throws IOException if a remote or network exception occurs + * @deprecated Use {@link #getRegionLocations()} or {@link #getStartEndKeys()} */ public Map getRegionsInfo() throws IOException { final Map regionMap = @@ -401,8 +404,8 @@ public class HTable implements HTableInterface { byte [] value = rowResult.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); if (value != null && value.length > 0) { - String address = Bytes.toString(value); - server = new HServerAddress(address); + String hostAndPort = Bytes.toString(value); + server = new HServerAddress(Addressing.createInetSocketAddressFromHostAndPortStr(hostAndPort)); } if (!(info.isOffline() || info.isSplit())) { @@ -416,6 +419,17 @@ public class HTable implements HTableInterface { return regionMap; } + /** + * Gets all the regions and their address for this table. + *

+ * This is mainly useful for the MapReduce integration. + * @return A map of HRegionInfo with it's server address + * @throws IOException if a remote or network exception occurs + */ + public NavigableMap getRegionLocations() throws IOException { + return MetaScanner.allTableRegions(getConfiguration(), getTableName(), false); + } + /** * Save the passed region information and the table's regions * cache. diff --git a/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java b/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java index 9e3f4d1a897..26d0b314354 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java +++ b/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java @@ -23,13 +23,20 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; @@ -225,8 +232,7 @@ public class MetaScanner { public static List listAllRegions(Configuration conf, final boolean offlined) throws IOException { final List regions = new ArrayList(); - MetaScannerVisitor visitor = - new MetaScannerVisitor() { + MetaScannerVisitor visitor = new MetaScannerVisitor() { @Override public boolean processRow(Result result) throws IOException { if (result == null || result.isEmpty()) { @@ -249,6 +255,51 @@ public class MetaScanner { return regions; } + /** + * Lists all of the table regions currently in META. + * @param conf + * @param offlined True if we are to include offlined regions, false and we'll + * leave out offlined regions from returned list. + * @return Map of all user-space regions to servers + * @throws IOException + */ + public static NavigableMap allTableRegions(Configuration conf, final byte [] tablename, final boolean offlined) + throws IOException { + final NavigableMap regions = + new TreeMap(); + MetaScannerVisitor visitor = new MetaScannerVisitor() { + @Override + public boolean processRow(Result rowResult) throws IOException { + HRegionInfo info = Writables.getHRegionInfo( + rowResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER)); + if (!(Bytes.equals(info.getTableDesc().getName(), tablename))) { + return false; + } + byte [] value = rowResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.SERVER_QUALIFIER); + String hostAndPort = null; + if (value != null && value.length > 0) { + hostAndPort = Bytes.toString(value); + } + value = rowResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); + long startcode = -1L; + if (value != null && value.length > 0) startcode = Bytes.toLong(value); + if (!(info.isOffline() || info.isSplit())) { + ServerName sn = null; + if (hostAndPort != null && hostAndPort.length() > 0) { + sn = new ServerName(hostAndPort, startcode); + } + regions.put(new UnmodifyableHRegionInfo(info), sn); + } + return true; + } + }; + metaScan(conf, visitor); + return regions; + } + /** * Visitor class called to process each row of the .META. table */ @@ -264,4 +315,4 @@ public class MetaScanner { */ public boolean processRow(Result rowResult) throws IOException; } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java b/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java index 6c6202424e2..9d188896aaa 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java +++ b/src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java @@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.client; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.util.Addressing; import java.util.Collection; import java.util.HashMap; @@ -39,22 +40,23 @@ import java.util.Set; * known server addresses via {@link #getNumExceptions()} and * {@link #getCause(int)}, {@link #getRow(int)} and {@link #getAddress(int)}. */ -public class RetriesExhaustedWithDetailsException extends RetriesExhaustedException { - +@SuppressWarnings("serial") +public class RetriesExhaustedWithDetailsException +extends RetriesExhaustedException { List exceptions; List actions; - List addresses; + List hostnameAndPort; public RetriesExhaustedWithDetailsException(List exceptions, List actions, - List addresses) { + List hostnameAndPort) { super("Failed " + exceptions.size() + " action" + pluralize(exceptions) + ": " + - getDesc(exceptions,actions,addresses)); + getDesc(exceptions, actions, hostnameAndPort)); this.exceptions = exceptions; this.actions = actions; - this.addresses = addresses; + this.hostnameAndPort = hostnameAndPort; } public List getCauses() { @@ -73,8 +75,17 @@ public class RetriesExhaustedWithDetailsException extends RetriesExhaustedExcept return actions.get(i); } + /** + * @param i + * @return + * @deprecated + */ public HServerAddress getAddress(int i) { - return addresses.get(i); + return new HServerAddress(Addressing.createInetSocketAddressFromHostAndPortStr(getHostnamePort(i))); + } + + public String getHostnamePort(final int i) { + return this.hostnameAndPort.get(i); } public boolean mayHaveClusterIssues() { @@ -100,12 +111,12 @@ public class RetriesExhaustedWithDetailsException extends RetriesExhaustedExcept public static String getDesc(List exceptions, List actions, - List addresses) { + List hostnamePort) { String s = getDesc(classifyExs(exceptions)); s += "servers with issues: "; - Set uniqAddr = new HashSet(); - uniqAddr.addAll(addresses); - for(HServerAddress addr : uniqAddr) { + Set uniqAddr = new HashSet(); + uniqAddr.addAll(hostnamePort); + for(String addr : uniqAddr) { s += addr + ", "; } return s; diff --git a/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java b/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java index 4097c6f3929..360906d8cc3 100644 --- a/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java +++ b/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java @@ -22,8 +22,8 @@ package org.apache.hadoop.hbase.coprocessor; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.UnknownRegionException; import java.io.IOException; diff --git a/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java b/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java index d64817fec88..7f199b84686 100644 --- a/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java +++ b/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java @@ -138,23 +138,22 @@ public interface MasterObserver extends Coprocessor { * Called prior to moving a given region from one region server to another. */ void preMove(final ObserverContext ctx, - final HRegionInfo region, final HServerInfo srcServer, - final HServerInfo destServer) + final HRegionInfo region, final ServerName srcServer, final ServerName destServer) throws UnknownRegionException; /** * Called after the region move has been requested. */ void postMove(final ObserverContext ctx, - final HRegionInfo region, final HServerInfo srcServer, - final HServerInfo destServer) + final HRegionInfo region, final ServerName srcServer, final ServerName destServer) throws UnknownRegionException; /** * Called prior to assigning a specific region. */ void preAssign(final ObserverContext ctx, - final byte [] regionName, final boolean force) throws IOException; + final byte [] regionName, final boolean force) + throws IOException; /** * Called after the region assignment has been requested. diff --git a/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java b/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java index c22e34246c1..0c9eccece52 100644 --- a/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java @@ -228,4 +228,4 @@ public abstract class EventHandler implements Runnable, Comparable { public synchronized void setListener(EventHandlerListener listener) { this.listener = listener; } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java b/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java index a55f9d65714..0f49dc14487 100644 --- a/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java +++ b/src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java @@ -23,6 +23,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; @@ -42,7 +43,7 @@ public class RegionTransitionData implements Writable { private byte [] regionName; /** Server event originated from. Optional. */ - private String serverName; + private ServerName origin; /** Time the event was created. Required but automatically set. */ private long stamp; @@ -89,11 +90,11 @@ public class RegionTransitionData implements Writable { * * @param eventType type of event * @param regionName name of region as per HRegionInfo#getRegionName() - * @param serverName name of server setting data + * @param origin Originating {@link ServerName} */ public RegionTransitionData(EventType eventType, byte [] regionName, - String serverName) { - this(eventType, regionName, serverName, null); + final ServerName origin) { + this(eventType, regionName, origin, null); } /** @@ -107,16 +108,16 @@ public class RegionTransitionData implements Writable { * * @param eventType type of event * @param regionName name of region as per HRegionInfo#getRegionName() - * @param serverName name of server setting data + * @param origin Originating {@link ServerName} * @param payload Payload examples include the daughters involved in a * {@link EventType#RS_ZK_REGION_SPLIT}. Can be null */ public RegionTransitionData(EventType eventType, byte [] regionName, - String serverName, final byte [] payload) { + final ServerName serverName, final byte [] payload) { this.eventType = eventType; this.stamp = System.currentTimeMillis(); this.regionName = regionName; - this.serverName = serverName; + this.origin = serverName; this.payload = payload; } @@ -155,8 +156,8 @@ public class RegionTransitionData implements Writable { * * @return server name of originating regionserver, or null if from master */ - public String getServerName() { - return serverName; + public ServerName getOrigin() { + return origin; } /** @@ -185,10 +186,8 @@ public class RegionTransitionData implements Writable { regionName = Bytes.readByteArray(in); // remaining fields are optional so prefixed with boolean // the name of the regionserver sending the data - if(in.readBoolean()) { - serverName = in.readUTF(); - } else { - serverName = null; + if (in.readBoolean()) { + this.origin = new ServerName(in.readUTF()); } if (in.readBoolean()) { this.payload = Bytes.readByteArray(in); @@ -201,9 +200,9 @@ public class RegionTransitionData implements Writable { out.writeLong(System.currentTimeMillis()); Bytes.writeByteArray(out, regionName); // remaining fields are optional so prefixed with boolean - out.writeBoolean(serverName != null); - if(serverName != null) { - out.writeUTF(serverName); + out.writeBoolean(this.origin != null); + if(this.origin != null) { + out.writeUTF(this.origin.toString()); } out.writeBoolean(this.payload != null); if (this.payload != null) { @@ -244,7 +243,7 @@ public class RegionTransitionData implements Writable { @Override public String toString() { - return "region=" + Bytes.toString(regionName) + ", server=" + serverName + + return "region=" + Bytes.toString(regionName) + ", origin=" + this.origin + ", state=" + eventType; } } \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java b/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java index d8f84633c21..d531b8d8b96 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java +++ b/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java @@ -41,10 +41,10 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Delete; @@ -148,8 +148,14 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur // Hbase types addToMap(HColumnDescriptor.class, code++); addToMap(HConstants.Modify.class, code++); - addToMap(HMsg.class, code++); - addToMap(HMsg[].class, code++); + + // We used to have a class named HMsg but its been removed. Rather than + // just axe it, use following random Integer class -- we just chose any + // class from java.lang -- instead just so codes that follow stay + // in same relative place. + addToMap(Integer.class, code++); + addToMap(Integer[].class, code++); + addToMap(HRegion.class, code++); addToMap(HRegion[].class, code++); addToMap(HRegionInfo.class, code++); @@ -225,6 +231,8 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur addToMap(CompareOp.class, code++); addToMap(ColumnRangeFilter.class, code++); + + addToMap(HServerLoad.class, code++); } private Class declaredClass; diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java b/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java index ec28de42701..3c845dcc761 100644 --- a/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java +++ b/src/main/java/org/apache/hadoop/hbase/ipc/HBaseServer.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.ipc; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -41,7 +40,13 @@ import java.nio.channels.Selector; import java.nio.channels.ServerSocketChannel; import java.nio.channels.SocketChannel; import java.nio.channels.WritableByteChannel; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Random; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; @@ -52,15 +57,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.io.WritableWithSize; +import org.apache.hadoop.hbase.util.ByteBufferOutputStream; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.ipc.VersionedProtocol; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.hbase.util.ByteBufferOutputStream; import com.google.common.base.Function; import com.google.common.util.concurrent.ThreadFactoryBuilder; diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java b/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java index 25139b300a1..e48222b5502 100644 --- a/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java +++ b/src/main/java/org/apache/hadoop/hbase/ipc/HMasterRegionInterface.java @@ -19,22 +19,16 @@ */ package org.apache.hadoop.hbase.ipc; -import org.apache.hadoop.hbase.HMsg; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; +import java.io.IOException; + +import org.apache.hadoop.hbase.HServerLoad; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.ipc.VersionedProtocol; -import java.io.IOException; - /** - * HRegionServers interact with the HMasterRegionInterface to report on local - * goings-on and to obtain data-handling instructions from the HMaster. - *

Changes here need to be reflected in HbaseObjectWritable HbaseRPC#Invoker. - * - *

NOTE: if you change the interface, you must change the RPC version - * number in HBaseRPCProtocolVersion - * + * The Master publishes this Interface for RegionServers to register themselves + * on. */ public interface HMasterRegionInterface extends VersionedProtocol { /** @@ -44,32 +38,27 @@ public interface HMasterRegionInterface extends VersionedProtocol { // maintained a single global version number on all HBase Interfaces. This // meant all HBase RPC was broke though only one of the three RPC Interfaces // had changed. This has since been undone. - public static final long VERSION = 28L; + public static final long VERSION = 29L; /** - * Called when a region server first starts - * @param info server info + * Called when a region server first starts. + * @param port Port number this regionserver is up on. + * @param serverStartcode This servers' startcode. * @param serverCurrentTime The current time of the region server in ms * @throws IOException e * @return Configuration for the regionserver to use: e.g. filesystem, - * hbase rootdir, etc. + * hbase rootdir, the hostname to use creating the RegionServer ServerName, + * etc. */ - public MapWritable regionServerStartup(HServerInfo info, - long serverCurrentTime) throws IOException; + public MapWritable regionServerStartup(final int port, + final long serverStartcode, final long serverCurrentTime) + throws IOException; /** - * Called to renew lease, tell master what the region server is doing and to - * receive new instructions from the master - * - * @param info server's address and start code - * @param msgs things the region server wants to tell the master - * @param mostLoadedRegions Array of HRegionInfos that should contain the - * reporting server's most loaded regions. These are candidates for being - * rebalanced. - * @return instructions from the master to the region server - * @throws IOException e + * @param sn {@link ServerName#getBytes()} + * @param hsl Server load. + * @throws IOException */ - public HMsg[] regionServerReport(HServerInfo info, HMsg msgs[], - HRegionInfo mostLoadedRegions[]) + public void regionServerReport(byte [] sn, HServerLoad hsl) throws IOException; } \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java b/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java index 663cab526df..81440318b5d 100644 --- a/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java +++ b/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java @@ -292,8 +292,10 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl /** * Method used when a master is taking the place of another failed one. - * @return The HSI + * @return This servers {@link HServerInfo}; it has RegionServer POV on the + * hostname which may not agree w/ how the Master sees this server. * @throws IOException e + * @deprecated */ public HServerInfo getHServerInfo() throws IOException; diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/WritableRpcEngine.java b/src/main/java/org/apache/hadoop/hbase/ipc/WritableRpcEngine.java index 2273e55e15a..9eb2ba9e0d8 100644 --- a/src/main/java/org/apache/hadoop/hbase/ipc/WritableRpcEngine.java +++ b/src/main/java/org/apache/hadoop/hbase/ipc/WritableRpcEngine.java @@ -140,8 +140,9 @@ class WritableRpcEngine implements RpcEngine { client.call(new Invocation(method, args), address, protocol, ticket, rpcTimeout); if (logDebug) { - long callTime = System.currentTimeMillis() - startTime; - LOG.debug("Call: " + method.getName() + " " + callTime); + // FIGURE HOW TO TURN THIS OFF! + // long callTime = System.currentTimeMillis() - startTime; + // LOG.debug("Call: " + method.getName() + " " + callTime); } return value.get(); } diff --git a/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java b/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java index 66a3345c79b..5125a71398f 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java @@ -23,8 +23,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -48,13 +49,17 @@ class ActiveMasterManager extends ZooKeeperListener { final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false); - private final HServerAddress address; + private final ServerName sn; private final Server master; - ActiveMasterManager(ZooKeeperWatcher watcher, HServerAddress address, - Server master) { + /** + * @param watcher + * @param sn ServerName + * @param master In an instance of a Master. + */ + ActiveMasterManager(ZooKeeperWatcher watcher, ServerName sn, Server master) { super(watcher); - this.address = address; + this.sn = sn; this.master = master; } @@ -122,11 +127,11 @@ class ActiveMasterManager extends ZooKeeperListener { boolean cleanSetOfActiveMaster = true; // Try to become the active master, watch if there is another master try { - if (ZKUtil.setAddressAndWatch(this.watcher, - this.watcher.masterAddressZNode, this.address)) { + if (ZKUtil.createEphemeralNodeAndWatch(this.watcher, + this.watcher.masterAddressZNode, Bytes.toBytes(this.sn.toString()))) { // We are the master, return this.clusterHasActiveMaster.set(true); - LOG.info("Master=" + this.address); + LOG.info("Master=" + this.sn); return cleanSetOfActiveMaster; } cleanSetOfActiveMaster = false; @@ -134,9 +139,10 @@ class ActiveMasterManager extends ZooKeeperListener { // There is another active master running elsewhere or this is a restart // and the master ephemeral node has not expired yet. this.clusterHasActiveMaster.set(true); - HServerAddress currentMaster = - ZKUtil.getDataAsAddress(this.watcher, this.watcher.masterAddressZNode); - if (currentMaster != null && currentMaster.equals(this.address)) { + byte [] bytes = + ZKUtil.getDataAndWatch(this.watcher, this.watcher.masterAddressZNode); + ServerName currentMaster = new ServerName(Bytes.toString(bytes)); + if (currentMaster != null && currentMaster.equals(this.sn)) { LOG.info("Current master has this master's address, " + currentMaster + "; master was restarted? Waiting on znode to expire..."); // Hurry along the expiration of the znode. @@ -177,11 +183,11 @@ class ActiveMasterManager extends ZooKeeperListener { public void stop() { try { // If our address is in ZK, delete it on our way out - HServerAddress zkAddress = - ZKUtil.getDataAsAddress(watcher, watcher.masterAddressZNode); + byte [] bytes = + ZKUtil.getDataAndWatch(watcher, watcher.masterAddressZNode); // TODO: redo this to make it atomic (only added for tests) - if(zkAddress != null && - zkAddress.equals(address)) { + ServerName master = new ServerName(Bytes.toString(bytes)); + if(master != null && master.equals(this.sn)) { ZKUtil.deleteNode(watcher, watcher.masterAddressZNode); } } catch (KeeperException e) { diff --git a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index eed62ee1f6b..166123f0581 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -24,6 +24,7 @@ import java.io.DataOutput; import java.io.IOException; import java.lang.Thread.UncaughtExceptionHandler; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -43,11 +44,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaReader; @@ -124,8 +124,8 @@ public class AssignmentManager extends ZooKeeperListener { * with the other under a lock on {@link #regions} * @see #regions */ - private final NavigableMap> servers = - new TreeMap>(); + private final NavigableMap> servers = + new TreeMap>(); /** * Region to server assignment map. @@ -134,8 +134,8 @@ public class AssignmentManager extends ZooKeeperListener { * with the other under a lock on {@link #regions} * @see #servers */ - private final SortedMap regions = - new TreeMap(); + private final SortedMap regions = + new TreeMap(); private final ExecutorService executorService; @@ -168,6 +168,26 @@ public class AssignmentManager extends ZooKeeperListener { this.master.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10); } + /** + * Compute the average load across all region servers. + * Currently, this uses a very naive computation - just uses the number of + * regions being served, ignoring stats about number of requests. + * @return the average load + */ + double getAverageLoad() { + int totalLoad = 0; + int numServers = 0; + // Sync on this.regions because access to this.servers always synchronizes + // in this order. + synchronized (this.regions) { + for (Map.Entry> e: servers.entrySet()) { + numServers++; + totalLoad += e.getValue().size(); + } + } + return (double)totalLoad / (double)numServers; + } + /** * @return Instance of ZKTable. */ @@ -191,33 +211,31 @@ public class AssignmentManager extends ZooKeeperListener { } /** - * Handle failover. Restore state from META and ZK. Handle any regions in - * transition. Presumes .META. and -ROOT- deployed. - * @throws KeeperException + * Called on startup. + * Figures whether a fresh cluster start of we are joining extant running cluster. * @throws IOException - * @throws InterruptedException + * @throws KeeperException + * @throws InterruptedException */ - void processFailover() throws KeeperException, IOException, InterruptedException { + void joinCluster() throws IOException, KeeperException, InterruptedException { // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are // synchronized. The presumption is that in this case it is safe since this // method is being played by a single thread on startup. - // TODO: Check list of user regions and their assignments against regionservers. // TODO: Regions that have a null location and are not in regionsInTransitions // need to be handled. // Add -ROOT- and .META. on regions map. They must be deployed if we got - // this far. Caller takes care of it. - HServerInfo hsi = - this.serverManager.getHServerInfo(this.catalogTracker.getMetaLocation()); - regionOnline(HRegionInfo.FIRST_META_REGIONINFO, hsi); - hsi = this.serverManager.getHServerInfo(this.catalogTracker.getRootLocation()); - regionOnline(HRegionInfo.ROOT_REGIONINFO, hsi); + // this far. + ServerName sn = this.catalogTracker.getMetaLocation(); + regionOnline(HRegionInfo.FIRST_META_REGIONINFO, sn); + sn = this.catalogTracker.getRootLocation(); + regionOnline(HRegionInfo.ROOT_REGIONINFO, sn); // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map>> deadServers = + Map>> deadServers = rebuildUserRegions(); // Process list of dead servers processDeadServers(deadServers); @@ -227,15 +245,36 @@ public class AssignmentManager extends ZooKeeperListener { public void processRegionsInTransition() throws KeeperException, IOException { List nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher, - watcher.assignmentZNode); - if (nodes.isEmpty()) { - LOG.info("No regions in transition in ZK to process on failover"); - return; + watcher.assignmentZNode); + // Run through all regions. If they are not assigned and not in RIT, then + // its a clean cluster startup, else its a failover. + boolean userRegionsOutOnCluster = false; + for (Map.Entry e: this.regions.entrySet()) { + if (!e.getKey().isMetaRegion() && e.getValue() != null) { + LOG.debug("Found " + e + " out on cluster"); + userRegionsOutOnCluster = true; + break; + } + if (nodes.contains(e.getKey().getEncodedName())) { + LOG.debug("Found " + e + " in RITs"); + userRegionsOutOnCluster = true; + break; + } } - LOG.info("Failed-over master needs to process " + nodes.size() + - " regions in transition"); - for (String encodedRegionName: nodes) { - processRegionInTransition(encodedRegionName, null); + + // If we found user regions out on cluster, its a failover. + if (userRegionsOutOnCluster) { + LOG.info("Found regions out on cluster or in RIT; failover"); + processDeadServers(deadServers); + if (!nodes.isEmpty()) { + for (String encodedRegionName: nodes) { + processRegionInTransition(encodedRegionName, null); + } + } + } else { + // Fresh cluster startup. + cleanoutUnassigned(); + assignAllUserRegions(); } } @@ -264,10 +303,10 @@ public class AssignmentManager extends ZooKeeperListener { } /** - * Process failover of encodedName. Look in + * Process failover of servername. Look in RIT. * @param encodedRegionName Region to process failover for. - * @param encodedRegionName RegionInfo. If null we'll go get it from meta table. - * @return + * @param regionInfo If null we'll go get it from meta table. + * @return True if we processed regionInfo as a RIT. * @throws KeeperException * @throws IOException */ @@ -278,7 +317,7 @@ public class AssignmentManager extends ZooKeeperListener { if (data == null) return false; HRegionInfo hri = regionInfo; if (hri == null) { - Pair p = + Pair p = MetaReader.getRegion(catalogTracker, data.getRegionName()); if (p == null) return false; hri = p.getFirst(); @@ -327,17 +366,18 @@ public class AssignmentManager extends ZooKeeperListener { // Region is opened, insert into RIT and handle it regionsInTransition.put(encodedRegionName, new RegionState( regionInfo, RegionState.State.OPENING, data.getStamp())); - HServerInfo hsi = serverManager.getServerInfo(data.getServerName()); + ServerName sn = + data.getOrigin() == null? null: data.getOrigin(); // hsi could be null if this server is no longer online. If // that the case, just let this RIT timeout; it'll be assigned // to new server then. - if (hsi == null) { + if (sn == null) { LOG.warn("Region in transition " + regionInfo.getEncodedName() + - " references a server no longer up " + data.getServerName() + - "; letting RIT timeout so will be assigned elsewhere"); + " references a null server; letting RIT timeout so will be " + + "assigned elsewhere"); break; } - new OpenedRegionHandler(master, this, regionInfo, hsi).process(); + new OpenedRegionHandler(master, this, regionInfo, sn).process(); break; } } @@ -354,18 +394,19 @@ public class AssignmentManager extends ZooKeeperListener { */ private void handleRegion(final RegionTransitionData data) { synchronized(regionsInTransition) { - if (data == null || data.getServerName() == null) { + if (data == null || data.getOrigin() == null) { LOG.warn("Unexpected NULL input " + data); return; } + ServerName sn = data.getOrigin(); // Check if this is a special HBCK transition - if (data.getServerName().equals(HConstants.HBCK_CODE_NAME)) { + if (sn.equals(HConstants.HBCK_CODE_SERVERNAME)) { handleHBCK(data); return; } // Verify this is a known server - if (!serverManager.isServerOnline(data.getServerName()) && - !this.master.getServerName().equals(data.getServerName())) { + if (!serverManager.isServerOnline(sn) && + !this.master.getServerName().equals(sn)) { LOG.warn("Attempted to handle region transition for server but " + "server is not online: " + data.getRegionName()); return; @@ -387,7 +428,7 @@ public class AssignmentManager extends ZooKeeperListener { case RS_ZK_REGION_SPLITTING: if (!isInStateForSplitting(regionState)) break; - addSplittingToRIT(data.getServerName(), encodedName); + addSplittingToRIT(sn.toString(), encodedName); break; case RS_ZK_REGION_SPLIT: @@ -396,9 +437,9 @@ public class AssignmentManager extends ZooKeeperListener { // If null, add SPLITTING state before going to SPLIT if (regionState == null) { LOG.info("Received SPLIT for region " + prettyPrintedRegionName + - " from server " + data.getServerName() + + " from server " + sn + " but region was not first in SPLITTING state; continuing"); - addSplittingToRIT(data.getServerName(), encodedName); + addSplittingToRIT(sn.toString(), encodedName); } // Check it has daughters. byte [] payload = data.getPayload(); @@ -412,14 +453,13 @@ public class AssignmentManager extends ZooKeeperListener { } assert daughters.size() == 2; // Assert that we can get a serverinfo for this server. - HServerInfo hsi = getAndCheckHServerInfo(data.getServerName()); - if (hsi == null) { - LOG.error("Dropped split! No HServerInfo for " + data.getServerName()); + if (!this.serverManager.isServerOnline(sn)) { + LOG.error("Dropped split! ServerName=" + sn + " unknown."); break; } // Run handler to do the rest of the SPLIT handling. this.executorService.submit(new SplitRegionHandler(master, this, - regionState.getRegion(), hsi, daughters)); + regionState.getRegion(), sn, daughters)); break; case RS_ZK_REGION_CLOSING: @@ -428,7 +468,7 @@ public class AssignmentManager extends ZooKeeperListener { if (regionState == null || (!regionState.isPendingClose() && !regionState.isClosing())) { LOG.warn("Received CLOSING for region " + prettyPrintedRegionName + - " from server " + data.getServerName() + " but region was in " + + " from server " + data.getOrigin() + " but region was in " + " the state " + regionState + " and not " + "in expected PENDING_CLOSE or CLOSING states"); return; @@ -442,7 +482,7 @@ public class AssignmentManager extends ZooKeeperListener { if (regionState == null || (!regionState.isPendingClose() && !regionState.isClosing())) { LOG.warn("Received CLOSED for region " + prettyPrintedRegionName + - " from server " + data.getServerName() + " but region was in " + + " from server " + data.getOrigin() + " but region was in " + " the state " + regionState + " and not " + "in expected PENDING_CLOSE or CLOSING states"); return; @@ -462,7 +502,7 @@ public class AssignmentManager extends ZooKeeperListener { (!regionState.isPendingOpen() && !regionState.isOpening())) { LOG.warn("Received OPENING for region " + prettyPrintedRegionName + - " from server " + data.getServerName() + " but region was in " + + " from server " + data.getOrigin() + " but region was in " + " the state " + regionState + " and not " + "in expected PENDING_OPEN or OPENING states"); return; @@ -477,7 +517,7 @@ public class AssignmentManager extends ZooKeeperListener { (!regionState.isPendingOpen() && !regionState.isOpening())) { LOG.warn("Received OPENED for region " + prettyPrintedRegionName + - " from server " + data.getServerName() + " but region was in " + + " from server " + data.getOrigin() + " but region was in " + " the state " + regionState + " and not " + "in expected PENDING_OPEN or OPENING states"); return; @@ -486,7 +526,7 @@ public class AssignmentManager extends ZooKeeperListener { regionState.update(RegionState.State.OPEN, data.getStamp()); this.executorService.submit( new OpenedRegionHandler(master, this, regionState.getRegion(), - this.serverManager.getServerInfo(data.getServerName()))); + data.getOrigin())); break; } } @@ -524,12 +564,6 @@ public class AssignmentManager extends ZooKeeperListener { return true; } - private HServerInfo getAndCheckHServerInfo(final String serverName) { - HServerInfo hsi = this.serverManager.getServerInfo(serverName); - if (hsi == null) LOG.debug("No serverinfo for " + serverName); - return hsi; - } - /** * @param serverName * @param encodedName @@ -572,9 +606,9 @@ public class AssignmentManager extends ZooKeeperListener { */ private HRegionInfo findHRegionInfo(final String serverName, final String encodedName) { - HServerInfo hsi = getAndCheckHServerInfo(serverName); - if (hsi == null) return null; - List hris = this.servers.get(hsi); + ServerName sn = new ServerName(serverName); + if (!this.serverManager.isServerOnline(sn)) return null; + List hris = this.servers.get(sn); HRegionInfo foundHri = null; for (HRegionInfo hri: hris) { if (hri.getEncodedName().equals(encodedName)) { @@ -594,7 +628,7 @@ public class AssignmentManager extends ZooKeeperListener { private void handleHBCK(RegionTransitionData data) { String encodedName = HRegionInfo.encodeRegionName(data.getRegionName()); LOG.info("Handling HBCK triggered transition=" + data.getEventType() + - ", server=" + data.getServerName() + ", region=" + + ", server=" + data.getOrigin() + ", region=" + HRegionInfo.prettyPrint(encodedName)); RegionState regionState = regionsInTransition.get(encodedName); switch (data.getEventType()) { @@ -741,9 +775,9 @@ public class AssignmentManager extends ZooKeeperListener { *

* Used when a region has been successfully opened on a region server. * @param regionInfo - * @param serverInfo + * @param sn */ - public void regionOnline(HRegionInfo regionInfo, HServerInfo serverInfo) { + public void regionOnline(HRegionInfo regionInfo, ServerName sn) { synchronized (this.regionsInTransition) { RegionState rs = this.regionsInTransition.remove(regionInfo.getEncodedName()); @@ -753,22 +787,22 @@ public class AssignmentManager extends ZooKeeperListener { } synchronized (this.regions) { // Add check - HServerInfo hsi = this.regions.get(regionInfo); - if (hsi != null) LOG.warn("Overwriting " + regionInfo.getEncodedName() + - " on " + hsi); - this.regions.put(regionInfo, serverInfo); - addToServers(serverInfo, regionInfo); + ServerName oldSn = this.regions.get(regionInfo); + if (oldSn != null) LOG.warn("Overwriting " + regionInfo.getEncodedName() + + " on " + oldSn + " with " + sn); + this.regions.put(regionInfo, sn); + addToServers(sn, regionInfo); this.regions.notifyAll(); } // Remove plan if one. clearRegionPlan(regionInfo); // Update timers for all regions in transition going against this server. - updateTimers(serverInfo); + updateTimers(sn); } /** * Touch timers for all regions in transition that have the passed - * hsi in common. + * sn in common. * Call this method whenever a server checks in. Doing so helps the case where * a new regionserver has joined the cluster and its been given 1k regions to * open. If this method is tickled every time the region reports in a @@ -777,9 +811,9 @@ public class AssignmentManager extends ZooKeeperListener { * as part of bulk assign -- there we have a different mechanism for extending * the regions in transition timer (we turn it off temporarily -- because * there is no regionplan involved when bulk assigning. - * @param hsi + * @param sn */ - private void updateTimers(final HServerInfo hsi) { + private void updateTimers(final ServerName sn) { // This loop could be expensive. // First make a copy of current regionPlan rather than hold sync while // looping because holding sync can cause deadlock. Its ok in this loop @@ -789,7 +823,7 @@ public class AssignmentManager extends ZooKeeperListener { copy.putAll(this.regionPlans); } for (Map.Entry e: copy.entrySet()) { - if (!e.getValue().getDestination().equals(hsi)) continue; + if (!e.getValue().getDestination().equals(sn)) continue; RegionState rs = null; synchronized (this.regionsInTransition) { rs = this.regionsInTransition.get(e.getKey()); @@ -828,11 +862,11 @@ public class AssignmentManager extends ZooKeeperListener { */ public void setOffline(HRegionInfo regionInfo) { synchronized (this.regions) { - HServerInfo serverInfo = this.regions.remove(regionInfo); - if (serverInfo == null) return; - List serverRegions = this.servers.get(serverInfo); + ServerName sn = this.regions.remove(regionInfo); + if (sn == null) return; + List serverRegions = this.servers.get(sn); if (!serverRegions.remove(regionInfo)) { - LOG.warn("No " + regionInfo + " on " + serverInfo); + LOG.warn("No " + regionInfo + " on " + sn); } } } @@ -906,10 +940,10 @@ public class AssignmentManager extends ZooKeeperListener { * @param destination * @param regions Regions to assign. */ - void assign(final HServerInfo destination, + void assign(final ServerName destination, final List regions) { LOG.debug("Bulk assigning " + regions.size() + " region(s) to " + - destination.getServerName()); + destination.toString()); List states = new ArrayList(regions.size()); synchronized (this.regionsInTransition) { @@ -932,14 +966,19 @@ public class AssignmentManager extends ZooKeeperListener { for (int oldCounter = 0; true;) { int count = counter.get(); if (oldCounter != count) { - LOG.info(destination.getServerName() + " unassigned znodes=" + count + + LOG.info(destination.toString() + " unassigned znodes=" + count + " of total=" + total); oldCounter = count; } if (count == total) break; Threads.sleep(1); } + // Move on to open regions. try { + // Send OPEN RPC. This can fail if the server on other end is is not up. + // If we fail, fail the startup by aborting the server. There is one + // exception we will tolerate: ServerNotRunningException. This is thrown + // between report of regionserver being up and long maxWaitTime = System.currentTimeMillis() + this.master.getConfiguration(). getLong("hbase.regionserver.rpc.startup.waittime", 60000); @@ -962,7 +1001,7 @@ public class AssignmentManager extends ZooKeeperListener { } catch (InterruptedException e) { throw new RuntimeException(e); } - LOG.debug("Bulk assigning done for " + destination.getServerName()); + LOG.debug("Bulk assigning done for " + destination.toString()); } /** @@ -971,11 +1010,11 @@ public class AssignmentManager extends ZooKeeperListener { static class CreateUnassignedAsyncCallback implements AsyncCallback.StringCallback { private final Log LOG = LogFactory.getLog(CreateUnassignedAsyncCallback.class); private final ZooKeeperWatcher zkw; - private final HServerInfo destination; + private final ServerName destination; private final AtomicInteger counter; CreateUnassignedAsyncCallback(final ZooKeeperWatcher zkw, - final HServerInfo destination, final AtomicInteger counter) { + final ServerName destination, final AtomicInteger counter) { this.zkw = zkw; this.destination = destination; this.counter = counter; @@ -991,7 +1030,7 @@ public class AssignmentManager extends ZooKeeperListener { ", rc=" + rc, null); return; } - LOG.debug("rs=" + (RegionState)ctx + ", server=" + this.destination.getServerName()); + LOG.debug("rs=" + (RegionState)ctx + ", server=" + this.destination.toString()); // Async exists to set a watcher so we'll get triggered when // unassigned node changes. this.zkw.getZooKeeper().exists(path, this.zkw, @@ -1078,7 +1117,7 @@ public class AssignmentManager extends ZooKeeperListener { if (plan == null) return; // Should get reassigned later when RIT times out. try { LOG.debug("Assigning region " + state.getRegion().getRegionNameAsString() + - " to " + plan.getDestination().getServerName()); + " to " + plan.getDestination().toString()); // Transition RegionState to PENDING_OPEN state.update(RegionState.State.PENDING_OPEN); // Send OPEN RPC. This can fail if the server on other end is is not up. @@ -1118,7 +1157,7 @@ public class AssignmentManager extends ZooKeeperListener { state.update(RegionState.State.OFFLINE); try { if(!ZKAssign.createOrForceNodeOffline(master.getZooKeeper(), - state.getRegion(), master.getServerName())) { + state.getRegion(), this.master.getServerName())) { LOG.warn("Attempted to create/force node into OFFLINE state before " + "completing assignment but failed to do so for " + state); return false; @@ -1147,7 +1186,7 @@ public class AssignmentManager extends ZooKeeperListener { state.update(RegionState.State.OFFLINE); try { ZKAssign.asyncCreateNodeOffline(master.getZooKeeper(), state.getRegion(), - master.getServerName(), cb, ctx); + this.master.getServerName(), cb, ctx); } catch (KeeperException e) { master.abort("Unexpected ZK exception creating/setting node OFFLINE", e); return false; @@ -1175,10 +1214,10 @@ public class AssignmentManager extends ZooKeeperListener { * if no servers to assign, it returns null). */ RegionPlan getRegionPlan(final RegionState state, - final HServerInfo serverToExclude, final boolean forceNewPlan) { + final ServerName serverToExclude, final boolean forceNewPlan) { // Pickup existing plan or make a new one String encodedName = state.getRegion().getEncodedName(); - List servers = this.serverManager.getOnlineServersList(); + List servers = this.serverManager.getOnlineServersList(); // The remove below hinges on the fact that the call to // serverManager.getOnlineServersList() returns a copy if (serverToExclude != null) servers.remove(serverToExclude); @@ -1266,7 +1305,7 @@ public class AssignmentManager extends ZooKeeperListener { } } // Send CLOSE RPC - HServerInfo server = null; + ServerName server = null; synchronized (this.regions) { server = regions.get(region); } @@ -1347,6 +1386,29 @@ public class AssignmentManager extends ZooKeeperListener { * Assigns all user regions, if any. Used during cluster startup. *

* This is a synchronous call and will return once every region has been + * assigned. If anything fails, an exception is thrown + * @throws InterruptedException + * @throws IOException + */ + public void assignUserRegions(List regions, List servers) + throws IOException, InterruptedException { + if (regions == null) + return; + Map> bulkPlan = null; + // Generate a round-robin bulk assignment plan + bulkPlan = LoadBalancer.roundRobinAssignment(regions, servers); + LOG.info("Bulk assigning " + regions.size() + " region(s) round-robin across " + + servers.size() + " server(s)"); + // Use fixed count thread pool assigning. + BulkAssigner ba = new StartupBulkAssigner(this.master, bulkPlan, this); + ba.bulkAssign(); + LOG.info("Bulk assigning done"); + } + + /** + * Assigns all user regions, if any exist. Used during cluster startup. + *

+ * This is a synchronous call and will return once every region has been * assigned. If anything fails, an exception is thrown and the cluster * should be shutdown. * @throws InterruptedException @@ -1354,10 +1416,10 @@ public class AssignmentManager extends ZooKeeperListener { */ public void assignAllUserRegions() throws IOException, InterruptedException { // Get all available servers - List servers = serverManager.getOnlineServersList(); + List servers = serverManager.getOnlineServersList(); // Scan META for all user regions, skipping any disabled tables - Map allRegions = + Map allRegions = MetaReader.fullScan(catalogTracker, this.zkTable.getDisabledTables(), true); if (allRegions == null || allRegions.isEmpty()) return; @@ -1365,15 +1427,14 @@ public class AssignmentManager extends ZooKeeperListener { boolean retainAssignment = master.getConfiguration(). getBoolean("hbase.master.startup.retainassign", true); - Map> bulkPlan = null; + Map> bulkPlan = null; if (retainAssignment) { // Reuse existing assignment info bulkPlan = LoadBalancer.retainAssignment(allRegions, servers); } else { // assign regions in round-robin fashion - HRegionInfo [] regions = - allRegions.keySet().toArray(new HRegionInfo[allRegions.size()]); - bulkPlan = LoadBalancer.roundRobinAssignment(regions, servers); + assignUserRegions(new ArrayList(allRegions.keySet()), servers); + return; } LOG.info("Bulk assigning " + allRegions.size() + " region(s) across " + servers.size() + " server(s), retainAssignment=" + retainAssignment); @@ -1391,11 +1452,11 @@ public class AssignmentManager extends ZooKeeperListener { * which will abort the Server if exception. */ static class StartupBulkAssigner extends BulkAssigner { - final Map> bulkPlan; + final Map> bulkPlan; final AssignmentManager assignmentManager; StartupBulkAssigner(final Server server, - final Map> bulkPlan, + final Map> bulkPlan, final AssignmentManager am) { super(server); this.bulkPlan = bulkPlan; @@ -1421,9 +1482,9 @@ public class AssignmentManager extends ZooKeeperListener { @Override protected void populatePool(java.util.concurrent.ExecutorService pool) { - for (Map.Entry> e: this.bulkPlan.entrySet()) { + for (Map.Entry> e: this.bulkPlan.entrySet()) { pool.execute(new SingleServerBulkAssigner(e.getKey(), e.getValue(), - this.assignmentManager, true)); + this.assignmentManager)); } } @@ -1456,7 +1517,7 @@ public class AssignmentManager extends ZooKeeperListener { */ static class GeneralBulkAssigner extends StartupBulkAssigner { GeneralBulkAssigner(final Server server, - final Map> bulkPlan, + final Map> bulkPlan, final AssignmentManager am) { super(server, bulkPlan, am); } @@ -1476,13 +1537,12 @@ public class AssignmentManager extends ZooKeeperListener { * Manage bulk assigning to a server. */ static class SingleServerBulkAssigner implements Runnable { - private final HServerInfo regionserver; + private final ServerName regionserver; private final List regions; private final AssignmentManager assignmentManager; - SingleServerBulkAssigner(final HServerInfo regionserver, - final List regions, final AssignmentManager am, - final boolean startUp) { + SingleServerBulkAssigner(final ServerName regionserver, + final List regions, final AssignmentManager am) { this.regionserver = regionserver; this.regions = regions; this.assignmentManager = am; @@ -1562,28 +1622,26 @@ public class AssignmentManager extends ZooKeeperListener { * in META * @throws IOException */ - private Map>> rebuildUserRegions() + Map>> rebuildUserRegions() throws IOException { // Region assignment from META - List results = MetaReader.fullScanOfResults(catalogTracker); + List results = MetaReader.fullScanOfResults(this.catalogTracker); // Map of offline servers and their regions to be returned - Map>> offlineServers = - new TreeMap>>(); + Map>> offlineServers = + new TreeMap>>(); // Iterate regions in META for (Result result : results) { - Pair region = - MetaReader.metaRowToRegionPairWithInfo(result); + Pair region = MetaReader.metaRowToRegionPair(result); if (region == null) continue; - HServerInfo regionLocation = region.getSecond(); HRegionInfo regionInfo = region.getFirst(); + ServerName regionLocation = region.getSecond(); if (regionLocation == null) { // Region not being served, add to region map with no assignment // If this needs to be assigned out, it will also be in ZK as RIT this.regions.put(regionInfo, null); - } else if (!serverManager.isServerOnline( - regionLocation.getServerName())) { + } else if (!this.serverManager.isServerOnline(regionLocation)) { // Region is located on a server that isn't online - List> offlineRegions = + List> offlineRegions = offlineServers.get(regionLocation); if (offlineRegions == null) { offlineRegions = new ArrayList>(1); @@ -1592,7 +1650,7 @@ public class AssignmentManager extends ZooKeeperListener { offlineRegions.add(new Pair(regionInfo, result)); } else { // Region is being served and on an active server - regions.put(regionInfo, regionLocation); + this.regions.put(regionInfo, regionLocation); addToServers(regionLocation, regionInfo); } } @@ -1613,9 +1671,9 @@ public class AssignmentManager extends ZooKeeperListener { * @throws KeeperException */ private void processDeadServers( - Map>> deadServers) + Map>> deadServers) throws IOException, KeeperException { - for (Map.Entry>> deadServer : + for (Map.Entry>> deadServer: deadServers.entrySet()) { List> regions = deadServer.getValue(); for (Pair region : regions) { @@ -1624,7 +1682,7 @@ public class AssignmentManager extends ZooKeeperListener { // If region was in transition (was in zk) force it offline for reassign try { ZKAssign.createOrForceNodeOffline(watcher, regionInfo, - master.getServerName()); + this.master.getServerName()); } catch (KeeperException.NoNodeException nne) { // This is fine } @@ -1640,11 +1698,11 @@ public class AssignmentManager extends ZooKeeperListener { * @param hsi * @param hri */ - private void addToServers(final HServerInfo hsi, final HRegionInfo hri) { - List hris = servers.get(hsi); + private void addToServers(final ServerName sn, final HRegionInfo hri) { + List hris = servers.get(sn); if (hris == null) { hris = new ArrayList(); - servers.put(hsi, hris); + servers.put(sn, hris); } hris.add(hri); } @@ -1857,7 +1915,7 @@ public class AssignmentManager extends ZooKeeperListener { try { data = new RegionTransitionData( EventType.M_ZK_REGION_OFFLINE, regionInfo.getRegionName(), - master.getServerName()); + master.getServerName()); if (ZKUtil.setData(watcher, node, data.getBytes(), stat.getVersion())) { // Node is now OFFLINE, let's trigger another assignment @@ -1922,16 +1980,16 @@ public class AssignmentManager extends ZooKeeperListener { /** * Process shutdown server removing any assignments. - * @param hsi Server that went down. + * @param sn Server that went down. * @return list of regions in transition on this server */ - public List processServerShutdown(final HServerInfo hsi) { + public List processServerShutdown(final ServerName sn) { // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { for (Iterator > i = this.regionPlans.entrySet().iterator(); i.hasNext();) { Map.Entry e = i.next(); - if (e.getValue().getDestination().equals(hsi)) { + if (e.getValue().getDestination().equals(sn)) { // Use iterator's remove else we'll get CME i.remove(); } @@ -1943,7 +2001,7 @@ public class AssignmentManager extends ZooKeeperListener { Set deadRegions = null; List rits = new ArrayList(); synchronized (this.regions) { - List assignedRegions = this.servers.remove(hsi); + List assignedRegions = this.servers.remove(sn); if (assignedRegions == null || assignedRegions.isEmpty()) { // No regions on this server, we are done, return empty list of RITs return rits; @@ -1968,16 +2026,16 @@ public class AssignmentManager extends ZooKeeperListener { /** * Update inmemory structures. - * @param hsi Server that reported the split + * @param sn Server that reported the split * @param parent Parent region that was split * @param a Daughter region A * @param b Daughter region B */ - public void handleSplitReport(final HServerInfo hsi, final HRegionInfo parent, + public void handleSplitReport(final ServerName sn, final HRegionInfo parent, final HRegionInfo a, final HRegionInfo b) { regionOffline(parent); - regionOnline(a, hsi); - regionOnline(b, hsi); + regionOnline(a, sn); + regionOnline(b, sn); // There's a possibility that the region was splitting while a user asked // the master to disable, we need to make sure we close those regions in @@ -1995,21 +2053,16 @@ public class AssignmentManager extends ZooKeeperListener { * If a new server has come in and it has no regions, it will not be included * in the returned Map. */ - Map> getAssignments() { + Map> getAssignments() { // This is an EXPENSIVE clone. Cloning though is the safest thing to do. // Can't let out original since it can change and at least the loadbalancer // wants to iterate this exported list. We need to synchronize on regions // since all access to this.servers is under a lock on this.regions. - Map> result = null; + Map> result = null; synchronized (this.regions) { - result = new HashMap>(this.servers.size()); - for (Map.Entry> e: this.servers.entrySet()) { - List shallowCopy = new ArrayList(e.getValue()); - HServerInfo clone = new HServerInfo(e.getKey()); - // Set into server load the number of regions this server is carrying - // The load balancer calculation needs it at least and its handy. - clone.getLoad().setNumberOfRegions(e.getValue().size()); - result.put(clone, shallowCopy); + result = new HashMap>(this.servers.size()); + for (Map.Entry> e: this.servers.entrySet()) { + result.put(e.getKey(), new ArrayList(e.getValue())); } } return result; @@ -2018,14 +2071,14 @@ public class AssignmentManager extends ZooKeeperListener { /** * @param encodedRegionName Region encoded name. * @return Null or a {@link Pair} instance that holds the full {@link HRegionInfo} - * and the hosting servers {@link HServerInfo}. + * and the hosting servers {@link ServerName}. */ - Pair getAssignment(final byte [] encodedRegionName) { + Pair getAssignment(final byte [] encodedRegionName) { String name = Bytes.toString(encodedRegionName); synchronized(this.regions) { - for (Map.Entry e: this.regions.entrySet()) { + for (Map.Entry e: this.regions.entrySet()) { if (e.getKey().getEncodedName().equals(name)) { - return new Pair(e.getKey(), e.getValue()); + return new Pair(e.getKey(), e.getValue()); } } } @@ -2042,29 +2095,13 @@ public class AssignmentManager extends ZooKeeperListener { unassign(plan.getRegionInfo()); } - /** - * @param hsi - * @return True if this server is carrying a catalog region, a region from - * -ROOT- or .META. table. - */ - boolean isMetaRegionServer(final HServerInfo hsi) { - synchronized (this.regions) { - List regions = this.servers.get(hsi); - if (regions == null || regions.isEmpty()) return false; - for (HRegionInfo hri: regions) { - if (hri.isMetaRegion()) return true; - } - } - return false; - } - /** * Run through remaining regionservers and unassign all catalog regions. */ void unassignCatalogRegions() { this.servers.entrySet(); synchronized (this.regions) { - for (Map.Entry> e: this.servers.entrySet()) { + for (Map.Entry> e: this.servers.entrySet()) { List regions = e.getValue(); if (regions == null || regions.isEmpty()) continue; for (HRegionInfo hri: regions) { @@ -2084,10 +2121,10 @@ public class AssignmentManager extends ZooKeeperListener { * @throws IOException */ void bulkAssignUserRegions(final HRegionInfo [] regions, - final List servers, final boolean sync) + final List servers, final boolean sync) throws IOException { - Map> bulkPlan = - LoadBalancer.roundRobinAssignment(regions, servers); + Map> bulkPlan = + LoadBalancer.roundRobinAssignment(Arrays.asList(regions), servers); LOG.info("Bulk assigning " + regions.length + " region(s) " + "round-robin across " + servers.size() + " server(s)"); // Use fixed count thread pool assigning. diff --git a/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index 05600c462d2..26e57144554 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -22,17 +22,15 @@ package org.apache.hadoop.hbase.master; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; import java.util.Set; import org.apache.commons.lang.NotImplementedException; -import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.ServerName; /** * Class to hold dead servers list and utility querying dead server list. */ -public class DeadServer implements Set { +public class DeadServer implements Set { /** * Set of known dead servers. On znode expiration, servers are added here. * This is needed in case of a network partitioning where the server's lease @@ -40,26 +38,22 @@ public class DeadServer implements Set { * and it's server logs are recovered, it will be told to call server startup * because by then, its regions have probably been reassigned. */ - private final Set deadServers = new HashSet(); - - /** Maximum number of dead servers to keep track of */ - private final int maxDeadServers; + private final Set deadServers = new HashSet(); /** Number of dead servers currently being processed */ private int numProcessing; - public DeadServer(int maxDeadServers) { + public DeadServer() { super(); - this.maxDeadServers = maxDeadServers; this.numProcessing = 0; } /** - * @param serverName + * @param serverName Server name * @return true if server is dead */ public boolean isDeadServer(final String serverName) { - return isDeadServer(serverName, false); + return isDeadServer(new ServerName(serverName)); } /** @@ -68,31 +62,27 @@ public class DeadServer implements Set { * host,port,startcode. * @return true if this server was dead before and coming back alive again */ - public boolean cleanPreviousInstance(final String newServerName) { - - String serverAddress = - HServerInfo.getServerNameLessStartCode(newServerName); - for (String serverName: deadServers) { - String deadServerAddress = - HServerInfo.getServerNameLessStartCode(serverName); - if (deadServerAddress.equals(serverAddress)) { - remove(serverName); - return true; - } - } - return false; + public boolean cleanPreviousInstance(final ServerName newServerName) { + ServerName sn = + ServerName.findServerWithSameHostnamePort(this.deadServers, newServerName); + if (sn == null) return false; + return this.deadServers.remove(sn); } /** - * @param serverName Servername as either host:port or - * host,port,startcode. - * @param hostAndPortOnly True if serverName is host and - * port only (host:port) and if so, then we do a prefix compare - * (ignoring start codes) looking for dead server. - * @return true if server is dead + * @param serverName + * @return true if this server is on the dead servers list. */ - boolean isDeadServer(final String serverName, final boolean hostAndPortOnly) { - return HServerInfo.isServer(this, serverName, hostAndPortOnly); + boolean isDeadServer(final ServerName serverName) { + return this.deadServers.contains(serverName); + } + + /** + * @return True if we have a server with matching hostname and port. + */ + boolean isDeadServerWithSameHostnamePort(final ServerName serverName) { + return ServerName.findServerWithSameHostnamePort(this.deadServers, + serverName) != null; } /** @@ -105,18 +95,18 @@ public class DeadServer implements Set { return numProcessing != 0; } - public synchronized Set clone() { - Set clone = new HashSet(this.deadServers.size()); + public synchronized Set clone() { + Set clone = new HashSet(this.deadServers.size()); clone.addAll(this.deadServers); return clone; } - public synchronized boolean add(String e) { + public synchronized boolean add(ServerName e) { this.numProcessing++; return deadServers.add(e); } - public synchronized void finish(String e) { + public synchronized void finish(ServerName e) { this.numProcessing--; } @@ -132,7 +122,7 @@ public class DeadServer implements Set { return deadServers.contains(o); } - public Iterator iterator() { + public Iterator iterator() { return this.deadServers.iterator(); } @@ -152,7 +142,7 @@ public class DeadServer implements Set { return deadServers.containsAll(c); } - public synchronized boolean addAll(Collection c) { + public synchronized boolean addAll(Collection c) { return deadServers.addAll(c); } diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 250a8cf2357..6c169b5ebb4 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -23,8 +23,8 @@ import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.net.InetSocketAddress; -import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -36,14 +36,13 @@ import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; @@ -54,8 +53,8 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MetaScanner; -import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType; import org.apache.hadoop.hbase.ipc.HBaseRPC; @@ -131,8 +130,12 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { // RPC server for the HMaster private final RpcServer rpcServer; - // Address of the HMaster - private final HServerAddress address; + + /** + * This servers address. + */ + private final InetSocketAddress isa; + // Metrics for the HMaster private final MasterMetrics metrics; // file system manager for the master FS operations @@ -172,6 +175,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { private LogCleaner logCleaner; private MasterCoprocessorHost cpHost; + private final ServerName serverName; /** * Initializes the HMaster. The steps are as follows: @@ -189,43 +193,48 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { throws IOException, KeeperException, InterruptedException { this.conf = conf; - /* - * Determine address and initialize RPC server (but do not start). - * The RPC server ports can be ephemeral. Create a ZKW instance. - */ - HServerAddress a = new HServerAddress(getMyAddress(this.conf)); - int numHandlers = conf.getInt("hbase.regionserver.handler.count", 10); + // Server to handle client requests. + String hostname = DNS.getDefaultHost( + conf.get("hbase.master.dns.interface", "default"), + conf.get("hbase.master.dns.nameserver", "default")); + int port = conf.getInt(HConstants.MASTER_PORT, HConstants.DEFAULT_MASTER_PORT); + // Creation of a HSA will force a resolve. + InetSocketAddress initialIsa = new InetSocketAddress(hostname, port); + if (initialIsa.getAddress() == null) { + throw new IllegalArgumentException("Failed resolve of " + this.isa); + } + int numHandlers = conf.getInt("hbase.master.handler.count", + conf.getInt("hbase.regionserver.handler.count", 25)); this.rpcServer = HBaseRPC.getServer(this, new Class[]{HMasterInterface.class, HMasterRegionInterface.class}, - a.getBindAddress(), a.getPort(), - numHandlers, - 0, // we dont use high priority handlers in master - false, conf, - 0); // this is a DNC w/o high priority handlers - this.address = new HServerAddress(rpcServer.getListenerAddress()); + initialIsa.getHostName(), // BindAddress is IP we got for this server. + initialIsa.getPort(), + numHandlers, + 0, // we dont use high priority handlers in master + conf.getBoolean("hbase.rpc.verbose", false), conf, + 0); // this is a DNC w/o high priority handlers + // Set our address. + this.isa = this.rpcServer.getListenerAddress(); + this.serverName = new ServerName(this.isa.getHostName(), + this.isa.getPort(), System.currentTimeMillis()); // initialize server principal (if using secure Hadoop) User.login(conf, "hbase.master.keytab.file", - "hbase.master.kerberos.principal", this.address.getHostname()); + "hbase.master.kerberos.principal", this.isa.getHostName()); // set the thread name now we have an address - setName(MASTER + "-" + this.address); + setName(MASTER + "-" + this.serverName.toString()); Replication.decorateMasterConfiguration(this.conf); - this.rpcServer.startThreads(); // Hack! Maps DFSClient => Master for logs. HDFS made this // config param for task trackers, but we can piggyback off of it. if (this.conf.get("mapred.task.id") == null) { - this.conf.set("mapred.task.id", "hb_m_" + this.address.toString() + - "_" + System.currentTimeMillis()); + this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString()); } - - this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + - address.getPort(), this); - - this.metrics = new MasterMetrics(getServerName()); + this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this); + this.metrics = new MasterMetrics(getServerName().toString()); } /** @@ -397,9 +406,9 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { fileSystemManager.getClusterId()); this.connection = HConnectionManager.getConnection(conf); - this.executorService = new ExecutorService(getServerName()); + this.executorService = new ExecutorService(getServerName().toString()); - this.serverManager = new ServerManager(this, this, metrics); + this.serverManager = new ServerManager(this, this); initializeZKBasedSystemTrackers(); @@ -409,29 +418,25 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { // start up all service threads. startServiceThreads(); - // Wait for region servers to report in. Returns count of regions. - int regionCount = this.serverManager.waitForRegionServers(); + // Wait for region servers to report in. + this.serverManager.waitForRegionServers(); + // Check zk for regionservers that are up but didn't register + for (ServerName sn: this.regionServerTracker.getOnlineServers()) { + if (!this.serverManager.isServerOnline(sn)) { + // Not registered; add it. + LOG.info("Registering server found up in zk: " + sn); + this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD); + } + } // TODO: Should do this in background rather than block master startup this.fileSystemManager. - splitLogAfterStartup(this.serverManager.getOnlineServers()); + splitLogAfterStartup(this.serverManager.getOnlineServers().keySet()); // Make sure root and meta assigned before proceeding. assignRootAndMeta(); - - // Is this fresh start with no regions assigned or are we a master joining - // an already-running cluster? If regionsCount == 0, then for sure a - // fresh start. TOOD: Be fancier. If regionsCount == 2, perhaps the - // 2 are .META. and -ROOT- and we should fall into the fresh startup - // branch below. For now, do processFailover. - if (regionCount == 0) { - LOG.info("Master startup proceeding: cluster startup"); - this.assignmentManager.cleanoutUnassigned(); - this.assignmentManager.assignAllUserRegions(); - } else { - LOG.info("Master startup proceeding: master failover"); - this.assignmentManager.processFailover(); - } + // Fixup assignment manager status + this.assignmentManager.joinCluster(); // Start balancer and meta catalog janitor after meta and regions have // been assigned. @@ -466,7 +471,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } else { // Region already assigned. We didnt' assign it. Add to in-memory state. this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO, - this.serverManager.getHServerInfo(this.catalogTracker.getRootLocation())); + this.catalogTracker.getRootLocation()); } LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit + ", location=" + catalogTracker.getRootLocation()); @@ -484,32 +489,13 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } else { // Region already assigned. We didnt' assign it. Add to in-memory state. this.assignmentManager.regionOnline(HRegionInfo.FIRST_META_REGIONINFO, - this.serverManager.getHServerInfo(this.catalogTracker.getMetaLocation())); + this.catalogTracker.getMetaLocation()); } LOG.info(".META. assigned=" + assigned + ", rit=" + rit + ", location=" + catalogTracker.getMetaLocation()); return assigned; } - /* - * @return This masters' address. - * @throws UnknownHostException - */ - private static String getMyAddress(final Configuration c) - throws UnknownHostException { - // Find out our address up in DNS. - String s = DNS.getDefaultHost(c.get("hbase.master.dns.interface","default"), - c.get("hbase.master.dns.nameserver","default")); - s += ":" + c.get(HConstants.MASTER_PORT, - Integer.toString(HConstants.DEFAULT_MASTER_PORT)); - return s; - } - - /** @return HServerAddress of the master server */ - public HServerAddress getMasterAddress() { - return this.address; - } - public long getProtocolVersion(String protocol, long clientVersion) { if (HMasterInterface.class.getName().equals(protocol)) { return HMasterInterface.VERSION; @@ -645,25 +631,16 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } @Override - public MapWritable regionServerStartup(final HServerInfo serverInfo, - final long serverCurrentTime) + public MapWritable regionServerStartup(final int port, + final long serverStartCode, final long serverCurrentTime) throws IOException { - // Set the ip into the passed in serverInfo. Its ip is more than likely - // not the ip that the master sees here. See at end of this method where - // we pass it back to the regionserver by setting "hbase.regionserver.address" - // Everafter, the HSI combination 'server name' is what uniquely identifies - // the incoming RegionServer. - InetSocketAddress address = new InetSocketAddress( - HBaseServer.getRemoteIp().getHostName(), - serverInfo.getServerAddress().getPort()); - serverInfo.setServerAddress(new HServerAddress(address)); - // Register with server manager - this.serverManager.regionServerStartup(serverInfo, serverCurrentTime); + this.serverManager.regionServerStartup(HBaseServer.getRemoteIp(), port, + serverStartCode, serverCurrentTime); // Send back some config info MapWritable mw = createConfigurationSubset(); - mw.put(new Text("hbase.regionserver.address"), - serverInfo.getServerAddress()); + mw.put(new Text(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER), + new Text(this.serverName.getHostname())); return mw; } @@ -682,23 +659,13 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } @Override - public HMsg [] regionServerReport(HServerInfo serverInfo, HMsg msgs[], - HRegionInfo[] mostLoadedRegions) + public void regionServerReport(byte[] sn, HServerLoad hsl) throws IOException { - return adornRegionServerAnswer(serverInfo, - this.serverManager.regionServerReport(serverInfo, msgs, mostLoadedRegions)); - } - - /** - * Override if you'd add messages to return to regionserver hsi - * or to send an exception. - * @param msgs Messages to add to - * @return Messages to return to - * @throws IOException exceptions that were injected for the region servers - */ - protected HMsg [] adornRegionServerAnswer(final HServerInfo hsi, - final HMsg [] msgs) throws IOException { - return msgs; + this.serverManager.regionServerReport(new ServerName(sn), hsl); + if (hsl != null && this.metrics != null) { + // Up our metrics. + this.metrics.incrementRequests(hsl.getNumberOfRequests()); + } } public boolean isMasterRunning() { @@ -758,14 +725,13 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } } - Map> assignments = + Map> assignments = this.assignmentManager.getAssignments(); // Returned Map from AM does not include mention of servers w/o assignments. - for (Map.Entry e: + for (Map.Entry e: this.serverManager.getOnlineServers().entrySet()) { - HServerInfo hsi = e.getValue(); - if (!assignments.containsKey(hsi)) { - assignments.put(hsi, new ArrayList()); + if (!assignments.containsKey(e.getKey())) { + assignments.put(e.getKey(), new ArrayList()); } } List plans = this.balancer.balanceCluster(assignments); @@ -832,12 +798,12 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { @Override public void move(final byte[] encodedRegionName, final byte[] destServerName) throws UnknownRegionException { - Pair p = + Pair p = this.assignmentManager.getAssignment(encodedRegionName); if (p == null) throw new UnknownRegionException(Bytes.toString(encodedRegionName)); HRegionInfo hri = p.getFirst(); - HServerInfo dest = null; + ServerName dest = null; if (destServerName == null || destServerName.length == 0) { LOG.info("Passed destination servername is null/empty so " + "choosing a server at random"); @@ -845,12 +811,12 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { // Unassign will reassign it elsewhere choosing random server. this.assignmentManager.unassign(hri); } else { - dest = this.serverManager.getServerInfo(new String(destServerName)); - + dest = new ServerName(Bytes.toString(destServerName)); if (this.cpHost != null) { this.cpHost.preMove(p.getFirst(), p.getSecond(), dest); } RegionPlan rp = new RegionPlan(p.getFirst(), p.getSecond(), dest); + LOG.info("Added move plan " + rp + ", running balancer"); this.assignmentManager.balance(rp); if (this.cpHost != null) { this.cpHost.postMove(p.getFirst(), p.getSecond(), dest); @@ -928,8 +894,13 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } // 5. Trigger immediate assignment of the regions in round-robin fashion - List servers = serverManager.getOnlineServersList(); - this.assignmentManager.bulkAssignUserRegions(newRegions, servers, sync); + List servers = serverManager.getOnlineServersList(); + try { + this.assignmentManager.assignUserRegions(Arrays.asList(newRegions), servers); + } catch (InterruptedException ie) { + LOG.error("Caught " + ie + " during round-robin assignment"); + throw new IOException(ie); + } // 6. If sync, wait for assignment of regions if (sync) { @@ -1027,11 +998,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { * is found, but not currently deployed, the second element of the pair * may be null. */ - Pair getTableRegionForRow( + Pair getTableRegionForRow( final byte [] tableName, final byte [] rowKey) throws IOException { - final AtomicReference> result = - new AtomicReference>(null); + final AtomicReference> result = + new AtomicReference>(null); MetaScannerVisitor visitor = new MetaScannerVisitor() { @@ -1040,13 +1011,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { if (data == null || data.size() <= 0) { return true; } - Pair pair = - MetaReader.metaRowToRegionPair(data); + Pair pair = MetaReader.metaRowToRegionPair(data); if (pair == null) { return false; } - if (!Bytes.equals(pair.getFirst().getTableDesc().getName(), - tableName)) { + if (!Bytes.equals(pair.getFirst().getTableDesc().getName(), tableName)) { return false; } result.set(pair); @@ -1095,13 +1064,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { * @return cluster status */ public ClusterStatus getClusterStatus() { - ClusterStatus status = new ClusterStatus(); - status.setHBaseVersion(VersionInfo.getVersion()); - status.setServerInfo(serverManager.getOnlineServers().values()); - status.setDeadServers(serverManager.getDeadServers()); - status.setRegionsInTransition(assignmentManager.getRegionsInTransition()); - status.setClusterId(fileSystemManager.getClusterId()); - return status; + return new ClusterStatus(VersionInfo.getVersion(), + this.fileSystemManager.getClusterId(), + this.serverManager.getOnlineServers(), + this.serverManager.getDeadServers(), + this.assignmentManager.getRegionsInTransition()); } public String getClusterId() { @@ -1183,8 +1150,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } @Override - public String getServerName() { - return address.toString(); + public ServerName getServerName() { + return this.serverName; } @Override @@ -1274,7 +1241,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return; } } - Pair pair = + Pair pair = MetaReader.getRegion(this.catalogTracker, regionName); if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName)); assignRegion(pair.getFirst()); @@ -1295,7 +1262,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return; } } - Pair pair = + Pair pair = MetaReader.getRegion(this.catalogTracker, regionName); if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName)); HRegionInfo hri = pair.getFirst(); @@ -1306,6 +1273,16 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } } + /** + * Compute the average load across all region servers. + * Currently, this uses a very naive computation - just uses the number of + * regions being served, ignoring stats about number of requests. + * @return the average load + */ + public double getAverageLoad() { + return this.assignmentManager.getAverageLoad(); + } + /** * Utility for constructing an instance of the passed HMaster class. * @param masterClass @@ -1331,7 +1308,6 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { } } - /** * @see org.apache.hadoop.hbase.master.HMasterCommandLine */ diff --git a/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java b/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java index 6c92cbcab24..92acb352214 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java +++ b/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java @@ -27,6 +27,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.NavigableSet; import java.util.Random; import java.util.TreeMap; @@ -40,8 +41,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.ServerName; import com.google.common.collect.MinMaxPriorityQueue; @@ -103,6 +103,27 @@ public class LoadBalancer { } static RegionPlanComparator rpComparator = new RegionPlanComparator(); + /** + * Data structure that holds servername and 'load'. + */ + static class ServerAndLoad implements Comparable { + private final ServerName sn; + private final int load; + ServerAndLoad(final ServerName sn, final int load) { + this.sn = sn; + this.load = load; + } + + ServerName getServerName() {return this.sn;} + int getLoad() {return this.load;} + + @Override + public int compareTo(ServerAndLoad other) { + int diff = this.load - other.load; + return diff != 0? diff: this.sn.compareTo(other.getServerName()); + } + } + /** * Generate a global load balancing plan according to the specified map of * server information to the most loaded regions of each server. @@ -189,28 +210,25 @@ public class LoadBalancer { * or null if cluster is already balanced */ public List balanceCluster( - Map> clusterState) { + Map> clusterState) { boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); - // Make a map sorted by load and count regions - TreeMap> serversByLoad = - new TreeMap>( - new HServerInfo.LoadComparator()); int numServers = clusterState.size(); if (numServers == 0) { LOG.debug("numServers=0 so skipping load balancing"); return null; } + NavigableMap> serversByLoad = + new TreeMap>(); int numRegions = 0; // Iterate so we can count regions as we build the map - for(Map.Entry> server: - clusterState.entrySet()) { - int sz = server.getValue().size(); + for (Map.Entry> server: clusterState.entrySet()) { + List regions = server.getValue(); + int sz = regions.size(); if (sz == 0) emptyRegionServerPresent = true; - server.getKey().getLoad().setNumberOfRegions(sz); - numRegions += server.getKey().getLoad().getNumberOfRegions(); - serversByLoad.put(server.getKey(), server.getValue()); + numRegions += sz; + serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions); } // Check if we even need to do any load balancing @@ -218,13 +236,14 @@ public class LoadBalancer { // HBASE-3681 check sloppiness first int floor = (int) Math.floor(average * (1 - slop)); int ceiling = (int) Math.ceil(average * (1 + slop)); - if(serversByLoad.lastKey().getLoad().getNumberOfRegions() <= ceiling && - serversByLoad.firstKey().getLoad().getNumberOfRegions() >= floor) { + if (serversByLoad.lastKey().getLoad() <= ceiling && + serversByLoad.firstKey().getLoad() >= floor) { // Skipped because no server outside (min,max) range - LOG.info("Skipping load balancing. servers=" + numServers + " " + - "regions=" + numRegions + " average=" + average + " " + - "mostloaded=" + serversByLoad.lastKey().getLoad().getNumberOfRegions() + - " leastloaded=" + serversByLoad.firstKey().getLoad().getNumberOfRegions()); + LOG.info("Skipping load balancing because balanced cluster; " + + "servers=" + numServers + " " + + "regions=" + numRegions + " average=" + average + " " + + "mostloaded=" + serversByLoad.lastKey().getLoad() + + " leastloaded=" + serversByLoad.lastKey().getLoad()); return null; } int min = numRegions / numServers; @@ -232,21 +251,22 @@ public class LoadBalancer { // Balance the cluster // TODO: Look at data block locality or a more complex load to do this - MinMaxPriorityQueue regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create(); + MinMaxPriorityQueue regionsToMove = + MinMaxPriorityQueue.orderedBy(rpComparator).create(); List regionsToReturn = new ArrayList(); // Walk down most loaded, pruning each to the max int serversOverloaded = 0; - // flag used to fetch regions from head and tail of list, alternately + // flag used to fetch regions from head and tail of list, alternately boolean fetchFromTail = false; - Map serverBalanceInfo = - new TreeMap(); - for(Map.Entry> server : - serversByLoad.descendingMap().entrySet()) { - HServerInfo serverInfo = server.getKey(); - int regionCount = serverInfo.getLoad().getNumberOfRegions(); - if(regionCount <= max) { - serverBalanceInfo.put(serverInfo, new BalanceInfo(0, 0)); + Map serverBalanceInfo = + new TreeMap(); + for (Map.Entry> server: + serversByLoad.descendingMap().entrySet()) { + ServerAndLoad sal = server.getKey(); + int regionCount = sal.getLoad(); + if (regionCount <= max) { + serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0)); break; } serversOverloaded++; @@ -257,14 +277,14 @@ public class LoadBalancer { Collections.sort(regions, riComparator); int numTaken = 0; for (int i = 0; i <= numToOffload; ) { - HRegionInfo hri = regions.get(i); // fetch from head + HRegionInfo hri = regions.get(i); // fetch from head if (fetchFromTail) { - hri = regions.get(regions.size() - 1 - i); + hri = regions.get(regions.size() - 1 - i); } i++; // Don't rebalance meta regions. if (hri.isMetaRegion()) continue; - regionsToMove.add(new RegionPlan(hri, serverInfo, null)); + regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null)); numTaken++; if (numTaken >= numToOffload) break; // fetch in alternate order if there is new region server @@ -272,48 +292,44 @@ public class LoadBalancer { fetchFromTail = !fetchFromTail; } } - serverBalanceInfo.put(serverInfo, - new BalanceInfo(numToOffload, (-1)*numTaken)); + serverBalanceInfo.put(sal.getServerName(), + new BalanceInfo(numToOffload, (-1)*numTaken)); } int totalNumMoved = regionsToMove.size(); - + // Walk down least loaded, filling each to the min int neededRegions = 0; // number of regions needed to bring all up to min fetchFromTail = false; - RegionPlan rp = null; - Map underloadedServers = new HashMap(); - for(Map.Entry> server : - serversByLoad.entrySet()) { - int regionCount = server.getKey().getLoad().getNumberOfRegions(); - if(regionCount >= min) { + + Map underloadedServers = new HashMap(); + for (Map.Entry> server: + serversByLoad.entrySet()) { + int regionCount = server.getKey().getLoad(); + if (regionCount >= min) { break; } - underloadedServers.put(server.getKey(), min - regionCount); + underloadedServers.put(server.getKey().getServerName(), min - regionCount); } // number of servers that get new regions int serversUnderloaded = underloadedServers.size(); int incr = 1; - List serverInfos = Arrays.asList(underloadedServers.keySet(). - toArray(new HServerInfo[serversUnderloaded])); - Collections.shuffle(serverInfos, RANDOM); + List sns = + Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded])); + Collections.shuffle(sns, RANDOM); while (regionsToMove.size() > 0) { int cnt = 0; int i = incr > 0 ? 0 : underloadedServers.size()-1; for (; i >= 0 && i < underloadedServers.size(); i += incr) { - if (0 == regionsToMove.size()) break; - HServerInfo si = serverInfos.get(i); + if (regionsToMove.isEmpty()) break; + ServerName si = sns.get(i); int numToTake = underloadedServers.get(si); if (numToTake == 0) continue; - - if (!fetchFromTail) rp = regionsToMove.remove(); - else rp = regionsToMove.removeLast(); - rp.setDestination(si); - regionsToReturn.add(rp); - + + addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } - + underloadedServers.put(si, numToTake-1); cnt++; BalanceInfo bi = serverBalanceInfo.get(si); @@ -325,17 +341,16 @@ public class LoadBalancer { } if (cnt == 0) break; // iterates underloadedServers in the other direction - LOG.info("First pass inner loop assigned " + cnt + " regions"); incr = -incr; } for (Integer i : underloadedServers.values()) { // If we still want to take some, increment needed - neededRegions += i; + neededRegions += i; } // If none needed to fill all to min and none left to drain all to max, // we are done - if(neededRegions == 0 && 0 == regionsToMove.size()) { + if (neededRegions == 0 && regionsToMove.isEmpty()) { long endTime = System.currentTimeMillis(); LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + @@ -350,17 +365,18 @@ public class LoadBalancer { // If we need more to fill min, grab one from each most loaded until enough if (neededRegions != 0) { // Walk down most loaded, grabbing one from each until we get enough - for(Map.Entry> server : + for (Map.Entry> server : serversByLoad.descendingMap().entrySet()) { - BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey()); + BalanceInfo balanceInfo = + serverBalanceInfo.get(server.getKey().getServerName()); int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload(); if (idx >= server.getValue().size()) break; HRegionInfo region = server.getValue().get(idx); if (region.isMetaRegion()) continue; // Don't move meta regions. - regionsToMove.add(new RegionPlan(region, server.getKey(), null)); + regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null)); totalNumMoved++; - if(--neededRegions == 0) { + if (--neededRegions == 0) { // No more regions needed, done shedding break; } @@ -371,11 +387,11 @@ public class LoadBalancer { // Assign each underloaded up to the min, then if leftovers, assign to max // Walk down least loaded, assigning to each to fill up to min - for(Map.Entry> server : - serversByLoad.entrySet()) { - int regionCount = server.getKey().getLoad().getNumberOfRegions(); + for (Map.Entry> server : + serversByLoad.entrySet()) { + int regionCount = server.getKey().getLoad(); if (regionCount >= min) break; - BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey()); + BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName()); if(balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } @@ -385,11 +401,8 @@ public class LoadBalancer { int numToTake = min - regionCount; int numTaken = 0; while(numTaken < numToTake && 0 < regionsToMove.size()) { - if (!fetchFromTail) rp = regionsToMove.remove(); - else rp = regionsToMove.removeLast(); - rp.setDestination(server.getKey()); - regionsToReturn.add(rp); - + addRegionPlan(regionsToMove, fetchFromTail, + server.getKey().getServerName(), regionsToReturn); numTaken++; if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; @@ -398,21 +411,19 @@ public class LoadBalancer { } // If we still have regions to dish out, assign underloaded to max - if(0 < regionsToMove.size()) { - for(Map.Entry> server : + if (0 < regionsToMove.size()) { + for (Map.Entry> server : serversByLoad.entrySet()) { - int regionCount = server.getKey().getLoad().getNumberOfRegions(); + int regionCount = server.getKey().getLoad(); if(regionCount >= max) { break; } - if (!fetchFromTail) rp = regionsToMove.remove(); - else rp = regionsToMove.removeLast(); - rp.setDestination(server.getKey()); - regionsToReturn.add(rp); + addRegionPlan(regionsToMove, fetchFromTail, + server.getKey().getServerName(), regionsToReturn); if (emptyRegionServerPresent) { fetchFromTail = !fetchFromTail; } - if(0 == regionsToMove.size()) { + if (regionsToMove.isEmpty()) { break; } } @@ -420,15 +431,15 @@ public class LoadBalancer { long endTime = System.currentTimeMillis(); - if (0 != regionsToMove.size() || neededRegions != 0) { + if (!regionsToMove.isEmpty() || neededRegions != 0) { // Emit data so can diagnose how balancer went astray. LOG.warn("regionsToMove=" + totalNumMoved + - ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + - ", serversUnderloaded=" + serversUnderloaded); + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + + ", serversUnderloaded=" + serversUnderloaded); StringBuilder sb = new StringBuilder(); - for (Map.Entry> e: clusterState.entrySet()) { + for (Map.Entry> e: clusterState.entrySet()) { if (sb.length() > 0) sb.append(", "); - sb.append(e.getKey().getServerName()); + sb.append(e.getKey().toString()); sb.append(" "); sb.append(e.getValue().size()); } @@ -444,6 +455,18 @@ public class LoadBalancer { return regionsToReturn; } + /** + * Add a region from the head or tail to the List of regions to return. + */ + void addRegionPlan(final MinMaxPriorityQueue regionsToMove, + final boolean fetchFromTail, final ServerName sn, List regionsToReturn) { + RegionPlan rp = null; + if (!fetchFromTail) rp = regionsToMove.remove(); + else rp = regionsToMove.removeLast(); + rp.setDestination(sn); + regionsToReturn.add(rp); + } + /** * @param regions * @return Randomization of passed regions @@ -457,11 +480,6 @@ public class LoadBalancer { * Stores additional per-server information about the regions added/removed * during the run of the balancing algorithm. * - * For servers that receive additional regions, we are not updating the number - * of regions in HServerInfo once we decide to reassign regions to a server, - * but we need this information later in the algorithm. This is stored in - * numRegionsAdded. - * * For servers that shed regions, we need to track which regions we have * already shed. nextRegionForUnload contains the index in the list * of regions on the server that is the next to be shed. @@ -506,14 +524,14 @@ public class LoadBalancer { * @return map of server to the regions it should take, or null if no * assignment is possible (ie. no regions or no servers) */ - public static Map> roundRobinAssignment( - HRegionInfo [] regions, List servers) { - if(regions.length == 0 || servers.size() == 0) { + public static Map> roundRobinAssignment( + List regions, List servers) { + if (regions.isEmpty() || servers.isEmpty()) { return null; } - Map> assignments = - new TreeMap>(); - int numRegions = regions.length; + Map> assignments = + new TreeMap>(); + int numRegions = regions.size(); int numServers = servers.size(); int max = (int)Math.ceil((float)numRegions/numServers); int serverIdx = 0; @@ -522,10 +540,10 @@ public class LoadBalancer { } int regionIdx = 0; for (int j = 0; j < numServers; j++) { - HServerInfo server = servers.get((j+serverIdx) % numServers); + ServerName server = servers.get((j + serverIdx) % numServers); List serverRegions = new ArrayList(max); for (int i=regionIdx; i> retainAssignment( - Map regions, List servers) { - Map> assignments = - new TreeMap>(); - // Build a map of server addresses to server info so we can match things up - Map serverMap = - new TreeMap(); - for (HServerInfo server : servers) { - serverMap.put(server.getServerAddress(), server); + public static Map> retainAssignment( + Map regions, List servers) { + Map> assignments = + new TreeMap>(); + for (ServerName server : servers) { assignments.put(server, new ArrayList()); } - for (Map.Entry region : regions.entrySet()) { - HServerAddress hsa = region.getValue(); - HServerInfo server = hsa == null? null: serverMap.get(hsa); - if (server != null) { - assignments.get(server).add(region.getKey()); + for (Map.Entry region : regions.entrySet()) { + ServerName sn = region.getValue(); + if (sn != null && servers.contains(sn)) { + assignments.get(sn).add(region.getKey()); } else { - assignments.get(servers.get(RANDOM.nextInt(assignments.size()))).add( - region.getKey()); + int size = assignments.size(); + assignments.get(servers.get(RANDOM.nextInt(size))).add(region.getKey()); } } return assignments; @@ -692,17 +705,17 @@ public class LoadBalancer { * @param servers * @return map of regions to the server it should be assigned to */ - public static Map immediateAssignment( - List regions, List servers) { - Map assignments = - new TreeMap(); + public static Map immediateAssignment( + List regions, List servers) { + Map assignments = + new TreeMap(); for(HRegionInfo region : regions) { assignments.put(region, servers.get(RANDOM.nextInt(servers.size()))); } return assignments; } - public static HServerInfo randomAssignment(List servers) { + public static ServerName randomAssignment(List servers) { if (servers == null || servers.isEmpty()) { LOG.warn("Wanted to do random assignment but no servers to assign to"); return null; @@ -722,21 +735,21 @@ public class LoadBalancer { */ public static class RegionPlan implements Comparable { private final HRegionInfo hri; - private final HServerInfo source; - private HServerInfo dest; + private final ServerName source; + private ServerName dest; /** * Instantiate a plan for a region move, moving the specified region from * the specified source server to the specified destination server. * * Destination server can be instantiated as null and later set - * with {@link #setDestination(HServerInfo)}. + * with {@link #setDestination(ServerName)}. * * @param hri region to be moved * @param source regionserver region should be moved from * @param dest regionserver region should be moved to */ - public RegionPlan(final HRegionInfo hri, HServerInfo source, HServerInfo dest) { + public RegionPlan(final HRegionInfo hri, ServerName source, ServerName dest) { this.hri = hri; this.source = source; this.dest = dest; @@ -745,7 +758,7 @@ public class LoadBalancer { /** * Set the destination server for the plan for this region. */ - public void setDestination(HServerInfo dest) { + public void setDestination(ServerName dest) { this.dest = dest; } @@ -753,7 +766,7 @@ public class LoadBalancer { * Get the source server for the plan for this region. * @return server info for source */ - public HServerInfo getSource() { + public ServerName getSource() { return source; } @@ -761,7 +774,7 @@ public class LoadBalancer { * Get the destination server for the plan for this region. * @return server info for destination */ - public HServerInfo getDestination() { + public ServerName getDestination() { return dest; } @@ -789,8 +802,8 @@ public class LoadBalancer { @Override public String toString() { return "hri=" + this.hri.getRegionNameAsString() + ", src=" + - (this.source == null? "": this.source.getServerName()) + - ", dest=" + (this.dest == null? "": this.dest.getServerName()); + (this.source == null? "": this.source.toString()) + + ", dest=" + (this.dest == null? "": this.dest.toString()); } } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java b/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java index 4bb072eff69..ec425a60b07 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java @@ -287,7 +287,7 @@ public class MasterCoprocessorHost } } - void preMove(final HRegionInfo region, final HServerInfo srcServer, final HServerInfo destServer) + void preMove(final HRegionInfo region, final ServerName srcServer, final ServerName destServer) throws UnknownRegionException { ObserverContext ctx = null; for (MasterEnvironment env: coprocessors) { @@ -302,7 +302,7 @@ public class MasterCoprocessorHost } } - void postMove(final HRegionInfo region, final HServerInfo srcServer, final HServerInfo destServer) + void postMove(final HRegionInfo region, final ServerName srcServer, final ServerName destServer) throws UnknownRegionException { ObserverContext ctx = null; for (MasterEnvironment env: coprocessors) { diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 55e0162702e..eab5f298666 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -20,7 +20,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; -import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -34,9 +34,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.Store; @@ -95,7 +95,7 @@ public class MasterFileSystem { conf.getBoolean("hbase.master.distributed.log.splitting", true); if (this.distributedLogSplitting) { this.splitLogManager = new SplitLogManager(master.getZooKeeper(), - master.getConfiguration(), master, master.getServerName()); + master.getConfiguration(), master, master.getServerName().toString()); this.splitLogManager.finishInitialization(); } else { this.splitLogManager = null; @@ -175,9 +175,9 @@ public class MasterFileSystem { * Inspect the log directory to recover any log file without * an active region server. * @param onlineServers Map of online servers keyed by - * {@link HServerInfo#getServerName()} + * {@link ServerName} */ - void splitLogAfterStartup(final Map onlineServers) { + void splitLogAfterStartup(final Set onlineServers) { Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); try { if (!this.fs.exists(logsDirPath)) { @@ -197,8 +197,8 @@ public class MasterFileSystem { return; } for (FileStatus status : logFolders) { - String serverName = status.getPath().getName(); - if (onlineServers.get(serverName) == null) { + ServerName serverName = new ServerName(status.getPath().getName()); + if (!onlineServers.contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); splitLog(serverName); @@ -209,9 +209,9 @@ public class MasterFileSystem { } } - public void splitLog(final String serverName) { + public void splitLog(final ServerName serverName) { long splitTime = 0, splitLogSize = 0; - Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName)); + Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName.toString())); if (distributedLogSplitting) { splitTime = EnvironmentEdgeManager.currentTimeMillis(); try { diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 04befe91093..64c14df829b 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; +import java.net.InetAddress; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -32,13 +33,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClockOutOfSyncException; -import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.PleaseHoldException; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.client.HConnection; @@ -47,12 +47,9 @@ import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; -import org.apache.hadoop.hbase.master.metrics.MasterMetrics; -import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; /** - * The ServerManager class manages info about region servers - HServerInfo, - * load numbers, dying servers, etc. + * The ServerManager class manages info about region servers. *

* Maintains lists of online and dead servers. Processes the startups, * shutdowns, and deaths of region servers. @@ -70,23 +67,20 @@ public class ServerManager { // Set if we are to shutdown the cluster. private volatile boolean clusterShutdown = false; - /** The map of known server names to server info */ - private final Map onlineServers = - new ConcurrentHashMap(); + /** Map of registered servers to their current load */ + private final Map onlineServers = + new ConcurrentHashMap(); // TODO: This is strange to have two maps but HSI above is used on both sides /** * Map from full server-instance name to the RPC connection for this server. */ - private final Map serverConnections = - new HashMap(); + private final Map serverConnections = + new HashMap(); private final Server master; private final MasterServices services; - // Reporting to track master metrics. - private final MasterMetrics metrics; - private final DeadServer deadservers; private final long maxSkew; @@ -95,26 +89,25 @@ public class ServerManager { * Constructor. * @param master * @param services - * @param metrics */ - public ServerManager(final Server master, final MasterServices services, - MasterMetrics metrics) { + public ServerManager(final Server master, final MasterServices services) { this.master = master; this.services = services; - this.metrics = metrics; Configuration c = master.getConfiguration(); maxSkew = c.getLong("hbase.master.maxclockskew", 30000); - this.deadservers = - new DeadServer(c.getInt("hbase.master.maxdeadservers", 100)); + this.deadservers = new DeadServer(); } /** * Let the server manager know a new regionserver has come online - * @param serverInfo + * @param ia The remote address + * @param port The remote port + * @param serverStartcode * @param serverCurrentTime The current time of the region server in ms * @throws IOException */ - void regionServerStartup(final HServerInfo serverInfo, long serverCurrentTime) + void regionServerStartup(final InetAddress ia, final int port, + final long serverStartcode, long serverCurrentTime) throws IOException { // Test for case where we get a region startup message from a regionserver // that has been quickly restarted but whose znode expiration handler has @@ -123,58 +116,66 @@ public class ServerManager { // is, reject the server and trigger its expiration. The next time it comes // in, it should have been removed from serverAddressToServerInfo and queued // for processing by ProcessServerShutdown. - HServerInfo info = new HServerInfo(serverInfo); - checkIsDead(info.getServerName(), "STARTUP"); - checkAlreadySameHostPort(info); - checkClockSkew(info, serverCurrentTime); - recordNewServer(info, false, null); + ServerName sn = new ServerName(ia.getHostName(), port, serverStartcode); + checkClockSkew(sn, serverCurrentTime); + checkIsDead(sn, "STARTUP"); + checkAlreadySameHostPort(sn); + recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD); + } + + void regionServerReport(ServerName sn, HServerLoad hsl) + throws YouAreDeadException, PleaseHoldException { + checkIsDead(sn, "REPORT"); + if (!this.onlineServers.containsKey(sn)) { + // Already have this host+port combo and its just different start code? + checkAlreadySameHostPort(sn); + // Just let the server in. Presume master joining a running cluster. + // recordNewServer is what happens at the end of reportServerStartup. + // The only thing we are skipping is passing back to the regionserver + // the ServerName to use. Here we presume a master has already done + // that so we'll press on with whatever it gave us for ServerName. + recordNewServer(sn, hsl); + } else { + this.onlineServers.put(sn, hsl); + } } /** * Test to see if we have a server of same host and port already. - * @param serverInfo + * @param serverName * @throws PleaseHoldException */ - void checkAlreadySameHostPort(final HServerInfo serverInfo) + void checkAlreadySameHostPort(final ServerName serverName) throws PleaseHoldException { - String hostAndPort = serverInfo.getServerAddress().toString(); - HServerInfo existingServer = - haveServerWithSameHostAndPortAlready(serverInfo.getHostnamePort()); + ServerName existingServer = + ServerName.findServerWithSameHostnamePort(getOnlineServersList(), serverName); if (existingServer != null) { - String message = "Server start rejected; we already have " + hostAndPort + - " registered; existingServer=" + existingServer + ", newServer=" + serverInfo; + String message = "Server serverName=" + serverName + + " rejected; we already have " + existingServer.toString() + + " registered with same hostname and port"; LOG.info(message); - if (existingServer.getStartCode() < serverInfo.getStartCode()) { + if (existingServer.getStartcode() < serverName.getStartcode()) { LOG.info("Triggering server recovery; existingServer " + - existingServer.getServerName() + " looks stale"); + existingServer + " looks stale"); expireServer(existingServer); } throw new PleaseHoldException(message); } } - private HServerInfo haveServerWithSameHostAndPortAlready(final String hostnamePort) { - synchronized (this.onlineServers) { - for (Map.Entry e: this.onlineServers.entrySet()) { - if (e.getValue().getHostnamePort().equals(hostnamePort)) { - return e.getValue(); - } - } - } - return null; - } - /** * Checks if the clock skew between the server and the master. If the clock * skew is too much it will throw an Exception. + * @param serverName Incoming servers's name + * @param serverCurrentTime * @throws ClockOutOfSyncException */ - private void checkClockSkew(final HServerInfo serverInfo, + private void checkClockSkew(final ServerName serverName, final long serverCurrentTime) throws ClockOutOfSyncException { long skew = System.currentTimeMillis() - serverCurrentTime; if (skew > maxSkew) { - String message = "Server " + serverInfo.getServerName() + " has been " + + String message = "Server " + serverName + " has been " + "rejected; Reported time is too far out of sync with master. " + "Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms"; LOG.warn(message); @@ -186,11 +187,11 @@ public class ServerManager { * If this server is on the dead list, reject it with a YouAreDeadException. * If it was dead but came back with a new start code, remove the old entry * from the dead list. - * @param serverName Server name formatted as host_port_startcode. + * @param serverName * @param what START or REPORT * @throws YouAreDeadException */ - private void checkIsDead(final String serverName, final String what) + private void checkIsDead(final ServerName serverName, final String what) throws YouAreDeadException { if (this.deadservers.isDeadServer(serverName)) { // host name, port and start code all match with existing one of the @@ -210,157 +211,34 @@ public class ServerManager { } /** - * Adds the HSI to the RS list - * @param info The region server informations - * @param useInfoLoad True if the load from the info should be used; e.g. - * under a master failover - * @param hri Region interface. Can be null. + * Adds the onlineServers list. + * @param hsl + * @param serverName The remote servers name. */ - void recordNewServer(HServerInfo info, boolean useInfoLoad, - HRegionInterface hri) { - HServerLoad load = useInfoLoad? info.getLoad(): new HServerLoad(); - String serverName = info.getServerName(); - LOG.info("Registering server=" + serverName + ", regionCount=" + - load.getLoad() + ", userLoad=" + useInfoLoad); - info.setLoad(load); - // TODO: Why did we update the RS location ourself? Shouldn't RS do this? - // masterStatus.getZooKeeper().updateRSLocationGetWatch(info, watcher); - // -- If I understand the question, the RS does not update the location - // because could be disagreement over locations because of DNS issues; only - // master does DNS now -- St.Ack 20100929. - this.onlineServers.put(serverName, info); - if (hri == null) { - serverConnections.remove(serverName); - } else { - serverConnections.put(serverName, hri); - } - } - - /** - * Called to process the messages sent from the region server to the master - * along with the heart beat. - * - * @param serverInfo - * @param msgs - * @param mostLoadedRegions Array of regions the region server is submitting - * as candidates to be rebalanced, should it be overloaded - * @return messages from master to region server indicating what region - * server should do. - * - * @throws IOException - */ - HMsg [] regionServerReport(final HServerInfo serverInfo, - final HMsg [] msgs, final HRegionInfo[] mostLoadedRegions) - throws IOException { - // Be careful. This method does returns in the middle. - HServerInfo info = new HServerInfo(serverInfo); - - // Check if dead. If it is, it'll get a 'You Are Dead!' exception. - checkIsDead(info.getServerName(), "REPORT"); - - // If we don't know this server, tell it shutdown. - HServerInfo storedInfo = this.onlineServers.get(info.getServerName()); - if (storedInfo == null) { - // Maybe we already have this host+port combo and its just different - // start code? - checkAlreadySameHostPort(info); - // Just let the server in. Presume master joining a running cluster. - // recordNewServer is what happens at the end of reportServerStartup. - // The only thing we are skipping is passing back to the regionserver - // the HServerInfo to use. Here we presume a master has already done - // that so we'll press on with whatever it gave us for HSI. - recordNewServer(info, true, null); - // If msgs, put off their processing but this is not enough because - // its possible that the next time the server reports in, we'll still - // not be up and serving. For example, if a split, we'll need the - // regions and servers setup in the master before the below - // handleSplitReport will work. TODO: FIx!! - if (msgs.length > 0) - throw new PleaseHoldException("FIX! Putting off " + - "message processing because not yet rwady but possible we won't be " + - "ready next on next report"); - } - - for (HMsg msg: msgs) { - LOG.info("Received " + msg + " from " + serverInfo.getServerName()); - switch (msg.getType()) { - default: - LOG.error("Unhandled msg type " + msg); - } - } - - HMsg [] reply = null; - if (this.clusterShutdown) { - if (isOnlyMetaRegionServersOnline()) { - LOG.info("Only catalog regions remaining; running unassign"); - // The only remaining regions are catalog regions. - // Shutdown needs to be staggered; the meta regions need to close last - // in case they need to be updated during the close melee. If only - // catalog reigons remaining, tell them they can go down now too. On - // close of region, the regionservers should then shut themselves down. - this.services.getAssignmentManager().unassignCatalogRegions(); - } - } - return processRegionServerAllsWell(info, mostLoadedRegions, reply); - } - - /** - * @return True if all online servers are carrying one or more catalog - * regions, there are no servers online carrying user regions only - */ - private boolean isOnlyMetaRegionServersOnline() { - List onlineServers = getOnlineServersList(); - for (HServerInfo hsi: onlineServers) { - if (!this.services.getAssignmentManager().isMetaRegionServer(hsi)) { - return false; - } - } - return true; - } - - /** - * RegionServer is checking in, no exceptional circumstances - * @param serverInfo - * @param mostLoadedRegions - * @param msgs - * @return - * @throws IOException - */ - private HMsg[] processRegionServerAllsWell(HServerInfo serverInfo, - final HRegionInfo[] mostLoadedRegions, HMsg[] msgs) - throws IOException { - // Refresh the info object and the load information - this.onlineServers.put(serverInfo.getServerName(), serverInfo); - HServerLoad load = serverInfo.getLoad(); - if (load != null && this.metrics != null) { - this.metrics.incrementRequests(load.getNumberOfRequests()); - } - // No more piggyback messages on heartbeats for other stuff - return msgs; - } - - /** - * Make server load accessible to AssignmentManager - * @param serverName - * @return - * @throws HServerLoad if serverName is known - */ - HServerLoad getLoad(String serverName) { - HServerInfo hsi = this.onlineServers.get(serverName); - if (hsi == null) return null; - return hsi.getLoad(); + void recordNewServer(final ServerName serverName, final HServerLoad hsl) { + LOG.info("Registering server=" + serverName); + this.onlineServers.put(serverName, hsl); + this.serverConnections.remove(serverName); } /** * @param serverName - * @return True if we removed server from the list. + * @return HServerLoad if serverName is known else null */ - private boolean removeServerInfo(final String serverName) { - HServerInfo info = this.onlineServers.remove(serverName); - if (info != null) { - return true; - } - return false; + public HServerLoad getLoad(final ServerName serverName) { + return this.onlineServers.get(serverName.toString()); + } + + /** + * @param serverName + * @return HServerLoad if serverName is known else null + * @deprecated Use {@link #getLoad(HServerAddress)} + */ + public HServerLoad getLoad(final HServerAddress address) { + ServerName sn = new ServerName(address.toString(), -1); + ServerName actual = + ServerName.findServerWithSameHostnamePort(this.getOnlineServersList(), sn); + return actual == null? null: getLoad(actual); } /** @@ -373,9 +251,9 @@ public class ServerManager { int totalLoad = 0; int numServers = 0; double averageLoad = 0.0; - for (HServerInfo hsi : onlineServers.values()) { + for (HServerLoad hsl: this.onlineServers.values()) { numServers++; - totalLoad += hsi.getLoad().getNumberOfRegions(); + totalLoad += hsl.getNumberOfRegions(); } averageLoad = (double)totalLoad / (double)numServers; return averageLoad; @@ -387,25 +265,17 @@ public class ServerManager { return this.onlineServers.size(); } - /** - * @param name server name - * @return HServerInfo for the given server address - */ - public HServerInfo getServerInfo(String name) { - return this.onlineServers.get(name); - } - /** * @return Read-only map of servers to serverinfo */ - public Map getOnlineServers() { + public Map getOnlineServers() { // Presumption is that iterating the returned Map is OK. synchronized (this.onlineServers) { return Collections.unmodifiableMap(this.onlineServers); } } - public Set getDeadServers() { + public Set getDeadServers() { return this.deadservers.clone(); } @@ -417,40 +287,11 @@ public class ServerManager { return this.deadservers.areDeadServersInProgress(); } - /** - * @param hsa - * @return The HServerInfo whose HServerAddress is hsa or null - * if nothing found. - */ - public HServerInfo getHServerInfo(final HServerAddress hsa) { - synchronized(this.onlineServers) { - // TODO: This is primitive. Do a better search. - for (Map.Entry e: this.onlineServers.entrySet()) { - if (e.getValue().getServerAddress().equals(hsa)) { - return e.getValue(); - } - } - } - return null; - } - - private void notifyOnlineServers() { - synchronized (this.onlineServers) { - this.onlineServers.notifyAll(); - } - } - - /* - * Wait on regionservers to report in - * with {@link #regionServerReport(HServerInfo, HMsg[])} so they get notice - * the master is going down. Waits until all region servers come back with - * a MSG_REGIONSERVER_STOP. - */ void letRegionServersShutdown() { synchronized (onlineServers) { - while (onlineServers.size() > 0) { + while (!onlineServers.isEmpty()) { StringBuilder sb = new StringBuilder(); - for (String key: this.onlineServers.keySet()) { + for (ServerName key: this.onlineServers.keySet()) { if (sb.length() > 0) { sb.append(", "); } @@ -470,19 +311,15 @@ public class ServerManager { * Expire the passed server. Add it to list of deadservers and queue a * shutdown processing. */ - public synchronized void expireServer(final HServerInfo hsi) { - // First check a server to expire. ServerName is of the form: - // , , - String serverName = hsi.getServerName(); - HServerInfo info = this.onlineServers.get(serverName); - if (info == null) { - LOG.warn("Received expiration of " + hsi.getServerName() + + public synchronized void expireServer(final ServerName serverName) { + if (!this.onlineServers.containsKey(serverName)) { + LOG.warn("Received expiration of " + serverName + " but server is not currently online"); return; } if (this.deadservers.contains(serverName)) { // TODO: Can this happen? It shouldn't be online in this case? - LOG.warn("Received expiration of " + hsi.getServerName() + + LOG.warn("Received expiration of " + serverName + " but server shutdown is already in progress"); return; } @@ -495,7 +332,7 @@ public class ServerManager { // If cluster is going down, yes, servers are going to be expiring; don't // process as a dead server if (this.clusterShutdown) { - LOG.info("Cluster shutdown set; " + hsi.getServerName() + + LOG.info("Cluster shutdown set; " + serverName + " expired; onlineServers=" + this.onlineServers.size()); if (this.onlineServers.isEmpty()) { master.stop("Cluster shutdown set; onlineServer=0"); @@ -506,9 +343,8 @@ public class ServerManager { // Was this server carrying root? boolean carryingRoot; try { - HServerAddress address = ct.getRootLocation(); - carryingRoot = address != null && - hsi.getServerAddress().equals(address); + ServerName address = ct.getRootLocation(); + carryingRoot = address.equals(serverName); } catch (InterruptedException e) { Thread.currentThread().interrupt(); LOG.info("Interrupted"); @@ -519,15 +355,14 @@ public class ServerManager { // run into fact that meta is dead). I can ask assignment manager. It // has an inmemory list of who has what. This list will be cleared as we // process the dead server but should be find asking it now. - HServerAddress address = ct.getMetaLocation(); - boolean carryingMeta = - address != null && hsi.getServerAddress().equals(address); + ServerName address = ct.getMetaLocation(); + boolean carryingMeta = address.equals(serverName); if (carryingRoot || carryingMeta) { this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master, - this.services, this.deadservers, info, carryingRoot, carryingMeta)); + this.services, this.deadservers, serverName, carryingRoot, carryingMeta)); } else { this.services.getExecutorService().submit(new ServerShutdownHandler(this.master, - this.services, this.deadservers, info)); + this.services, this.deadservers, serverName)); } LOG.debug("Added=" + serverName + " to dead servers, submitted shutdown handler to be executed, root=" + @@ -544,12 +379,12 @@ public class ServerManager { * @param server server to open a region * @param region region to open */ - public void sendRegionOpen(HServerInfo server, HRegionInfo region) + public void sendRegionOpen(final ServerName server, HRegionInfo region) throws IOException { HRegionInterface hri = getServerConnection(server); if (hri == null) { - LOG.warn("Attempting to send OPEN RPC to server " + server.getServerName() - + " failed because no RPC connection found to this server"); + LOG.warn("Attempting to send OPEN RPC to server " + server.toString() + + " failed because no RPC connection found to this server"); return; } hri.openRegion(region); @@ -563,12 +398,12 @@ public class ServerManager { * @param server server to open a region * @param regions regions to open */ - public void sendRegionOpen(HServerInfo server, List regions) + public void sendRegionOpen(ServerName server, List regions) throws IOException { HRegionInterface hri = getServerConnection(server); if (hri == null) { - LOG.warn("Attempting to send OPEN RPC to server " + server.getServerName() - + " failed because no RPC connection found to this server"); + LOG.warn("Attempting to send OPEN RPC to server " + server.toString() + + " failed because no RPC connection found to this server"); return; } hri.openRegions(regions); @@ -584,13 +419,13 @@ public class ServerManager { * @return true if server acknowledged close, false if not * @throws IOException */ - public boolean sendRegionClose(HServerInfo server, HRegionInfo region) + public boolean sendRegionClose(ServerName server, HRegionInfo region) throws IOException { if (server == null) throw new NullPointerException("Passed server is null"); HRegionInterface hri = getServerConnection(server); if (hri == null) { throw new IOException("Attempting to send CLOSE RPC to server " + - server.getServerName() + " for region " + + server.toString() + " for region " + region.getRegionNameAsString() + " failed because no RPC connection found to this server"); } @@ -598,31 +433,30 @@ public class ServerManager { } /** - * @param info + * @param sn * @return * @throws IOException * @throws RetriesExhaustedException wrapping a ConnectException if failed * putting up proxy. */ - private HRegionInterface getServerConnection(HServerInfo info) + private HRegionInterface getServerConnection(final ServerName sn) throws IOException { HConnection connection = HConnectionManager.getConnection(this.master.getConfiguration()); - HRegionInterface hri = serverConnections.get(info.getServerName()); + HRegionInterface hri = this.serverConnections.get(sn.toString()); if (hri == null) { - LOG.debug("New connection to " + info.getServerName()); - hri = connection.getHRegionConnection(info.getServerAddress(), false); - this.serverConnections.put(info.getServerName(), hri); + LOG.debug("New connection to " + sn.toString()); + hri = connection.getHRegionConnection(sn.getHostname(), sn.getPort()); + this.serverConnections.put(sn, hri); } return hri; } /** * Waits for the regionservers to report in. - * @return Count of regions out on cluster * @throws InterruptedException */ - public int waitForRegionServers() + public void waitForRegionServers() throws InterruptedException { long interval = this.master.getConfiguration(). getLong("hbase.master.wait.on.regionservers.interval", 3000); @@ -640,31 +474,18 @@ public class ServerManager { } oldcount = count; } - // Count how many regions deployed out on cluster. If fresh start, it'll - // be none but if not a fresh start, we'll have registered servers when - // they came in on the {@link #regionServerReport(HServerInfo)} as opposed to - // {@link #regionServerStartup(HServerInfo)} and it'll be carrying an - // actual server load. - int regionCount = 0; - for (Map.Entry e: this.onlineServers.entrySet()) { - HServerLoad load = e.getValue().getLoad(); - if (load != null) regionCount += load.getLoad(); - } - LOG.info("Exiting wait on regionserver(s) to checkin; count=" + count + - ", stopped=" + this.master.isStopped() + - ", count of regions out on cluster=" + regionCount); - return regionCount; } /** * @return A copy of the internal list of online servers. */ - public List getOnlineServersList() { + public List getOnlineServersList() { // TODO: optimize the load balancer call so we don't need to make a new list - return new ArrayList(onlineServers.values()); + // TODO: FIX. THIS IS POPULAR CALL. + return new ArrayList(this.onlineServers.keySet()); } - public boolean isServerOnline(String serverName) { + public boolean isServerOnline(ServerName serverName) { return onlineServers.containsKey(serverName); } @@ -681,6 +502,5 @@ public class ServerManager { * Stop the ServerManager. Currently does nothing. */ public void stop() { - } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java b/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java index dada818e262..7c3eb2aa93e 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java @@ -156,10 +156,13 @@ public class SplitLogManager extends ZooKeeperListener { } public void finishInitialization() { - Threads.setDaemonThreadRunning(timeoutMonitor, serverName - + ".splitLogManagerTimeoutMonitor"); - this.watcher.registerListener(this); - lookForOrphans(); + Threads.setDaemonThreadRunning(timeoutMonitor, serverName + + ".splitLogManagerTimeoutMonitor"); + // Watcher can be null during tests with Mock'd servers. + if (this.watcher != null) { + this.watcher.registerListener(this); + lookForOrphans(); + } } /** diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java index eb01a6a7d15..e5385b72171 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java @@ -19,8 +19,8 @@ */ package org.apache.hadoop.hbase.master.handler; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.DeadServer; import org.apache.hadoop.hbase.master.MasterServices; @@ -34,9 +34,9 @@ public class MetaServerShutdownHandler extends ServerShutdownHandler { public MetaServerShutdownHandler(final Server server, final MasterServices services, - final DeadServer deadServers, final HServerInfo hsi, + final DeadServer deadServers, final ServerName serverName, final boolean carryingRoot, final boolean carryingMeta) { - super(server, services, deadServers, hsi, EventType.M_META_SERVER_SHUTDOWN); + super(server, services, deadServers, serverName, EventType.M_META_SERVER_SHUTDOWN); this.carryingRoot = carryingRoot; this.carryingMeta = carryingMeta; } @@ -50,4 +50,4 @@ public class MetaServerShutdownHandler extends ServerShutdownHandler { boolean isCarryingMeta() { return this.carryingMeta; } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/OpenedRegionHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/OpenedRegionHandler.java index c478ab72dee..3d16e4789de 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/handler/OpenedRegionHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/OpenedRegionHandler.java @@ -22,8 +22,8 @@ package org.apache.hadoop.hbase.master.handler; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.zookeeper.ZKAssign; @@ -36,7 +36,7 @@ public class OpenedRegionHandler extends EventHandler implements TotesHRegionInf private static final Log LOG = LogFactory.getLog(OpenedRegionHandler.class); private final AssignmentManager assignmentManager; private final HRegionInfo regionInfo; - private final HServerInfo serverInfo; + private final ServerName sn; private final OpenedPriority priority; private enum OpenedPriority { @@ -55,11 +55,11 @@ public class OpenedRegionHandler extends EventHandler implements TotesHRegionInf public OpenedRegionHandler(Server server, AssignmentManager assignmentManager, HRegionInfo regionInfo, - HServerInfo serverInfo) { + ServerName sn) { super(server, EventType.RS_ZK_REGION_OPENED); this.assignmentManager = assignmentManager; this.regionInfo = regionInfo; - this.serverInfo = serverInfo; + this.sn = sn; if(regionInfo.isRootRegion()) { priority = OpenedPriority.ROOT; } else if(regionInfo.isMetaRegion()) { @@ -94,7 +94,7 @@ public class OpenedRegionHandler extends EventHandler implements TotesHRegionInf // Code to defend against case where we get SPLIT before region open // processing completes; temporary till we make SPLITs go via zk -- 0.92. if (this.assignmentManager.isRegionInTransition(regionInfo) != null) { - this.assignmentManager.regionOnline(regionInfo, serverInfo); + this.assignmentManager.regionOnline(regionInfo, this.sn); } else { LOG.warn("Skipping the onlining of " + regionInfo.getRegionNameAsString() + " because regions is NOT in RIT -- presuming this is because it SPLIT"); @@ -106,7 +106,7 @@ public class OpenedRegionHandler extends EventHandler implements TotesHRegionInf assignmentManager.unassign(regionInfo); } else { LOG.debug("Opened region " + regionInfo.getRegionNameAsString() + - " on " + serverInfo.getServerName()); + " on " + this.sn.toString()); } } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 60662c2d42b..6a7f33343fd 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -28,8 +28,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; @@ -47,29 +47,29 @@ import org.apache.zookeeper.KeeperException; /** * Process server shutdown. * Server-to-handle must be already in the deadservers lists. See - * {@link ServerManager#expireServer(HServerInfo)}. + * {@link ServerManager#expireServer(ServerName)} */ public class ServerShutdownHandler extends EventHandler { private static final Log LOG = LogFactory.getLog(ServerShutdownHandler.class); - private final HServerInfo hsi; + private final ServerName serverName; private final Server server; private final MasterServices services; private final DeadServer deadServers; public ServerShutdownHandler(final Server server, final MasterServices services, - final DeadServer deadServers, final HServerInfo hsi) { - this(server, services, deadServers, hsi, EventType.M_SERVER_SHUTDOWN); + final DeadServer deadServers, final ServerName serverName) { + this(server, services, deadServers, serverName, EventType.M_SERVER_SHUTDOWN); } ServerShutdownHandler(final Server server, final MasterServices services, - final DeadServer deadServers, final HServerInfo hsi, EventType type) { + final DeadServer deadServers, final ServerName serverName, EventType type) { super(server, type); - this.hsi = hsi; + this.serverName = serverName; this.server = server; this.services = services; this.deadServers = deadServers; - if (!this.deadServers.contains(hsi.getServerName())) { - LOG.warn(hsi.getServerName() + " is NOT in deadservers; it should be!"); + if (!this.deadServers.contains(this.serverName)) { + LOG.warn(this.serverName + " is NOT in deadservers; it should be!"); } } @@ -89,7 +89,7 @@ public class ServerShutdownHandler extends EventHandler { @Override public void process() throws IOException { - final String serverName = this.hsi.getServerName(); + final ServerName serverName = this.serverName; LOG.info("Splitting logs for " + serverName); this.services.getMasterFileSystem().splitLog(serverName); @@ -99,7 +99,7 @@ public class ServerShutdownHandler extends EventHandler { // OFFLINE? -- and then others after like CLOSING that depend on log // splitting. List regionsInTransition = - this.services.getAssignmentManager().processServerShutdown(this.hsi); + this.services.getAssignmentManager().processServerShutdown(this.serverName); // Assign root and meta if we were carrying them. if (isCarryingRoot()) { // -ROOT- @@ -134,7 +134,7 @@ public class ServerShutdownHandler extends EventHandler { try { this.server.getCatalogTracker().waitForMeta(); hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(), - this.hsi); + this.serverName); break; } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java index fee0b3d9be5..543409a3758 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/SplitRegionHandler.java @@ -24,8 +24,8 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.zookeeper.ZKAssign; @@ -38,7 +38,7 @@ public class SplitRegionHandler extends EventHandler implements TotesHRegionInfo private static final Log LOG = LogFactory.getLog(SplitRegionHandler.class); private final AssignmentManager assignmentManager; private final HRegionInfo parent; - private final HServerInfo serverInfo; + private final ServerName sn; private final List daughters; /** * For testing only! Set to true to skip handling of split. @@ -47,11 +47,11 @@ public class SplitRegionHandler extends EventHandler implements TotesHRegionInfo public SplitRegionHandler(Server server, AssignmentManager assignmentManager, HRegionInfo regionInfo, - HServerInfo serverInfo, final List daughters) { + ServerName sn, final List daughters) { super(server, EventType.RS_ZK_REGION_SPLIT); this.assignmentManager = assignmentManager; this.parent = regionInfo; - this.serverInfo = serverInfo; + this.sn = sn; this.daughters = daughters; } @@ -70,7 +70,7 @@ public class SplitRegionHandler extends EventHandler implements TotesHRegionInfo LOG.warn("Skipping split message, TEST_SKIP is set"); return; } - this.assignmentManager.handleSplitReport(this.serverInfo, this.parent, + this.assignmentManager.handleSplitReport(this.sn, this.parent, this.daughters.get(0), this.daughters.get(1)); // Remove region from ZK try { diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index c065702412d..5e55d30c1e3 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -58,12 +58,12 @@ import org.apache.hadoop.hbase.DroppedSnapshotException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.UnknownScannerException; -import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Increment; @@ -235,7 +235,7 @@ public class HRegion implements HeapSize { // , Writable{ final long memstoreFlushSize; private volatile long lastFlushTime; final RegionServerServices rsServices; - private List> recentFlushes = new ArrayList>(); + private List> recentFlushes = new ArrayList>(); private final long blockingMemStoreSize; final long threadWakeFrequency; // Used to guard closes diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 96040506968..d211b534f35 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; +import java.io.StringWriter; import java.lang.Thread.UncaughtExceptionHandler; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; @@ -34,7 +35,6 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -60,7 +60,7 @@ import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HMsg; +import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; @@ -70,11 +70,11 @@ import org.apache.hadoop.hbase.MasterAddressTracker; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.UnknownRowLockException; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.YouAreDeadException; -import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.RootLocationEditor; @@ -139,6 +139,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.DNS; import org.apache.zookeeper.KeeperException; +import org.codehaus.jackson.map.ObjectMapper; import com.google.common.base.Function; import com.google.common.collect.Lists; @@ -169,7 +170,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // If false, the file system has become unavailable protected volatile boolean fsOk; - protected HServerInfo serverInfo; protected final Configuration conf; private final HConnection connection; @@ -189,7 +189,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, new ConcurrentHashMap(); protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - private final LinkedBlockingQueue outboundMsgs = new LinkedBlockingQueue(); final int numRetries; protected final int threadWakeFrequency; @@ -204,13 +203,16 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Server to handle client requests. Default access so can be accessed by // unit tests. - RpcServer server; + RpcServer rpcServer; + + private final InetSocketAddress isa; // Leases private Leases leases; - // Request counter - private volatile AtomicInteger requestCount = new AtomicInteger(); + // Request counter. + // Do we need this? Can't we just sum region counters? St.Ack 20110412 + private AtomicInteger requestCount = new AtomicInteger(); // Info server. Default access so can be used by unit tests. REGIONSERVER // is name of the webapp and the attribute name used stuffing this instance @@ -248,7 +250,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // flag set after we're done setting up server threads (used for testing) protected volatile boolean isOnline; - final Map scanners = new ConcurrentHashMap(); + final Map scanners = + new ConcurrentHashMap(); // zookeeper connection and watcher private ZooKeeperWatcher zooKeeper; @@ -270,10 +273,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, private final int rpcTimeout; - // The main region server thread. - @SuppressWarnings("unused") - private Thread regionServerThread; - // Instance of the hbase executor service. private ExecutorService service; @@ -282,6 +281,22 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, private final RegionServerAccounting regionServerAccounting; + /** + * The server name the Master sees us as. Its made from the hostname the + * master passes us, port, and server startcode. Gets set after registration + * against Master. The hostname can differ from the hostname in {@link #isa} + * but usually doesn't if both servers resolve . + */ + private ServerName serverNameFromMasterPOV; + + // Port we put up the webui on. + private int webuiport = -1; + + /** + * This servers startcode. + */ + private final long startcode; + /** * Starts a HRegionServer at the default location * @@ -289,81 +304,86 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, * @throws IOException * @throws InterruptedException */ - public HRegionServer(Configuration conf) throws IOException, InterruptedException { + public HRegionServer(Configuration conf) + throws IOException, InterruptedException { this.fsOk = true; this.conf = conf; this.connection = HConnectionManager.getConnection(conf); this.isOnline = false; - - // check to see if the codec list is available: - String [] codecs = conf.getStrings("hbase.regionserver.codecs", - (String[])null); - if (codecs != null) { - for (String codec : codecs) { - if (!CompressionTest.testCompression(codec)) { - throw new IOException("Compression codec " + codec + - " not supported, aborting RS construction"); - } - } - } + checkCodecs(this.conf); // Config'ed params this.numRetries = conf.getInt("hbase.client.retries.number", 10); this.threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, - 10 * 1000); + 10 * 1000); this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000); - sleeper = new Sleeper(this.msgInterval, this); + this.sleeper = new Sleeper(this.msgInterval, this); this.maxScannerResultSize = conf.getLong( - HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY, - HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE); + HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY, + HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE); this.numRegionsToReport = conf.getInt( - "hbase.regionserver.numregionstoreport", 10); + "hbase.regionserver.numregionstoreport", 10); this.rpcTimeout = conf.getInt( - HConstants.HBASE_RPC_TIMEOUT_KEY, - HConstants.DEFAULT_HBASE_RPC_TIMEOUT); + HConstants.HBASE_RPC_TIMEOUT_KEY, + HConstants.DEFAULT_HBASE_RPC_TIMEOUT); this.abortRequested = false; this.stopped = false; - // Server to handle client requests - String machineName = DNS.getDefaultHost(conf.get( - "hbase.regionserver.dns.interface", "default"), conf.get( - "hbase.regionserver.dns.nameserver", "default")); - String addressStr = machineName + ":" + - conf.get(HConstants.REGIONSERVER_PORT, - Integer.toString(HConstants.DEFAULT_REGIONSERVER_PORT)); - HServerAddress address = new HServerAddress(addressStr); - this.server = HBaseRPC.getServer(this, - new Class[]{HRegionInterface.class, HBaseRPCErrorHandler.class, - OnlineRegions.class}, - address.getBindAddress(), - address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10), - conf.getInt("hbase.regionserver.metahandler.count", 10), - false, conf, QOS_THRESHOLD); - this.server.setErrorHandler(this); - this.server.setQosFunction(new QosFunction()); - - // HServerInfo can be amended by master. See below in reportForDuty. - this.serverInfo = new HServerInfo(new HServerAddress(new InetSocketAddress( - address.getBindAddress(), this.server.getListenerAddress().getPort())), - System.currentTimeMillis(), this.conf.getInt( - "hbase.regionserver.info.port", 60030), machineName); - if (this.serverInfo.getServerAddress() == null) { - throw new NullPointerException("Server address cannot be null; " - + "hbase-958 debugging"); + // Server to handle client requests. + String hostname = DNS.getDefaultHost( + conf.get("hbase.regionserver.dns.interface", "default"), + conf.get("hbase.regionserver.dns.nameserver", "default")); + int port = conf.getInt(HConstants.REGIONSERVER_PORT, + HConstants.DEFAULT_REGIONSERVER_PORT); + // Creation of a HSA will force a resolve. + InetSocketAddress initialIsa = new InetSocketAddress(hostname, port); + if (initialIsa.getAddress() == null) { + throw new IllegalArgumentException("Failed resolve of " + initialIsa); } + this.rpcServer = HBaseRPC.getServer(this, + new Class[]{HRegionInterface.class, HBaseRPCErrorHandler.class, + OnlineRegions.class}, + initialIsa.getHostName(), // BindAddress is IP we got for this server. + initialIsa.getPort(), + conf.getInt("hbase.regionserver.handler.count", 10), + conf.getInt("hbase.regionserver.metahandler.count", 10), + conf.getBoolean("hbase.rpc.verbose", false), + conf, QOS_THRESHOLD); + // Set our address. + this.isa = this.rpcServer.getListenerAddress(); + + this.rpcServer.setErrorHandler(this); + this.rpcServer.setQosFunction(new QosFunction()); + this.startcode = System.currentTimeMillis(); // login the server principal (if using secure Hadoop) - User.login(conf, "hbase.regionserver.keytab.file", - "hbase.regionserver.kerberos.principal", serverInfo.getHostname()); - + User.login(this.conf, "hbase.regionserver.keytab.file", + "hbase.regionserver.kerberos.principal", this.isa.getHostName()); regionServerAccounting = new RegionServerAccounting(); } + /** + * Run test on configured codecs to make sure supporting libs are in place. + * @param c + * @throws IOException + */ + private static void checkCodecs(final Configuration c) throws IOException { + // check to see if the codec list is available: + String [] codecs = c.getStrings("hbase.regionserver.codecs", (String[])null); + if (codecs == null) return; + for (String codec : codecs) { + if (!CompressionTest.testCompression(codec)) { + throw new IOException("Compression codec " + codec + + " not supported, aborting RS construction"); + } + } + } + private static final int NORMAL_QOS = 0; private static final int QOS_THRESHOLD = 10; // the line between low and high qos private static final int HIGH_QOS = 100; @@ -373,6 +393,10 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, int priority() default 0; } + /** + * Utility used ensuring higher quality of service for priority rpcs; e.g. + * rpcs to .META. and -ROOT-, etc. + */ class QosFunction implements Function { private final Map annotatedQos; @@ -441,7 +465,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, return HIGH_QOS; } } else if (inv.getParameterClasses()[0] == MultiAction.class) { - MultiAction ma = (MultiAction) inv.getParameters()[0]; + MultiAction ma = (MultiAction) inv.getParameters()[0]; Set regions = ma.getRegions(); // ok this sucks, but if any single of the actions touches a meta, the // whole @@ -464,14 +488,13 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } /** - * Creates all of the state that needs to be reconstructed in case we are - * doing a restart. This is shared between the constructor and restart(). Both - * call it. + * All initialization needed before we go register with Master. * * @throws IOException * @throws InterruptedException */ - private void initialize() throws IOException, InterruptedException { + private void preRegistrationInitialization() + throws IOException, InterruptedException { try { initializeZooKeeper(); initializeThreads(); @@ -483,7 +506,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Call stop if error or process will stick around for ever since server // puts up non-daemon threads. LOG.error("Stopping HRS because failed initialize", t); - this.server.stop(); + this.rpcServer.stop(); } } @@ -497,8 +520,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, */ private void initializeZooKeeper() throws IOException, InterruptedException { // Open connection to zookeeper and set primary watcher - zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" + - serverInfo.getServerAddress().getPort(), this); + this.zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + ":" + + this.isa.getPort(), this); // Create the master address manager, register with zk, and start it. Then // block until a master is available. No point in starting up if no master @@ -520,7 +543,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Create the log splitting worker and start it this.splitLogWorker = new SplitLogWorker(this.zooKeeper, - this.getConfiguration(), this.getServerName()); + this.getConfiguration(), this.getServerName().toString()); splitLogWorker.start(); } @@ -548,7 +571,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } private void initializeThreads() throws IOException { - // Cache flushing thread. this.cacheFlusher = new MemStoreFlusher(conf, this); @@ -557,10 +579,10 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Background thread to check for major compactions; needed if region // has not gotten updates in a while. Make it run at a lesser frequency. - int multiplier = this.conf.getInt(HConstants.THREAD_WAKE_FREQUENCY - + ".multiplier", 1000); + int multiplier = this.conf.getInt(HConstants.THREAD_WAKE_FREQUENCY + + ".multiplier", 1000); this.majorCompactionChecker = new MajorCompactionChecker(this, - this.threadWakeFrequency * multiplier, this); + this.threadWakeFrequency * multiplier, this); this.leases = new Leases((int) conf.getLong( HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, @@ -569,28 +591,30 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } /** - * The HRegionServer sticks in this loop until closed. It repeatedly checks in - * with the HMaster, sending heartbeats & reports, and receiving HRegion - * load/unload instructions. + * The HRegionServer sticks in this loop until closed. */ public void run() { - try { - // Initialize threads and wait for a master - initialize(); + // Do pre-registration initializations; zookeeper, lease threads, etc. + preRegistrationInitialization(); } catch (Exception e) { abort("Fatal exception during initialization", e); } - this.regionServerThread = Thread.currentThread(); try { + // Try and register with the Master; tell it we are here. while (!this.stopped) { if (tryReportForDuty()) break; + LOG.warn("No response on reportForDuty. Sleeping and then retrying."); + this.sleeper.sleep(); } + + // We registered with the Master. Go into run mode. long lastMsg = 0; - List outboundMessages = new ArrayList(); + boolean onlyMetaRegionsRemaining = false; + long oldRequestCount = -1; // The main run loop. - for (int tries = 0; !this.stopped && isHealthy();) { + while (!this.stopped && isHealthy()) { if (!isClusterUp()) { if (isOnlineRegionsEmpty()) { stop("Exiting; cluster shutdown set and not carrying any regions"); @@ -598,56 +622,37 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, this.stopping = true; closeUserRegions(this.abortRequested); } else if (this.stopping && LOG.isDebugEnabled()) { + if (!onlyMetaRegionsRemaining) { + onlyMetaRegionsRemaining = isOnlyMetaRegionsRemaining(); + } + if (onlyMetaRegionsRemaining) { + // Set stopped if no requests since last time we went around the loop. + // The remaining meta regions will be closed on our way out. + if (oldRequestCount == this.requestCount.get()) { + stop("Stopped; only catalog regions remaining online"); + break; + } + oldRequestCount = this.requestCount.get(); + } LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString()); } } long now = System.currentTimeMillis(); - // Drop into the send loop if msgInterval has elapsed or if something - // to send. If we fail talking to the master, then we'll sleep below - // on poll of the outboundMsgs blockingqueue. - if ((now - lastMsg) >= msgInterval || !outboundMessages.isEmpty()) { - try { - doMetrics(); - tryRegionServerReport(outboundMessages); - lastMsg = System.currentTimeMillis(); - // Reset tries count if we had a successful transaction. - tries = 0; - if (this.stopped) continue; - } catch (Exception e) { // FindBugs REC_CATCH_EXCEPTION - // Two special exceptions could be printed out here, - // PleaseHoldException and YouAreDeadException - if (e instanceof IOException) { - e = RemoteExceptionHandler.checkIOException((IOException) e); - } - if (e instanceof YouAreDeadException) { - // This will be caught and handled as a fatal error below - throw e; - } - tries++; - if (tries > 0 && (tries % this.numRetries) == 0) { - // Check filesystem every so often. - checkFileSystem(); - } - if (this.stopped) { - continue; - } - LOG.warn("Attempt=" + tries, e); - // No point retrying immediately; this is probably connection to - // master issue. Doing below will cause us to sleep. - lastMsg = System.currentTimeMillis(); - } + if ((now - lastMsg) >= msgInterval) { + doMetrics(); + tryRegionServerReport(); + lastMsg = System.currentTimeMillis(); } - now = System.currentTimeMillis(); - HMsg msg = this.outboundMsgs.poll((msgInterval - (now - lastMsg)), TimeUnit.MILLISECONDS); - if (msg != null) outboundMessages.add(msg); + if (!this.stopped) this.sleeper.sleep(); } // for } catch (Throwable t) { if (!checkOOME(t)) { abort("Unhandled exception: " + t.getMessage(), t); } } + // Run shutdown. this.leases.closeAfterLeasesExpire(); - this.server.stop(); + this.rpcServer.stop(); if (this.splitLogWorker != null) { splitLogWorker.stop(); } @@ -673,18 +678,18 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, if (this.majorCompactionChecker != null) this.majorCompactionChecker.interrupt(); if (this.killed) { - // Just skip out w/o closing regions. + // Just skip out w/o closing regions. Used when testing. } else if (abortRequested) { if (this.fsOk) { closeAllRegions(abortRequested); // Don't leave any open file handles closeWAL(false); } - LOG.info("aborting server at: " + this.serverInfo.getServerName()); + LOG.info("aborting server " + this.serverNameFromMasterPOV); } else { closeAllRegions(abortRequested); closeWAL(true); closeAllScanners(); - LOG.info("stopping server at: " + this.serverInfo.getServerName()); + LOG.info("stopping server " + this.serverNameFromMasterPOV); } // Interrupt catalog tracker here in case any regions being opened out in // handlers are stuck waiting on meta or root. @@ -697,6 +702,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, this.hbaseMaster = null; } this.leases.close(); + try { + deleteMyEphemeralNode(); + } catch (KeeperException e) { + LOG.warn("Failed deleting my ephemeral node", e); + } HConnectionManager.deleteConnection(conf, true); this.zooKeeper.close(); if (!killed) { @@ -705,6 +715,57 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, LOG.info(Thread.currentThread().getName() + " exiting"); } + private boolean isOnlyMetaRegionsRemaining() { + if (getNumberOfOnlineRegions() > 2) return false; + boolean onlyMetaRegionsRemaining = false; + for (Map.Entry e: this.onlineRegions.entrySet()) { + if (!e.getValue().getRegionInfo().isMetaRegion()) { + onlyMetaRegionsRemaining = false; + break; + } + onlyMetaRegionsRemaining = true; + } + return onlyMetaRegionsRemaining; + } + + void tryRegionServerReport() + throws IOException { + HServerLoad hsl = buildServerLoad(); + // Why we do this? + this.requestCount.set(0); + while (!this.stopped) { + try { + this.hbaseMaster.regionServerReport(this.serverNameFromMasterPOV.getBytes(), hsl); + break; + } catch (IOException ioe) { + if (ioe instanceof RemoteException) { + ioe = ((RemoteException)ioe).unwrapRemoteException(); + } + if (ioe instanceof YouAreDeadException) { + // This will be caught and handled as a fatal error in run() + throw ioe; + } + // Couldn't connect to the master, get location from zk and reconnect + // Method blocks until new master is found or we are stopped + getMaster(); + } + } + } + + HServerLoad buildServerLoad() { + Collection regions = getOnlineRegionsLocalContext(); + TreeMap regionLoads = + new TreeMap(Bytes.BYTES_COMPARATOR); + for (HRegion region: regions) { + regionLoads.put(region.getRegionName(), createRegionLoad(region)); + } + MemoryUsage memory = + ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); + return new HServerLoad(requestCount.get(), + (int)(memory.getUsed() / 1024 / 1024), + (int) (memory.getMax() / 1024 / 1024), regionLoads); + } + String getOnlineRegionsAsPrintableString() { StringBuilder sb = new StringBuilder(); for (HRegion r: this.onlineRegions.values()) { @@ -736,47 +797,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } } - List tryRegionServerReport(final List outboundMessages) - throws IOException { - this.serverInfo.setLoad(buildServerLoad()); - this.requestCount.set(0); - addOutboundMsgs(outboundMessages); - HMsg [] msgs = null; - while (!this.stopped) { - try { - msgs = this.hbaseMaster.regionServerReport(this.serverInfo, - outboundMessages.toArray(HMsg.EMPTY_HMSG_ARRAY), - getMostLoadedRegions()); - break; - } catch (IOException ioe) { - if (ioe instanceof RemoteException) { - ioe = ((RemoteException)ioe).unwrapRemoteException(); - } - if (ioe instanceof YouAreDeadException) { - // This will be caught and handled as a fatal error in run() - throw ioe; - } - // Couldn't connect to the master, get location from zk and reconnect - // Method blocks until new master is found or we are stopped - getMaster(); - } - } - updateOutboundMsgs(outboundMessages); - outboundMessages.clear(); - return outboundMessages; - } - - private HServerLoad buildServerLoad() { - MemoryUsage memory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); - HServerLoad hsl = new HServerLoad(requestCount.get(), - (int)(memory.getUsed() / 1024 / 1024), - (int) (memory.getMax() / 1024 / 1024)); - for (HRegion r : this.onlineRegions.values()) { - hsl.addRegionInfo(createRegionLoad(r)); - } - return hsl; - } - private void closeWAL(final boolean delete) { try { if (this.hlog != null) { @@ -803,62 +823,24 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } } - /* - * Add to the passed msgs messages to pass to the master. - * - * @param msgs Current outboundMsgs array; we'll add messages to this List. - */ - private void addOutboundMsgs(final List msgs) { - if (msgs.isEmpty()) { - this.outboundMsgs.drainTo(msgs); - return; - } - OUTER: for (HMsg m : this.outboundMsgs) { - for (HMsg mm : msgs) { - // Be careful don't add duplicates. - if (mm.equals(m)) { - continue OUTER; - } - } - msgs.add(m); - } - } - - /* - * Remove from this.outboundMsgs those messsages we sent the master. - * - * @param msgs Messages we sent the master. - */ - private void updateOutboundMsgs(final List msgs) { - if (msgs.isEmpty()) { - return; - } - for (HMsg m : this.outboundMsgs) { - for (HMsg mm : msgs) { - if (mm.equals(m)) { - this.outboundMsgs.remove(m); - break; - } - } - } - } - /* * Run init. Sets up hlog and starts up all server threads. * * @param c Extra configuration. */ - protected void handleReportForDutyResponse(final MapWritable c) throws IOException { + protected void handleReportForDutyResponse(final MapWritable c) + throws IOException { try { - for (Map.Entry e : c.entrySet()) { - + for (Map.Entry e :c.entrySet()) { String key = e.getKey().toString(); - // Use the address the master passed us - if (key.equals("hbase.regionserver.address")) { - HServerAddress hsa = (HServerAddress) e.getValue(); - LOG.info("Master passed us address to use. Was=" - + this.serverInfo.getServerAddress() + ", Now=" + hsa.toString()); - this.serverInfo.setServerAddress(hsa); + // The hostname the master sees us as. + if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) { + String hostnameFromMasterPOV = e.getValue().toString(); + this.serverNameFromMasterPOV = new ServerName(hostnameFromMasterPOV, + this.isa.getPort(), this.startcode); + LOG.info("Master passed us hostname to use. Was=" + + this.isa.getHostName() + ", Now=" + + this.serverNameFromMasterPOV.getHostname()); continue; } String value = e.getValue().toString(); @@ -867,14 +849,15 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } this.conf.set(key, value); } - + // hack! Maps DFSClient => RegionServer for logs. HDFS made this // config param for task trackers, but we can piggyback off of it. if (this.conf.get("mapred.task.id") == null) { - this.conf.set("mapred.task.id", - "hb_rs_" + this.serverInfo.getServerName() + "_" + - System.currentTimeMillis()); + this.conf.set("mapred.task.id", "hb_rs_" + + this.serverNameFromMasterPOV.toString()); } + // Set our ephemeral znode up in zookeeper now we have a name. + createMyEphemeralNode(); // Master sent us hbase.rootdir to use. Should be fully qualified // path with file system specification included. Set 'fs.defaultFS' @@ -889,8 +872,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Init in here rather than in constructor after thread name has been set this.metrics = new RegionServerMetrics(); startServiceThreads(); - LOG.info("Serving as " + this.serverInfo.getServerName() + - ", RPC listening on " + this.server.getListenerAddress() + + LOG.info("Serving as " + this.serverNameFromMasterPOV + + ", RPC listening on " + this.isa + ", sessionid=0x" + Long.toHexString(this.zooKeeper.getZooKeeper().getSessionId())); isOnline = true; @@ -902,10 +885,23 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } } + private String getMyEphemeralNodePath() { + return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString()); + } + + private void createMyEphemeralNode() throws KeeperException { + ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(), + HConstants.EMPTY_BYTE_ARRAY); + } + + private void deleteMyEphemeralNode() throws KeeperException { + ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath()); + } + public RegionServerAccounting getRegionServerAccounting() { return regionServerAccounting; } - + /* * @param r Region to get RegionLoad for. * @@ -1090,14 +1086,12 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, */ private HLog setupWALAndReplication() throws IOException { final Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME); - Path logdir = new Path(rootDir, HLog.getHLogDirectoryName(this.serverInfo)); - if (LOG.isDebugEnabled()) { - LOG.debug("logdir=" + logdir); - } + Path logdir = new Path(rootDir, + HLog.getHLogDirectoryName(this.serverNameFromMasterPOV.toString())); + if (LOG.isDebugEnabled()) LOG.debug("logdir=" + logdir); if (this.fs.exists(logdir)) { - throw new RegionServerRunningException("Region server already " - + "running at " + this.serverInfo.getServerName() - + " because logdir " + logdir.toString() + " exists"); + throw new RegionServerRunningException("Region server has already " + + "created directory at " + this.serverNameFromMasterPOV.toString()); } // Instantiate replication manager if replication enabled. Pass it the @@ -1120,7 +1114,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, */ protected HLog instantiateHLog(Path logdir, Path oldLogDir) throws IOException { return new HLog(this.fs, logdir, oldLogDir, this.conf, - getWALActionListeners(), this.serverInfo.getServerAddress().toString()); + getWALActionListeners(), this.serverNameFromMasterPOV.toString()); } /** @@ -1240,7 +1234,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, }; // Start executor services - this.service = new ExecutorService(getServerName()); + this.service = new ExecutorService(getServerName().toString()); this.service.startExecutorService(ExecutorType.RS_OPEN_REGION, conf.getInt("hbase.regionserver.executor.openregion.threads", 3)); this.service.startExecutorService(ExecutorType.RS_OPEN_ROOT, @@ -1256,46 +1250,20 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, Threads.setDaemonThreadRunning(this.hlogRoller, n + ".logRoller", handler); Threads.setDaemonThreadRunning(this.cacheFlusher, n + ".cacheFlusher", - handler); + handler); Threads.setDaemonThreadRunning(this.compactSplitThread, n + ".compactor", - handler); - Threads.setDaemonThreadRunning(this.majorCompactionChecker, n - + ".majorCompactionChecker", handler); + handler); + Threads.setDaemonThreadRunning(this.majorCompactionChecker, n + + ".majorCompactionChecker", handler); // Leases is not a Thread. Internally it runs a daemon thread. If it gets // an unhandled exception, it will just exit. this.leases.setName(n + ".leaseChecker"); this.leases.start(); - // Put up info server. - int port = this.conf.getInt("hbase.regionserver.info.port", 60030); - // -1 is for disabling info server - if (port >= 0) { - String addr = this.conf.get("hbase.regionserver.info.bindAddress", - "0.0.0.0"); - // check if auto port bind enabled - boolean auto = this.conf.getBoolean("hbase.regionserver.info.port.auto", - false); - while (true) { - try { - this.infoServer = new InfoServer("regionserver", addr, port, false); - this.infoServer.setAttribute("regionserver", this); - this.infoServer.start(); - break; - } catch (BindException e) { - if (!auto) { - // auto bind disabled throw BindException - throw e; - } - // auto bind enabled, try to use another port - LOG.info("Failed binding http info server to port: " + port); - port++; - // update HRS server info port. - this.serverInfo = new HServerInfo(this.serverInfo.getServerAddress(), - this.serverInfo.getStartCode(), port, - this.serverInfo.getHostname()); - } - } - } + + // Put up the webui. Webui may come up on port other than configured if + // that port is occupied. Adjust serverInfo if this is the case. + this.webuiport = putUpWebUI(); if (this.replicationHandler != null) { this.replicationHandler.startReplicationServices(); @@ -1303,7 +1271,38 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Start Server. This service is like leases in that it internally runs // a thread. - this.server.start(); + this.rpcServer.start(); + } + + /** + * Puts up the webui. + * @return Returns final port -- maybe different from what we started with. + * @throws IOException + */ + private int putUpWebUI() throws IOException { + int port = this.conf.getInt("hbase.regionserver.info.port", 60030); + // -1 is for disabling info server + if (port < 0) return port; + String addr = this.conf.get("hbase.regionserver.info.bindAddress", "0.0.0.0"); + // check if auto port bind enabled + boolean auto = this.conf.getBoolean("hbase.regionserver.info.port.auto", false); + while (true) { + try { + this.infoServer = new InfoServer("regionserver", addr, port, false); + this.infoServer.setAttribute("regionserver", this); + this.infoServer.start(); + break; + } catch (BindException e) { + if (!auto) { + // auto bind disabled throw BindException + throw e; + } + // auto bind enabled, try to use another port + LOG.info("Failed binding http info server to port: " + port); + port++; + } + } + return port; } /* @@ -1361,15 +1360,18 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // Update ZK, ROOT or META if (r.getRegionInfo().isRootRegion()) { RootLocationEditor.setRootLocation(getZooKeeper(), - getServerInfo().getServerAddress()); + this.serverNameFromMasterPOV); } else if (r.getRegionInfo().isMetaRegion()) { - MetaEditor.updateMetaLocation(ct, r.getRegionInfo(), getServerInfo()); + MetaEditor.updateMetaLocation(ct, r.getRegionInfo(), + this.serverNameFromMasterPOV); } else { if (daughter) { // If daughter of a split, update whole row, not just location. - MetaEditor.addDaughter(ct, r.getRegionInfo(), getServerInfo()); + MetaEditor.addDaughter(ct, r.getRegionInfo(), + this.serverNameFromMasterPOV); } else { - MetaEditor.updateRegionLocation(ct, r.getRegionInfo(), getServerInfo()); + MetaEditor.updateRegionLocation(ct, r.getRegionInfo(), + this.serverNameFromMasterPOV); } } } @@ -1379,7 +1381,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, * @return Metrics instance. */ public HBaseRpcMetrics getRpcMetrics() { - return server.getRpcMetrics(); + return rpcServer.getRpcMetrics(); } /** @@ -1444,22 +1446,29 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, * Method will block until a master is available. You can break from this * block by requesting the server stop. * - * @return master address, or null if server has been stopped + * @return master + port, or null if server has been stopped */ - private HServerAddress getMaster() { - HServerAddress masterAddress = null; + private ServerName getMaster() { + ServerName masterServerName = null; + while ((masterServerName = this.masterAddressManager.getMasterAddress()) == null) { + if (stopped) { + return null; + } + LOG.debug("No master found, will retry"); + sleeper.sleep(); + } + InetSocketAddress isa = + new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort()); HMasterRegionInterface master = null; - while (!stopped && master == null) { - - masterAddress = getMasterAddress(); - LOG.info("Attempting connect to Master server at " + masterAddress); + LOG.info("Attempting connect to Master server at " + + this.masterAddressManager.getMasterAddress()); try { // Do initial RPC setup. The final argument indicates that the RPC // should retry indefinitely. master = (HMasterRegionInterface) HBaseRPC.waitForProxy( HMasterRegionInterface.class, HMasterRegionInterface.VERSION, - masterAddress.getInetSocketAddress(), this.conf, -1, + isa, this.conf, -1, this.rpcTimeout, this.rpcTimeout); } catch (IOException e) { e = e instanceof RemoteException ? @@ -1472,23 +1481,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, sleeper.sleep(); } } - LOG.info("Connected to master at " + masterAddress); + LOG.info("Connected to master at " + isa); this.hbaseMaster = master; - return masterAddress; + return masterServerName; } - private HServerAddress getMasterAddress() { - HServerAddress masterAddress = null; - while ((masterAddress = masterAddressManager.getMasterAddress()) == null) { - if (stopped) { - return null; - } - LOG.debug("No master found, will retry"); - sleeper.sleep(); - } - return masterAddress; - } - /** * @return True if successfully invoked {@link #reportForDuty()} * @throws IOException @@ -1499,35 +1496,32 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, handleReportForDutyResponse(w); return true; } - sleeper.sleep(); - LOG.warn("No response on reportForDuty. Sleeping and then retrying."); return false; } /* * Let the master know we're here Run initialization using parameters passed * us by the master. + * @return A Map of key/value configurations we got from the Master else + * null if we failed to register. + * @throws IOException */ private MapWritable reportForDuty() throws IOException { - HServerAddress masterAddress = null; - while (!stopped && (masterAddress = getMaster()) == null) { + ServerName masterServerName = null; + while (!stopped && (masterServerName = getMaster()) == null) { + LOG.warn("Unable to get master for initialization -- sleeping"); sleeper.sleep(); - LOG.warn("Unable to get master for initialization"); } - MapWritable result = null; long lastMsg = 0; while (!stopped) { try { this.requestCount.set(0); - lastMsg = System.currentTimeMillis(); - ZKUtil.setAddressAndWatch(zooKeeper, - ZKUtil.joinZNode(zooKeeper.rsZNode, ZKUtil.getNodeName(serverInfo)), - this.serverInfo.getServerAddress()); - this.serverInfo.setLoad(buildServerLoad()); - LOG.info("Telling master at " + masterAddress + " that we are up"); - result = this.hbaseMaster.regionServerStartup(this.serverInfo, - EnvironmentEdgeManager.currentTimeMillis()); + LOG.info("Telling master at " + masterServerName + " that we are up " + + "with port=" + this.isa.getPort() + ", startcode=" + this.startcode); + lastMsg = EnvironmentEdgeManager.currentTimeMillis(); + int port = this.isa.getPort(); + result = this.hbaseMaster.regionServerStartup(port, this.startcode, lastMsg); break; } catch (RemoteException e) { IOException ioe = e.unwrapRemoteException(); @@ -1541,8 +1535,6 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } } catch (IOException e) { LOG.warn("error telling master we are up", e); - } catch (KeeperException e) { - LOG.warn("error putting up ephemeral node in zookeeper", e); } sleeper.sleep(lastMsg); } @@ -2419,15 +2411,50 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } public int getNumberOfOnlineRegions() { - int size = -1; - size = this.onlineRegions.size(); - return size; + return this.onlineRegions.size(); } boolean isOnlineRegionsEmpty() { return this.onlineRegions.isEmpty(); } + /** + * @param encodedRegionName + * @return JSON Map of labels to values for passed in encodedRegionName + * @throws IOException + */ + public byte [] getRegionStats(final String encodedRegionName) + throws IOException { + HRegion r = null; + synchronized (this.onlineRegions) { + r = this.onlineRegions.get(encodedRegionName); + } + if (r == null) return null; + ObjectMapper mapper = new ObjectMapper(); + int stores = 0; + int storefiles = 0; + int storefileSizeMB = 0; + int memstoreSizeMB = (int) (r.memstoreSize.get() / 1024 / 1024); + int storefileIndexSizeMB = 0; + synchronized (r.stores) { + stores += r.stores.size(); + for (Store store : r.stores.values()) { + storefiles += store.getStorefilesCount(); + storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024); + storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024); + } + } + Map map = new TreeMap(); + map.put("stores", stores); + map.put("storefiles", storefiles); + map.put("storefileSizeMB", storefileIndexSizeMB); + map.put("memstoreSizeMB", memstoreSizeMB); + StringWriter w = new StringWriter(); + mapper.writeValue(w, map); + w.close(); + return Bytes.toBytes(w.toString()); + } + /** * For tests and web ui. * This method will only work if HRegionServer is in the same JVM as client; @@ -2554,18 +2581,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, @Override @QosPriority(priority=HIGH_QOS) public long getProtocolVersion(final String protocol, final long clientVersion) - throws IOException { + throws IOException { if (protocol.equals(HRegionInterface.class.getName())) { return HRegionInterface.VERSION; } - throw new IOException("Unknown protocol to name node: " + protocol); - } - - /** - * @return Queue to which you can add outbound messages. - */ - protected LinkedBlockingQueue getOutboundMsgs() { - return this.outboundMsgs; + throw new IOException("Unknown protocol: " + protocol); } /** @@ -2590,13 +2610,18 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } /** - * @return Info on port this server has bound to, etc. + * @return This servers {@link HServerInfo} */ + // TODO: Deprecate and do getServerName instead. public HServerInfo getServerInfo() { - return this.serverInfo; + try { + return getHServerInfo(); + } catch (IOException e) { + e.printStackTrace(); + } + return null; } - @Override public Result increment(byte[] regionName, Increment increment) throws IOException { @@ -2661,11 +2686,14 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } } - /** {@inheritDoc} */ + /** {@inheritDoc} + * @deprecated Use {@link #getServerName()} instead. + */ @Override @QosPriority(priority=HIGH_QOS) public HServerInfo getHServerInfo() throws IOException { - return serverInfo; + return new HServerInfo(new HServerAddress(this.isa), + this.startcode, this.webuiport); } @SuppressWarnings("unchecked") @@ -2680,8 +2708,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, // actions in the list. Collections.sort(actionsForRegion); Row action; - List puts = new ArrayList(); - for (Action a : actionsForRegion) { + List> puts = new ArrayList>(); + for (Action a : actionsForRegion) { action = a.getAction(); int originalIndex = a.getOriginalIndex(); @@ -2722,7 +2750,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, List> putsWithLocks = Lists.newArrayListWithCapacity(puts.size()); - for (Action a : puts) { + for (Action a : puts) { Put p = (Put) a.getAction(); Integer lock; @@ -2743,7 +2771,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, for( int i = 0 ; i < codes.length ; i++) { OperationStatusCode code = codes[i]; - Action theAction = puts.get(i); + Action theAction = puts.get(i); Object result = null; if (code == OperationStatusCode.SUCCESS) { @@ -2757,7 +2785,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } } catch (IOException ioe) { // fail all the puts with the ioe in question. - for (Action a: puts) { + for (Action a: puts) { response.add(regionName, a.getOriginalIndex(), ioe); } } @@ -2815,7 +2843,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } public String toString() { - return this.serverInfo.toString(); + return getServerName().toString(); } /** @@ -2833,8 +2861,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } @Override - public String getServerName() { - return serverInfo.getServerName(); + public ServerName getServerName() { + // Our servername could change after we talk to the master. + return this.serverNameFromMasterPOV == null? + new ServerName(this.isa.getHostName(), this.isa.getPort(), this.startcode): + this.serverNameFromMasterPOV; } @Override @@ -2862,8 +2893,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, */ public static Thread startRegionServer(final HRegionServer hrs) throws IOException { - return startRegionServer(hrs, "regionserver" - + hrs.getServerInfo().getServerAddress().getPort()); + return startRegionServer(hrs, "regionserver" + hrs.isa.getPort()); } /** diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java index 7e1090c358b..c402b87dac5 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.ipc.HBaseRpcMetrics; import org.apache.hadoop.hbase.regionserver.wal.HLog; @@ -51,12 +50,6 @@ public interface RegionServerServices extends OnlineRegions { */ public FlushRequester getFlushRequester(); - /** - * Return data structure that has Server address and startcode. - * @return The HServerInfo for this RegionServer. - */ - public HServerInfo getServerInfo(); - /** * @return the RegionServerAccounting for this Region Server */ diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java index e9d0f0a5304..489254efd03 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java @@ -198,7 +198,7 @@ public class SplitLogWorker extends ZooKeeperListener implements Runnable { try { taskReadyLock.wait(); } catch (InterruptedException e) { - LOG.warn("SplitLogWorker inteurrpted while waiting for task," + + LOG.warn("SplitLogWorker interurrpted while waiting for task," + " exiting", e); assert exitWorker == true; return; diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java index 1b521f66581..aad14da12cc 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.executor.EventHandler.EventType; @@ -476,7 +477,7 @@ public class SplitTransaction { } // Look for any exception - for (Future future : futures) { + for (Future future: futures) { try { future.get(); } catch (InterruptedException e) { @@ -690,7 +691,7 @@ public class SplitTransaction { * @throws IOException */ private static int createNodeSplitting(final ZooKeeperWatcher zkw, - final HRegionInfo region, final String serverName) + final HRegionInfo region, final ServerName serverName) throws KeeperException, IOException { LOG.debug(zkw.prefix("Creating ephemeral node for " + region.getEncodedName() + " in SPLITTING state")); @@ -744,7 +745,7 @@ public class SplitTransaction { * @throws IOException */ private static int transitionNodeSplit(ZooKeeperWatcher zkw, - HRegionInfo parent, HRegionInfo a, HRegionInfo b, String serverName, + HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName, final int znodeVersion) throws KeeperException, IOException { byte [] payload = Writables.getBytes(a, b); @@ -755,7 +756,7 @@ public class SplitTransaction { private static int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent, - final String serverName, final int version) + final ServerName serverName, final int version) throws KeeperException, IOException { return ZKAssign.transitionNode(zkw, parent, serverName, EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version); diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java index 7569992535f..378ac3a718d 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java @@ -40,7 +40,6 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.regex.Matcher; @@ -57,8 +56,8 @@ import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.FSUtils; @@ -146,9 +145,6 @@ public class HLog implements Syncable { private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas final static Object [] NO_ARGS = new Object []{}; - // used to indirectly tell syncFs to force the sync - private boolean forceSync = false; - public interface Reader { void init(FileSystem fs, Path path, Configuration c) throws IOException; void close() throws IOException; @@ -1279,36 +1275,10 @@ public class HLog implements Syncable { /** * Construct the HLog directory name * - * @param info HServerInfo for server + * @param serverName Server name formatted as described in {@link ServerName} * @return the HLog directory name */ - public static String getHLogDirectoryName(HServerInfo info) { - return getHLogDirectoryName(info.getServerName()); - } - - /** - * Construct the HLog directory name - * - * @param serverAddress - * @param startCode - * @return the HLog directory name - */ - public static String getHLogDirectoryName(String serverAddress, - long startCode) { - if (serverAddress == null || serverAddress.length() == 0) { - return null; - } - return getHLogDirectoryName( - HServerInfo.getServerName(serverAddress, startCode)); - } - - /** - * Construct the HLog directory name - * - * @param serverName - * @return the HLog directory name - */ - public static String getHLogDirectoryName(String serverName) { + public static String getHLogDirectoryName(final String serverName) { StringBuilder dirName = new StringBuilder(HConstants.HREGION_LOGDIR_NAME); dirName.append("/"); dirName.append(serverName); diff --git a/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeer.java b/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeer.java index 548c8eb84fc..6de47e63243 100644 --- a/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeer.java +++ b/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeer.java @@ -24,7 +24,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; /** @@ -37,8 +37,7 @@ public class ReplicationPeer { private final String clusterKey; private final String id; - private List regionServers = - new ArrayList(0); + private List regionServers = new ArrayList(0); private final AtomicBoolean peerEnabled = new AtomicBoolean(); // Cannot be final since a new object needs to be recreated when session fails private ZooKeeperWatcher zkw; @@ -82,7 +81,7 @@ public class ReplicationPeer { * for this peer cluster * @return list of addresses */ - public List getRegionServers() { + public List getRegionServers() { return regionServers; } @@ -90,7 +89,7 @@ public class ReplicationPeer { * Set the list of region servers for that peer * @param regionServers list of addresses for the region servers */ - public void setRegionServers(List regionServers) { + public void setRegionServers(List regionServers) { this.regionServers = regionServers; } diff --git a/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java b/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java index f282c6d6c06..4923bfbcf42 100644 --- a/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java +++ b/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java @@ -35,14 +35,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; -import org.apache.hadoop.util.StringUtils; import org.apache.zookeeper.KeeperException; /** @@ -137,7 +136,7 @@ public class ReplicationZookeeper { this.peerClusters = new HashMap(); ZKUtil.createWithParents(this.zookeeper, ZKUtil.joinZNode(this.replicationZNode, this.replicationStateNodeName)); - this.rsServerNameZnode = ZKUtil.joinZNode(rsZNode, server.getServerName()); + this.rsServerNameZnode = ZKUtil.joinZNode(rsZNode, server.getServerName().toString()); ZKUtil.createWithParents(this.zookeeper, this.rsServerNameZnode); connectExistingPeers(); } @@ -204,14 +203,14 @@ public class ReplicationZookeeper { * @param peerClusterId (byte) the cluster to interrogate * @return addresses of all region servers */ - public List getSlavesAddresses(String peerClusterId) + public List getSlavesAddresses(String peerClusterId) throws KeeperException { if (this.peerClusters.size() == 0) { - return new ArrayList(0); + return new ArrayList(0); } ReplicationPeer peer = this.peerClusters.get(peerClusterId); if (peer == null) { - return new ArrayList(0); + return new ArrayList(0); } peer.setRegionServers(fetchSlavesAddresses(peer.getZkw())); return peer.getRegionServers(); @@ -222,16 +221,47 @@ public class ReplicationZookeeper { * @param zkw zk connection to use * @return list of region server addresses */ - private List fetchSlavesAddresses(ZooKeeperWatcher zkw) { - List rss = null; + private List fetchSlavesAddresses(ZooKeeperWatcher zkw) { + List rss = null; try { - rss = ZKUtil.listChildrenAndGetAsAddresses(zkw, zkw.rsZNode); + rss = listChildrenAndGetAsServerNames(zkw, zkw.rsZNode); } catch (KeeperException e) { LOG.warn("Cannot get peer's region server addresses", e); } return rss; } + /** + * Lists the children of the specified znode, retrieving the data of each + * child as a server address. + * + * Used to list the currently online regionservers and their addresses. + * + * Sets no watches at all, this method is best effort. + * + * Returns an empty list if the node has no children. Returns null if the + * parent node itself does not exist. + * + * @param zkw zookeeper reference + * @param znode node to get children of as addresses + * @return list of data of children of specified znode, empty if no children, + * null if parent does not exist + * @throws KeeperException if unexpected zookeeper exception + */ + public static List listChildrenAndGetAsServerNames( + ZooKeeperWatcher zkw, String znode) + throws KeeperException { + List children = ZKUtil.listChildrenNoWatch(zkw, znode); + if(children == null) { + return null; + } + List addresses = new ArrayList(children.size()); + for (String child : children) { + addresses.add(new ServerName(child)); + } + return addresses; + } + /** * This method connects this cluster to another one and registers it * in this region server's replication znode diff --git a/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index 78c3b420696..fc71f0302fb 100644 --- a/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; @@ -202,7 +203,7 @@ public class ReplicationSource extends Thread */ private void chooseSinks() throws KeeperException { this.currentPeers.clear(); - List addresses = + List addresses = this.zkHelper.getSlavesAddresses(peerClusterId); Set setOfAddr = new HashSet(); int nbPeers = (int) (Math.ceil(addresses.size() * ratio)); @@ -212,7 +213,8 @@ public class ReplicationSource extends Thread HServerAddress address; // Make sure we get one address that we don't already have do { - address = addresses.get(this.random.nextInt(addresses.size())); + ServerName sn = addresses.get(this.random.nextInt(addresses.size())); + address = new HServerAddress(sn.getHostname(), sn.getPort()); } while (setOfAddr.contains(address)); LOG.info("Choosing peer " + address); setOfAddr.add(address); diff --git a/src/main/java/org/apache/hadoop/hbase/rest/StorageClusterStatusResource.java b/src/main/java/org/apache/hadoop/hbase/rest/StorageClusterStatusResource.java index 578ee145292..cddade0c76f 100644 --- a/src/main/java/org/apache/hadoop/hbase/rest/StorageClusterStatusResource.java +++ b/src/main/java/org/apache/hadoop/hbase/rest/StorageClusterStatusResource.java @@ -35,8 +35,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.rest.model.StorageClusterStatusModel; @@ -73,13 +73,13 @@ public class StorageClusterStatusResource extends ResourceBase { model.setRegions(status.getRegionsCount()); model.setRequests(status.getRequestsCount()); model.setAverageLoad(status.getAverageLoad()); - for (HServerInfo info: status.getServerInfo()) { - HServerLoad load = info.getLoad(); - StorageClusterStatusModel.Node node = + for (ServerName info: status.getServers()) { + HServerLoad load = status.getLoad(info); + StorageClusterStatusModel.Node node = model.addLiveNode( - info.getServerAddress().getHostname() + ":" + - Integer.toString(info.getServerAddress().getPort()), - info.getStartCode(), load.getUsedHeapMB(), + info.getHostname() + ":" + + Integer.toString(info.getPort()), + info.getStartcode(), load.getUsedHeapMB(), load.getMaxHeapMB()); node.setRequests(load.getNumberOfRequests()); for (HServerLoad.RegionLoad region: load.getRegionsLoad().values()) { @@ -88,8 +88,8 @@ public class StorageClusterStatusResource extends ResourceBase { region.getMemStoreSizeMB(), region.getStorefileIndexSizeMB()); } } - for (String name: status.getDeadServerNames()) { - model.addDeadNode(name); + for (ServerName name: status.getDeadServerNames()) { + model.addDeadNode(name.toString()); } ResponseBuilder response = Response.ok(model); response.cacheControl(cacheControl); diff --git a/src/main/java/org/apache/hadoop/hbase/util/Addressing.java b/src/main/java/org/apache/hadoop/hbase/util/Addressing.java new file mode 100644 index 00000000000..714e2d9752c --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/Addressing.java @@ -0,0 +1,75 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.net.InetSocketAddress; + +/** + * Utility for network addresses, resolving and naming. + */ +public class Addressing { + public static final String HOSTNAME_PORT_SEPARATOR = ":"; + + /** + * @param hostAndPort Formatted as <hostname> ':' <port> + * @return An InetSocketInstance + */ + public static InetSocketAddress createInetSocketAddressFromHostAndPortStr( + final String hostAndPort) { + return new InetSocketAddress(parseHostname(hostAndPort), parsePort(hostAndPort)); + } + + /** + * @param hostname Server hostname + * @param port Server port + * @return Returns a concatenation of hostname and + * port in following + * form: <hostname> ':' <port>. For example, if hostname + * is example.org and port is 1234, this method will return + * example.org:1234 + */ + public static String createHostAndPortStr(final String hostname, final int port) { + return hostname + HOSTNAME_PORT_SEPARATOR + port; + } + + /** + * @param hostAndPort Formatted as <hostname> ':' <port> + * @return The hostname portion of hostAndPort + */ + public static String parseHostname(final String hostAndPort) { + int colonIndex = hostAndPort.lastIndexOf(HOSTNAME_PORT_SEPARATOR); + if (colonIndex < 0) { + throw new IllegalArgumentException("Not a host:port pair: " + hostAndPort); + } + return hostAndPort.substring(0, colonIndex); + } + + /** + * @param hostAndPort Formatted as <hostname> ':' <port> + * @return The port portion of hostAndPort + */ + public static int parsePort(final String hostAndPort) { + int colonIndex = hostAndPort.lastIndexOf(HOSTNAME_PORT_SEPARATOR); + if (colonIndex < 0) { + throw new IllegalArgumentException("Not a host:port pair: " + hostAndPort); + } + return Integer.parseInt(hostAndPort.substring(colonIndex + 1)); + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 5da5e34f446..1c1d94b7de8 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -39,25 +39,28 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; +import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MetaScanner; -import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; @@ -172,21 +175,21 @@ public class HBaseFsck { } // From the master, get a list of all known live region servers - Collection regionServers = status.getServerInfo(); + Collection regionServers = status.getServers(); errors.print("Number of live region servers: " + regionServers.size()); if (details) { - for (HServerInfo rsinfo: regionServers) { - errors.print(" " + rsinfo.getServerName()); + for (ServerName rsinfo: regionServers) { + errors.print(" " + rsinfo); } } // From the master, get a list of all dead region servers - Collection deadRegionServers = status.getDeadServerNames(); + Collection deadRegionServers = status.getDeadServerNames(); errors.print("Number of dead region servers: " + deadRegionServers.size()); if (details) { - for (String name: deadRegionServers) { + for (ServerName name: deadRegionServers) { errors.print(" " + name); } } @@ -302,31 +305,55 @@ public class HBaseFsck { // Check if Root region is valid and existing if (rootLocation == null || rootLocation.getRegionInfo() == null || - rootLocation.getServerAddress() == null) { + rootLocation.getHostname() == null) { errors.reportError("Root Region or some of its attributes is null."); return false; } - - MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(), - rootLocation.getServerAddress(), null, System.currentTimeMillis()); + ServerName sn; + try { + sn = getRootRegionServerName(); + } catch (InterruptedException e) { + throw new IOException("Interrupted", e); + } + MetaEntry m = + new MetaEntry(rootLocation.getRegionInfo(), sn, System.currentTimeMillis()); HbckInfo hbInfo = new HbckInfo(m); regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo); return true; } + private ServerName getRootRegionServerName() + throws IOException, InterruptedException { + RootRegionTracker rootRegionTracker = + new RootRegionTracker(this.connection.getZooKeeperWatcher(), new Abortable() { + @Override + public void abort(String why, Throwable e) { + LOG.error(why, e); + System.exit(1); + } + }); + rootRegionTracker.start(); + ServerName sn = null; + try { + sn = rootRegionTracker.getRootRegionLocation(); + } finally { + rootRegionTracker.stop(); + } + return sn; + } + /** * Contacts each regionserver and fetches metadata about regions. * @param regionServerList - the list of region servers to connect to * @throws IOException if a remote or network exception occurs */ - void processRegionServers(Collection regionServerList) - throws IOException, InterruptedException { - + void processRegionServers(Collection regionServerList) + throws IOException, InterruptedException { WorkItemRegion[] work = new WorkItemRegion[regionServerList.size()]; int num = 0; // loop to contact each region server in parallel - for (HServerInfo rsinfo:regionServerList) { + for (ServerName rsinfo: regionServerList) { work[num] = new WorkItemRegion(this, rsinfo, errors, connection); executor.execute(work[num]); num++; @@ -478,7 +505,7 @@ public class HBaseFsck { if (modTInfo == null) { modTInfo = new TInfo(tableName); } - for (HServerAddress server : hbi.deployedOn) { + for (ServerName server : hbi.deployedOn) { modTInfo.addServer(server); } modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey()); @@ -498,19 +525,19 @@ public class HBaseFsck { private class TInfo { String tableName; TreeMap edges; - TreeSet deployedOn; + TreeSet deployedOn; TInfo(String name) { this.tableName = name; edges = new TreeMap (Bytes.BYTES_COMPARATOR); - deployedOn = new TreeSet (); + deployedOn = new TreeSet (); } public void addEdge(byte[] fromNode, byte[] toNode) { this.edges.put(fromNode, toNode); } - public void addServer(HServerAddress server) { + public void addServer(ServerName server) { this.deployedOn.add(server); } @@ -647,7 +674,7 @@ public class HBaseFsck { errors.print("Trying to fix a problem with .META..."); setShouldRerun(); // try fix it (treat is a dupe assignment) - List deployedOn = Lists.newArrayList(); + List deployedOn = Lists.newArrayList(); for (HbckInfo mRegion : metaRegions) { deployedOn.add(mRegion.metaEntry.regionServer); } @@ -681,35 +708,19 @@ public class HBaseFsck { // record the latest modification of this META record long ts = Collections.max(result.list(), comp).getTimestamp(); - - // record region details - byte [] value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.REGIONINFO_QUALIFIER); - if (value == null || value.length == 0) { + Pair pair = + MetaReader.metaRowToRegionPair(result); + if (pair == null || pair.getFirst() == null) { emptyRegionInfoQualifiers.add(result); return true; } - HRegionInfo info = Writables.getHRegionInfo(value); - HServerAddress server = null; - byte[] startCode = null; - - // record assigned region server - value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.SERVER_QUALIFIER); - if (value != null && value.length > 0) { - String address = Bytes.toString(value); - server = new HServerAddress(address); + ServerName sn = null; + if (pair.getSecond() != null) { + sn = pair.getSecond(); } - - // record region's start key - value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.STARTCODE_QUALIFIER); - if (value != null) { - startCode = value; - } - MetaEntry m = new MetaEntry(info, server, startCode, ts); + MetaEntry m = new MetaEntry(pair.getFirst(), sn, ts); HbckInfo hbInfo = new HbckInfo(m); - HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo); + HbckInfo previous = regionInfo.put(pair.getFirst().getEncodedName(), hbInfo); if (previous != null) { throw new IOException("Two entries in META are same " + previous); } @@ -740,11 +751,10 @@ public class HBaseFsck { * Stores the entries scanned from META */ private static class MetaEntry extends HRegionInfo { - HServerAddress regionServer; // server hosting this region + ServerName regionServer; // server hosting this region long modTime; // timestamp of most recent modification metadata - public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer, - byte[] startCode, long modTime) { + public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) { super(rinfo); this.regionServer = regionServer; this.modTime = modTime; @@ -758,13 +768,13 @@ public class HBaseFsck { boolean onlyEdits = false; MetaEntry metaEntry = null; FileStatus foundRegionDir = null; - List deployedOn = Lists.newArrayList(); + List deployedOn = Lists.newArrayList(); HbckInfo(MetaEntry metaEntry) { this.metaEntry = metaEntry; } - public synchronized void addServer(HServerAddress server) { + public synchronized void addServer(ServerName server) { this.deployedOn.add(server); } @@ -792,7 +802,7 @@ public class HBaseFsck { } System.out.println(" Number of regions: " + tInfo.getNumRegions()); System.out.print(" Deployed on: "); - for (HServerAddress server : tInfo.deployedOn) { + for (ServerName server : tInfo.deployedOn) { System.out.print(" " + server.toString()); } System.out.println(); @@ -865,12 +875,12 @@ public class HBaseFsck { */ static class WorkItemRegion implements Runnable { private HBaseFsck hbck; - private HServerInfo rsinfo; + private ServerName rsinfo; private ErrorReporter errors; private HConnection connection; private boolean done; - WorkItemRegion(HBaseFsck hbck, HServerInfo info, + WorkItemRegion(HBaseFsck hbck, ServerName info, ErrorReporter errors, HConnection connection) { this.hbck = hbck; this.rsinfo = info; @@ -888,8 +898,7 @@ public class HBaseFsck { public synchronized void run() { errors.progress(); try { - HRegionInterface server = connection.getHRegionConnection( - rsinfo.getServerAddress()); + HRegionInterface server = connection.getHRegionConnection(new HServerAddress(rsinfo.getHostname(), rsinfo.getPort())); // list all online regions from this region server List regions = server.getOnlineRegions(); @@ -908,7 +917,7 @@ public class HBaseFsck { // check to see if the existance of this region matches the region in META for (HRegionInfo r:regions) { HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName()); - hbi.addServer(rsinfo.getServerAddress()); + hbi.addServer(rsinfo); } } catch (IOException e) { // unable to connect to the region server. errors.reportError("RegionServer: " + rsinfo.getServerName() + diff --git a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java index b624d281e15..39f3af26af7 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java +++ b/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -47,13 +48,13 @@ public class HBaseFsckRepair { * @throws InterruptedException */ public static void fixDupeAssignment(Configuration conf, HRegionInfo region, - List servers) + List servers) throws IOException, KeeperException, InterruptedException { HRegionInfo actualRegion = new HRegionInfo(region); // Close region on the servers silently - for(HServerAddress server : servers) { + for(ServerName server : servers) { closeRegionSilentlyAndWait(conf, server, actualRegion); } @@ -82,14 +83,14 @@ public class HBaseFsckRepair { throws ZooKeeperConnectionException, KeeperException, IOException { ZKAssign.createOrForceNodeOffline( HConnectionManager.getConnection(conf).getZooKeeperWatcher(), - region, HConstants.HBCK_CODE_NAME); + region, HConstants.HBCK_CODE_SERVERNAME); } private static void closeRegionSilentlyAndWait(Configuration conf, - HServerAddress server, HRegionInfo region) + ServerName server, HRegionInfo region) throws IOException, InterruptedException { HRegionInterface rs = - HConnectionManager.getConnection(conf).getHRegionConnection(server); + HConnectionManager.getConnection(conf).getHRegionConnection(new HServerAddress(server.getHostname(), server.getPort())); rs.closeRegion(region, false); long timeout = conf.getLong("hbase.hbck.close.timeout", 120000); long expiration = timeout + System.currentTimeMillis(); diff --git a/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java b/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java index baf0c27499a..5bd94fb4bc1 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java @@ -189,7 +189,7 @@ public class JVMClusterUtil { while (true) { for (JVMClusterUtil.MasterThread t : masters) { if (t.master.isActiveMaster()) { - return t.master.getMasterAddress().toString(); + return t.master.getServerName().toString(); } } try { diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java index 0437484100e..5a3b91d96c4 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java @@ -19,14 +19,18 @@ */ package org.apache.hadoop.hbase.zookeeper; +import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import java.util.NavigableSet; +import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Abortable; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.ServerManager; +import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.zookeeper.KeeperException; /** @@ -41,7 +45,7 @@ import org.apache.zookeeper.KeeperException; */ public class RegionServerTracker extends ZooKeeperListener { private static final Log LOG = LogFactory.getLog(RegionServerTracker.class); - + private NavigableSet regionServers = new TreeSet(); private ServerManager serverManager; private Abortable abortable; @@ -58,32 +62,56 @@ public class RegionServerTracker extends ZooKeeperListener { *

All RSs will be tracked after this method is called. * * @throws KeeperException + * @throws IOException */ - public void start() throws KeeperException { + public void start() throws KeeperException, IOException { watcher.registerListener(this); - ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode); + List servers = + ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode); + add(servers); + } + + private void add(final List servers) throws IOException { + synchronized(this.regionServers) { + this.regionServers.clear(); + for (NodeAndData n: servers) { + ServerName sn = new ServerName(ZKUtil.getNodeName(n.getNode())); + this.regionServers.add(sn); + } + } + } + + private void remove(final ServerName sn) { + synchronized(this.regionServers) { + this.regionServers.remove(sn); + } } @Override public void nodeDeleted(String path) { - if(path.startsWith(watcher.rsZNode)) { + if (path.startsWith(watcher.rsZNode)) { String serverName = ZKUtil.getNodeName(path); LOG.info("RegionServer ephemeral node deleted, processing expiration [" + - serverName + "]"); - HServerInfo hsi = serverManager.getServerInfo(serverName); - if(hsi == null) { - LOG.info("No HServerInfo found for " + serverName); + serverName + "]"); + ServerName sn = new ServerName(serverName); + if (!serverManager.isServerOnline(sn)) { + LOG.info(serverName.toString() + " is not online"); return; } - serverManager.expireServer(hsi); + remove(sn); + this.serverManager.expireServer(sn); } } @Override public void nodeChildrenChanged(String path) { - if(path.equals(watcher.rsZNode)) { + if (path.equals(watcher.rsZNode)) { try { - ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode); + List servers = + ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode); + add(servers); + } catch (IOException e) { + abortable.abort("Unexpected zk exception getting RS nodes", e); } catch (KeeperException e) { abortable.abort("Unexpected zk exception getting RS nodes", e); } @@ -92,10 +120,12 @@ public class RegionServerTracker extends ZooKeeperListener { /** * Gets the online servers. - * @return list of online servers from zk + * @return list of online servers * @throws KeeperException */ - public List getOnlineServers() throws KeeperException { - return ZKUtil.listChildrenAndGetAsAddresses(watcher, watcher.rsZNode); + public List getOnlineServers() { + synchronized (this.regionServers) { + return new ArrayList(this.regionServers); + } } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java index 692b608f43d..de2b7801846 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java @@ -20,9 +20,8 @@ package org.apache.hadoop.hbase.zookeeper; import org.apache.hadoop.hbase.Abortable; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.RootLocationEditor; -import org.apache.hadoop.hbase.regionserver.RegionServerServices; import org.apache.hadoop.hbase.util.Bytes; /** @@ -54,31 +53,34 @@ public class RootRegionTracker extends ZooKeeperNodeTracker { /** * Gets the root region location, if available. Null if not. Does not block. - * @return server address for server hosting root region, null if none available + * @return server name * @throws InterruptedException */ - public HServerAddress getRootRegionLocation() throws InterruptedException { - return dataToHServerAddress(super.getData()); + public ServerName getRootRegionLocation() throws InterruptedException { + byte [] data = super.getData(); + return data == null? null: new ServerName(dataToString(data)); } /** * Gets the root region location, if available, and waits for up to the * specified timeout if not immediately available. * @param timeout maximum time to wait, in millis - * @return server address for server hosting root region, null if timed out + * @return server name for server hosting root region formatted as per + * {@link ServerName}, or null if none available * @throws InterruptedException if interrupted while waiting */ - public HServerAddress waitRootRegionLocation(long timeout) + public ServerName waitRootRegionLocation(long timeout) throws InterruptedException { - return dataToHServerAddress(super.blockUntilAvailable(timeout)); + String str = dataToString(super.blockUntilAvailable(timeout)); + return str == null? null: new ServerName(str); } /* * @param data * @return Returns null if data is null else converts passed data - * to an HServerAddress instance. + * to a String instance. */ - private static HServerAddress dataToHServerAddress(final byte [] data) { - return data == null ? null: new HServerAddress(Bytes.toString(data)); + private static String dataToString(final byte [] data) { + return data == null ? null: Bytes.toString(data); } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java index 34e17b60638..1092ebf5a92 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.zookeeper.AsyncCallback; @@ -130,13 +131,13 @@ public class ZKAssign { * @throws KeeperException.NodeExistsException if node already exists */ public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, - String serverName) + ServerName serverName) throws KeeperException, KeeperException.NodeExistsException { createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); } public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, - String serverName, final EventType event) + ServerName serverName, final EventType event) throws KeeperException, KeeperException.NodeExistsException { LOG.debug(zkw.prefix("Creating unassigned node for " + region.getEncodedName() + " in OFFLINE state")); @@ -165,7 +166,7 @@ public class ZKAssign { * @throws KeeperException.NodeExistsException if node already exists */ public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName, + HRegionInfo region, ServerName serverName, final AsyncCallback.StringCallback cb, final Object ctx) throws KeeperException { LOG.debug(zkw.prefix("Async create of unassigned node for " + @@ -198,7 +199,7 @@ public class ZKAssign { * @throws KeeperException.NoNodeException if node does not exist */ public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region, - String serverName) + ServerName serverName) throws KeeperException, KeeperException.NoNodeException { LOG.debug(zkw.prefix("Forcing existing unassigned node for " + region.getEncodedName() + " to OFFLINE state")); @@ -231,7 +232,7 @@ public class ZKAssign { * @throws KeeperException.NodeExistsException if node already exists */ public static boolean createOrForceNodeOffline(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName) + HRegionInfo region, ServerName serverName) throws KeeperException { LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " + region.getEncodedName() + " with OFFLINE state")); @@ -464,7 +465,7 @@ public class ZKAssign { * @throws KeeperException.NodeExistsException if node already exists */ public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region, - String serverName) + ServerName serverName) throws KeeperException, KeeperException.NodeExistsException { LOG.debug(zkw.prefix("Creating unassigned node for " + region.getEncodedName() + " in a CLOSING state")); @@ -506,7 +507,7 @@ public class ZKAssign { * @throws KeeperException if unexpected zookeeper exception */ public static int transitionNodeClosed(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName, int expectedVersion) + HRegionInfo region, ServerName serverName, int expectedVersion) throws KeeperException { return transitionNode(zkw, region, serverName, EventType.RS_ZK_REGION_CLOSING, @@ -540,14 +541,14 @@ public class ZKAssign { * @throws KeeperException if unexpected zookeeper exception */ public static int transitionNodeOpening(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName) + HRegionInfo region, ServerName serverName) throws KeeperException { return transitionNodeOpening(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE); } public static int transitionNodeOpening(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName, final EventType beginState) + HRegionInfo region, ServerName serverName, final EventType beginState) throws KeeperException { return transitionNode(zkw, region, serverName, beginState, EventType.RS_ZK_REGION_OPENING, -1); @@ -580,7 +581,7 @@ public class ZKAssign { * @throws KeeperException if unexpected zookeeper exception */ public static int retransitionNodeOpening(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName, int expectedVersion) + HRegionInfo region, ServerName serverName, int expectedVersion) throws KeeperException { return transitionNode(zkw, region, serverName, EventType.RS_ZK_REGION_OPENING, @@ -616,7 +617,7 @@ public class ZKAssign { * @throws KeeperException if unexpected zookeeper exception */ public static int transitionNodeOpened(ZooKeeperWatcher zkw, - HRegionInfo region, String serverName, int expectedVersion) + HRegionInfo region, ServerName serverName, int expectedVersion) throws KeeperException { return transitionNode(zkw, region, serverName, EventType.RS_ZK_REGION_OPENING, @@ -652,7 +653,7 @@ public class ZKAssign { * @throws KeeperException if unexpected zookeeper exception */ public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, - String serverName, EventType beginState, EventType endState, + ServerName serverName, EventType beginState, EventType endState, int expectedVersion) throws KeeperException { return transitionNode(zkw, region, serverName, beginState, endState, @@ -660,7 +661,7 @@ public class ZKAssign { } public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region, - String serverName, EventType beginState, EventType endState, + ServerName serverName, EventType beginState, EventType endState, int expectedVersion, final byte [] payload) throws KeeperException { String encoded = region.getEncodedName(); @@ -699,7 +700,7 @@ public class ZKAssign { "unassigned node for " + encoded + " from " + beginState + " to " + endState + " failed, " + "the node existed but was in the state " + existingData.getEventType() + - " set by the server " + existingData.getServerName())); + " set by the server " + serverName)); return -1; } diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index 7f5b3770089..919fdadf80a 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -33,8 +33,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.util.Bytes; import org.apache.zookeeper.AsyncCallback; @@ -125,19 +123,6 @@ public class ZKUtil { return idx <= 0 ? null : node.substring(0, idx); } - /** - * Get the unique node-name for the specified regionserver. - * - * Used when a server puts up an ephemeral node for itself and needs to use - * a unique name. - * - * @param serverInfo server information - * @return unique, zookeeper-safe znode path for the server instance - */ - public static String getNodeName(HServerInfo serverInfo) { - return serverInfo.getServerName(); - } - /** * Get the name of the current node from the specified fully-qualified path. * @param path fully-qualified path @@ -332,38 +317,6 @@ public class ZKUtil { return children; } - /** - * Lists the children of the specified znode, retrieving the data of each - * child as a server address. - * - * Used to list the currently online regionservers and their addresses. - * - * Sets no watches at all, this method is best effort. - * - * Returns an empty list if the node has no children. Returns null if the - * parent node itself does not exist. - * - * @param zkw zookeeper reference - * @param znode node to get children of as addresses - * @return list of data of children of specified znode, empty if no children, - * null if parent does not exist - * @throws KeeperException if unexpected zookeeper exception - */ - public static List listChildrenAndGetAsAddresses( - ZooKeeperWatcher zkw, String znode) - throws KeeperException { - List children = listChildrenNoWatch(zkw, znode); - if(children == null) { - return null; - } - List addresses = - new ArrayList(children.size()); - for(String child : children) { - addresses.add(getDataAsAddress(zkw, joinZNode(znode, child))); - } - return addresses; - } - /** * Lists the children of the specified znode without setting any watches. * @@ -601,32 +554,6 @@ public class ZKUtil { } } - /** - * Get the data at the specified znode, deserialize it as an HServerAddress, - * and set a watch. - * - * Returns the data as a server address and sets a watch if the node exists. - * Returns null and no watch is set if the node does not exist or there is an - * exception. - * - * @param zkw zk reference - * @param znode path of node - * @return data of the specified node as a server address, or null - * @throws KeeperException if unexpected zookeeper exception - */ - public static HServerAddress getDataAsAddress(ZooKeeperWatcher zkw, - String znode) - throws KeeperException { - byte [] data = getDataAndWatch(zkw, znode); - if(data == null) { - return null; - } - String addrString = Bytes.toString(data); - LOG.debug(zkw.prefix("Read server address from znode " + znode + ": " + - addrString)); - return new HServerAddress(addrString); - } - /** * Update the data of an existing node with the expected version to have the * specified data. @@ -656,31 +583,6 @@ public class ZKUtil { // Data setting // - /** - * Set the specified znode to be an ephemeral node carrying the specified - * server address. Used by masters for their ephemeral node and regionservers - * for their ephemeral node. - * - * If the node is created successfully, a watcher is also set on the node. - * - * If the node is not created successfully because it already exists, this - * method will also set a watcher on the node. - * - * If there is another problem, a KeeperException will be thrown. - * - * @param zkw zk reference - * @param znode path of node - * @param address server address - * @return true if address set, false if not, watch set in both cases - * @throws KeeperException if unexpected zookeeper exception - */ - public static boolean setAddressAndWatch(ZooKeeperWatcher zkw, - String znode, HServerAddress address) - throws KeeperException { - return createEphemeralNodeAndWatch(zkw, znode, - Bytes.toBytes(address.toString())); - } - /** * Sets the data of the existing znode to be the specified data. Ensures that * the current data has the specified expected version. @@ -745,8 +647,7 @@ public class ZKUtil { * @param data data to set for node * @throws KeeperException if unexpected zookeeper exception */ - public static void setData(ZooKeeperWatcher zkw, String znode, - byte [] data) + public static void setData(ZooKeeperWatcher zkw, String znode, byte [] data) throws KeeperException, KeeperException.NoNodeException { setData(zkw, znode, data, -1); } @@ -1024,10 +925,9 @@ public class ZKUtil { public static void deleteChildrenRecursively(ZooKeeperWatcher zkw, String node) throws KeeperException { List children = ZKUtil.listChildrenNoWatch(zkw, node); - if(children != null || !children.isEmpty()) { - for(String child : children) { - deleteNodeRecursively(zkw, joinZNode(node, child)); - } + if (children == null || children.isEmpty()) return; + for(String child : children) { + deleteNodeRecursively(zkw, joinZNode(node, child)); } } @@ -1041,13 +941,12 @@ public class ZKUtil { try { sb.append("HBase is rooted at ").append(zkw.baseZNode); sb.append("\nMaster address: ").append( - getDataAsAddress(zkw, zkw.masterAddressZNode)); + Bytes.toStringBinary(getData(zkw, zkw.masterAddressZNode))); sb.append("\nRegion server holding ROOT: ").append( - getDataAsAddress(zkw, zkw.rootServerZNode)); + Bytes.toStringBinary(getData(zkw, zkw.rootServerZNode))); sb.append("\nRegion servers:"); - for (HServerAddress address : listChildrenAndGetAsAddresses(zkw, - zkw.rsZNode)) { - sb.append("\n ").append(address); + for (String child: listChildrenNoWatch(zkw, zkw.rsZNode)) { + sb.append("\n ").append(child); } sb.append("\nQuorum Server Statistics:"); String[] servers = zkw.getQuorum().split(","); diff --git a/src/main/resources/hbase-webapps/master/master.jsp b/src/main/resources/hbase-webapps/master/master.jsp index eeabc0ab028..2ed58d05ecb 100644 --- a/src/main/resources/hbase-webapps/master/master.jsp +++ b/src/main/resources/hbase-webapps/master/master.jsp @@ -7,17 +7,15 @@ import="org.apache.hadoop.hbase.util.FSUtils" import="org.apache.hadoop.hbase.master.HMaster" import="org.apache.hadoop.hbase.HConstants" + import="org.apache.hadoop.hbase.ServerName" import="org.apache.hadoop.hbase.client.HBaseAdmin" import="org.apache.hadoop.hbase.client.HConnectionManager" - import="org.apache.hadoop.hbase.HServerInfo" - import="org.apache.hadoop.hbase.HServerAddress" import="org.apache.hadoop.hbase.HTableDescriptor" %><% HMaster master = (HMaster)getServletContext().getAttribute(HMaster.MASTER); Configuration conf = master.getConfiguration(); - HServerAddress rootLocation = master.getCatalogTracker().getRootLocation(); + ServerName rootLocation = master.getCatalogTracker().getRootLocation(); boolean metaOnline = master.getCatalogTracker().getMetaLocation() != null; - Map serverToServerInfos = - master.getServerManager().getOnlineServers(); + List servers = master.getServerManager().getOnlineServersList(); int interval = conf.getInt("hbase.regionserver.msginterval", 1000)/1000; if (interval == 0) { interval = 1; @@ -32,12 +30,12 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -HBase Master: <%= master.getMasterAddress().getHostname()%>:<%= master.getMasterAddress().getPort() %> +HBase Master: <%= master.getServerName().getHostAndPort() %> -

Master: <%=master.getMasterAddress().getHostname()%>:<%=master.getMasterAddress().getPort()%>

+

Master: <%=master.getServerName().getHostname()%>:<%=master.getServerName().getPort()%>

@@ -137,26 +135,27 @@ <% } %>

Region Servers

-<% if (serverToServerInfos != null && serverToServerInfos.size() > 0) { %> +<% if (servers != null && servers.size() > 0) { %> <% int totalRegions = 0; int totalRequests = 0; %> - -<% String[] serverNames = serverToServerInfos.keySet().toArray(new String[serverToServerInfos.size()]); + +<% ServerName [] serverNames = servers.toArray(new ServerName[servers.size()]); Arrays.sort(serverNames); - for (String serverName: serverNames) { - HServerInfo hsi = serverToServerInfos.get(serverName); - String hostname = hsi.getServerAddress().getHostname() + ":" + hsi.getInfoPort(); + for (ServerName serverName: serverNames) { + // HARDCODED FOR NOW; FIX -- READ FROM ZK + String hostname = serverName.getHostname() + ":60020"; String url = "http://" + hostname + "/"; - totalRegions += hsi.getLoad().getNumberOfRegions(); - totalRequests += hsi.getLoad().getNumberOfRequests() / interval; - long startCode = hsi.getStartCode(); + // TODO: FIX + totalRegions += 0; + totalRequests += 0; + long startCode = serverName.getStartcode(); %> - + <% } %> - +
AddressStart CodeLoad
AddressStart CodeLoad
<%= hostname %><%= startCode %><%= hsi.getLoad().toString(interval) %>
<%= hostname %><%= startCode %><%= 0 %>
Total: servers: <%= serverToServerInfos.size() %> requests=<%= totalRequests %>, regions=<%= totalRegions %>
Total: servers: <%= servers.size() %> requests=<%= totalRequests %>, regions=<%= totalRegions %>

Load is requests per second and count of regions loaded

diff --git a/src/main/resources/hbase-webapps/master/table.jsp b/src/main/resources/hbase-webapps/master/table.jsp index 6fcf16feafe..95696fdadff 100644 --- a/src/main/resources/hbase-webapps/master/table.jsp +++ b/src/main/resources/hbase-webapps/master/table.jsp @@ -6,6 +6,7 @@ import="org.apache.hadoop.hbase.client.HBaseAdmin" import="org.apache.hadoop.hbase.client.HConnectionManager" import="org.apache.hadoop.hbase.HRegionInfo" + import="org.apache.hadoop.hbase.ServerName" import="org.apache.hadoop.hbase.HServerAddress" import="org.apache.hadoop.hbase.HServerInfo" import="org.apache.hadoop.hbase.HServerLoad" @@ -22,7 +23,7 @@ String tableName = request.getParameter("name"); HTable table = new HTable(conf, tableName); String tableHeader = "

Table Regions

"; - HServerAddress rl = master.getCatalogTracker().getRootLocation(); + ServerName rl = master.getCatalogTracker().getRootLocation(); boolean showFragmentation = conf.getBoolean("hbase.master.ui.fragmentation.enabled", false); Map frags = null; if (showFragmentation) { @@ -83,8 +84,9 @@ if(tableName.equals(Bytes.toString(HConstants.ROOT_TABLE_NAME))) { %> <%= tableHeader %> +// HARDCODED FOR NOW TODO: FIX GET FROM ZK <% - int infoPort = master.getServerManager().getHServerInfo(rl).getInfoPort(); + int infoPort = 60020; // HARDCODED FOR NOW -- TODO FIX String url = "http://" + rl.getHostname() + ":" + infoPort + "/"; %> @@ -102,9 +104,9 @@ <% // NOTE: Presumes one meta region only. HRegionInfo meta = HRegionInfo.FIRST_META_REGIONINFO; - HServerAddress metaLocation = master.getCatalogTracker().getMetaLocation(); + ServerName metaLocation = master.getCatalogTracker().getMetaLocation(); for (int i = 0; i < 1; i++) { - int infoPort = master.getServerManager().getHServerInfo(metaLocation).getInfoPort(); + int infoPort = 60020; // HARDCODED FOR NOW -- TODO FIX String url = "http://" + metaLocation.getHostname() + ":" + infoPort + "/"; %> @@ -141,7 +143,7 @@ if(regions != null && regions.size() > 0) { %> <%= tableHeader %> <% - for(Map.Entry hriEntry : regions.entrySet()) { + for (Map.Entry hriEntry : regions.entrySet()) { HRegionInfo regionInfo = hriEntry.getKey(); HServerAddress addr = hriEntry.getValue(); long req = 0; @@ -150,16 +152,16 @@ String urlRegionServer = null; if (addr != null) { - HServerInfo info = master.getServerManager().getHServerInfo(addr); - if (info != null) { - HServerLoad sl = info.getLoad(); + HServerLoad sl = master.getServerManager().getLoad(addr); + if (sl != null) { Map map = sl.getRegionsLoad(); if (map.containsKey(regionInfo.getRegionName())) { req = map.get(regionInfo.getRegionName()).getRequestsCount(); } - infoPort = info.getInfoPort(); + // This port might be wrong if RS actually ended up using something else. + int port = conf.getInt("hbase.regionserver.info.port", 60030); urlRegionServer = - "http://" + addr.getHostname().toString() + ":" + infoPort + "/"; + "http://" + addr.getHostname().toString() + ":" + port + "/"; Integer i = regDistribution.get(urlRegionServer); if (null == i) i = new Integer(0); regDistribution.put(urlRegionServer, i+1); diff --git a/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java index 617ae89003c..6460c83c915 100644 --- a/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java +++ b/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java @@ -23,8 +23,6 @@ import java.io.IOException; import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -39,7 +37,6 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.io.MapWritable; -import org.apache.zookeeper.KeeperException; /** * This class creates a single process HBase cluster. @@ -83,75 +80,6 @@ public class MiniHBaseCluster { return this.conf; } - /** - * Override Master so can add inject behaviors testing. - */ - public static class MiniHBaseClusterMaster extends HMaster { - private final Map> messages = - new ConcurrentHashMap>(); - - private final Map exceptions = - new ConcurrentHashMap(); - - public MiniHBaseClusterMaster(final Configuration conf) - throws IOException, KeeperException, InterruptedException { - super(conf); - } - - /** - * Add a message to send to a regionserver next time it checks in. - * @param hsi RegionServer's HServerInfo. - * @param msg Message to add. - */ - void addMessage(final HServerInfo hsi, HMsg msg) { - synchronized(this.messages) { - List hmsgs = this.messages.get(hsi); - if (hmsgs == null) { - hmsgs = new ArrayList(); - this.messages.put(hsi, hmsgs); - } - hmsgs.add(msg); - } - } - - void addException(final HServerInfo hsi, final IOException ex) { - this.exceptions.put(hsi, ex); - } - - /** - * This implementation is special, exceptions will be treated first and - * message won't be sent back to the region servers even if some are - * specified. - * @param hsi the rs - * @param msgs Messages to add to - * @return - * @throws IOException will be throw if any added for this region server - */ - @Override - protected HMsg[] adornRegionServerAnswer(final HServerInfo hsi, - final HMsg[] msgs) throws IOException { - IOException ex = this.exceptions.remove(hsi); - if (ex != null) { - throw ex; - } - HMsg [] answerMsgs = msgs; - synchronized (this.messages) { - List hmsgs = this.messages.get(hsi); - if (hmsgs != null && !hmsgs.isEmpty()) { - int size = answerMsgs.length; - HMsg [] newAnswerMsgs = new HMsg[size + hmsgs.size()]; - System.arraycopy(answerMsgs, 0, newAnswerMsgs, 0, answerMsgs.length); - for (int i = 0; i < hmsgs.size(); i++) { - newAnswerMsgs[answerMsgs.length + i] = hmsgs.get(i); - } - answerMsgs = newAnswerMsgs; - hmsgs.clear(); - } - } - return super.adornRegionServerAnswer(hsi, answerMsgs); - } - } - /** * Subclass so can get at protected methods (none at moment). Also, creates * a FileSystem instance per instantiation. Adds a shutdown own FileSystem @@ -176,10 +104,6 @@ public class MiniHBaseCluster { return super.closeRegion(region); } - public void setHServerInfo(final HServerInfo hsi) { - this.serverInfo = hsi; - } - /* * @param c * @param currentfs We return this if we did not make a new one. @@ -266,8 +190,7 @@ public class MiniHBaseCluster { try { // start up a LocalHBaseCluster hbaseCluster = new LocalHBaseCluster(conf, nMasterNodes, 0, - MiniHBaseCluster.MiniHBaseClusterMaster.class, - MiniHBaseCluster.MiniHBaseClusterRegionServer.class); + HMaster.class, MiniHBaseCluster.MiniHBaseClusterRegionServer.class); // manually add the regionservers as other users for (int i=0; i 0)? compare2 < 0: compare2 > 0); + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/TestHServerAddress.java b/src/test/java/org/apache/hadoop/hbase/TestHServerAddress.java new file mode 100644 index 00000000000..4108ba59606 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/TestHServerAddress.java @@ -0,0 +1,83 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.net.InetSocketAddress; + +import org.apache.hadoop.hbase.util.Writables; +import org.junit.Test; + +/** + * Tests for {@link HServerAddress} + */ +public class TestHServerAddress { + @Test + public void testHashCode() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerAddress hsa2 = new HServerAddress("localhost", 1234); + assertEquals(hsa1.hashCode(), hsa2.hashCode()); + HServerAddress hsa3 = new HServerAddress("localhost", 1235); + assertNotSame(hsa1.hashCode(), hsa3.hashCode()); + } + + @Test + public void testHServerAddress() { + new HServerAddress(); + } + + @Test + public void testHServerAddressInetSocketAddress() { + HServerAddress hsa1 = + new HServerAddress(new InetSocketAddress("localhost", 1234)); + System.out.println(hsa1.toString()); + } + + @Test + public void testHServerAddressString() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerAddress hsa2 = + new HServerAddress(new InetSocketAddress("localhost", 1234)); + assertTrue(hsa1.equals(hsa2)); + } + + @Test + public void testHServerAddressHServerAddress() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerAddress hsa2 = new HServerAddress(hsa1); + assertEquals(hsa1, hsa2); + } + + @Test + public void testReadFields() throws IOException { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerAddress hsa2 = new HServerAddress("localhost", 1235); + byte [] bytes = Writables.getBytes(hsa1); + HServerAddress deserialized = + (HServerAddress)Writables.getWritable(bytes, new HServerAddress()); + assertEquals(hsa1, deserialized); + bytes = Writables.getBytes(hsa2); + deserialized = + (HServerAddress)Writables.getWritable(bytes, new HServerAddress()); + assertNotSame(hsa1, deserialized); + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/TestHServerInfo.java b/src/test/java/org/apache/hadoop/hbase/TestHServerInfo.java new file mode 100644 index 00000000000..50e7d1047fe --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/TestHServerInfo.java @@ -0,0 +1,80 @@ +package org.apache.hadoop.hbase; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.hadoop.hbase.util.Writables; +import org.junit.Test; + +public class TestHServerInfo { + + @Test + public void testHashCodeAndEquals() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerInfo hsi1 = new HServerInfo(hsa1, 1L, 5678); + HServerInfo hsi2 = new HServerInfo(hsa1, 1L, 5678); + HServerInfo hsi3 = new HServerInfo(hsa1, 2L, 5678); + HServerInfo hsi4 = new HServerInfo(hsa1, 1L, 5677); + HServerAddress hsa2 = new HServerAddress("localhost", 1235); + HServerInfo hsi5 = new HServerInfo(hsa2, 1L, 5678); + assertEquals(hsi1.hashCode(), hsi2.hashCode()); + assertTrue(hsi1.equals(hsi2)); + assertNotSame(hsi1.hashCode(), hsi3.hashCode()); + assertFalse(hsi1.equals(hsi3)); + assertNotSame(hsi1.hashCode(), hsi4.hashCode()); + assertFalse(hsi1.equals(hsi4)); + assertNotSame(hsi1.hashCode(), hsi5.hashCode()); + assertFalse(hsi1.equals(hsi5)); + } + + @Test + public void testHServerInfoHServerInfo() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerInfo hsi1 = new HServerInfo(hsa1, 1L, 5678); + HServerInfo hsi2 = new HServerInfo(hsi1); + assertEquals(hsi1, hsi2); + } + + @Test + public void testGetServerAddress() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerInfo hsi1 = new HServerInfo(hsa1, 1L, 5678); + assertEquals(hsi1.getServerAddress(), hsa1); + } + + @Test + public void testToString() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerInfo hsi1 = new HServerInfo(hsa1, 1L, 5678); + System.out.println(hsi1.toString()); + } + + @Test + public void testReadFields() throws IOException { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerInfo hsi1 = new HServerInfo(hsa1, 1L, 5678); + HServerAddress hsa2 = new HServerAddress("localhost", 1235); + HServerInfo hsi2 = new HServerInfo(hsa2, 1L, 5678); + byte [] bytes = Writables.getBytes(hsi1); + HServerInfo deserialized = + (HServerInfo)Writables.getWritable(bytes, new HServerInfo()); + assertEquals(hsi1, deserialized); + bytes = Writables.getBytes(hsi2); + deserialized = (HServerInfo)Writables.getWritable(bytes, new HServerInfo()); + assertNotSame(hsa1, deserialized); + } + + @Test + public void testCompareTo() { + HServerAddress hsa1 = new HServerAddress("localhost", 1234); + HServerInfo hsi1 = new HServerInfo(hsa1, 1L, 5678); + HServerAddress hsa2 = new HServerAddress("localhost", 1235); + HServerInfo hsi2 = new HServerInfo(hsa2, 1L, 5678); + assertTrue(hsi1.compareTo(hsi1) == 0); + assertTrue(hsi2.compareTo(hsi2) == 0); + int compare1 = hsi1.compareTo(hsi2); + int compare2 = hsi2.compareTo(hsi1); + assertTrue((compare1 > 0)? compare2 < 0: compare2 > 0); + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java b/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java index 11b94fed5c8..75613b83ebb 100644 --- a/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java +++ b/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java @@ -22,10 +22,15 @@ package org.apache.hadoop.hbase; import java.io.IOException; import java.util.List; import java.util.ArrayList; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; @@ -33,7 +38,6 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil; -import org.apache.hadoop.hbase.util.Threads; /** * Test whether region rebalancing works. (HBASE-71) @@ -93,8 +97,16 @@ public class TestRegionRebalancing extends HBaseClusterTestCase { * For HBASE-71. Try a few different configurations of starting and stopping * region servers to see if the assignment or regions is pretty balanced. * @throws IOException + * @throws InterruptedException */ - public void testRebalancing() throws IOException { + public void testRebalancing() throws IOException, InterruptedException { + HConnection connection = HConnectionManager.getConnection(conf); + CatalogTracker ct = new CatalogTracker(connection); + ct.start(); + Map regions = MetaReader.fullScan(ct); + for (Map.Entry e: regions.entrySet()) { + LOG.info(e); + } table = new HTable(conf, "test"); assertEquals("Test table should have 20 regions", 20, table.getStartKeys().length); @@ -102,39 +114,34 @@ public class TestRegionRebalancing extends HBaseClusterTestCase { // verify that the region assignments are balanced to start out assertRegionsAreBalanced(); - LOG.debug("Adding 2nd region server."); // add a region server - total of 2 - LOG.info("Started=" + + LOG.info("Started second server=" + cluster.startRegionServer().getRegionServer().getServerName()); cluster.getMaster().balance(); assertRegionsAreBalanced(); // add a region server - total of 3 - LOG.debug("Adding 3rd region server."); - LOG.info("Started=" + + LOG.info("Started third server=" + cluster.startRegionServer().getRegionServer().getServerName()); cluster.getMaster().balance(); assertRegionsAreBalanced(); // kill a region server - total of 2 - LOG.debug("Killing the 3rd region server."); - LOG.info("Stopped=" + cluster.stopRegionServer(2, false)); + LOG.info("Stopped third server=" + cluster.stopRegionServer(2, false)); cluster.waitOnRegionServer(2); cluster.getMaster().balance(); assertRegionsAreBalanced(); // start two more region servers - total of 4 - LOG.debug("Adding 3rd region server"); - LOG.info("Started=" + + LOG.info("Readding third server=" + cluster.startRegionServer().getRegionServer().getServerName()); - LOG.debug("Adding 4th region server"); - LOG.info("Started=" + + LOG.info("Added fourth server=" + cluster.startRegionServer().getRegionServer().getServerName()); cluster.getMaster().balance(); assertRegionsAreBalanced(); for (int i = 0; i < 6; i++){ - LOG.debug("Adding " + (i + 5) + "th region server"); + LOG.info("Adding " + (i + 5) + "th region server"); cluster.startRegionServer(); } cluster.getMaster().balance(); @@ -169,7 +176,7 @@ public class TestRegionRebalancing extends HBaseClusterTestCase { int regionCount = getRegionCount(); List servers = getOnlineRegionServers(); - double avg = cluster.getMaster().getServerManager().getAverageLoad(); + double avg = cluster.getMaster().getAverageLoad(); int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop)); int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1; LOG.debug("There are " + servers.size() + " servers and " + regionCount diff --git a/src/test/java/org/apache/hadoop/hbase/TestSerialization.java b/src/test/java/org/apache/hadoop/hbase/TestSerialization.java index 40874fc5862..05f0efc7d7f 100644 --- a/src/test/java/org/apache/hadoop/hbase/TestSerialization.java +++ b/src/test/java/org/apache/hadoop/hbase/TestSerialization.java @@ -130,19 +130,6 @@ public class TestSerialization { HConstants.EMPTY_END_ROW); } - /** - * Test ServerInfo serialization - * @throws Exception - */ - @Test public void testServerInfo() throws Exception { - HServerInfo hsi = new HServerInfo(new HServerAddress("0.0.0.0:123"), -1, - 1245, "default name"); - byte [] b = Writables.getBytes(hsi); - HServerInfo deserializedHsi = - (HServerInfo)Writables.getWritable(b, new HServerInfo()); - assertTrue(hsi.equals(deserializedHsi)); - } - @Test public void testPut() throws Exception{ byte[] row = "row".getBytes(); byte[] fam = "fam".getBytes(); @@ -584,4 +571,4 @@ public class TestSerialization { HConstants.REPLICATION_SCOPE_LOCAL)); return htd; } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/hadoop/hbase/TestServerName.java b/src/test/java/org/apache/hadoop/hbase/TestServerName.java new file mode 100644 index 00000000000..298fbe683e0 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/TestServerName.java @@ -0,0 +1,56 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import static org.junit.Assert.*; + +import java.util.HashSet; +import java.util.Set; + +import org.junit.Test; + +public class TestServerName { + @Test + public void testServerName() { + ServerName sn = new ServerName("www.example.org", 1234, 5678); + ServerName sn2 = new ServerName("www.example.org", 1234, 5678); + ServerName sn3 = new ServerName("www.example.org", 1234, 56789); + assertTrue(sn.equals(sn2)); + assertFalse(sn.equals(sn3)); + assertEquals(sn.hashCode(), sn2.hashCode()); + assertNotSame(sn.hashCode(), sn3.hashCode()); + assertEquals(sn.toString(), + ServerName.getServerName("www.example.org", 1234, 5678)); + assertEquals(sn.toString(), + ServerName.getServerName("www.example.org:1234", 5678)); + assertEquals(sn.toString(), + "www.example.org" + ServerName.SERVERNAME_SEPARATOR + + "1234" + ServerName.SERVERNAME_SEPARATOR + "5678"); + } + + @Test + public void getServerStartcodeFromServerName() { + ServerName sn = new ServerName("www.example.org", 1234, 5678); + assertEquals(5678, + ServerName.getServerStartcodeFromServerName(sn.toString())); + assertNotSame(5677, + ServerName.getServerStartcodeFromServerName(sn.toString())); + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java index e25184ec630..0a3c53442ce 100644 --- a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java +++ b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java @@ -36,9 +36,9 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.Result; @@ -63,8 +63,8 @@ import org.mockito.Mockito; public class TestCatalogTracker { private static final Log LOG = LogFactory.getLog(TestCatalogTracker.class); private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); - private static final HServerAddress HSA = - new HServerAddress("example.org:1234"); + private static final ServerName HSA = + new ServerName("example.org", 1234, System.currentTimeMillis()); private ZooKeeperWatcher watcher; private Abortable abortable; @@ -115,7 +115,7 @@ public class TestCatalogTracker { final CatalogTracker ct = constructAndStartCatalogTracker(connection); try { RootLocationEditor.setRootLocation(this.watcher, - new HServerAddress("example.com:1234")); + new ServerName("example.com", 1234, System.currentTimeMillis())); } finally { // Clean out root location or later tests will be confused... they presume // start fresh in zk. @@ -131,9 +131,9 @@ public class TestCatalogTracker { @Test public void testInterruptWaitOnMetaAndRoot() throws IOException, InterruptedException { final CatalogTracker ct = constructAndStartCatalogTracker(); - HServerAddress hsa = ct.getRootLocation(); + ServerName hsa = ct.getRootLocation(); Assert.assertNull(hsa); - HServerAddress meta = ct.getMetaLocation(); + ServerName meta = ct.getMetaLocation(); Assert.assertNull(meta); Thread t = new Thread() { @Override @@ -169,7 +169,7 @@ public class TestCatalogTracker { final CatalogTracker ct = constructAndStartCatalogTracker(connection); try { RootLocationEditor.setRootLocation(this.watcher, - new HServerAddress("example.com:1234")); + new ServerName("example.com", 1234, System.currentTimeMillis())); Assert.assertFalse(ct.verifyMetaRegionLocation(100)); } finally { // Clean out root location or later tests will be confused... they presume @@ -200,7 +200,7 @@ public class TestCatalogTracker { final CatalogTracker ct = constructAndStartCatalogTracker(connection); try { RootLocationEditor.setRootLocation(this.watcher, - new HServerAddress("example.com:1234")); + new ServerName("example.com", 1234, System.currentTimeMillis())); Assert.assertFalse(ct.verifyRootRegionLocation(100)); } finally { // Clean out root location or later tests will be confused... they presume @@ -232,7 +232,7 @@ public class TestCatalogTracker { @Test public void testNoTimeoutWaitForRoot() throws IOException, InterruptedException, KeeperException { final CatalogTracker ct = constructAndStartCatalogTracker(); - HServerAddress hsa = ct.getRootLocation(); + ServerName hsa = ct.getRootLocation(); Assert.assertNull(hsa); // Now test waiting on root location getting set. @@ -246,7 +246,7 @@ public class TestCatalogTracker { Assert.assertTrue(ct.getRootLocation().equals(hsa)); } - private HServerAddress setRootLocation() throws KeeperException { + private ServerName setRootLocation() throws KeeperException { RootLocationEditor.setRootLocation(this.watcher, HSA); return HSA; } @@ -270,7 +270,7 @@ public class TestCatalogTracker { thenReturn(mockHRI); final CatalogTracker ct = constructAndStartCatalogTracker(connection); - HServerAddress hsa = ct.getMetaLocation(); + ServerName hsa = ct.getMetaLocation(); Assert.assertNull(hsa); // Now test waiting on meta location getting set. @@ -300,8 +300,7 @@ public class TestCatalogTracker { // been assigned. String node = ct.getMetaNodeTracker().getNode(); ZKUtil.createAndFailSilent(this.watcher, node); - MetaEditor.updateMetaLocation(ct, HRegionInfo.FIRST_META_REGIONINFO, - new HServerInfo(HSA, -1, "example.com")); + MetaEditor.updateMetaLocation(ct, HRegionInfo.FIRST_META_REGIONINFO, HSA); ZKUtil.deleteNode(this.watcher, node); // Join the thread... should exit shortly. t.join(); diff --git a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java index 66c69593110..614c9063394 100644 --- a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java +++ b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java @@ -23,7 +23,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.junit.Test; @@ -53,7 +53,8 @@ public class TestCatalogTrackerOnCluster { LOG.error("Abort was called on 'bad root location writer'", e); } }); - HServerAddress nonsense = new HServerAddress("example.org:1234"); + ServerName nonsense = + new ServerName("example.org", 1234, System.currentTimeMillis()); RootLocationEditor.setRootLocation(zookeeper, nonsense); // Bring back up the hbase cluster. See if it can deal with nonsense root // location. diff --git a/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditor.java b/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditor.java index 60320a39077..18e647eb640 100644 --- a/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditor.java +++ b/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditor.java @@ -32,7 +32,7 @@ import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; @@ -116,7 +116,7 @@ public class TestMetaReaderEditor { // Test it works getting a region from user table. List regions = MetaReader.getTableRegions(ct, nameBytes); assertEquals(regionCount, regions.size()); - Pair pair = + Pair pair = MetaReader.getRegion(ct, regions.get(0).getRegionName()); assertEquals(regions.get(0).getEncodedName(), pair.getFirst().getEncodedName()); diff --git a/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index 7177b05c13c..c3b23fef973 100644 --- a/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ b/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -120,6 +120,154 @@ public class TestFromClientSide { // Nothing to do. } + /** + * HBASE-2468 use case 1 and 2: region info de/serialization + */ + @Test + public void testRegionCacheDeSerialization() throws Exception { + // 1. test serialization. + LOG.info("Starting testRegionCacheDeSerialization"); + final byte[] TABLENAME = Bytes.toBytes("testCachePrewarm2"); + final byte[] FAMILY = Bytes.toBytes("family"); + Configuration conf = TEST_UTIL.getConfiguration(); + TEST_UTIL.createTable(TABLENAME, FAMILY); + + // Set up test table: + // Create table: + HTable table = new HTable(conf, TABLENAME); + + // Create multiple regions for this table + TEST_UTIL.createMultiRegions(table, FAMILY); + Scan s = new Scan(); + ResultScanner scanner = table.getScanner(s); + while (scanner.next() != null) continue; + + Path tempPath = new Path(HBaseTestingUtility.getTestDir(), "regions.dat"); + + final String tempFileName = tempPath.toString(); + + FileOutputStream fos = new FileOutputStream(tempFileName); + DataOutputStream dos = new DataOutputStream(fos); + + // serialize the region info and output to a local file. + table.serializeRegionInfo(dos); + dos.flush(); + dos.close(); + + // read a local file and deserialize the region info from it. + FileInputStream fis = new FileInputStream(tempFileName); + DataInputStream dis = new DataInputStream(fis); + + Map deserRegions = + table.deserializeRegionInfo(dis); + dis.close(); + + // regions obtained from meta scanner. + Map loadedRegions = + table.getRegionsInfo(); + + // set the deserialized regions to the global cache. + table.getConnection().clearRegionCache(); + + table.getConnection().prewarmRegionCache(table.getTableName(), + deserRegions); + + // verify whether the 2 maps are identical or not. + assertEquals("Number of cached region is incorrect", + HConnectionManager.getCachedRegionCount(conf, TABLENAME), + loadedRegions.size()); + + // verify each region is prefetched or not. + for (Map.Entry e: loadedRegions.entrySet()) { + HRegionInfo hri = e.getKey(); + assertTrue(HConnectionManager.isRegionCached(conf, + hri.getTableDesc().getName(), hri.getStartKey())); + } + + // delete the temp file + File f = new java.io.File(tempFileName); + f.delete(); + LOG.info("Finishing testRegionCacheDeSerialization"); + } + + /** + * HBASE-2468 use case 3: + */ + @Test + public void testRegionCachePreWarm() throws Exception { + LOG.info("Starting testRegionCachePreWarm"); + final byte [] TABLENAME = Bytes.toBytes("testCachePrewarm"); + Configuration conf = TEST_UTIL.getConfiguration(); + + // Set up test table: + // Create table: + TEST_UTIL.createTable(TABLENAME, FAMILY); + + // disable region cache for the table. + HTable.setRegionCachePrefetch(conf, TABLENAME, false); + assertFalse("The table is disabled for region cache prefetch", + HTable.getRegionCachePrefetch(conf, TABLENAME)); + + HTable table = new HTable(conf, TABLENAME); + + // create many regions for the table. + TEST_UTIL.createMultiRegions(table, FAMILY); + // This count effectively waits until the regions have been + // fully assigned + TEST_UTIL.countRows(table); + table.getConnection().clearRegionCache(); + assertEquals("Clearing cache should have 0 cached ", 0, + HConnectionManager.getCachedRegionCount(conf, TABLENAME)); + + // A Get is suppose to do a region lookup request + Get g = new Get(Bytes.toBytes("aaa")); + table.get(g); + + // only one region should be cached if the cache prefetch is disabled. + assertEquals("Number of cached region is incorrect ", 1, + HConnectionManager.getCachedRegionCount(conf, TABLENAME)); + + // now we enable cached prefetch. + HTable.setRegionCachePrefetch(conf, TABLENAME, true); + assertTrue("The table is enabled for region cache prefetch", + HTable.getRegionCachePrefetch(conf, TABLENAME)); + + HTable.setRegionCachePrefetch(conf, TABLENAME, false); + assertFalse("The table is disabled for region cache prefetch", + HTable.getRegionCachePrefetch(conf, TABLENAME)); + + HTable.setRegionCachePrefetch(conf, TABLENAME, true); + assertTrue("The table is enabled for region cache prefetch", + HTable.getRegionCachePrefetch(conf, TABLENAME)); + + table.getConnection().clearRegionCache(); + + assertEquals("Number of cached region is incorrect ", 0, + HConnectionManager.getCachedRegionCount(conf, TABLENAME)); + + // if there is a cache miss, some additional regions should be prefetched. + Get g2 = new Get(Bytes.toBytes("bbb")); + table.get(g2); + + // Get the configured number of cache read-ahead regions. + int prefetchRegionNumber = conf.getInt("hbase.client.prefetch.limit", 10); + + // the total number of cached regions == region('aaa") + prefeched regions. + LOG.info("Testing how many regions cached"); + assertEquals("Number of cached region is incorrect ", prefetchRegionNumber, + HConnectionManager.getCachedRegionCount(conf, TABLENAME)); + + table.getConnection().clearRegionCache(); + + Get g3 = new Get(Bytes.toBytes("abc")); + table.get(g3); + assertEquals("Number of cached region is incorrect ", prefetchRegionNumber, + HConnectionManager.getCachedRegionCount(conf, TABLENAME)); + + LOG.info("Finishing testRegionCachePreWarm"); + } + + /** * Verifies that getConfiguration returns the same Configuration object used * to create the HTable instance. @@ -3762,150 +3910,7 @@ public class TestFromClientSide { assertTrue(scan.getFamilyMap().containsKey(FAMILY)); } - /** - * HBASE-2468 use case 1 and 2: region info de/serialization - */ - @Test - public void testRegionCacheDeSerialization() throws Exception { - // 1. test serialization. - LOG.info("Starting testRegionCacheDeSerialization"); - final byte[] TABLENAME = Bytes.toBytes("testCachePrewarm2"); - final byte[] FAMILY = Bytes.toBytes("family"); - Configuration conf = TEST_UTIL.getConfiguration(); - TEST_UTIL.createTable(TABLENAME, FAMILY); - - // Set up test table: - // Create table: - HTable table = new HTable(conf, TABLENAME); - - // Create multiple regions for this table - TEST_UTIL.createMultiRegions(table, FAMILY); - - Path tempPath = new Path(HBaseTestingUtility.getTestDir(), "regions.dat"); - - final String tempFileName = tempPath.toString(); - - FileOutputStream fos = new FileOutputStream(tempFileName); - DataOutputStream dos = new DataOutputStream(fos); - - // serialize the region info and output to a local file. - table.serializeRegionInfo(dos); - dos.flush(); - dos.close(); - - // read a local file and deserialize the region info from it. - FileInputStream fis = new FileInputStream(tempFileName); - DataInputStream dis = new DataInputStream(fis); - - Map deserRegions = - table.deserializeRegionInfo(dis); - dis.close(); - - // regions obtained from meta scanner. - Map loadedRegions = - table.getRegionsInfo(); - - // set the deserialized regions to the global cache. - table.getConnection().clearRegionCache(); - - table.getConnection().prewarmRegionCache(table.getTableName(), - deserRegions); - - // verify whether the 2 maps are identical or not. - assertEquals("Number of cached region is incorrect", - HConnectionManager.getCachedRegionCount(conf, TABLENAME), - loadedRegions.size()); - - // verify each region is prefetched or not. - for (Map.Entry e: loadedRegions.entrySet()) { - HRegionInfo hri = e.getKey(); - assertTrue(HConnectionManager.isRegionCached(conf, - hri.getTableDesc().getName(), hri.getStartKey())); - } - - // delete the temp file - File f = new java.io.File(tempFileName); - f.delete(); - LOG.info("Finishing testRegionCacheDeSerialization"); - } - - /** - * HBASE-2468 use case 3: - */ - @Test - public void testRegionCachePreWarm() throws Exception { - LOG.info("Starting testRegionCachePreWarm"); - final byte [] TABLENAME = Bytes.toBytes("testCachePrewarm"); - Configuration conf = TEST_UTIL.getConfiguration(); - - // Set up test table: - // Create table: - TEST_UTIL.createTable(TABLENAME, FAMILY); - - // disable region cache for the table. - HTable.setRegionCachePrefetch(conf, TABLENAME, false); - assertFalse("The table is disabled for region cache prefetch", - HTable.getRegionCachePrefetch(conf, TABLENAME)); - - HTable table = new HTable(conf, TABLENAME); - - // create many regions for the table. - TEST_UTIL.createMultiRegions(table, FAMILY); - // This count effectively waits until the regions have been - // fully assigned - TEST_UTIL.countRows(table); - table.getConnection().clearRegionCache(); - assertEquals("Clearing cache should have 0 cached ", 0, - HConnectionManager.getCachedRegionCount(conf, TABLENAME)); - - // A Get is suppose to do a region lookup request - Get g = new Get(Bytes.toBytes("aaa")); - table.get(g); - - // only one region should be cached if the cache prefetch is disabled. - assertEquals("Number of cached region is incorrect ", 1, - HConnectionManager.getCachedRegionCount(conf, TABLENAME)); - - // now we enable cached prefetch. - HTable.setRegionCachePrefetch(conf, TABLENAME, true); - assertTrue("The table is enabled for region cache prefetch", - HTable.getRegionCachePrefetch(conf, TABLENAME)); - - HTable.setRegionCachePrefetch(conf, TABLENAME, false); - assertFalse("The table is disabled for region cache prefetch", - HTable.getRegionCachePrefetch(conf, TABLENAME)); - - HTable.setRegionCachePrefetch(conf, TABLENAME, true); - assertTrue("The table is enabled for region cache prefetch", - HTable.getRegionCachePrefetch(conf, TABLENAME)); - - table.getConnection().clearRegionCache(); - - assertEquals("Number of cached region is incorrect ", 0, - HConnectionManager.getCachedRegionCount(conf, TABLENAME)); - - // if there is a cache miss, some additional regions should be prefetched. - Get g2 = new Get(Bytes.toBytes("bbb")); - table.get(g2); - - // Get the configured number of cache read-ahead regions. - int prefetchRegionNumber = conf.getInt("hbase.client.prefetch.limit", 10); - - // the total number of cached regions == region('aaa") + prefeched regions. - LOG.info("Testing how many regions cached"); - assertEquals("Number of cached region is incorrect ", prefetchRegionNumber, - HConnectionManager.getCachedRegionCount(conf, TABLENAME)); - - table.getConnection().clearRegionCache(); - - Get g3 = new Get(Bytes.toBytes("abc")); - table.get(g3); - assertEquals("Number of cached region is incorrect ", prefetchRegionNumber, - HConnectionManager.getCachedRegionCount(conf, TABLENAME)); - - LOG.info("Finishing testRegionCachePreWarm"); - } - + @Test public void testIncrement() throws Exception { LOG.info("Starting testIncrement"); diff --git a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java index 0829c8efe03..4abbdfe42d4 100644 --- a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java +++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java @@ -20,15 +20,24 @@ package org.apache.hadoop.hbase.coprocessor; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; @@ -41,13 +50,6 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.*; - /** * Tests invocation of the {@link org.apache.hadoop.hbase.coprocessor.MasterObserver} * interface hooks at all appropriate times during normal HMaster operations. @@ -217,14 +219,14 @@ public class TestMasterObserver { @Override public void preMove(ObserverContext env, - HRegionInfo region, HServerInfo srcServer, HServerInfo destServer) + HRegionInfo region, ServerName srcServer, ServerName destServer) throws UnknownRegionException { preMoveCalled = true; } @Override public void postMove(ObserverContext env, HRegionInfo region, - HServerInfo srcServer, HServerInfo destServer) + ServerName srcServer, ServerName destServer) throws UnknownRegionException { postMoveCalled = true; } @@ -445,15 +447,17 @@ public class TestMasterObserver { Map regions = table.getRegionsInfo(); assertFalse(regions.isEmpty()); - Map.Entry firstRegion = + Map.Entry firstRegion = regions.entrySet().iterator().next(); // try to force a move - Collection servers = master.getClusterStatus().getServerInfo(); + Collection servers = master.getClusterStatus().getServers(); String destName = null; - for (HServerInfo info : servers) { - if (!info.getServerAddress().equals(firstRegion.getValue())) { - destName = info.getServerName(); + for (ServerName info : servers) { + HServerAddress hsa = + new HServerAddress(info.getHostname(), info.getPort()); + if (!hsa.equals(firstRegion.getValue())) { + destName = info.toString(); break; } } @@ -471,7 +475,7 @@ public class TestMasterObserver { master.balanceSwitch(false); // move half the open regions from RS 0 to RS 1 HRegionServer rs = cluster.getRegionServer(0); - byte[] destRS = Bytes.toBytes(cluster.getRegionServer(1).getServerName()); + byte[] destRS = Bytes.toBytes(cluster.getRegionServer(1).getServerName().toString()); List openRegions = rs.getOnlineRegions(); int moveCnt = openRegions.size()/2; for (int i=0; i${HBASE_HOME}/bin/hbase ./bin/hbase org.apache.hadoop.hbase.OOMEHMaster start/code>. - */ -public class OOMEHMaster extends HMaster { - private List retainer = new ArrayList(); - - public OOMEHMaster(HBaseConfiguration conf) - throws IOException, KeeperException, InterruptedException { - super(conf); - } - - @Override - public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg[] msgs, - HRegionInfo[] mostLoadedRegions) - throws IOException { - // Retain 1M. - this.retainer.add(new byte [1024 * 1024]); - return super.regionServerReport(serverInfo, msgs, mostLoadedRegions); - } - - public static void main(String[] args) throws Exception { - new HMasterCommandLine(OOMEHMaster.class).doMain(args); - } -} diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java b/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java index 1a199411e6b..75397f7ab13 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java @@ -30,9 +30,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -67,7 +68,7 @@ public class TestActiveMasterManager { } catch(KeeperException.NoNodeException nne) {} // Create the master node with a dummy address - HServerAddress master = new HServerAddress("localhost", 1); + ServerName master = new ServerName("localhost", 1, System.currentTimeMillis()); // Should not have a master yet DummyMaster dummyMaster = new DummyMaster(); ActiveMasterManager activeMasterManager = new ActiveMasterManager(zk, @@ -106,8 +107,10 @@ public class TestActiveMasterManager { } catch(KeeperException.NoNodeException nne) {} // Create the master node with a dummy address - HServerAddress firstMasterAddress = new HServerAddress("localhost", 1); - HServerAddress secondMasterAddress = new HServerAddress("localhost", 2); + ServerName firstMasterAddress = + new ServerName("localhost", 1, System.currentTimeMillis()); + ServerName secondMasterAddress = + new ServerName("localhost", 2, System.currentTimeMillis()); // Should not have a master yet DummyMaster ms1 = new DummyMaster(); @@ -177,8 +180,10 @@ public class TestActiveMasterManager { * @throws KeeperException */ private void assertMaster(ZooKeeperWatcher zk, - HServerAddress expectedAddress) throws KeeperException { - HServerAddress readAddress = ZKUtil.getDataAsAddress(zk, zk.masterAddressZNode); + ServerName expectedAddress) + throws KeeperException { + ServerName readAddress = + new ServerName(Bytes.toString(ZKUtil.getData(zk, zk.masterAddressZNode))); assertNotNull(readAddress); assertTrue(expectedAddress.equals(readAddress)); } @@ -188,8 +193,7 @@ public class TestActiveMasterManager { ActiveMasterManager manager; boolean isActiveMaster; - public WaitToBeMasterThread(ZooKeeperWatcher zk, - HServerAddress address) { + public WaitToBeMasterThread(ZooKeeperWatcher zk, ServerName address) { this.manager = new ActiveMasterManager(zk, address, new DummyMaster()); isActiveMaster = false; @@ -248,7 +252,7 @@ public class TestActiveMasterManager { } @Override - public String getServerName() { + public ServerName getServerName() { return null; } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index c8e8f7566f9..ada2af6fce9 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.ExecutorService; @@ -84,8 +85,8 @@ public class TestCatalogJanitor { } @Override - public String getServerName() { - return "mockserver.example.org,1234,-1L"; + public ServerName getServerName() { + return new ServerName("mockserver.example.org", 1234, -1L); } @Override @@ -161,7 +162,7 @@ public class TestCatalogJanitor { } @Override - public String getServerName() { + public ServerName getServerName() { return null; } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java b/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java index 915cdf6c923..752e12b15b8 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase.master; +import java.net.InetAddress; + import junit.framework.Assert; import org.apache.commons.logging.Log; @@ -26,9 +28,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.junit.Test; @@ -53,7 +54,7 @@ public class TestClockSkewDetection { } @Override - public String getServerName() { + public ServerName getServerName() { return null; } @@ -72,22 +73,20 @@ public class TestClockSkewDetection { @Override public void stop(String why) { - }}, null, null); + }}, null); LOG.debug("regionServerStartup 1"); - HServerInfo hsi1 = new HServerInfo(new HServerAddress("example.org:1234"), - System.currentTimeMillis(), -1, "example.com"); - sm.regionServerStartup(hsi1, System.currentTimeMillis()); + InetAddress ia1 = InetAddress.getLocalHost(); + sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis()); long maxSkew = 30000; try { LOG.debug("regionServerStartup 2"); - HServerInfo hsi2 = new HServerInfo(new HServerAddress("example.org:1235"), - System.currentTimeMillis(), -1, "example.com"); - sm.regionServerStartup(hsi2, System.currentTimeMillis() - maxSkew * 2); + InetAddress ia2 = InetAddress.getLocalHost(); + sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2); Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException " - + "but didn't.", false); + + "but didn't.", false); } catch(ClockOutOfSyncException e) { //we want an exception LOG.info("Recieved expected exception: "+e); diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java index 61baf7dd894..76a8a85c53c 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java @@ -20,34 +20,25 @@ package org.apache.hadoop.hbase.master; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import org.apache.hadoop.hbase.ServerName; import org.junit.Test; public class TestDeadServer { @Test public void testIsDead() { - DeadServer ds = new DeadServer(2); - final String hostname123 = "127.0.0.1,123,3"; - assertFalse(ds.isDeadServer(hostname123, false)); - assertFalse(ds.isDeadServer(hostname123, true)); + DeadServer ds = new DeadServer(); + final ServerName hostname123 = new ServerName("127.0.0.1", 123, 3L); ds.add(hostname123); - assertTrue(ds.isDeadServer(hostname123, false)); - assertFalse(ds.isDeadServer("127.0.0.1:1", true)); - assertFalse(ds.isDeadServer("127.0.0.1:1234", true)); - assertTrue(ds.isDeadServer("127.0.0.1:123", true)); assertTrue(ds.areDeadServersInProgress()); ds.finish(hostname123); assertFalse(ds.areDeadServersInProgress()); - final String hostname1234 = "127.0.0.2,1234,4"; + final ServerName hostname1234 = new ServerName("127.0.0.2", 1234, 4L); ds.add(hostname1234); - assertTrue(ds.isDeadServer(hostname123, false)); - assertTrue(ds.isDeadServer(hostname1234, false)); assertTrue(ds.areDeadServersInProgress()); ds.finish(hostname1234); assertFalse(ds.areDeadServersInProgress()); - final String hostname12345 = "127.0.0.2,12345,4"; + final ServerName hostname12345 = new ServerName("127.0.0.2", 12345, 4L); ds.add(hostname12345); - assertTrue(ds.isDeadServer(hostname1234, false)); - assertTrue(ds.isDeadServer(hostname12345, false)); assertTrue(ds.areDeadServersInProgress()); ds.finish(hostname12345); assertFalse(ds.areDeadServersInProgress()); @@ -55,14 +46,14 @@ public class TestDeadServer { // Already dead = 127.0.0.1,9090,112321 // Coming back alive = 127.0.0.1,9090,223341 - final String deadServer = "127.0.0.1,9090,112321"; + final ServerName deadServer = new ServerName("127.0.0.1", 9090, 112321L); assertFalse(ds.cleanPreviousInstance(deadServer)); ds.add(deadServer); assertTrue(ds.isDeadServer(deadServer)); - final String deadServerHostComingAlive = "127.0.0.1,9090,112321"; + final ServerName deadServerHostComingAlive = + new ServerName("127.0.0.1", 9090, 112321L); assertTrue(ds.cleanPreviousInstance(deadServerHostComingAlive)); assertFalse(ds.isDeadServer(deadServer)); assertFalse(ds.cleanPreviousInstance(deadServerHostComingAlive)); - } } \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java b/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java index 6f718f15bfa..ba87bc0e1ce 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java @@ -19,11 +19,12 @@ */ package org.apache.hadoop.hbase.master; -import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.*; - +import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_final_transistion_failed; +import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_acquired; +import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_err; +import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_resigned; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import java.io.IOException; import java.util.Iterator; @@ -62,8 +63,8 @@ import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.zookeeper.KeeperException; -import org.junit.Before; import org.junit.After; +import org.junit.Before; import org.junit.Test; public class TestDistributedLogSplitting { @@ -156,7 +157,7 @@ public class TestDistributedLogSplitting { HRegionServer hrs = rsts.get(0).getRegionServer(); Path rootdir = FSUtils.getRootDir(conf); final Path logDir = new Path(rootdir, - HLog.getHLogDirectoryName(hrs.getServerName())); + HLog.getHLogDirectoryName(hrs.getServerName().toString())); installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40); @@ -205,7 +206,7 @@ public class TestDistributedLogSplitting { HRegionServer hrs = rsts.get(0).getRegionServer(); Path rootdir = FSUtils.getRootDir(conf); final Path logDir = new Path(rootdir, - HLog.getHLogDirectoryName(hrs.getServerName())); + HLog.getHLogDirectoryName(hrs.getServerName().toString())); installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40); @@ -253,11 +254,10 @@ public class TestDistributedLogSplitting { HRegionServer hrs = rsts.get(0).getRegionServer(); Path rootdir = FSUtils.getRootDir(conf); final Path logDir = new Path(rootdir, - HLog.getHLogDirectoryName(hrs.getServerName())); + HLog.getHLogDirectoryName(hrs.getServerName().toString())); installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40); - byte[] table = Bytes.toBytes("table"); makeHLog(hrs.getWAL(), hrs.getOnlineRegions(), "table", NUM_LOG_LINES, 100); @@ -400,11 +400,6 @@ public class TestDistributedLogSplitting { master.assignmentManager.waitUntilNoRegionsInTransition(60000); } - private void blockUntilRIT(ZooKeeperWatcher zkw) - throws KeeperException, InterruptedException { - ZKAssign.blockUntilRIT(zkw); - } - private void putData(HRegion region, byte[] startRow, int numRows, byte [] qf, byte [] ...families) throws IOException { diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestHMasterRPCException.java b/src/test/java/org/apache/hadoop/hbase/master/TestHMasterRPCException.java index a1bb6af1943..5cb7c251cad 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestHMasterRPCException.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestHMasterRPCException.java @@ -23,10 +23,12 @@ package org.apache.hadoop.hbase.master; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.net.InetSocketAddress; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ipc.HBaseRPC; import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.ipc.RemoteException; @@ -43,12 +45,11 @@ public class TestHMasterRPCException { HMaster hm = new HMaster(conf); - HServerAddress hma = hm.getMasterAddress(); + ServerName sm = hm.getServerName(); + InetSocketAddress isa = new InetSocketAddress(sm.getHostname(), sm.getPort()); try { - HMasterInterface inf = - (HMasterInterface) HBaseRPC.getProxy( - HMasterInterface.class, HMasterInterface.VERSION, - hma.getInetSocketAddress(), conf, 100); + HMasterInterface inf = (HMasterInterface) HBaseRPC.getProxy( + HMasterInterface.class, HMasterInterface.VERSION, isa, conf, 100); inf.isMasterRunning(); fail(); } catch (RemoteException ex) { diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java b/src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java index 6b2dc538c03..d9099978e55 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java @@ -20,10 +20,11 @@ package org.apache.hadoop.hbase.master; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.util.ArrayList; -import java.util.Arrays; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -39,9 +40,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; import org.apache.hadoop.hbase.util.Bytes; import org.junit.BeforeClass; @@ -138,6 +138,38 @@ public class TestLoadBalancer { new int [] { 12, 100 }, }; + @Test + public void testRandomizer() { + for(int [] mockCluster : clusterStateMocks) { + if (mockCluster.length < 5) continue; + Map> servers = + mockClusterServers(mockCluster); + for (Map.Entry> e: servers.entrySet()) { + List original = e.getValue(); + if (original.size() < 5) continue; + // Try ten times in case random chances upon original order more than + // one or two times in a row. + boolean same = true; + for (int i = 0; i < 10 && same; i++) { + List copy = new ArrayList(original); + System.out.println("Randomizing before " + copy.size()); + for (HRegionInfo hri: copy) { + System.out.println(hri.getEncodedName()); + } + List randomized = LoadBalancer.randomize(copy); + System.out.println("Randomizing after " + randomized.size()); + for (HRegionInfo hri: randomized) { + System.out.println(hri.getEncodedName()); + } + if (original.equals(randomized)) continue; + same = false; + break; + } + assertFalse(same); + } + } + } + /** * Test the load balancing algorithm. * @@ -150,13 +182,14 @@ public class TestLoadBalancer { public void testBalanceCluster() throws Exception { for(int [] mockCluster : clusterStateMocks) { - Map> servers = mockClusterServers(mockCluster); - LOG.info("Mock Cluster : " + printMock(servers) + " " + printStats(servers)); + Map> servers = mockClusterServers(mockCluster); + List list = convertToList(servers); + LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list)); List plans = loadBalancer.balanceCluster(servers); - List balancedCluster = reconcile(servers, plans); + List balancedCluster = reconcile(list, plans); LOG.info("Mock Balance : " + printMock(balancedCluster)); assertClusterAsBalanced(balancedCluster); - for(Map.Entry> entry : servers.entrySet()) { + for(Map.Entry> entry : servers.entrySet()) { returnRegions(entry.getValue()); returnServer(entry.getKey()); } @@ -168,13 +201,13 @@ public class TestLoadBalancer { * Invariant is that all servers have between floor(avg) and ceiling(avg) * number of regions. */ - public void assertClusterAsBalanced(List servers) { + public void assertClusterAsBalanced(List servers) { int numServers = servers.size(); int numRegions = 0; int maxRegions = 0; int minRegions = Integer.MAX_VALUE; - for(HServerInfo server : servers) { - int nr = server.getLoad().getNumberOfRegions(); + for(LoadBalancer.ServerAndLoad server : servers) { + int nr = server.getLoad(); if(nr > maxRegions) { maxRegions = nr; } @@ -190,9 +223,9 @@ public class TestLoadBalancer { int min = numRegions / numServers; int max = numRegions % numServers == 0 ? min : min + 1; - for(HServerInfo server : servers) { - assertTrue(server.getLoad().getNumberOfRegions() <= max); - assertTrue(server.getLoad().getNumberOfRegions() >= min); + for(LoadBalancer.ServerAndLoad server : servers) { + assertTrue(server.getLoad() <= max); + assertTrue(server.getLoad() >= min); } } @@ -208,12 +241,13 @@ public class TestLoadBalancer { for(int [] mock : regionsAndServersMocks) { LOG.debug("testImmediateAssignment with " + mock[0] + " regions and " + mock[1] + " servers"); List regions = randomRegions(mock[0]); - List servers = randomServers(mock[1], 0); - Map assignments = - LoadBalancer.immediateAssignment(regions, servers); - assertImmediateAssignment(regions, servers, assignments); + List servers = randomServers(mock[1], 0); + List list = getListOfServerNames(servers); + Map assignments = + LoadBalancer.immediateAssignment(regions, list); + assertImmediateAssignment(regions, list, assignments); returnRegions(regions); - returnServers(servers); + returnServers(list); } } @@ -224,7 +258,7 @@ public class TestLoadBalancer { * @param assignments */ private void assertImmediateAssignment(List regions, - List servers, Map assignments) { + List servers, Map assignments) { for(HRegionInfo region : regions) { assertTrue(assignments.containsKey(region)); } @@ -243,9 +277,10 @@ public class TestLoadBalancer { for(int [] mock : regionsAndServersMocks) { LOG.debug("testBulkAssignment with " + mock[0] + " regions and " + mock[1] + " servers"); List regions = randomRegions(mock[0]); - List servers = randomServers(mock[1], 0); - Map> assignments = - LoadBalancer.roundRobinAssignment(regions.toArray(new HRegionInfo[regions.size()]), servers); + List servers = randomServers(mock[1], 0); + List list = getListOfServerNames(servers); + Map> assignments = + LoadBalancer.roundRobinAssignment(regions, list); float average = (float)regions.size()/servers.size(); int min = (int)Math.floor(average); int max = (int)Math.ceil(average); @@ -255,7 +290,7 @@ public class TestLoadBalancer { } } returnRegions(regions); - returnServers(servers); + returnServers(list); } } @@ -267,31 +302,43 @@ public class TestLoadBalancer { @Test public void testRetainAssignment() throws Exception { // Test simple case where all same servers are there - List servers = randomServers(10, 10); + List servers = randomServers(10, 10); List regions = randomRegions(100); - Map existing = - new TreeMap(); - for (int i=0;i existing = + new TreeMap(); + for (int i = 0; i < regions.size(); i++) { + existing.put(regions.get(i), servers.get(i % servers.size()).getServerName()); } - Map> assignment = - LoadBalancer.retainAssignment(existing, servers); - assertRetainedAssignment(existing, servers, assignment); + List listOfServerNames = getListOfServerNames(servers); + Map> assignment = + LoadBalancer.retainAssignment(existing, listOfServerNames); + assertRetainedAssignment(existing, listOfServerNames, assignment); // Include two new servers that were not there before - List servers2 = new ArrayList(servers); + List servers2 = + new ArrayList(servers); servers2.add(randomServer(10)); servers2.add(randomServer(10)); - assignment = LoadBalancer.retainAssignment(existing, servers2); - assertRetainedAssignment(existing, servers2, assignment); + listOfServerNames = getListOfServerNames(servers2); + assignment = LoadBalancer.retainAssignment(existing, listOfServerNames); + assertRetainedAssignment(existing, listOfServerNames, assignment); // Remove two of the servers that were previously there - List servers3 = new ArrayList(servers); + List servers3 = + new ArrayList(servers); servers3.remove(servers3.size()-1); servers3.remove(servers3.size()-2); - assignment = LoadBalancer.retainAssignment(existing, servers3); - assertRetainedAssignment(existing, servers3, assignment); + listOfServerNames = getListOfServerNames(servers2); + assignment = LoadBalancer.retainAssignment(existing, listOfServerNames); + assertRetainedAssignment(existing, listOfServerNames, assignment); + } + + private List getListOfServerNames(final List sals) { + List list = new ArrayList(); + for (LoadBalancer.ServerAndLoad e: sals) { + list.add(e.getServerName()); + } + return list; } /** @@ -308,12 +355,12 @@ public class TestLoadBalancer { * @param assignment */ private void assertRetainedAssignment( - Map existing, List servers, - Map> assignment) { + Map existing, List servers, + Map> assignment) { // Verify condition 1, every region assigned, and to online server - Set onlineServerSet = new TreeSet(servers); + Set onlineServerSet = new TreeSet(servers); Set assignedRegions = new TreeSet(); - for (Map.Entry> a : assignment.entrySet()) { + for (Map.Entry> a : assignment.entrySet()) { assertTrue("Region assigned to server that was not listed as online", onlineServerSet.contains(a.getKey())); for (HRegionInfo r : a.getValue()) assignedRegions.add(r); @@ -321,23 +368,23 @@ public class TestLoadBalancer { assertEquals(existing.size(), assignedRegions.size()); // Verify condition 2, if server had existing assignment, must have same - Set onlineAddresses = new TreeSet(); - for (HServerInfo s : servers) onlineAddresses.add(s.getServerAddress()); - for (Map.Entry> a : assignment.entrySet()) { + Set onlineAddresses = new TreeSet(); + for (ServerName s : servers) onlineAddresses.add(s); + for (Map.Entry> a : assignment.entrySet()) { for (HRegionInfo r : a.getValue()) { - HServerAddress address = existing.get(r); + ServerName address = existing.get(r); if (address != null && onlineAddresses.contains(address)) { - assertTrue(a.getKey().getServerAddress().equals(address)); + assertTrue(a.getKey().equals(address)); } } } } - private String printStats(Map> servers) { + private String printStats(List servers) { int numServers = servers.size(); int totalRegions = 0; - for(HServerInfo server : servers.keySet()) { - totalRegions += server.getLoad().getNumberOfRegions(); + for(LoadBalancer.ServerAndLoad server : servers) { + totalRegions += server.getLoad(); } float average = (float)totalRegions / numServers; int max = (int)Math.ceil(average); @@ -345,20 +392,31 @@ public class TestLoadBalancer { return "[srvr=" + numServers + " rgns=" + totalRegions + " avg=" + average + " max=" + max + " min=" + min + "]"; } - private String printMock(Map> servers) { - return printMock(Arrays.asList(servers.keySet().toArray(new HServerInfo[servers.size()]))); + private List convertToList(final Map> servers) { + List list = + new ArrayList(servers.size()); + for (Map.Entry> e: servers.entrySet()) { + list.add(new LoadBalancer.ServerAndLoad(e.getKey(), e.getValue().size())); + } + return list; } - private String printMock(List balancedCluster) { - SortedSet sorted = new TreeSet(balancedCluster); - HServerInfo [] arr = sorted.toArray(new HServerInfo[sorted.size()]); + private String printMock(Map> servers) { + return printMock(convertToList(servers)); + } + + private String printMock(List balancedCluster) { + SortedSet sorted = + new TreeSet(balancedCluster); + LoadBalancer.ServerAndLoad [] arr = + sorted.toArray(new LoadBalancer.ServerAndLoad[sorted.size()]); StringBuilder sb = new StringBuilder(sorted.size() * 4 + 4); sb.append("{ "); - for(int i=0;i reconcile( - Map> servers, List plans) { - if(plans != null) { - for(RegionPlan plan : plans) { - plan.getSource().getLoad().setNumberOfRegions( - plan.getSource().getLoad().getNumberOfRegions() - 1); - plan.getDestination().getLoad().setNumberOfRegions( - plan.getDestination().getLoad().getNumberOfRegions() + 1); - } + private List reconcile(List list, + List plans) { + List result = + new ArrayList(list.size()); + if (plans == null) return result; + Map map = + new HashMap(list.size()); + for (RegionPlan plan : plans) { + ServerName source = plan.getSource(); + updateLoad(map, source, -1); + ServerName destination = plan.getDestination(); + updateLoad(map, destination, +1); } - return Arrays.asList(servers.keySet().toArray(new HServerInfo[servers.size()])); + result.clear(); + result.addAll(map.values()); + return result; } - private Map> mockClusterServers( + private void updateLoad(Map map, + final ServerName sn, final int diff) { + LoadBalancer.ServerAndLoad sal = map.get(sn); + if (sal == null) return; + sal = new LoadBalancer.ServerAndLoad(sn, sal.getLoad() + diff); + map.put(sn, sal); + } + + private Map> mockClusterServers( int [] mockCluster) { int numServers = mockCluster.length; - Map> servers = - new TreeMap>(); - for(int i=0;i> servers = + new TreeMap>(); + for(int i = 0; i < numServers; i++) { int numRegions = mockCluster[i]; - HServerInfo server = randomServer(numRegions); + LoadBalancer.ServerAndLoad sal = randomServer(0); List regions = randomRegions(numRegions); - servers.put(server, regions); + servers.put(sal.getServerName(), regions); } return servers; } @@ -426,36 +497,34 @@ public class TestLoadBalancer { regionQueue.addAll(regions); } - private Queue serverQueue = new LinkedList(); + private Queue serverQueue = new LinkedList(); - private HServerInfo randomServer(int numRegions) { - if(!serverQueue.isEmpty()) { - HServerInfo server = this.serverQueue.poll(); - server.getLoad().setNumberOfRegions(numRegions); - return server; + private LoadBalancer.ServerAndLoad randomServer(final int numRegionsPerServer) { + if (!this.serverQueue.isEmpty()) { + ServerName sn = this.serverQueue.poll(); + return new LoadBalancer.ServerAndLoad(sn, numRegionsPerServer); } String host = "127.0.0.1"; int port = rand.nextInt(60000); long startCode = rand.nextLong(); - HServerInfo hsi = - new HServerInfo(new HServerAddress(host, port), startCode, port, host); - hsi.getLoad().setNumberOfRegions(numRegions); - return hsi; + ServerName sn = new ServerName(host, port, startCode); + return new LoadBalancer.ServerAndLoad(sn, numRegionsPerServer); } - private List randomServers(int numServers, int numRegionsPerServer) { - List servers = new ArrayList(numServers); - for(int i=0;i randomServers(int numServers, int numRegionsPerServer) { + List servers = + new ArrayList(numServers); + for (int i = 0; i < numServers; i++) { servers.add(randomServer(numRegionsPerServer)); } return servers; } - private void returnServer(HServerInfo server) { + private void returnServer(ServerName server) { serverQueue.add(server); } - private void returnServers(List servers) { - serverQueue.addAll(servers); + private void returnServers(List servers) { + this.serverQueue.addAll(servers); } } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java b/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java index 19220fbc0f8..fc05e474313 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.replication.ReplicationZookeeper; import org.apache.hadoop.hbase.replication.regionserver.Replication; @@ -71,7 +72,8 @@ public class TestLogsCleaner { Path oldLogDir = new Path(HBaseTestingUtility.getTestDir(), HConstants.HREGION_OLDLOGDIR_NAME); - String fakeMachineName = URLEncoder.encode(server.getServerName(), "UTF8"); + String fakeMachineName = + URLEncoder.encode(server.getServerName().toString(), "UTF8"); FileSystem fs = FileSystem.get(conf); LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, oldLogDir); @@ -146,8 +148,8 @@ public class TestLogsCleaner { } @Override - public String getServerName() { - return "regionserver,60020,000000"; + public ServerName getServerName() { + return new ServerName("regionserver,60020,000000"); } @Override diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java b/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java index c4ea83ff996..f473c807fe0 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; @@ -75,7 +75,7 @@ public class TestMaster { TEST_UTIL.loadTable(new HTable(TEST_UTIL.getConfiguration(), TABLENAME), FAMILYNAME); - List> tableRegions = + List> tableRegions = MetaReader.getTableRegionsAndLocations(m.getCatalogTracker(), Bytes.toString(TABLENAME)); LOG.info("Regions after load: " + Joiner.on(',').join(tableRegions)); @@ -106,10 +106,10 @@ public class TestMaster { // We have three regions because one is split-in-progress assertEquals(3, tableRegions.size()); LOG.info("Making sure we can call getTableRegionClosest while opening"); - Pair pair = + Pair pair = m.getTableRegionForRow(TABLENAME, Bytes.toBytes("cde")); LOG.info("Result is: " + pair); - Pair tableRegionFromName = + Pair tableRegionFromName = MetaReader.getRegion(m.getCatalogTracker(), pair.getFirst().getRegionName()); assertEquals(tableRegionFromName.getFirst(), pair.getFirst()); diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 5a334e0fae1..6b95eee2989 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -36,9 +36,9 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; @@ -85,7 +85,7 @@ public class TestMasterFailover { // verify only one is the active master and we have right number int numActive = 0; int activeIndex = -1; - String activeName = null; + ServerName activeName = null; for (int i = 0; i < masterThreads.size(); i++) { if (masterThreads.get(i).getMaster().isActiveMaster()) { numActive++; @@ -278,8 +278,7 @@ public class TestMasterFailover { // Let's just assign everything to first RS HRegionServer hrs = cluster.getRegionServer(0); - String serverName = hrs.getServerName(); - HServerInfo hsiAlive = hrs.getServerInfo(); + ServerName serverName = hrs.getServerName(); // we'll need some regions to already be assigned out properly on live RS List enabledAndAssignedRegions = new ArrayList(); @@ -292,12 +291,12 @@ public class TestMasterFailover { // now actually assign them for (HRegionInfo hri : enabledAndAssignedRegions) { master.assignmentManager.regionPlans.put(hri.getEncodedName(), - new RegionPlan(hri, null, hsiAlive)); + new RegionPlan(hri, null, serverName)); master.assignRegion(hri); } for (HRegionInfo hri : disabledAndAssignedRegions) { master.assignmentManager.regionPlans.put(hri.getEncodedName(), - new RegionPlan(hri, null, hsiAlive)); + new RegionPlan(hri, null, serverName)); master.assignRegion(hri); } @@ -583,12 +582,10 @@ public class TestMasterFailover { // The first RS will stay online HRegionServer hrs = cluster.getRegionServer(0); - HServerInfo hsiAlive = hrs.getServerInfo(); // The second RS is going to be hard-killed HRegionServer hrsDead = cluster.getRegionServer(1); - String deadServerName = hrsDead.getServerName(); - HServerInfo hsiDead = hrsDead.getServerInfo(); + ServerName deadServerName = hrsDead.getServerName(); // we'll need some regions to already be assigned out properly on live RS List enabledAndAssignedRegions = new ArrayList(); @@ -601,12 +598,12 @@ public class TestMasterFailover { // now actually assign them for (HRegionInfo hri : enabledAndAssignedRegions) { master.assignmentManager.regionPlans.put(hri.getEncodedName(), - new RegionPlan(hri, null, hsiAlive)); + new RegionPlan(hri, null, hrs.getServerName())); master.assignRegion(hri); } for (HRegionInfo hri : disabledAndAssignedRegions) { master.assignmentManager.regionPlans.put(hri.getEncodedName(), - new RegionPlan(hri, null, hsiAlive)); + new RegionPlan(hri, null, hrs.getServerName())); master.assignRegion(hri); } @@ -621,12 +618,12 @@ public class TestMasterFailover { // set region plan to server to be killed and trigger assign for (HRegionInfo hri : enabledAndOnDeadRegions) { master.assignmentManager.regionPlans.put(hri.getEncodedName(), - new RegionPlan(hri, null, hsiDead)); + new RegionPlan(hri, null, deadServerName)); master.assignRegion(hri); } for (HRegionInfo hri : disabledAndOnDeadRegions) { master.assignmentManager.regionPlans.put(hri.getEncodedName(), - new RegionPlan(hri, null, hsiDead)); + new RegionPlan(hri, null, deadServerName)); master.assignRegion(hri); } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java b/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java index dff6c1b7994..c0ea6495378 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java @@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.executor.EventHandler.EventType; @@ -72,11 +73,11 @@ public class TestRestartCluster { String unassignedZNode = zooKeeper.assignmentZNode; ZKUtil.createAndFailSilent(zooKeeper, unassignedZNode); - ZKAssign.createNodeOffline(zooKeeper, HRegionInfo.ROOT_REGIONINFO, - HMaster.MASTER); + ServerName sn = new ServerName(HMaster.MASTER, -1, System.currentTimeMillis()); - ZKAssign.createNodeOffline(zooKeeper, HRegionInfo.FIRST_META_REGIONINFO, - HMaster.MASTER); + ZKAssign.createNodeOffline(zooKeeper, HRegionInfo.ROOT_REGIONINFO, sn); + + ZKAssign.createNodeOffline(zooKeeper, HRegionInfo.FIRST_META_REGIONINFO, sn); LOG.debug("Created UNASSIGNED zNode for ROOT and META regions in state " + EventType.M_ZK_REGION_OFFLINE); @@ -132,4 +133,4 @@ public class TestRestartCluster { UTIL.waitTableAvailable(TABLE, 30000); } } -} +} \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java b/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java index 6089ae63b49..566652da67b 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; @@ -155,7 +156,7 @@ public class TestRollingRestart { int num = 1; int total = regionServers.size(); for (RegionServerThread rst : regionServers) { - String serverName = rst.getRegionServer().getServerName(); + ServerName serverName = rst.getRegionServer().getServerName(); log("Stopping region server " + num + " of " + total + " [ " + serverName + "]"); rst.getRegionServer().stop("Stopping RS during rolling restart"); @@ -302,7 +303,7 @@ public class TestRollingRestart { } private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster, - String serverName) throws InterruptedException { + ServerName serverName) throws InterruptedException { ServerManager sm = activeMaster.getMaster().getServerManager(); // First wait for it to be in dead list while (!sm.getDeadServers().contains(serverName)) { diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java index 319a74e6bca..47f52d6e49d 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java @@ -27,8 +27,8 @@ import java.util.concurrent.Semaphore; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.MasterAddressTracker; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -75,17 +75,17 @@ public class TestMasterAddressManager { // Create the master node with a dummy address String host = "localhost"; int port = 1234; - HServerAddress dummyAddress = new HServerAddress(host, port); + ServerName sn = new ServerName(host, port, System.currentTimeMillis()); LOG.info("Creating master node"); - ZKUtil.setAddressAndWatch(zk, zk.masterAddressZNode, dummyAddress); + ZKUtil.createEphemeralNodeAndWatch(zk, zk.masterAddressZNode, sn.getBytes()); // Wait for the node to be created LOG.info("Waiting for master address manager to be notified"); listener.waitForCreation(); LOG.info("Master node created"); assertTrue(addressManager.hasMaster()); - HServerAddress pulledAddress = addressManager.getMasterAddress(); - assertTrue(pulledAddress.equals(dummyAddress)); + ServerName pulledAddress = addressManager.getMasterAddress(); + assertTrue(pulledAddress.equals(sn)); } diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java index 7ff6a2e6c6c..ef8a4b2fd46 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.HBaseTestCase; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.UnknownScannerException; @@ -263,11 +262,11 @@ public class TestScanner extends HBaseTestCase { // Store some new information - HServerAddress address = new HServerAddress("foo.bar.com:1234"); + String address = "foo.bar.com:1234"; put = new Put(ROW_KEY, System.currentTimeMillis(), null); put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - Bytes.toBytes(address.toString())); + Bytes.toBytes(address)); // put.add(HConstants.COL_STARTCODE, Bytes.toBytes(START_CODE)); @@ -301,12 +300,12 @@ public class TestScanner extends HBaseTestCase { // Now update the information again - address = new HServerAddress("bar.foo.com:4321"); + address = "bar.foo.com:4321"; put = new Put(ROW_KEY, System.currentTimeMillis(), null); put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - Bytes.toBytes(address.toString())); + Bytes.toBytes(address)); region.put(put); // Validate again diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index 7fc44e90772..cf97f7ff0df 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; @@ -258,7 +259,7 @@ public class TestSplitTransactionOnCluster { // Insert into zk a blocking znode, a znode of same name as region // so it gets in way of our splitting. ZKAssign.createNodeClosing(t.getConnection().getZooKeeperWatcher(), - hri, "anyOldServer"); + hri, new ServerName("any.old.server", 1234, -1)); // Now try splitting.... should fail. And each should successfully // rollback. this.admin.split(hri.getRegionNameAsString()); @@ -455,7 +456,7 @@ public class TestSplitTransactionOnCluster { HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); LOG.info("Moving " + hri.getRegionNameAsString() + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex); - admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName())); + admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName().getBytes()); } // Wait till table region is up on the server that is NOT carrying .META.. while (true) { diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java b/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java index 34defbc7766..bcf90241fe5 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java @@ -31,9 +31,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.ipc.HBaseRpcMetrics; @@ -72,11 +72,11 @@ public class TestOpenRegionHandler { */ static class MockServer implements Server { boolean stopped = false; - final static String NAME = "MockServer"; + final static ServerName NAME = new ServerName("MockServer", 123, -1); final ZooKeeperWatcher zk; MockServer() throws ZooKeeperConnectionException, IOException { - this.zk = new ZooKeeperWatcher(HTU.getConfiguration(), NAME, this); + this.zk = new ZooKeeperWatcher(HTU.getConfiguration(), NAME.toString(), this); } @Override @@ -113,7 +113,7 @@ public class TestOpenRegionHandler { } @Override - public String getServerName() { + public ServerName getServerName() { return NAME; } } @@ -155,12 +155,7 @@ public class TestOpenRegionHandler { public HLog getWAL() { return null; } - - @Override - public HServerInfo getServerInfo() { - return null; - } - + @Override public HBaseRpcMetrics getRpcMetrics() { return null; @@ -196,7 +191,7 @@ public class TestOpenRegionHandler { } @Override - public String getServerName() { + public ServerName getServerName() { return null; } diff --git a/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java b/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java index 20a1ff8cb07..c147a14e2c3 100644 --- a/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java +++ b/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java @@ -19,6 +19,12 @@ */ package org.apache.hadoop.hbase.replication.regionserver; +import static org.junit.Assert.assertEquals; + +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -32,6 +38,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; @@ -45,16 +52,8 @@ import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; -import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; - -import static org.junit.Assert.assertEquals; - public class TestReplicationSourceManager { private static final Log LOG = @@ -225,7 +224,7 @@ public class TestReplicationSourceManager { } @Override - public String getServerName() { + public ServerName getServerName() { return null; //To change body of implemented methods use File | Settings | File Templates. } diff --git a/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index a0550820d64..ac90a926b58 100644 --- a/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -26,8 +26,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; @@ -82,15 +82,15 @@ public class TestHBaseFsck { for (JVMClusterUtil.RegionServerThread rs : TEST_UTIL.getHBaseCluster().getRegionServerThreads()) { - HServerInfo hsi = rs.getRegionServer().getServerInfo(); + ServerName sn = rs.getRegionServer().getServerName(); // When we find a diff RS, change the assignment and break - if (startCode != hsi.getStartCode()) { + if (startCode != sn.getStartcode()) { Put put = new Put(res.getRow()); put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - Bytes.toBytes(hsi.getHostnamePort())); + Bytes.toBytes(sn.getHostAndPort())); put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, - Bytes.toBytes(hsi.getStartCode())); + Bytes.toBytes(sn.getStartcode())); meta.put(put); break resforloop; }
NameRegion ServerStart KeyEnd KeyRequests