HBASE-27 hregioninfo cell empty in meta table

Summary of changes:

HMaster:

- When a row has an empty HRegionInfo (info:regioninfo), log it with the row name and and the other keys still in the row.

- Log the number of rows with empty HRegionInfo

- Delete the rows

- Make RowMap inner class static, change methods to have package scope to avoid synthetic accessors.

- Provide row name to getHRegionInfo so it can issue better log messages

- add method deleteEmptyMetaRows to remove rows with empty HRegionInfo

HRegion

- change removeRegionFromMETA to use deleteAll rather than using a BatchUpdate containing deletes for each cell.

TestEmptyMetaInfo

- new test case



git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@636589 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2008-03-13 00:33:13 +00:00
parent 6fb7767fc6
commit 9057e559a1
8 changed files with 213 additions and 96 deletions

View File

@ -37,6 +37,7 @@ Hbase Change Log
HBASE-495 No server address listed in .META.
HBASE-433 HBASE-251 Region server should delete restore log after successful
restore, Stuck replaying the edits of crashed machine.
HBASE-27 hregioninfo cell empty in meta table
IMPROVEMENTS
HBASE-415 Rewrite leases to use DelayedBlockingQueue instead of polling

View File

@ -21,7 +21,8 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.SortedMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
@ -36,7 +37,6 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.UnknownScannerException;
@ -124,6 +124,7 @@ abstract class BaseScanner extends Chore implements HConstants {
this.initialScanComplete = false;
}
/** @return true if initial scan completed successfully */
public boolean isInitialScanComplete() {
return initialScanComplete;
}
@ -152,6 +153,7 @@ abstract class BaseScanner extends Chore implements HConstants {
// scan we go check if parents can be removed.
Map<HRegionInfo, RowResult> splitParents =
new HashMap<HRegionInfo, RowResult>();
List<Text> emptyRows = new ArrayList<Text>();
try {
regionServer = master.connection.getHRegionConnection(region.getServer());
scannerId =
@ -165,8 +167,9 @@ abstract class BaseScanner extends Chore implements HConstants {
break;
}
HRegionInfo info = master.getHRegionInfo(values);
HRegionInfo info = master.getHRegionInfo(values.getRow(), values);
if (info == null) {
emptyRows.add(values.getRow());
continue;
}
@ -206,12 +209,24 @@ abstract class BaseScanner extends Chore implements HConstants {
}
} catch (IOException e) {
LOG.error("Closing scanner",
RemoteExceptionHandler.checkIOException(e));
RemoteExceptionHandler.checkIOException(e));
}
}
// Scan is finished. Take a look at split parents to see if any we can
// clean up.
// Scan is finished.
// First clean up any meta region rows which had null HRegionInfos
if (emptyRows.size() > 0) {
LOG.warn("Found " + emptyRows.size() +
" rows with empty HRegionInfo while scanning meta region " +
region.getRegionName());
master.deleteEmptyMetaRows(regionServer, region.getRegionName(),
emptyRows);
}
// Take a look at split parents to see if any we can clean up.
if (splitParents.size() > 0) {
for (Map.Entry<HRegionInfo, RowResult> e : splitParents.entrySet()) {
HRegionInfo hri = e.getKey();
@ -289,7 +304,7 @@ abstract class BaseScanner extends Chore implements HConstants {
* @return True if still has references to parent.
* @throws IOException
*/
protected boolean hasReferences(final Text metaRegionName,
private boolean hasReferences(final Text metaRegionName,
final HRegionInterface srvr, final Text parent,
RowResult rowContent, final Text splitColumn)
throws IOException {

View File

@ -21,49 +21,38 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.ipc.HbaseRPC;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.InfoServer;
import org.apache.hadoop.hbase.util.Sleeper;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.Leases;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
@ -75,7 +64,6 @@ import org.apache.hadoop.hbase.LocalHBaseCluster;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.LeaseListener;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
@ -320,6 +308,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
return rootServer;
}
/**
* Wait until root region is available
*/
public void waitForRootRegionLocation() {
regionManager.waitForRootRegionLocation();
}
@ -471,7 +462,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
* need to install an unexpected exception handler.
*/
private void startServiceThreads() {
String threadName = Thread.currentThread().getName();
try {
regionManager.start();
serverManager.start();
@ -693,21 +683,45 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
* Get HRegionInfo from passed META map of row values.
* Returns null if none found (and logs fact that expected COL_REGIONINFO
* was missing). Utility method used by scanners of META tables.
* @param row name of the row
* @param map Map to do lookup in.
* @return Null or found HRegionInfo.
* @throws IOException
*/
HRegionInfo getHRegionInfo(final Map<Text, Cell> map)
HRegionInfo getHRegionInfo(final Text row, final Map<Text, Cell> map)
throws IOException {
Cell regioninfo = map.get(COL_REGIONINFO);
if (regioninfo == null) {
LOG.warn(COL_REGIONINFO.toString() + " is empty; has keys: " +
map.keySet().toString());
LOG.warn(COL_REGIONINFO.toString() + " is empty for row: " + row +
"; has keys: " + map.keySet().toString());
return null;
}
return (HRegionInfo)Writables.getWritable(regioninfo.getValue(), new HRegionInfo());
}
/*
* When we find rows in a meta region that has an empty HRegionInfo, we
* clean them up here.
*
* @param server connection to server serving meta region
* @param metaRegionName name of the meta region we scanned
* @param emptyRows the row keys that had empty HRegionInfos
*/
protected void deleteEmptyMetaRows(HRegionInterface server,
Text metaRegionName,
List<Text> emptyRows) {
for (Text regionName: emptyRows) {
try {
HRegion.removeRegionFromMETA(server, metaRegionName, regionName);
LOG.warn("Removed region: " + regionName + " from meta region: " +
metaRegionName + " because HRegionInfo was empty");
} catch (IOException e) {
LOG.error("deleting region: " + regionName + " from meta region: " +
metaRegionName, e);
}
}
}
/*
* Main program
*/

View File

@ -24,14 +24,13 @@ import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.SortedMap;
import java.util.Set;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
@ -66,6 +65,7 @@ class ProcessServerShutdown extends RegionServerOperation {
}
/**
* @param master
* @param serverInfo
*/
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
@ -93,9 +93,9 @@ class ProcessServerShutdown extends RegionServerOperation {
private void scanMetaRegion(HRegionInterface server, long scannerId,
Text regionName) throws IOException {
ArrayList<ToDoEntry> toDoList = new ArrayList<ToDoEntry>();
HashSet<HRegionInfo> regions = new HashSet<HRegionInfo>();
List<ToDoEntry> toDoList = new ArrayList<ToDoEntry>();
Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
List<Text> emptyRows = new ArrayList<Text>();
try {
while (true) {
RowResult values = null;
@ -133,8 +133,9 @@ class ProcessServerShutdown extends RegionServerOperation {
}
// Bingo! Found it.
HRegionInfo info = master.getHRegionInfo(values);
HRegionInfo info = master.getHRegionInfo(row, values);
if (info == null) {
emptyRows.add(row);
continue;
}
@ -180,6 +181,14 @@ class ProcessServerShutdown extends RegionServerOperation {
}
}
// Scan complete. Remove any rows which had empty HRegionInfos
if (emptyRows.size() > 0) {
LOG.warn("Found " + emptyRows.size() +
" rows with empty HRegionInfo while scanning meta region " +
regionName);
master.deleteEmptyMetaRows(server, regionName, emptyRows);
}
// Update server in root/meta entries
for (ToDoEntry e: toDoList) {
if (e.deleteRegion) {

View File

@ -36,10 +36,10 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
* Data structure used to return results out of the toRowMap method.
*/
class RowMap {
static final Log LOG = LogFactory.getLog(RowMap.class.getName());
private static final Log LOG = LogFactory.getLog(RowMap.class.getName());
final Text row;
final SortedMap<Text, byte[]> map;
private final Text row;
private final SortedMap<Text, byte[]> map;
RowMap(final Text r, final SortedMap<Text, byte[]> m) {
this.row = r;
@ -61,7 +61,7 @@ class RowMap {
* @return Returns a SortedMap currently. TODO: This looks like it could
* be a plain Map.
*/
public static RowMap fromHbaseMapWritable(HbaseMapWritable mw) {
static RowMap fromHbaseMapWritable(HbaseMapWritable mw) {
if (mw == null) {
throw new IllegalArgumentException("Passed MapWritable cannot be null");
}

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.Map;
import java.util.HashMap;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Set;
@ -51,11 +50,11 @@ class ServerManager implements HConstants {
static final Log LOG = LogFactory.getLog(ServerManager.class.getName());
/** The map of known server names to server info */
private final Map<String, HServerInfo> serversToServerInfo =
final Map<String, HServerInfo> serversToServerInfo =
new ConcurrentHashMap<String, HServerInfo>();
/** Set of known dead servers */
private final Set<String> deadServers =
final Set<String> deadServers =
Collections.synchronizedSet(new HashSet<String>());
/** SortedMap server load -> Set of server names */
@ -63,20 +62,27 @@ class ServerManager implements HConstants {
Collections.synchronizedSortedMap(new TreeMap<HServerLoad, Set<String>>());
/** Map of server names -> server load */
private final Map<String, HServerLoad> serversToLoad =
final Map<String, HServerLoad> serversToLoad =
new ConcurrentHashMap<String, HServerLoad>();
private HMaster master;
HMaster master;
private final Leases serverLeases;
/**
* @param master
*/
public ServerManager(HMaster master) {
this.master = master;
serverLeases = new Leases(master.leaseTimeout,
master.conf.getInt("hbase.master.lease.thread.wakefrequency", 15 * 1000));
}
/** Let the server manager know a new regionserver has come online */
/**
* Let the server manager know a new regionserver has come online
*
* @param serverInfo
*/
public void regionServerStartup(HServerInfo serverInfo) {
String s = serverInfo.getServerAddress().toString().trim();
LOG.info("received start message from: " + s);
@ -121,7 +127,14 @@ class ServerManager implements HConstants {
loadToServers.put(load, servers);
}
/** {@inheritDoc} */
/**
* @param serverInfo
* @param msgs
* @return messages from master to region server indicating what region
* server should do.
*
* @throws IOException
*/
public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[])
throws IOException {
String serverName = serverInfo.getServerAddress().toString().trim();
@ -377,7 +390,16 @@ class ServerManager implements HConstants {
return returnMsgs.toArray(new HMsg[returnMsgs.size()]);
}
/** A region has split. **/
/**
* A region has split.
*
* @param serverName
* @param serverInfo
* @param region
* @param splitA
* @param splitB
* @param returnMsgs
*/
private void processSplitRegion(String serverName, HServerInfo serverInfo,
HRegionInfo region, HMsg splitA, HMsg splitB, ArrayList<HMsg> returnMsgs) {
@ -467,54 +489,6 @@ class ServerManager implements HConstants {
}
}
/** Region server reporting that it has closed a region */
private void processRegionClose(String serverName, HServerInfo info,
HRegionInfo region) {
LOG.info(info.getServerAddress().toString() + " no longer serving " +
region.getRegionName());
if (region.isRootRegion()) {
if (region.isOffline()) {
// Can't proceed without root region. Shutdown.
LOG.fatal("root region is marked offline");
master.shutdown();
}
master.regionManager.unassignRootRegion();
} else {
boolean reassignRegion = !region.isOffline();
boolean deleteRegion = false;
if (master.regionManager.isClosing(region.getRegionName())) {
master.regionManager.noLongerClosing(region.getRegionName());
reassignRegion = false;
}
if (master.regionManager.isMarkedForDeletion(region.getRegionName())) {
master.regionManager.regionDeleted(region.getRegionName());
reassignRegion = false;
deleteRegion = true;
}
if (region.isMetaTable()) {
// Region is part of the meta table. Remove it from onlineMetaRegions
master.regionManager.offlineMetaRegion(region.getStartKey());
}
// NOTE: we cannot put the region into unassignedRegions as that
// could create a race with the pending close if it gets
// reassigned before the close is processed.
master.regionManager.noLongerUnassigned(region);
try {
master.toDoQueue.put(new ProcessRegionClose(master, region, reassignRegion,
deleteRegion));
} catch (InterruptedException e) {
throw new RuntimeException(
"Putting into toDoQueue was interrupted.", e);
}
}
}
/** Cancel a server's lease and update its load information */
private boolean cancelLease(final String serverName) {
boolean leaseCancelled = false;
@ -544,16 +518,20 @@ class ServerManager implements HConstants {
}
/** compute the average load across all region servers */
/** @return the average load across all region servers */
public int averageLoad() {
return 0;
}
/** @return the number of active servers */
public int numServers() {
return serversToServerInfo.size();
}
/** get HServerInfo from a server address */
/**
* @param address server address
* @return HServerInfo for the given server address
*/
public HServerInfo getServerInfo(String address) {
return serversToServerInfo.get(address);
}
@ -579,6 +557,9 @@ class ServerManager implements HConstants {
return Collections.unmodifiableMap(loadToServers);
}
/**
* Wakes up threads waiting on serversToServerInfo
*/
public void notifyServers() {
synchronized (serversToServerInfo) {
serversToServerInfo.notifyAll();
@ -666,10 +647,17 @@ class ServerManager implements HConstants {
serverLeases.close();
}
/**
* @param serverName
*/
public void removeDeadServer(String serverName) {
deadServers.remove(serverName);
}
/**
* @param serverName
* @return true if server is dead
*/
public boolean isDead(String serverName) {
return deadServers.contains(serverName);
}

View File

@ -20,9 +20,10 @@
package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -32,10 +33,8 @@ import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
/**
@ -95,14 +94,17 @@ abstract class TableOperation implements HConstants {
server.openScanner(m.getRegionName(), COLUMN_FAMILY_ARRAY,
tableName, System.currentTimeMillis(), null);
List<Text> emptyRows = new ArrayList<Text>();
try {
while (true) {
RowResult values = server.next(scannerId);
if(values == null || values.size() == 0) {
break;
}
HRegionInfo info = this.master.getHRegionInfo(values);
HRegionInfo info =
this.master.getHRegionInfo(values.getRow(), values);
if (info == null) {
emptyRows.add(values.getRow());
throw new IOException(COL_REGIONINFO + " not found on " +
values.getRow());
}
@ -132,6 +134,15 @@ abstract class TableOperation implements HConstants {
scannerId = -1L;
}
// Get rid of any rows that have a null HRegionInfo
if (emptyRows.size() > 0) {
LOG.warn("Found " + emptyRows.size() +
" rows with empty HRegionInfo while scanning meta region " +
m.getRegionName());
master.deleteEmptyMetaRows(server, m.getRegionName(), emptyRows);
}
if (!tableExists) {
throw new IOException(tableName + " does not exist");
}

View File

@ -0,0 +1,79 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.BatchUpdate;
/**
* Tests master cleanup of rows in meta table where there is no HRegionInfo
*/
public class TestEmptyMetaInfo extends HBaseClusterTestCase {
/**
* Insert some bogus rows in meta. Master should clean them up.
* @throws IOException
*/
public void testEmptyMetaInfo() throws IOException {
HTable t = new HTable(conf, HConstants.META_TABLE_NAME);
for (int i = 0; i < 5; i++) {
Text regionName = new Text("tablename," + (i == 0 ? "" : (i +",")) +
System.currentTimeMillis());
BatchUpdate b = new BatchUpdate(regionName);
b.put(HConstants.COL_SERVER,
"localhost:1234".getBytes(HConstants.UTF8_ENCODING));
t.commit(b);
}
long sleepTime =
conf.getLong("hbase.master.meta.thread.rescanfrequency", 10000);
int tries = conf.getInt("hbase.client.retries.number", 5);
int count = 0;
do {
tries -= 1;
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
// ignore
}
HScannerInterface scanner =
t.obtainScanner(HConstants.ALL_META_COLUMNS, new Text("tablename"));
try {
count = 0;
HStoreKey key = new HStoreKey();
SortedMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
while (scanner.next(key, results)) {
count += 1;
}
} finally {
scanner.close();
}
} while (count != 0 && tries >= 0);
assertTrue(tries >= 0);
assertEquals(0, count);
}
}