HBASE-1784 Missing rows after medium intensity insert

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@810200 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2009-09-01 19:54:42 +00:00
parent 61fa816157
commit dea3c1207c
3 changed files with 95 additions and 24 deletions

View File

@ -361,6 +361,7 @@ Release 0.20.0 - Unreleased
HBASE-1767 test zookeeper broken in trunk and 0.20 branch; broken on
hudson too
HBASE-1780 HTable.flushCommits clears write buffer in finally clause
HBASE-1784 Missing rows after medium intensity insert
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
@ -153,8 +154,10 @@ abstract class BaseScanner extends Chore implements HConstants {
int rows = 0;
try {
regionServer = master.connection.getHRegionConnection(region.getServer());
scannerId = regionServer.openScanner(region.getRegionName(),
new Scan().addFamily(HConstants.CATALOG_FAMILY));
Scan s = new Scan().addFamily(HConstants.CATALOG_FAMILY);
// Make this scan do a row at a time otherwise, data can be stale.
s.setCaching(1);
scannerId = regionServer.openScanner(region.getRegionName(), s);
while (true) {
Result values = regionServer.next(scannerId);
if (values == null || values.size() == 0) {
@ -165,19 +168,11 @@ abstract class BaseScanner extends Chore implements HConstants {
emptyRows.add(values.getRow());
continue;
}
String serverName = "";
byte [] val = values.getValue(CATALOG_FAMILY, SERVER_QUALIFIER);
if( val != null) {
serverName = Bytes.toString(val);
}
long startCode = 0L;
val = values.getValue(CATALOG_FAMILY, STARTCODE_QUALIFIER);
if(val != null) {
startCode = Bytes.toLong(val);
}
String serverAddress = getServerAddress(values);
long startCode = getStartCode(values);
// Note Region has been assigned.
checkAssigned(info, serverName, startCode);
checkAssigned(regionServer, region, info, serverAddress, startCode);
if (isSplitParent(info)) {
splitParents.put(info, values);
}
@ -230,6 +225,24 @@ abstract class BaseScanner extends Chore implements HConstants {
" row(s) of meta region " + region.toString() + " complete");
}
/*
* @param r
* @return Empty String or server address found in <code>r</code>
*/
private String getServerAddress(final Result r) {
byte [] val = r.getValue(CATALOG_FAMILY, SERVER_QUALIFIER);
return val == null || val.length <= 0? "": Bytes.toString(val);
}
/*
* @param r
* @return Return 0L or server startcode found in <code>r</code>
*/
private long getStartCode(final Result r) {
byte [] val = r.getValue(CATALOG_FAMILY, STARTCODE_QUALIFIER);
return val == null || val.length <= 0? 0L: Bytes.toLong(val);
}
/*
* @param info Region to check.
* @return True if this is a split parent.
@ -326,8 +339,7 @@ abstract class BaseScanner extends Chore implements HConstants {
if (LOG.isDebugEnabled()) {
LOG.debug(split.getRegionNameAsString() + "/" + split.getEncodedName()
+ " no longer has references to " + Bytes.toStringBinary(parent)
);
+ " no longer has references to " + Bytes.toStringBinary(parent));
}
Delete delete = new Delete(parent);
@ -337,12 +349,43 @@ abstract class BaseScanner extends Chore implements HConstants {
return result;
}
protected void checkAssigned(final HRegionInfo info,
/*
* Check the passed region is assigned. If not, add to unassigned.
* @param regionServer
* @param meta
* @param info
* @param serverAddress
* @param startCode
* @throws IOException
*/
protected void checkAssigned(final HRegionInterface regionServer,
final MetaRegion meta, final HRegionInfo info,
final String serverAddress, final long startCode)
throws IOException {
String serverName = null;
if (serverAddress != null && serverAddress.length() > 0) {
serverName = HServerInfo.getServerName(serverAddress, startCode);
String sa = serverAddress;
long sc = startCode;
if (sa == null || sa.length() <= 0) {
// Scans are sloppy. They don't respect row locks and they get and
// cache a row internally so may have data that is a little stale. Make
// sure that for sure this serverAddress is null. We are trying to
// avoid double-assignments. See hbase-1784. Will have to wait till
// 0.21 hbase where we use zk to mediate state transitions to do better.
Get g = new Get(info.getRegionName());
g.addFamily(HConstants.CATALOG_FAMILY);
Result r = regionServer.get(meta.getRegionName(), g);
if (r != null && !r.isEmpty()) {
sa = getServerAddress(r);
if (sa != null && sa.length() > 0) {
// Reget startcode in case its changed in the meantime too.
sc = getStartCode(r);
LOG.debug("GET got values when meta found none: serverAddress=" + sa
+ ", startCode=" + sc);
}
}
}
if (sa != null && sa.length() > 0) {
serverName = HServerInfo.getServerName(sa, sc);
}
HServerInfo storedInfo = null;
synchronized (this.master.regionManager) {
@ -365,8 +408,8 @@ abstract class BaseScanner extends Chore implements HConstants {
// The current assignment is invalid
if (LOG.isDebugEnabled()) {
LOG.debug("Current assignment of " + info.getRegionNameAsString() +
" is not valid; " + " serverAddress=" + serverAddress +
", startCode=" + startCode + " unknown.");
" is not valid; " + " serverAddress=" + sa +
", startCode=" + sc + " unknown.");
}
// Now get the region assigned
this.master.regionManager.setUnassigned(info, true);

View File

@ -44,6 +44,9 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.Leases;
import org.apache.hadoop.hbase.HMsg.Type;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
import org.apache.zookeeper.WatchedEvent;
@ -499,10 +502,8 @@ class ServerManager implements HConstants {
// This prevents the master from sending a SPLIT message if the table
// has already split by the region server.
master.regionManager.endActions(region.getRegionName());
HRegionInfo newRegionA = splitA.getRegionInfo();
master.regionManager.setUnassigned(newRegionA, false);
HRegionInfo newRegionB = splitB.getRegionInfo();
master.regionManager.setUnassigned(newRegionB, false);
assignSplitDaughter(splitA.getRegionInfo());
assignSplitDaughter(splitB.getRegionInfo());
if (region.isMetaTable()) {
// A meta region has split.
master.regionManager.offlineMetaRegion(region.getStartKey());
@ -511,6 +512,32 @@ class ServerManager implements HConstants {
}
}
/*
* Assign new daughter-of-a-split UNLESS its already been assigned.
* It could have been assigned already in rare case where there was a large
* gap between insertion of the daughter region into .META. by the
* splitting regionserver and receipt of the split message in master (See
* HBASE-1784).
* @param hri Region to assign.
*/
private void assignSplitDaughter(final HRegionInfo hri) {
MetaRegion mr = this.master.regionManager.getFirstMetaRegionForRegion(hri);
Get g = new Get(hri.getRegionName());
g.addFamily(HConstants.CATALOG_FAMILY);
try {
HRegionInterface server =
master.connection.getHRegionConnection(mr.getServer());
Result r = server.get(mr.getRegionName(), g);
// If size > 3 -- presume regioninfo, startcode and server -- then presume
// that this daughter already assigned and return.
if (r.size() >= 3) return;
} catch (IOException e) {
LOG.warn("Failed get on " + HConstants.CATALOG_FAMILY_STR +
"; possible double-assignment?", e);
}
this.master.regionManager.setUnassigned(hri, false);
}
/*
* Region server is reporting that a region is now opened
* @param serverInfo