HBASE-505 Region assignments should never time out so long as the region

server reports that it is processing the open request

This is patch reviewed with Jim but with the number of edits between
reports made into a configurable.

Have the HRegionServer pass down a Progressable implementation down into
Region and then down int Store where edits are replayed. Call progress
after every couple of thousand edits.

M src/java/org/apache/hadoop/hbase/HStore.java
  Take a Progessable in the constructor. Call it when applying edits.
M src/java/org/apache/hadoop/hbase/HMaster.java
  Update commment around MSG_REPORT_PROCESS_OPEN so its expected
  that we can get more than one of these messages during a region open.
M src/java/org/apache/hadoop/hbase/HRegion.java
  New constructor that takes a Progressable. Pass it to Stores on construction.
M src/java/org/apache/hadoop/hbase/HRegionServer.java
  On open of a region, pass in a Progressable that adds a
MSG_REPORT_PROCESS_OPEN every time its called.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@643223 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-04-01 00:09:25 +00:00
parent 198a156334
commit f15048d4d0
5 changed files with 72 additions and 15 deletions

View File

@ -4,6 +4,8 @@ Hbase Change Log
HBASE-550 EOF trying to read reconstruction log stops region deployment
HBASE-551 Master stuck splitting server logs in shutdown loop; on each
iteration, edits are aggregated up into the millions
HBASE-505 Region assignments should never time out so long as the region
server reports that it is processing the open request
Release 0.1.0

View File

@ -494,10 +494,12 @@ class RegionManager implements HConstants {
/** Update the deadline for a region assignment to be completed */
public void updateAssignmentDeadline(HRegionInfo info) {
synchronized (unassignedRegions) {
// Region server has acknowledged request to open region.
// Region server is reporting in that its working on region open
// (We can get more than one of these messages if region is replaying
// a bunch of edits and taking a while to open).
// Extend region open time by max region open time.
unassignedRegions.put(info,
System.currentTimeMillis() + master.maxRegionOpenTime);
this.unassignedRegions.put(info,
Long.valueOf(System.currentTimeMillis() + this.master.maxRegionOpenTime));
}
}

View File

@ -49,6 +49,7 @@ import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hbase.HConstants;
@ -373,12 +374,41 @@ public class HRegion implements HConstants {
* @param initialFiles If there are initial files (implying that the HRegion
* is new), then read them from the supplied path.
* @param listener an object that implements CacheFlushListener or null
*
* @throws IOException
*/
public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration conf,
HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener)
throws IOException {
this(basedir, log, fs, conf, regionInfo, initialFiles, listener, null);
}
/**
* HRegion constructor.
*
* @param log The HLog is the outbound log for any updates to the HRegion
* (There's a single HLog for all the HRegions on a single HRegionServer.)
* The log file is a logfile from the previous execution that's
* custom-computed for this HRegion. The HRegionServer computes and sorts the
* appropriate log info for this HRegion. If there is a previous log file
* (implying that the HRegion has been written-to before), then read it from
* the supplied path.
* @param basedir qualified path of directory where region should be located,
* usually the table directory.
* @param fs is the filesystem.
* @param conf is global configuration settings.
* @param regionInfo - HRegionInfo that describes the region
* @param initialFiles If there are initial files (implying that the HRegion
* is new), then read them from the supplied path.
* @param listener an object that implements CacheFlushListener or null
* @param reporter Call on a period so hosting server can report we're
* making progress to master -- otherwise master might think region deploy
* failed. Can be null.
* @throws IOException
*/
public HRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration conf,
HRegionInfo regionInfo, Path initialFiles, CacheFlushListener listener,
final Progressable reporter)
throws IOException {
this.basedir = basedir;
this.log = log;
@ -402,12 +432,9 @@ public class HRegion implements HConstants {
long maxSeqId = -1;
for(HColumnDescriptor c :
this.regionInfo.getTableDesc().families().values()) {
HStore store = new HStore(this.basedir, this.regionInfo, c, this.fs,
oldLogFile, this.conf);
oldLogFile, this.conf, reporter);
stores.put(c.getFamilyName(), store);
long storeSeqId = store.getMaxSequenceId();
if (storeSeqId > maxSeqId) {
maxSeqId = storeSeqId;
@ -423,7 +450,7 @@ public class HRegion implements HConstants {
this.minSequenceId = maxSeqId;
if (LOG.isDebugEnabled()) {
LOG.debug("Next sequence id for region " + regionInfo.getRegionName() +
" is " + this.minSequenceId);
" is " + this.minSequenceId);
}
// Get rid of any splits or merges that were lost in-progress

View File

@ -82,6 +82,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
/**
@ -785,7 +786,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
HTableDescriptor.getTableDir(rootDir,
regionInfo.getTableDesc().getName()
),
this.log, this.fs, conf, regionInfo, null, this.cacheFlusher
this.log, this.fs, conf, regionInfo, null, this.cacheFlusher,
new Progressable() {
public void progress() {
getOutboundMsgs().add(new HMsg(HMsg.MSG_REPORT_PROCESS_OPEN,
regionInfo));
}
}
);
// Startup a compaction early if one is needed.
this.compactSplitThread.compactionRequested(region);
@ -1336,11 +1343,18 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
}
throw new IOException("Unknown protocol to name node: " + protocol);
}
/**
* @return Queue to which you can add outbound messages.
*/
protected List<HMsg> getOutboundMsgs() {
return this.outboundMsgs;
}
//
// Main program and support routines
//
private static void printUsageAndExit() {
printUsageAndExit(null);
}

View File

@ -54,6 +54,7 @@ import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hbase.BloomFilterDescriptor;
import org.onelab.filter.BloomFilter;
@ -159,10 +160,14 @@ public class HStore implements HConstants {
* @param fs file system object
* @param reconstructionLog existing log file to apply if any
* @param conf configuration object
* @param reporter Call on a period so hosting server can report we're
* making progress to master -- otherwise master might think region deploy
* failed. Can be null.
* @throws IOException
*/
HStore(Path basedir, HRegionInfo info, HColumnDescriptor family,
FileSystem fs, Path reconstructionLog, HBaseConfiguration conf)
FileSystem fs, Path reconstructionLog, HBaseConfiguration conf,
final Progressable reporter)
throws IOException {
this.basedir = basedir;
this.info = info;
@ -235,7 +240,7 @@ public class HStore implements HConstants {
}
try {
doReconstructionLog(reconstructionLog, maxSeqId);
doReconstructionLog(reconstructionLog, maxSeqId, reporter);
} catch (IOException e) {
// Presume we got here because of some HDFS issue or because of a lack of
// HADOOP-1700; for now keep going but this is probably not what we want
@ -308,7 +313,7 @@ public class HStore implements HConstants {
* reflected in the MapFiles.)
*/
private void doReconstructionLog(final Path reconstructionLog,
final long maxSeqID)
final long maxSeqID, final Progressable reporter)
throws UnsupportedEncodingException, IOException {
if (reconstructionLog == null || !fs.exists(reconstructionLog)) {
// Nothing to do.
@ -332,6 +337,8 @@ public class HStore implements HConstants {
HLogEdit val = new HLogEdit();
long skippedEdits = 0;
long editsCount = 0;
// How many edits to apply before we send a progress report.
int reportInterval = this.conf.getInt("hbase.hstore.report.interval.edits", 2000);
while (logReader.next(key, val)) {
maxSeqIdInLog = Math.max(maxSeqIdInLog, key.getLogSeqNum());
if (key.getLogSeqNum() <= maxSeqID) {
@ -349,6 +356,11 @@ public class HStore implements HConstants {
HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp());
reconstructedCache.put(k, val.getVal());
editsCount++;
// Every 2k edits, tell the reporter we're making progress.
// Have seen 60k edits taking 3minutes to complete.
if (reporter != null && (editsCount % reportInterval) == 0) {
reporter.progress();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +