HBASE-4645 Edits Log recovery losing data across column families
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1189285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9570f3c907
commit
770cc7d5d2
|
@ -403,6 +403,7 @@ Release 0.92.0 - Unreleased
|
|||
HBASE-4670 Fix javadoc warnings
|
||||
HBASE-4367 Deadlock in MemStore flusher due to JDK internally synchronizing
|
||||
on current thread
|
||||
HBASE-4645 Edits Log recovery losing data across column families
|
||||
|
||||
TESTS
|
||||
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
||||
|
|
|
@ -447,20 +447,39 @@ public class HRegion implements HeapSize { // , Writable{
|
|||
status.setStatus("Cleaning up temporary data from old regions");
|
||||
cleanupTmpDir();
|
||||
|
||||
// Load in all the HStores. Get maximum seqid.
|
||||
// Load in all the HStores.
|
||||
// Get minimum of the maxSeqId across all the store.
|
||||
//
|
||||
// Context: During replay we want to ensure that we do not lose any data. So, we
|
||||
// have to be conservative in how we replay logs. For each store, we calculate
|
||||
// the maxSeqId up to which the store was flushed. But, since different stores
|
||||
// could have a different maxSeqId, we choose the
|
||||
// minimum across all the stores.
|
||||
// This could potentially result in duplication of data for stores that are ahead
|
||||
// of others. ColumnTrackers in the ScanQueryMatchers do the de-duplication, so we
|
||||
// do not have to worry.
|
||||
// TODO: If there is a store that was never flushed in a long time, we could replay
|
||||
// a lot of data. Currently, this is not a problem because we flush all the stores at
|
||||
// the same time. If we move to per-cf flushing, we might want to revisit this and send
|
||||
// in a vector of maxSeqIds instead of sending in a single number, which has to be the
|
||||
// min across all the max.
|
||||
long minSeqId = -1;
|
||||
long maxSeqId = -1;
|
||||
for (HColumnDescriptor c : this.htableDescriptor.getFamilies()) {
|
||||
status.setStatus("Instantiating store for column family " + c);
|
||||
Store store = instantiateHStore(this.tableDir, c);
|
||||
this.stores.put(c.getName(), store);
|
||||
long storeSeqId = store.getMaxSequenceId();
|
||||
if (storeSeqId > maxSeqId) {
|
||||
if (minSeqId == -1 || storeSeqId < minSeqId) {
|
||||
minSeqId = storeSeqId;
|
||||
}
|
||||
if (maxSeqId == -1 || storeSeqId > maxSeqId) {
|
||||
maxSeqId = storeSeqId;
|
||||
}
|
||||
}
|
||||
// Recover any edits if available.
|
||||
maxSeqId = replayRecoveredEditsIfAny(
|
||||
this.regiondir, maxSeqId, reporter, status);
|
||||
maxSeqId = Math.max(maxSeqId, replayRecoveredEditsIfAny(
|
||||
this.regiondir, minSeqId, reporter, status));
|
||||
|
||||
status.setStatus("Cleaning up detritus from prior splits");
|
||||
// Get rid of any splits or merges that were lost in-progress. Clean out
|
||||
|
|
|
@ -276,24 +276,20 @@ public class TestWALReplay {
|
|||
Result result = region.get(g, null);
|
||||
assertEquals(countPerFamily * htd.getFamilies().size(),
|
||||
result.size());
|
||||
// Now close the region, split the log, reopen the region and assert that
|
||||
// replay of log has no effect, that our seqids are calculated correctly so
|
||||
// Now close the region (without flush), split the log, reopen the region and assert that
|
||||
// replay of log has the correct effect, that our seqids are calculated correctly so
|
||||
// all edits in logs are seen as 'stale'/old.
|
||||
region.close();
|
||||
region.close(true);
|
||||
wal.close();
|
||||
runWALSplit(this.conf);
|
||||
HLog wal2 = createWAL(this.conf);
|
||||
HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null) {
|
||||
@Override
|
||||
protected boolean restoreEdit(Store s, KeyValue kv) {
|
||||
super.restoreEdit(s, kv);
|
||||
throw new RuntimeException("Called when it should not have been!");
|
||||
}
|
||||
};
|
||||
HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null);
|
||||
long seqid2 = region2.initialize();
|
||||
// HRegionServer usually does this. It knows the largest seqid across all regions.
|
||||
wal2.setSequenceNumber(seqid2);
|
||||
assertTrue(seqid + result.size() < seqid2);
|
||||
final Result result1b = region2.get(g, null);
|
||||
assertEquals(result.size(), result1b.size());
|
||||
|
||||
// Next test. Add more edits, then 'crash' this region by stealing its wal
|
||||
// out from under it and assert that replay of the log adds the edits back
|
||||
|
@ -343,6 +339,88 @@ public class TestWALReplay {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we recover correctly when there is a failure in between the
|
||||
* flushes. i.e. Some stores got flushed but others did not.
|
||||
*
|
||||
* Unfortunately, there is no easy hook to flush at a store level. The way
|
||||
* we get around this is by flushing at the region level, and then deleting
|
||||
* the recently flushed store file for one of the Stores. This would put us
|
||||
* back in the situation where all but that store got flushed and the region
|
||||
* died.
|
||||
*
|
||||
* We restart Region again, and verify that the edits were replayed.
|
||||
*
|
||||
* @throws IOException
|
||||
* @throws IllegalAccessException
|
||||
* @throws NoSuchFieldException
|
||||
* @throws IllegalArgumentException
|
||||
* @throws SecurityException
|
||||
*/
|
||||
@Test
|
||||
public void testReplayEditsAfterPartialFlush()
|
||||
throws IOException, SecurityException, IllegalArgumentException,
|
||||
NoSuchFieldException, IllegalAccessException, InterruptedException {
|
||||
final String tableNameStr = "testReplayEditsWrittenViaHRegion";
|
||||
final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableNameStr);
|
||||
final Path basedir = new Path(this.hbaseRootDir, tableNameStr);
|
||||
deleteDir(basedir);
|
||||
final byte[] rowName = Bytes.toBytes(tableNameStr);
|
||||
final int countPerFamily = 10;
|
||||
final HTableDescriptor htd = createBasic3FamilyHTD(tableNameStr);
|
||||
HRegion region3 = HRegion.createHRegion(hri,
|
||||
hbaseRootDir, this.conf, htd);
|
||||
|
||||
// Write countPerFamily edits into the three families. Do a flush on one
|
||||
// of the families during the load of edits so its seqid is not same as
|
||||
// others to test we do right thing when different seqids.
|
||||
HLog wal = createWAL(this.conf);
|
||||
HRegion region = new HRegion(basedir, wal, this.fs, this.conf, hri, htd, null);
|
||||
long seqid = region.initialize();
|
||||
// HRegionServer usually does this. It knows the largest seqid across all regions.
|
||||
wal.setSequenceNumber(seqid);
|
||||
for (HColumnDescriptor hcd: htd.getFamilies()) {
|
||||
addRegionEdits(rowName, hcd.getName(), countPerFamily, this.ee, region, "x");
|
||||
}
|
||||
|
||||
// Now assert edits made it in.
|
||||
final Get g = new Get(rowName);
|
||||
Result result = region.get(g, null);
|
||||
assertEquals(countPerFamily * htd.getFamilies().size(),
|
||||
result.size());
|
||||
|
||||
// Let us flush the region
|
||||
region.flushcache();
|
||||
region.close(true);
|
||||
wal.close();
|
||||
|
||||
// delete the store files in the second column family to simulate a failure
|
||||
// in between the flushcache();
|
||||
// we have 3 families. killing the middle one ensures that taking the maximum
|
||||
// will make us fail.
|
||||
int cf_count = 0;
|
||||
for (HColumnDescriptor hcd: htd.getFamilies()) {
|
||||
cf_count++;
|
||||
if (cf_count == 2) {
|
||||
this.fs.delete(new Path(region.getRegionDir(), Bytes.toString(hcd.getName()))
|
||||
, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Let us try to split and recover
|
||||
runWALSplit(this.conf);
|
||||
HLog wal2 = createWAL(this.conf);
|
||||
HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null);
|
||||
long seqid2 = region2.initialize();
|
||||
// HRegionServer usually does this. It knows the largest seqid across all regions.
|
||||
wal2.setSequenceNumber(seqid2);
|
||||
assertTrue(seqid + result.size() < seqid2);
|
||||
|
||||
final Result result1b = region2.get(g, null);
|
||||
assertEquals(result.size(), result1b.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HRegion with the result of a HLog split and test we only see the
|
||||
* good edits
|
||||
|
|
Loading…
Reference in New Issue