HBASE-11511 Write flush events to WAL
This commit is contained in:
parent
fe50c6d366
commit
bbe29eb93c
|
@ -87,6 +87,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionRequest
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.AuthenticationProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.AuthenticationProtos;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.CellProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.CellProtos;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileResponse;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileResponse;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
|
||||||
|
@ -117,6 +118,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MasterService;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
|
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
|
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction;
|
||||||
import org.apache.hadoop.hbase.security.access.Permission;
|
import org.apache.hadoop.hbase.security.access.Permission;
|
||||||
import org.apache.hadoop.hbase.security.access.TablePermission;
|
import org.apache.hadoop.hbase.security.access.TablePermission;
|
||||||
import org.apache.hadoop.hbase.security.access.UserPermission;
|
import org.apache.hadoop.hbase.security.access.UserPermission;
|
||||||
|
@ -2499,6 +2502,29 @@ public final class ProtobufUtil {
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static FlushDescriptor toFlushDescriptor(FlushAction action, HRegionInfo hri,
|
||||||
|
long flushSeqId, Map<byte[], List<Path>> committedFiles) {
|
||||||
|
FlushDescriptor.Builder desc = FlushDescriptor.newBuilder()
|
||||||
|
.setAction(action)
|
||||||
|
.setEncodedRegionName(ByteStringer.wrap(hri.getEncodedNameAsBytes()))
|
||||||
|
.setFlushSequenceNumber(flushSeqId)
|
||||||
|
.setTableName(ByteStringer.wrap(hri.getTable().getName()));
|
||||||
|
|
||||||
|
for (Map.Entry<byte[], List<Path>> entry : committedFiles.entrySet()) {
|
||||||
|
WALProtos.FlushDescriptor.StoreFlushDescriptor.Builder builder =
|
||||||
|
WALProtos.FlushDescriptor.StoreFlushDescriptor.newBuilder()
|
||||||
|
.setFamilyName(ByteStringer.wrap(entry.getKey()))
|
||||||
|
.setStoreHomeDir(Bytes.toString(entry.getKey())); //relative to region
|
||||||
|
if (entry.getValue() != null) {
|
||||||
|
for (Path path : entry.getValue()) {
|
||||||
|
builder.addFlushOutput(path.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
desc.addStoreFlushes(builder);
|
||||||
|
}
|
||||||
|
return desc.build();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return short version of Message toString'd, shorter than TextFormat#shortDebugString.
|
* Return short version of Message toString'd, shorter than TextFormat#shortDebugString.
|
||||||
* Tries to NOT print out data both because it can be big but also so we do not have data in our
|
* Tries to NOT print out data both because it can be big but also so we do not have data in our
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -89,12 +89,35 @@ message CompactionDescriptor {
|
||||||
required bytes table_name = 1; // TODO: WALKey already stores these, might remove
|
required bytes table_name = 1; // TODO: WALKey already stores these, might remove
|
||||||
required bytes encoded_region_name = 2;
|
required bytes encoded_region_name = 2;
|
||||||
required bytes family_name = 3;
|
required bytes family_name = 3;
|
||||||
repeated string compaction_input = 4;
|
repeated string compaction_input = 4; // relative to store dir
|
||||||
repeated string compaction_output = 5;
|
repeated string compaction_output = 5;
|
||||||
required string store_home_dir = 6;
|
required string store_home_dir = 6; // relative to region dir
|
||||||
optional bytes region_name = 7; // full region name
|
optional bytes region_name = 7; // full region name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Special WAL entry to hold all related to a flush.
|
||||||
|
*/
|
||||||
|
message FlushDescriptor {
|
||||||
|
enum FlushAction {
|
||||||
|
START_FLUSH = 0;
|
||||||
|
COMMIT_FLUSH = 1;
|
||||||
|
ABORT_FLUSH = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message StoreFlushDescriptor {
|
||||||
|
required bytes family_name = 1;
|
||||||
|
required string store_home_dir = 2; //relative to region dir
|
||||||
|
repeated string flush_output = 3; // relative to store dir (if this is a COMMIT_FLUSH)
|
||||||
|
}
|
||||||
|
|
||||||
|
required FlushAction action = 1;
|
||||||
|
required bytes table_name = 2;
|
||||||
|
required bytes encoded_region_name = 3;
|
||||||
|
optional uint64 flush_sequence_number = 4;
|
||||||
|
repeated StoreFlushDescriptor store_flushes = 5;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A trailer that is appended to the end of a properly closed HLog WAL file.
|
* A trailer that is appended to the end of a properly closed HLog WAL file.
|
||||||
* If missing, this is either a legacy or a corrupted WAL file.
|
* If missing, this is either a legacy or a corrupted WAL file.
|
||||||
|
|
|
@ -30,6 +30,7 @@ import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NavigableMap;
|
import java.util.NavigableMap;
|
||||||
|
@ -113,10 +114,13 @@ import org.apache.hadoop.hbase.ipc.RpcCallContext;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcServer;
|
import org.apache.hadoop.hbase.ipc.RpcServer;
|
||||||
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
|
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
|
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction;
|
||||||
import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl.WriteEntry;
|
import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl.WriteEntry;
|
||||||
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
|
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||||
|
@ -1729,8 +1733,11 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
status.setStatus("Preparing to flush by snapshotting stores in " +
|
status.setStatus("Preparing to flush by snapshotting stores in " +
|
||||||
getRegionInfo().getEncodedName());
|
getRegionInfo().getEncodedName());
|
||||||
List<StoreFlushContext> storeFlushCtxs = new ArrayList<StoreFlushContext>(stores.size());
|
List<StoreFlushContext> storeFlushCtxs = new ArrayList<StoreFlushContext>(stores.size());
|
||||||
|
TreeMap<byte[], List<Path>> committedFiles = new TreeMap<byte[], List<Path>>(
|
||||||
|
Bytes.BYTES_COMPARATOR);
|
||||||
long flushSeqId = -1L;
|
long flushSeqId = -1L;
|
||||||
|
|
||||||
|
long trxId = 0;
|
||||||
try {
|
try {
|
||||||
try {
|
try {
|
||||||
w = mvcc.beginMemstoreInsert();
|
w = mvcc.beginMemstoreInsert();
|
||||||
|
@ -1754,12 +1761,39 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
for (Store s : stores.values()) {
|
for (Store s : stores.values()) {
|
||||||
totalFlushableSize += s.getFlushableSize();
|
totalFlushableSize += s.getFlushableSize();
|
||||||
storeFlushCtxs.add(s.createFlushContext(flushSeqId));
|
storeFlushCtxs.add(s.createFlushContext(flushSeqId));
|
||||||
|
committedFiles.put(s.getFamily().getName(), null); // for writing stores to WAL
|
||||||
|
}
|
||||||
|
|
||||||
|
// write the snapshot start to WAL
|
||||||
|
if (wal != null) {
|
||||||
|
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH,
|
||||||
|
getRegionInfo(), flushSeqId, committedFiles);
|
||||||
|
trxId = HLogUtil.writeFlushMarker(wal, this.htableDescriptor, getRegionInfo(),
|
||||||
|
desc, sequenceId, false); // no sync. Sync is below where we do not hold the updates lock
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare flush (take a snapshot)
|
// Prepare flush (take a snapshot)
|
||||||
for (StoreFlushContext flush : storeFlushCtxs) {
|
for (StoreFlushContext flush : storeFlushCtxs) {
|
||||||
flush.prepare();
|
flush.prepare();
|
||||||
}
|
}
|
||||||
|
} catch (IOException ex) {
|
||||||
|
if (wal != null) {
|
||||||
|
if (trxId > 0) { // check whether we have already written START_FLUSH to WAL
|
||||||
|
try {
|
||||||
|
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH,
|
||||||
|
getRegionInfo(), flushSeqId, committedFiles);
|
||||||
|
HLogUtil.writeFlushMarker(wal, this.htableDescriptor, getRegionInfo(),
|
||||||
|
desc, sequenceId, false);
|
||||||
|
} catch (Throwable t) {
|
||||||
|
LOG.warn("Received unexpected exception trying to write ABORT_FLUSH marker to WAL:" +
|
||||||
|
StringUtils.stringifyException(t));
|
||||||
|
// ignore this since we will be aborting the RS with DSE.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we have called wal.startCacheFlush(), now we have to abort it
|
||||||
|
wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
|
||||||
|
throw ex; // let upper layers deal with it.
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
this.updatesLock.writeLock().unlock();
|
this.updatesLock.writeLock().unlock();
|
||||||
}
|
}
|
||||||
|
@ -1767,9 +1801,16 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
", syncing WAL and waiting on mvcc, flushsize=" + totalFlushableSize;
|
", syncing WAL and waiting on mvcc, flushsize=" + totalFlushableSize;
|
||||||
status.setStatus(s);
|
status.setStatus(s);
|
||||||
if (LOG.isTraceEnabled()) LOG.trace(s);
|
if (LOG.isTraceEnabled()) LOG.trace(s);
|
||||||
// sync unflushed WAL changes when deferred log sync is enabled
|
// sync unflushed WAL changes
|
||||||
// see HBASE-8208 for details
|
// see HBASE-8208 for details
|
||||||
if (wal != null && !shouldSyncLog()) wal.sync();
|
if (wal != null) {
|
||||||
|
try {
|
||||||
|
wal.sync(); // ensure that flush marker is sync'ed
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.warn("Unexpected exception while log.sync(), ignoring. Exception: "
|
||||||
|
+ StringUtils.stringifyException(ioe));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// wait for all in-progress transactions to commit to HLog before
|
// wait for all in-progress transactions to commit to HLog before
|
||||||
// we can start the flush. This prevents
|
// we can start the flush. This prevents
|
||||||
|
@ -1806,16 +1847,27 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
|
|
||||||
// Switch snapshot (in memstore) -> new hfile (thus causing
|
// Switch snapshot (in memstore) -> new hfile (thus causing
|
||||||
// all the store scanners to reset/reseek).
|
// all the store scanners to reset/reseek).
|
||||||
|
Iterator<Store> it = stores.values().iterator(); // stores.values() and storeFlushCtxs have
|
||||||
|
// same order
|
||||||
for (StoreFlushContext flush : storeFlushCtxs) {
|
for (StoreFlushContext flush : storeFlushCtxs) {
|
||||||
boolean needsCompaction = flush.commit(status);
|
boolean needsCompaction = flush.commit(status);
|
||||||
if (needsCompaction) {
|
if (needsCompaction) {
|
||||||
compactionRequested = true;
|
compactionRequested = true;
|
||||||
}
|
}
|
||||||
|
committedFiles.put(it.next().getFamily().getName(), flush.getCommittedFiles());
|
||||||
}
|
}
|
||||||
storeFlushCtxs.clear();
|
storeFlushCtxs.clear();
|
||||||
|
|
||||||
// Set down the memstore size by amount of flush.
|
// Set down the memstore size by amount of flush.
|
||||||
this.addAndGetGlobalMemstoreSize(-totalFlushableSize);
|
this.addAndGetGlobalMemstoreSize(-totalFlushableSize);
|
||||||
|
|
||||||
|
if (wal != null) {
|
||||||
|
// write flush marker to WAL. If fail, we should throw DroppedSnapshotException
|
||||||
|
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.COMMIT_FLUSH,
|
||||||
|
getRegionInfo(), flushSeqId, committedFiles);
|
||||||
|
HLogUtil.writeFlushMarker(wal, this.htableDescriptor, getRegionInfo(),
|
||||||
|
desc, sequenceId, true);
|
||||||
|
}
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
// An exception here means that the snapshot was not persisted.
|
// An exception here means that the snapshot was not persisted.
|
||||||
// The hlog needs to be replayed so its content is restored to memstore.
|
// The hlog needs to be replayed so its content is restored to memstore.
|
||||||
|
@ -1824,6 +1876,16 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
// exceptions -- e.g. HBASE-659 was about an NPE -- so now we catch
|
// exceptions -- e.g. HBASE-659 was about an NPE -- so now we catch
|
||||||
// all and sundry.
|
// all and sundry.
|
||||||
if (wal != null) {
|
if (wal != null) {
|
||||||
|
try {
|
||||||
|
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH,
|
||||||
|
getRegionInfo(), flushSeqId, committedFiles);
|
||||||
|
HLogUtil.writeFlushMarker(wal, this.htableDescriptor, getRegionInfo(),
|
||||||
|
desc, sequenceId, false);
|
||||||
|
} catch (Throwable ex) {
|
||||||
|
LOG.warn("Received unexpected exception trying to write ABORT_FLUSH marker to WAL:" +
|
||||||
|
StringUtils.stringifyException(ex));
|
||||||
|
// ignore this since we will be aborting the RS with DSE.
|
||||||
|
}
|
||||||
wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
|
wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
|
||||||
}
|
}
|
||||||
DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
|
DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
|
||||||
|
|
|
@ -2035,6 +2035,7 @@ public class HStore implements Store {
|
||||||
private long cacheFlushSeqNum;
|
private long cacheFlushSeqNum;
|
||||||
private MemStoreSnapshot snapshot;
|
private MemStoreSnapshot snapshot;
|
||||||
private List<Path> tempFiles;
|
private List<Path> tempFiles;
|
||||||
|
private List<Path> committedFiles;
|
||||||
|
|
||||||
private StoreFlusherImpl(long cacheFlushSeqNum) {
|
private StoreFlusherImpl(long cacheFlushSeqNum) {
|
||||||
this.cacheFlushSeqNum = cacheFlushSeqNum;
|
this.cacheFlushSeqNum = cacheFlushSeqNum;
|
||||||
|
@ -2047,6 +2048,7 @@ public class HStore implements Store {
|
||||||
@Override
|
@Override
|
||||||
public void prepare() {
|
public void prepare() {
|
||||||
this.snapshot = memstore.snapshot();
|
this.snapshot = memstore.snapshot();
|
||||||
|
committedFiles = new ArrayList<Path>(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -2079,14 +2081,20 @@ public class HStore implements Store {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (HStore.this.getCoprocessorHost() != null) {
|
for (StoreFile sf : storeFiles) {
|
||||||
for (StoreFile sf : storeFiles) {
|
if (HStore.this.getCoprocessorHost() != null) {
|
||||||
HStore.this.getCoprocessorHost().postFlush(HStore.this, sf);
|
HStore.this.getCoprocessorHost().postFlush(HStore.this, sf);
|
||||||
}
|
}
|
||||||
|
committedFiles.add(sf.getPath());
|
||||||
}
|
}
|
||||||
// Add new file to store files. Clear snapshot too while we have the Store write lock.
|
// Add new file to store files. Clear snapshot too while we have the Store write lock.
|
||||||
return HStore.this.updateStorefiles(storeFiles, snapshot.getId());
|
return HStore.this.updateStorefiles(storeFiles, snapshot.getId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Path> getCommittedFiles() {
|
||||||
|
return committedFiles;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,10 @@
|
||||||
package org.apache.hadoop.hbase.regionserver;
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -61,4 +63,10 @@ interface StoreFlushContext {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
boolean commit(MonitoredTask status) throws IOException;
|
boolean commit(MonitoredTask status) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the newly committed files from the flush. Called only if commit returns true
|
||||||
|
* @return a list of Paths for new files
|
||||||
|
*/
|
||||||
|
List<Path> getCommittedFiles();
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
|
||||||
import com.google.protobuf.TextFormat;
|
import com.google.protobuf.TextFormat;
|
||||||
|
@ -268,4 +269,19 @@ public class HLogUtil {
|
||||||
LOG.trace("Appended compaction marker " + TextFormat.shortDebugString(c));
|
LOG.trace("Appended compaction marker " + TextFormat.shortDebugString(c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a flush marker indicating a start / abort or a complete of a region flush
|
||||||
|
*/
|
||||||
|
public static long writeFlushMarker(HLog log, HTableDescriptor htd, HRegionInfo info,
|
||||||
|
final FlushDescriptor f, AtomicLong sequenceId, boolean sync) throws IOException {
|
||||||
|
TableName tn = TableName.valueOf(f.getTableName().toByteArray());
|
||||||
|
HLogKey key = new HLogKey(info.getEncodedNameAsBytes(), tn);
|
||||||
|
long trx = log.appendNoSync(htd, info, key, WALEdit.createFlushWALEdit(info, f), sequenceId, false, null);
|
||||||
|
if (sync) log.sync(trx);
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Appended flush marker " + TextFormat.shortDebugString(f));
|
||||||
|
}
|
||||||
|
return trx;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,8 +36,10 @@ import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.codec.Codec;
|
import org.apache.hadoop.hbase.codec.Codec;
|
||||||
import org.apache.hadoop.hbase.io.HeapSize;
|
import org.apache.hadoop.hbase.io.HeapSize;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.ClassSize;
|
import org.apache.hadoop.hbase.util.ClassSize;
|
||||||
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
import org.apache.hadoop.io.Writable;
|
import org.apache.hadoop.io.Writable;
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,6 +85,8 @@ public class WALEdit implements Writable, HeapSize {
|
||||||
public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY");
|
public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY");
|
||||||
static final byte [] METAROW = Bytes.toBytes("METAROW");
|
static final byte [] METAROW = Bytes.toBytes("METAROW");
|
||||||
static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION");
|
static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION");
|
||||||
|
static final byte [] FLUSH = Bytes.toBytes("HBASE::FLUSH");
|
||||||
|
|
||||||
private final int VERSION_2 = -1;
|
private final int VERSION_2 = -1;
|
||||||
private final boolean isReplay;
|
private final boolean isReplay;
|
||||||
|
|
||||||
|
@ -112,6 +116,10 @@ public class WALEdit implements Writable, HeapSize {
|
||||||
return Bytes.equals(METAFAMILY, f);
|
return Bytes.equals(METAFAMILY, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isMetaEditFamily(Cell cell) {
|
||||||
|
return CellUtil.matchingFamily(cell, METAFAMILY);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return True when current WALEdit is created by log replay. Replication skips WALEdits from
|
* @return True when current WALEdit is created by log replay. Replication skips WALEdits from
|
||||||
* replay.
|
* replay.
|
||||||
|
@ -256,6 +264,19 @@ public class WALEdit implements Writable, HeapSize {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static WALEdit createFlushWALEdit(HRegionInfo hri, FlushDescriptor f) {
|
||||||
|
KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH,
|
||||||
|
EnvironmentEdgeManager.currentTimeMillis(), f.toByteArray());
|
||||||
|
return new WALEdit().add(kv);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException {
|
||||||
|
if (CellUtil.matchingColumn(cell, METAFAMILY, FLUSH)) {
|
||||||
|
return FlushDescriptor.parseFrom(cell.getValue());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a compacion WALEdit
|
* Create a compacion WALEdit
|
||||||
* @param c
|
* @param c
|
||||||
|
@ -264,7 +285,7 @@ public class WALEdit implements Writable, HeapSize {
|
||||||
public static WALEdit createCompaction(final HRegionInfo hri, final CompactionDescriptor c) {
|
public static WALEdit createCompaction(final HRegionInfo hri, final CompactionDescriptor c) {
|
||||||
byte [] pbbytes = c.toByteArray();
|
byte [] pbbytes = c.toByteArray();
|
||||||
KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION,
|
KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION,
|
||||||
System.currentTimeMillis(), pbbytes);
|
EnvironmentEdgeManager.currentTimeMillis(), pbbytes);
|
||||||
return new WALEdit().add(kv); //replication scope null so that this won't be replicated
|
return new WALEdit().add(kv); //replication scope null so that this won't be replicated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,10 +35,12 @@ import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Matchers.anyLong;
|
import static org.mockito.Matchers.anyLong;
|
||||||
|
import static org.mockito.Matchers.argThat;
|
||||||
import static org.mockito.Mockito.never;
|
import static org.mockito.Mockito.never;
|
||||||
import static org.mockito.Mockito.spy;
|
import static org.mockito.Mockito.spy;
|
||||||
import static org.mockito.Mockito.times;
|
import static org.mockito.Mockito.times;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InterruptedIOException;
|
import java.io.InterruptedIOException;
|
||||||
|
@ -111,6 +113,9 @@ import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
|
import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion.RowLock;
|
import org.apache.hadoop.hbase.regionserver.HRegion.RowLock;
|
||||||
import org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem;
|
import org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem;
|
||||||
|
@ -136,6 +141,7 @@ import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
import org.junit.rules.TestName;
|
import org.junit.rules.TestName;
|
||||||
|
import org.mockito.ArgumentMatcher;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
@ -786,6 +792,228 @@ public class TestHRegion {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFlushMarkers() throws Exception {
|
||||||
|
// tests that flush markers are written to WAL and handled at recovered edits
|
||||||
|
String method = name.getMethodName();
|
||||||
|
TableName tableName = TableName.valueOf(method);
|
||||||
|
byte[] family = Bytes.toBytes("family");
|
||||||
|
Path logDir = TEST_UTIL.getDataTestDirOnTestFS("testRecoveredEditsIgnoreFlushMarkers.log");
|
||||||
|
HLog hlog = HLogFactory.createHLog(FILESYSTEM, logDir, UUID.randomUUID().toString(),
|
||||||
|
TEST_UTIL.getConfiguration());
|
||||||
|
|
||||||
|
this.region = initHRegion(tableName.getName(), HConstants.EMPTY_START_ROW,
|
||||||
|
HConstants.EMPTY_END_ROW, method, CONF, false, Durability.USE_DEFAULT, hlog, family);
|
||||||
|
try {
|
||||||
|
Path regiondir = region.getRegionFileSystem().getRegionDir();
|
||||||
|
FileSystem fs = region.getRegionFileSystem().getFileSystem();
|
||||||
|
byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes();
|
||||||
|
|
||||||
|
long maxSeqId = 3;
|
||||||
|
long minSeqId = 0;
|
||||||
|
|
||||||
|
for (long i = minSeqId; i < maxSeqId; i++) {
|
||||||
|
Put put = new Put(Bytes.toBytes(i));
|
||||||
|
put.add(family, Bytes.toBytes(i), Bytes.toBytes(i));
|
||||||
|
region.put(put);
|
||||||
|
region.flushcache();
|
||||||
|
}
|
||||||
|
|
||||||
|
// this will create a region with 3 files from flush
|
||||||
|
assertEquals(3, region.getStore(family).getStorefilesCount());
|
||||||
|
List<String> storeFiles = new ArrayList<String>(3);
|
||||||
|
for (StoreFile sf : region.getStore(family).getStorefiles()) {
|
||||||
|
storeFiles.add(sf.getPath().getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
// now verify that the flush markers are written
|
||||||
|
hlog.close();
|
||||||
|
HLog.Reader reader = HLogFactory.createReader(fs,
|
||||||
|
fs.listStatus(fs.listStatus(logDir)[0].getPath())[0].getPath(),
|
||||||
|
TEST_UTIL.getConfiguration());
|
||||||
|
|
||||||
|
List<HLog.Entry> flushDescriptors = new ArrayList<HLog.Entry>();
|
||||||
|
long lastFlushSeqId = -1;
|
||||||
|
while (true) {
|
||||||
|
HLog.Entry entry = reader.next();
|
||||||
|
if (entry == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Cell cell = entry.getEdit().getKeyValues().get(0);
|
||||||
|
if (WALEdit.isMetaEditFamily(cell)) {
|
||||||
|
FlushDescriptor flushDesc = WALEdit.getFlushDescriptor(cell);
|
||||||
|
assertNotNull(flushDesc);
|
||||||
|
assertArrayEquals(tableName.getName(), flushDesc.getTableName().toByteArray());
|
||||||
|
if (flushDesc.getAction() == FlushAction.START_FLUSH) {
|
||||||
|
assertTrue(flushDesc.getFlushSequenceNumber() > lastFlushSeqId);
|
||||||
|
} else if (flushDesc.getAction() == FlushAction.COMMIT_FLUSH) {
|
||||||
|
assertTrue(flushDesc.getFlushSequenceNumber() == lastFlushSeqId);
|
||||||
|
}
|
||||||
|
lastFlushSeqId = flushDesc.getFlushSequenceNumber();
|
||||||
|
assertArrayEquals(regionName, flushDesc.getEncodedRegionName().toByteArray());
|
||||||
|
assertEquals(1, flushDesc.getStoreFlushesCount()); //only one store
|
||||||
|
StoreFlushDescriptor storeFlushDesc = flushDesc.getStoreFlushes(0);
|
||||||
|
assertArrayEquals(family, storeFlushDesc.getFamilyName().toByteArray());
|
||||||
|
assertEquals("family", storeFlushDesc.getStoreHomeDir());
|
||||||
|
if (flushDesc.getAction() == FlushAction.START_FLUSH) {
|
||||||
|
assertEquals(0, storeFlushDesc.getFlushOutputCount());
|
||||||
|
} else {
|
||||||
|
assertEquals(1, storeFlushDesc.getFlushOutputCount()); //only one file from flush
|
||||||
|
assertTrue(storeFiles.contains(storeFlushDesc.getFlushOutput(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
flushDescriptors.add(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(3 * 2, flushDescriptors.size()); // START_FLUSH and COMMIT_FLUSH per flush
|
||||||
|
|
||||||
|
// now write those markers to the recovered edits again.
|
||||||
|
|
||||||
|
Path recoveredEditsDir = HLogUtil.getRegionDirRecoveredEditsDir(regiondir);
|
||||||
|
|
||||||
|
Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", 1000));
|
||||||
|
fs.create(recoveredEdits);
|
||||||
|
HLog.Writer writer = HLogFactory.createRecoveredEditsWriter(fs, recoveredEdits, CONF);
|
||||||
|
|
||||||
|
for (HLog.Entry entry : flushDescriptors) {
|
||||||
|
writer.append(entry);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
// close the region now, and reopen again
|
||||||
|
region.close();
|
||||||
|
region = HRegion.openHRegion(region, null);
|
||||||
|
|
||||||
|
// now check whether we have can read back the data from region
|
||||||
|
for (long i = minSeqId; i < maxSeqId; i++) {
|
||||||
|
Get get = new Get(Bytes.toBytes(i));
|
||||||
|
Result result = region.get(get);
|
||||||
|
byte[] value = result.getValue(family, Bytes.toBytes(i));
|
||||||
|
assertArrayEquals(Bytes.toBytes(i), value);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
HRegion.closeHRegion(this.region);
|
||||||
|
this.region = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class IsFlushWALMarker extends ArgumentMatcher<WALEdit> {
|
||||||
|
volatile FlushAction[] actions;
|
||||||
|
public IsFlushWALMarker(FlushAction... actions) {
|
||||||
|
this.actions = actions;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public boolean matches(Object edit) {
|
||||||
|
List<KeyValue> kvs = ((WALEdit)edit).getKeyValues();
|
||||||
|
if (kvs.isEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (WALEdit.isMetaEditFamily(kvs.get(0))) {
|
||||||
|
FlushDescriptor desc = null;
|
||||||
|
try {
|
||||||
|
desc = WALEdit.getFlushDescriptor(kvs.get(0));
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn(e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (desc != null) {
|
||||||
|
for (FlushAction action : actions) {
|
||||||
|
if (desc.getAction() == action) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
public IsFlushWALMarker set(FlushAction... actions) {
|
||||||
|
this.actions = actions;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void testFlushMarkersWALFail() throws Exception {
|
||||||
|
// test the cases where the WAL append for flush markers fail.
|
||||||
|
String method = name.getMethodName();
|
||||||
|
TableName tableName = TableName.valueOf(method);
|
||||||
|
byte[] family = Bytes.toBytes("family");
|
||||||
|
|
||||||
|
// spy an actual WAL implementation to throw exception (was not able to mock)
|
||||||
|
Path logDir = TEST_UTIL.getDataTestDirOnTestFS("testRecoveredEditsIgnoreFlushMarkers.log");
|
||||||
|
HLog hlog = spy(HLogFactory.createHLog(FILESYSTEM, logDir, UUID.randomUUID().toString(),
|
||||||
|
TEST_UTIL.getConfiguration()));
|
||||||
|
|
||||||
|
this.region = initHRegion(tableName.getName(), HConstants.EMPTY_START_ROW,
|
||||||
|
HConstants.EMPTY_END_ROW, method, CONF, false, Durability.USE_DEFAULT, hlog, family);
|
||||||
|
try {
|
||||||
|
int i = 0;
|
||||||
|
Put put = new Put(Bytes.toBytes(i));
|
||||||
|
put.setDurability(Durability.SKIP_WAL); // have to skip mocked wal
|
||||||
|
put.add(family, Bytes.toBytes(i), Bytes.toBytes(i));
|
||||||
|
region.put(put);
|
||||||
|
|
||||||
|
// 1. Test case where START_FLUSH throws exception
|
||||||
|
IsFlushWALMarker isFlushWALMarker = new IsFlushWALMarker(FlushAction.START_FLUSH);
|
||||||
|
|
||||||
|
// throw exceptions if the WalEdit is a start flush action
|
||||||
|
when(hlog.appendNoSync((HTableDescriptor)any(), (HRegionInfo)any(), (HLogKey)any(),
|
||||||
|
(WALEdit)argThat(isFlushWALMarker), (AtomicLong)any(), Mockito.anyBoolean(),
|
||||||
|
(List<KeyValue>)any()))
|
||||||
|
.thenThrow(new IOException("Fail to append flush marker"));
|
||||||
|
|
||||||
|
// start cache flush will throw exception
|
||||||
|
try {
|
||||||
|
region.flushcache();
|
||||||
|
fail("This should have thrown exception");
|
||||||
|
} catch (DroppedSnapshotException unexpected) {
|
||||||
|
// this should not be a dropped snapshot exception. Meaning that RS will not abort
|
||||||
|
throw unexpected;
|
||||||
|
} catch (IOException expected) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Test case where START_FLUSH succeeds but COMMIT_FLUSH will throw exception
|
||||||
|
isFlushWALMarker.set(FlushAction.COMMIT_FLUSH);
|
||||||
|
|
||||||
|
try {
|
||||||
|
region.flushcache();
|
||||||
|
fail("This should have thrown exception");
|
||||||
|
} catch (DroppedSnapshotException expected) {
|
||||||
|
// we expect this exception, since we were able to write the snapshot, but failed to
|
||||||
|
// write the flush marker to WAL
|
||||||
|
} catch (IOException unexpected) {
|
||||||
|
throw unexpected;
|
||||||
|
}
|
||||||
|
|
||||||
|
region.close();
|
||||||
|
this.region = initHRegion(tableName.getName(), HConstants.EMPTY_START_ROW,
|
||||||
|
HConstants.EMPTY_END_ROW, method, CONF, false, Durability.USE_DEFAULT, hlog, family);
|
||||||
|
region.put(put);
|
||||||
|
|
||||||
|
// 3. Test case where ABORT_FLUSH will throw exception.
|
||||||
|
// Even if ABORT_FLUSH throws exception, we should not fail with IOE, but continue with
|
||||||
|
// DroppedSnapshotException. Below COMMMIT_FLUSH will cause flush to abort
|
||||||
|
isFlushWALMarker.set(FlushAction.COMMIT_FLUSH, FlushAction.ABORT_FLUSH);
|
||||||
|
|
||||||
|
try {
|
||||||
|
region.flushcache();
|
||||||
|
fail("This should have thrown exception");
|
||||||
|
} catch (DroppedSnapshotException expected) {
|
||||||
|
// we expect this exception, since we were able to write the snapshot, but failed to
|
||||||
|
// write the flush marker to WAL
|
||||||
|
} catch (IOException unexpected) {
|
||||||
|
throw unexpected;
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
HRegion.closeHRegion(this.region);
|
||||||
|
this.region = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetWhileRegionClose() throws IOException {
|
public void testGetWhileRegionClose() throws IOException {
|
||||||
TableName tableName = TableName.valueOf(name.getMethodName());
|
TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
|
Loading…
Reference in New Issue