HBASE-5826 Improve sync of HLog edits (Todd)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1339671 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c18eebd8a6
commit
022ef75950
|
@ -48,6 +48,7 @@ import java.util.concurrent.locks.ReentrantLock;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.google.common.base.Charsets;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
@ -139,8 +140,10 @@ public class HLog implements Syncable {
|
|||
private final long optionalFlushInterval;
|
||||
private final long blocksize;
|
||||
private final String prefix;
|
||||
private final AtomicLong unflushedEntries = new AtomicLong(0);
|
||||
private volatile long syncedTillHere = 0;
|
||||
|
||||
/** tracking information about what has been synced */
|
||||
private SyncInfo syncInfo = new SyncInfo();
|
||||
|
||||
private long lastDeferredTxid;
|
||||
private final Path oldLogDir;
|
||||
private volatile boolean logRollRunning;
|
||||
|
@ -230,7 +233,6 @@ public class HLog implements Syncable {
|
|||
// during an update
|
||||
// locked during appends
|
||||
private final Object updateLock = new Object();
|
||||
private final Object flushLock = new Object();
|
||||
|
||||
private final boolean enabled;
|
||||
|
||||
|
@ -298,6 +300,7 @@ public class HLog implements Syncable {
|
|||
private static Metric writeSize = new Metric();
|
||||
// For measuring latency of syncs
|
||||
private static Metric syncTime = new Metric();
|
||||
private static AtomicLong syncBatchSize = new AtomicLong();
|
||||
//For measuring slow HLog appends
|
||||
private static AtomicLong slowHLogAppendCount = new AtomicLong();
|
||||
private static Metric slowHLogAppendTime = new Metric();
|
||||
|
@ -314,6 +317,10 @@ public class HLog implements Syncable {
|
|||
return syncTime.get();
|
||||
}
|
||||
|
||||
public static long getSyncBatchSize() {
|
||||
return syncBatchSize.getAndSet(0);
|
||||
}
|
||||
|
||||
public static long getSlowAppendCount() {
|
||||
return slowHLogAppendCount.get();
|
||||
}
|
||||
|
@ -833,17 +840,11 @@ public class HLog implements Syncable {
|
|||
try {
|
||||
// Wait till all current transactions are written to the hlog.
|
||||
// No new transactions can occur because we have the updatelock.
|
||||
if (this.unflushedEntries.get() != this.syncedTillHere) {
|
||||
LOG.debug("cleanupCurrentWriter " +
|
||||
" waiting for transactions to get synced " +
|
||||
" total " + this.unflushedEntries.get() +
|
||||
" synced till here " + syncedTillHere);
|
||||
sync();
|
||||
}
|
||||
sync();
|
||||
this.writer.close();
|
||||
this.writer = null;
|
||||
closeErrorCount.set(0);
|
||||
} catch (IOException e) {
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed close of HLog writer", e);
|
||||
int errors = closeErrorCount.incrementAndGet();
|
||||
if (errors <= closeErrorsTolerated && !hasDeferredEntries()) {
|
||||
|
@ -1006,7 +1007,7 @@ public class HLog implements Syncable {
|
|||
* @param logEdit
|
||||
* @param logKey
|
||||
* @param doSync shall we sync after writing the transaction
|
||||
* @return The txid of this transaction
|
||||
* @return The seqnum of this transaction
|
||||
* @throws IOException
|
||||
*/
|
||||
public long append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit,
|
||||
|
@ -1015,9 +1016,9 @@ public class HLog implements Syncable {
|
|||
if (this.closed) {
|
||||
throw new IOException("Cannot append; log is closed");
|
||||
}
|
||||
long txid = 0;
|
||||
long seqNum;
|
||||
synchronized (updateLock) {
|
||||
long seqNum = obtainSeqNum();
|
||||
seqNum = obtainSeqNum();
|
||||
logKey.setLogSeqNum(seqNum);
|
||||
// The 'lastSeqWritten' map holds the sequence number of the oldest
|
||||
// write for each region (i.e. the first edit added to the particular
|
||||
|
@ -1027,10 +1028,9 @@ public class HLog implements Syncable {
|
|||
this.lastSeqWritten.putIfAbsent(regionInfo.getEncodedNameAsBytes(),
|
||||
Long.valueOf(seqNum));
|
||||
doWrite(regionInfo, logKey, logEdit, htd);
|
||||
txid = this.unflushedEntries.incrementAndGet();
|
||||
this.numEntries.incrementAndGet();
|
||||
if (htd.isDeferredLogFlush()) {
|
||||
lastDeferredTxid = txid;
|
||||
lastDeferredTxid = seqNum;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1040,9 +1040,9 @@ public class HLog implements Syncable {
|
|||
(regionInfo.isMetaRegion() ||
|
||||
!htd.isDeferredLogFlush())) {
|
||||
// sync txn to file system
|
||||
this.sync(txid);
|
||||
this.sync(seqNum);
|
||||
}
|
||||
return txid;
|
||||
return seqNum;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1090,13 +1090,13 @@ public class HLog implements Syncable {
|
|||
private long append(HRegionInfo info, byte [] tableName, WALEdit edits, UUID clusterId,
|
||||
final long now, HTableDescriptor htd, boolean doSync)
|
||||
throws IOException {
|
||||
if (edits.isEmpty()) return this.unflushedEntries.get();;
|
||||
if (edits.isEmpty()) return this.logSeqNum.get();
|
||||
if (this.closed) {
|
||||
throw new IOException("Cannot append; log is closed");
|
||||
}
|
||||
long txid = 0;
|
||||
long seqNum;
|
||||
synchronized (this.updateLock) {
|
||||
long seqNum = obtainSeqNum();
|
||||
seqNum = obtainSeqNum();
|
||||
// The 'lastSeqWritten' map holds the sequence number of the oldest
|
||||
// write for each region (i.e. the first edit added to the particular
|
||||
// memstore). . When the cache is flushed, the entry for the
|
||||
|
@ -1109,9 +1109,8 @@ public class HLog implements Syncable {
|
|||
HLogKey logKey = makeKey(encodedRegionName, tableName, seqNum, now, clusterId);
|
||||
doWrite(info, logKey, edits, htd);
|
||||
this.numEntries.incrementAndGet();
|
||||
txid = this.unflushedEntries.incrementAndGet();
|
||||
if (htd.isDeferredLogFlush()) {
|
||||
lastDeferredTxid = txid;
|
||||
lastDeferredTxid = seqNum;
|
||||
}
|
||||
}
|
||||
// Sync if catalog region, and if not then check if that table supports
|
||||
|
@ -1120,9 +1119,9 @@ public class HLog implements Syncable {
|
|||
(info.isMetaRegion() ||
|
||||
!htd.isDeferredLogFlush())) {
|
||||
// sync txn to file system
|
||||
this.sync(txid);
|
||||
this.sync(seqNum);
|
||||
}
|
||||
return txid;
|
||||
return seqNum;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1180,6 +1179,9 @@ public class HLog implements Syncable {
|
|||
// goal is to increase the batchsize for writing-to-hdfs as well as
|
||||
// sync-to-hdfs, so that we can get better system throughput.
|
||||
private List<Entry> pendingWrites = new LinkedList<Entry>();
|
||||
long lastSeqAppended = -1;
|
||||
long lastSeqFlushed = -1;
|
||||
private Object flushLock = new Object();
|
||||
|
||||
LogSyncer(long optionalFlushInterval) {
|
||||
this.optionalFlushInterval = optionalFlushInterval;
|
||||
|
@ -1193,7 +1195,7 @@ public class HLog implements Syncable {
|
|||
while(!this.isInterrupted() && !closeLogSyncer) {
|
||||
|
||||
try {
|
||||
if (unflushedEntries.get() <= syncedTillHere) {
|
||||
if (syncInfo.getLastSyncedTxId() >= logSeqNum.get()) {
|
||||
Thread.sleep(this.optionalFlushInterval);
|
||||
}
|
||||
sync();
|
||||
|
@ -1213,24 +1215,45 @@ public class HLog implements Syncable {
|
|||
// our own queue rather than writing it to the HDFS output stream because
|
||||
// HDFSOutputStream.writeChunk is not lightweight at all.
|
||||
synchronized void append(Entry e) throws IOException {
|
||||
long seq = e.getKey().getLogSeqNum();
|
||||
assert seq > lastSeqAppended;
|
||||
lastSeqAppended = seq;
|
||||
pendingWrites.add(e);
|
||||
}
|
||||
|
||||
// Returns all currently pending writes. New writes
|
||||
// will accumulate in a new list.
|
||||
synchronized List<Entry> getPendingWrites() {
|
||||
List<Entry> save = this.pendingWrites;
|
||||
this.pendingWrites = new LinkedList<Entry>();
|
||||
return save;
|
||||
}
|
||||
long flushWritesTo(Writer writer) throws IOException {
|
||||
synchronized (flushLock) {
|
||||
List<Entry> pending;
|
||||
|
||||
// writes out pending entries to the HLog
|
||||
void hlogFlush(Writer writer, List<Entry> pending) throws IOException {
|
||||
if (pending == null) return;
|
||||
synchronized (this) {
|
||||
pending = pendingWrites;
|
||||
pendingWrites = new LinkedList<Entry>();
|
||||
}
|
||||
|
||||
// write out all accumulated Entries to hdfs.
|
||||
for (Entry e : pending) {
|
||||
writer.append(e);
|
||||
boolean success = false;
|
||||
try {
|
||||
int numFlushed = 0;
|
||||
for (Entry e : pending) {
|
||||
writer.append(e);
|
||||
long seq = e.getKey().getLogSeqNum();
|
||||
assert seq > lastSeqFlushed;
|
||||
lastSeqFlushed = seq;
|
||||
numFlushed++;
|
||||
}
|
||||
syncBatchSize.addAndGet(numFlushed);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// push back our batch into the pending list
|
||||
synchronized (this) {
|
||||
pending.addAll(pendingWrites);
|
||||
pendingWrites = pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
return lastSeqFlushed;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1239,9 +1262,31 @@ public class HLog implements Syncable {
|
|||
}
|
||||
}
|
||||
|
||||
private static class SyncInfo {
|
||||
private long syncedTillHere = 0;
|
||||
|
||||
synchronized long getLastSyncedTxId() {
|
||||
return syncedTillHere;
|
||||
}
|
||||
|
||||
synchronized void notifySynced(long txid) {
|
||||
if (txid > syncedTillHere) {
|
||||
syncedTillHere = txid;
|
||||
}
|
||||
notifyAll();
|
||||
}
|
||||
|
||||
synchronized void waitForSync(long txid) throws InterruptedException {
|
||||
while (syncedTillHere < txid) {
|
||||
wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// sync all known transactions
|
||||
private void syncer() throws IOException {
|
||||
syncer(this.unflushedEntries.get()); // sync all pending items
|
||||
syncer(logSeqNum.get()); // sync all pending
|
||||
}
|
||||
|
||||
// sync all transactions upto the specified txid
|
||||
|
@ -1253,47 +1298,26 @@ public class HLog implements Syncable {
|
|||
}
|
||||
// if the transaction that we are interested in is already
|
||||
// synced, then return immediately.
|
||||
if (txid <= this.syncedTillHere) {
|
||||
if (syncInfo.getLastSyncedTxId() >= txid) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
long doneUpto;
|
||||
long now = System.currentTimeMillis();
|
||||
// First flush all the pending writes to HDFS. Then
|
||||
// issue the sync to HDFS. If sync is successful, then update
|
||||
// syncedTillHere to indicate that transactions till this
|
||||
// number has been successfully synced.
|
||||
synchronized (flushLock) {
|
||||
if (txid <= this.syncedTillHere) {
|
||||
return;
|
||||
}
|
||||
doneUpto = this.unflushedEntries.get();
|
||||
List<Entry> pending = logSyncerThread.getPendingWrites();
|
||||
try {
|
||||
logSyncerThread.hlogFlush(tempWriter, pending);
|
||||
} catch(IOException io) {
|
||||
synchronized (this.updateLock) {
|
||||
// HBASE-4387, HBASE-5623, retry with updateLock held
|
||||
tempWriter = this.writer;
|
||||
logSyncerThread.hlogFlush(tempWriter, pending);
|
||||
}
|
||||
}
|
||||
}
|
||||
// another thread might have sync'ed avoid double-sync'ing
|
||||
if (txid <= this.syncedTillHere) {
|
||||
return;
|
||||
}
|
||||
long flushedSeqId;
|
||||
try {
|
||||
flushedSeqId = logSyncerThread.flushWritesTo(tempWriter);
|
||||
tempWriter.sync();
|
||||
} catch(IOException io) {
|
||||
synchronized (this.updateLock) {
|
||||
// HBASE-4387, HBASE-5623, retry with updateLock held
|
||||
tempWriter = this.writer;
|
||||
flushedSeqId = logSyncerThread.flushWritesTo(tempWriter);
|
||||
tempWriter.sync();
|
||||
}
|
||||
}
|
||||
this.syncedTillHere = Math.max(this.syncedTillHere, doneUpto);
|
||||
|
||||
syncInfo.notifySynced(flushedSeqId);
|
||||
// We try to not acquire the updateLock just to update statistics.
|
||||
// Make these statistics as AtomicLong.
|
||||
syncTime.inc(System.currentTimeMillis() - now);
|
||||
if (!this.logRollRunning) {
|
||||
checkLowReplication();
|
||||
|
@ -1548,14 +1572,15 @@ public class HLog implements Syncable {
|
|||
if (this.closed) {
|
||||
return;
|
||||
}
|
||||
long txid = 0;
|
||||
long seqNumOfCompletionEdit;
|
||||
synchronized (updateLock) {
|
||||
seqNumOfCompletionEdit = obtainSeqNum();
|
||||
long now = System.currentTimeMillis();
|
||||
WALEdit edit = completeCacheFlushLogEdit();
|
||||
HLogKey key = makeKey(encodedRegionName, tableName, logSeqId,
|
||||
|
||||
WALEdit edit = completeCacheFlushLogEdit(logSeqId);
|
||||
HLogKey key = makeKey(encodedRegionName, tableName, seqNumOfCompletionEdit,
|
||||
System.currentTimeMillis(), HConstants.DEFAULT_CLUSTER_ID);
|
||||
logSyncerThread.append(new Entry(key, edit));
|
||||
txid = this.unflushedEntries.incrementAndGet();
|
||||
writeTime.inc(System.currentTimeMillis() - now);
|
||||
long len = 0;
|
||||
for (KeyValue kv : edit.getKeyValues()) {
|
||||
|
@ -1565,7 +1590,7 @@ public class HLog implements Syncable {
|
|||
this.numEntries.incrementAndGet();
|
||||
}
|
||||
// sync txn to file system
|
||||
this.sync(txid);
|
||||
this.sync(seqNumOfCompletionEdit);
|
||||
|
||||
} finally {
|
||||
// updateLock not needed for removing snapshot's entry
|
||||
|
@ -1576,9 +1601,17 @@ public class HLog implements Syncable {
|
|||
}
|
||||
}
|
||||
|
||||
private WALEdit completeCacheFlushLogEdit() {
|
||||
private WALEdit completeCacheFlushLogEdit(long seqIdOfFlush) {
|
||||
// The data is not actually used here - we just need to write
|
||||
// something to the log to make sure we're still the owner of the
|
||||
// pipeline.
|
||||
byte[] data = Bytes.add(
|
||||
COMPLETE_CACHE_FLUSH,
|
||||
":".getBytes(Charsets.UTF_8),
|
||||
Bytes.toBytes(String.valueOf(seqIdOfFlush)));
|
||||
|
||||
KeyValue kv = new KeyValue(METAROW, METAFAMILY, null,
|
||||
System.currentTimeMillis(), COMPLETE_CACHE_FLUSH);
|
||||
System.currentTimeMillis(), data);
|
||||
WALEdit e = new WALEdit();
|
||||
e.add(kv);
|
||||
return e;
|
||||
|
@ -1855,7 +1888,7 @@ public class HLog implements Syncable {
|
|||
|
||||
/** Provide access to currently deferred sequence num for tests */
|
||||
boolean hasDeferredEntries() {
|
||||
return lastDeferredTxid > syncedTillHere;
|
||||
return lastDeferredTxid > syncInfo.getLastSyncedTxId();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -245,7 +245,7 @@ public class SequenceFileLogWriter implements HLog.Writer {
|
|||
|
||||
@Override
|
||||
public void sync() throws IOException {
|
||||
this.writer.syncFs();
|
||||
if (this.writer != null) this.writer.syncFs();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -548,8 +548,9 @@ public class TestHLog {
|
|||
KeyValue kv = val.getKeyValues().get(0);
|
||||
assertTrue(Bytes.equals(HLog.METAROW, kv.getRow()));
|
||||
assertTrue(Bytes.equals(HLog.METAFAMILY, kv.getFamily()));
|
||||
assertEquals(0, Bytes.compareTo(HLog.COMPLETE_CACHE_FLUSH,
|
||||
val.getKeyValues().get(0).getValue()));
|
||||
assertTrue(Bytes.startsWith(
|
||||
val.getKeyValues().get(0).getValue(),
|
||||
HLog.COMPLETE_CACHE_FLUSH));
|
||||
System.out.println(key + " " + val);
|
||||
}
|
||||
} finally {
|
||||
|
@ -616,8 +617,9 @@ public class TestHLog {
|
|||
assertTrue(Bytes.equals(tableName, entry.getKey().getTablename()));
|
||||
assertTrue(Bytes.equals(HLog.METAROW, val.getRow()));
|
||||
assertTrue(Bytes.equals(HLog.METAFAMILY, val.getFamily()));
|
||||
assertEquals(0, Bytes.compareTo(HLog.COMPLETE_CACHE_FLUSH,
|
||||
val.getValue()));
|
||||
assertTrue(Bytes.startsWith(
|
||||
val.getValue(),
|
||||
HLog.COMPLETE_CACHE_FLUSH));
|
||||
System.out.println(entry.getKey() + " " + val);
|
||||
}
|
||||
} finally {
|
||||
|
|
Loading…
Reference in New Issue