HBASE-2544 Forward port branch 0.20 WAL to TRUNK
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@944063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eb11dd6c82
commit
635768d28a
|
@ -308,6 +308,7 @@ Release 0.21.0 - Unreleased
|
||||||
HBASE-2431 Master does not respect generation stamps, may result in meta
|
HBASE-2431 Master does not respect generation stamps, may result in meta
|
||||||
getting permanently offlined
|
getting permanently offlined
|
||||||
HBASE-2515 ChangeTableState considers split&&offline regions as being served
|
HBASE-2515 ChangeTableState considers split&&offline regions as being served
|
||||||
|
HBASE-2544 Forward port branch 0.20 WAL to TRUNK
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-1760 Cleanup TODOs in HTable
|
HBASE-1760 Cleanup TODOs in HTable
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/**
|
/**
|
||||||
* Copyright 2007 The Apache Software Foundation
|
* Copyright 2010 The Apache Software Foundation
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
@ -19,34 +19,14 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.regionserver.wal;
|
package org.apache.hadoop.hbase.regionserver.wal;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.Syncable;
|
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
|
||||||
import org.apache.hadoop.hbase.HServerInfo;
|
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
|
||||||
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
|
||||||
import org.apache.hadoop.hbase.util.ClassSize;
|
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
|
||||||
import org.apache.hadoop.hbase.util.Threads;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
|
||||||
|
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
@ -66,6 +46,32 @@ import java.util.concurrent.locks.Condition;
|
||||||
import java.util.concurrent.locks.Lock;
|
import java.util.concurrent.locks.Lock;
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.Syncable;
|
||||||
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.HServerInfo;
|
||||||
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.ClassSize;
|
||||||
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.FSConstants;
|
||||||
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HLog stores all the edits to the HStore. Its the hbase write-ahead-log
|
* HLog stores all the edits to the HStore. Its the hbase write-ahead-log
|
||||||
* implementation.
|
* implementation.
|
||||||
|
@ -120,20 +126,21 @@ public class HLog implements HConstants, Syncable {
|
||||||
private final AtomicInteger unflushedEntries = new AtomicInteger(0);
|
private final AtomicInteger unflushedEntries = new AtomicInteger(0);
|
||||||
private final Path oldLogDir;
|
private final Path oldLogDir;
|
||||||
|
|
||||||
|
private OutputStream hdfs_out; // OutputStream associated with the current SequenceFile.writer
|
||||||
|
private int initialReplication; // initial replication factor of SequenceFile.writer
|
||||||
|
private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas
|
||||||
|
final static Object [] NO_ARGS = new Object []{};
|
||||||
|
|
||||||
|
// used to indirectly tell syncFs to force the sync
|
||||||
|
private boolean forceSync = false;
|
||||||
|
|
||||||
public interface Reader {
|
public interface Reader {
|
||||||
|
|
||||||
void init(FileSystem fs, Path path, Configuration c) throws IOException;
|
void init(FileSystem fs, Path path, Configuration c) throws IOException;
|
||||||
|
|
||||||
void close() throws IOException;
|
void close() throws IOException;
|
||||||
|
|
||||||
Entry next() throws IOException;
|
Entry next() throws IOException;
|
||||||
|
|
||||||
Entry next(Entry reuse) throws IOException;
|
Entry next(Entry reuse) throws IOException;
|
||||||
|
|
||||||
void seek(long pos) throws IOException;
|
void seek(long pos) throws IOException;
|
||||||
|
|
||||||
long getPosition() throws IOException;
|
long getPosition() throws IOException;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public interface Writer {
|
public interface Writer {
|
||||||
|
@ -144,9 +151,6 @@ public class HLog implements HConstants, Syncable {
|
||||||
long getLength() throws IOException;
|
long getLength() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
// used to indirectly tell syncFs to force the sync
|
|
||||||
private boolean forceSync = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Current log file.
|
* Current log file.
|
||||||
*/
|
*/
|
||||||
|
@ -168,11 +172,14 @@ public class HLog implements HConstants, Syncable {
|
||||||
|
|
||||||
private final AtomicLong logSeqNum = new AtomicLong(0);
|
private final AtomicLong logSeqNum = new AtomicLong(0);
|
||||||
|
|
||||||
|
// The timestamp (in ms) when the log file was created.
|
||||||
private volatile long filenum = -1;
|
private volatile long filenum = -1;
|
||||||
|
|
||||||
|
//number of transactions in the current Hlog.
|
||||||
private final AtomicInteger numEntries = new AtomicInteger(0);
|
private final AtomicInteger numEntries = new AtomicInteger(0);
|
||||||
|
|
||||||
// If > than this size, roll the log.
|
// If > than this size, roll the log. This is typically 0.95 times the size
|
||||||
|
// of the default Hdfs block size.
|
||||||
private final long logrollsize;
|
private final long logrollsize;
|
||||||
|
|
||||||
// This lock prevents starting a log roll during a cache flush.
|
// This lock prevents starting a log roll during a cache flush.
|
||||||
|
@ -272,7 +279,7 @@ public class HLog implements HConstants, Syncable {
|
||||||
}
|
}
|
||||||
fs.mkdirs(dir);
|
fs.mkdirs(dir);
|
||||||
this.oldLogDir = oldLogDir;
|
this.oldLogDir = oldLogDir;
|
||||||
if(!fs.exists(oldLogDir)) {
|
if (!fs.exists(oldLogDir)) {
|
||||||
fs.mkdirs(this.oldLogDir);
|
fs.mkdirs(this.oldLogDir);
|
||||||
}
|
}
|
||||||
this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
|
this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
|
||||||
|
@ -282,7 +289,29 @@ public class HLog implements HConstants, Syncable {
|
||||||
", enabled=" + this.enabled +
|
", enabled=" + this.enabled +
|
||||||
", flushlogentries=" + this.flushlogentries +
|
", flushlogentries=" + this.flushlogentries +
|
||||||
", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
|
", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
|
||||||
|
// rollWriter sets this.hdfs_out if it can.
|
||||||
rollWriter();
|
rollWriter();
|
||||||
|
|
||||||
|
// handle the reflection necessary to call getNumCurrentReplicas()
|
||||||
|
this.getNumCurrentReplicas = null;
|
||||||
|
if(this.hdfs_out != null) {
|
||||||
|
try {
|
||||||
|
this.getNumCurrentReplicas = this.hdfs_out.getClass().
|
||||||
|
getMethod("getNumCurrentReplicas", new Class<?> []{});
|
||||||
|
this.getNumCurrentReplicas.setAccessible(true);
|
||||||
|
} catch (NoSuchMethodException e) {
|
||||||
|
// Thrown if getNumCurrentReplicas() function isn't available
|
||||||
|
} catch (SecurityException e) {
|
||||||
|
// Thrown if we can't get access to getNumCurrentReplicas()
|
||||||
|
this.getNumCurrentReplicas = null; // could happen on setAccessible()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(this.getNumCurrentReplicas != null) {
|
||||||
|
LOG.info("Using getNumCurrentReplicas--HDFS-826");
|
||||||
|
} else {
|
||||||
|
LOG.info("getNumCurrentReplicas--HDFS-826 not available" );
|
||||||
|
}
|
||||||
|
|
||||||
logSyncerThread = new LogSyncer(this.optionalFlushInterval);
|
logSyncerThread = new LogSyncer(this.optionalFlushInterval);
|
||||||
Threads.setDaemonThreadRunning(logSyncerThread,
|
Threads.setDaemonThreadRunning(logSyncerThread,
|
||||||
Thread.currentThread().getName() + ".logSyncer");
|
Thread.currentThread().getName() + ".logSyncer");
|
||||||
|
@ -355,6 +384,17 @@ public class HLog implements HConstants, Syncable {
|
||||||
this.filenum = System.currentTimeMillis();
|
this.filenum = System.currentTimeMillis();
|
||||||
Path newPath = computeFilename(this.filenum);
|
Path newPath = computeFilename(this.filenum);
|
||||||
this.writer = createWriter(fs, newPath, HBaseConfiguration.create(conf));
|
this.writer = createWriter(fs, newPath, HBaseConfiguration.create(conf));
|
||||||
|
this.initialReplication = fs.getFileStatus(newPath).getReplication();
|
||||||
|
|
||||||
|
// Can we get at the dfsclient outputstream? If an instance of
|
||||||
|
// SFLW, it'll have done the necessary reflection to get at the
|
||||||
|
// protected field name.
|
||||||
|
this.hdfs_out = null;
|
||||||
|
if (this.writer instanceof SequenceFileLogWriter) {
|
||||||
|
this.hdfs_out =
|
||||||
|
((SequenceFileLogWriter)this.writer).getDFSCOutputStream();
|
||||||
|
}
|
||||||
|
|
||||||
LOG.info((oldFile != null?
|
LOG.info((oldFile != null?
|
||||||
"Roll " + FSUtils.getPath(oldFile) + ", entries=" +
|
"Roll " + FSUtils.getPath(oldFile) + ", entries=" +
|
||||||
this.numEntries.get() +
|
this.numEntries.get() +
|
||||||
|
@ -628,7 +668,7 @@ public class HLog implements HConstants, Syncable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
byte [] regionName = regionInfo.getRegionName();
|
byte [] regionName = regionInfo.getRegionName();
|
||||||
byte [] tableName = regionInfo.getTableDesc().getName();
|
byte [] tableName = regionInfo.getTableDesc().getName();
|
||||||
this.append(regionInfo, makeKey(regionName, tableName, -1, now), logEdit, isMetaRegion);
|
this.append(regionInfo, makeKey(regionName, tableName, -1, now), logEdit);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -650,8 +690,7 @@ public class HLog implements HConstants, Syncable {
|
||||||
* @param logKey
|
* @param logKey
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public void append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit,
|
public void append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit)
|
||||||
final boolean isMetaRegion)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (this.closed) {
|
if (this.closed) {
|
||||||
throw new IOException("Cannot append; log is closed");
|
throw new IOException("Cannot append; log is closed");
|
||||||
|
@ -672,7 +711,7 @@ public class HLog implements HConstants, Syncable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// sync txn to file system
|
// sync txn to file system
|
||||||
this.sync(isMetaRegion);
|
this.sync(regionInfo.isMetaRegion());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -705,15 +744,14 @@ public class HLog implements HConstants, Syncable {
|
||||||
if (this.closed) {
|
if (this.closed) {
|
||||||
throw new IOException("Cannot append; log is closed");
|
throw new IOException("Cannot append; log is closed");
|
||||||
}
|
}
|
||||||
long seqNum = obtainSeqNum();
|
|
||||||
synchronized (this.updateLock) {
|
synchronized (this.updateLock) {
|
||||||
|
long seqNum = obtainSeqNum();
|
||||||
// The 'lastSeqWritten' map holds the sequence number of the oldest
|
// The 'lastSeqWritten' map holds the sequence number of the oldest
|
||||||
// write for each region (i.e. the first edit added to the particular
|
// write for each region (i.e. the first edit added to the particular
|
||||||
// memstore). . When the cache is flushed, the entry for the
|
// memstore). . When the cache is flushed, the entry for the
|
||||||
// region being flushed is removed if the sequence number of the flush
|
// region being flushed is removed if the sequence number of the flush
|
||||||
// is greater than or equal to the value in lastSeqWritten.
|
// is greater than or equal to the value in lastSeqWritten.
|
||||||
this.lastSeqWritten.putIfAbsent(regionName, seqNum);
|
this.lastSeqWritten.putIfAbsent(regionName, seqNum);
|
||||||
int counter = 0;
|
|
||||||
HLogKey logKey = makeKey(regionName, tableName, seqNum, now);
|
HLogKey logKey = makeKey(regionName, tableName, seqNum, now);
|
||||||
doWrite(info, logKey, edits);
|
doWrite(info, logKey, edits);
|
||||||
this.numEntries.incrementAndGet();
|
this.numEntries.incrementAndGet();
|
||||||
|
@ -799,10 +837,6 @@ public class HLog implements HConstants, Syncable {
|
||||||
}
|
}
|
||||||
lock.lock();
|
lock.lock();
|
||||||
try {
|
try {
|
||||||
if (syncerShuttingDown) {
|
|
||||||
LOG.warn(getName() + " was shut down while waiting for sync");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (syncerShuttingDown) {
|
if (syncerShuttingDown) {
|
||||||
LOG.warn(getName() + " was shut down while waiting for sync");
|
LOG.warn(getName() + " was shut down while waiting for sync");
|
||||||
return;
|
return;
|
||||||
|
@ -852,7 +886,24 @@ public class HLog implements HConstants, Syncable {
|
||||||
syncOps++;
|
syncOps++;
|
||||||
this.forceSync = false;
|
this.forceSync = false;
|
||||||
this.unflushedEntries.set(0);
|
this.unflushedEntries.set(0);
|
||||||
// TODO: HBASE-2401
|
|
||||||
|
// if the number of replicas in HDFS has fallen below the initial
|
||||||
|
// value, then roll logs.
|
||||||
|
try {
|
||||||
|
int numCurrentReplicas = getLogReplication();
|
||||||
|
if (numCurrentReplicas != 0 &&
|
||||||
|
numCurrentReplicas < this.initialReplication) {
|
||||||
|
LOG.warn("HDFS pipeline error detected. " +
|
||||||
|
"Found " + numCurrentReplicas + " replicas but expecting " +
|
||||||
|
this.initialReplication + " replicas. " +
|
||||||
|
" Requesting close of hlog.");
|
||||||
|
requestLogRoll();
|
||||||
|
logRollRequested = true;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
|
||||||
|
" still proceeding ahead...");
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.fatal("Could not append. Requesting close of hlog", e);
|
LOG.fatal("Could not append. Requesting close of hlog", e);
|
||||||
requestLogRoll();
|
requestLogRoll();
|
||||||
|
@ -866,6 +917,29 @@ public class HLog implements HConstants, Syncable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method gets the datanode replication count for the current HLog.
|
||||||
|
*
|
||||||
|
* If the pipeline isn't started yet or is empty, you will get the default
|
||||||
|
* replication factor. Therefore, if this function returns 0, it means you
|
||||||
|
* are not properly running with the HDFS-826 patch.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
int getLogReplication() throws Exception {
|
||||||
|
if(this.getNumCurrentReplicas != null && this.hdfs_out != null) {
|
||||||
|
Object repl = this.getNumCurrentReplicas.invoke(this.hdfs_out, NO_ARGS);
|
||||||
|
if (repl instanceof Integer) {
|
||||||
|
return ((Integer)repl).intValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean canGetCurReplicas() {
|
||||||
|
return this.getNumCurrentReplicas != null;
|
||||||
|
}
|
||||||
|
|
||||||
public void hsync() throws IOException {
|
public void hsync() throws IOException {
|
||||||
// Not yet implemented up in hdfs so just call hflush.
|
// Not yet implemented up in hdfs so just call hflush.
|
||||||
hflush();
|
hflush();
|
||||||
|
@ -916,20 +990,6 @@ public class HLog implements HConstants, Syncable {
|
||||||
return outputfiles.size();
|
return outputfiles.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Obtain a specified number of sequence numbers
|
|
||||||
*
|
|
||||||
* @param num number of sequence numbers to obtain
|
|
||||||
* @return array of sequence numbers
|
|
||||||
*/
|
|
||||||
private long [] obtainSeqNum(int num) {
|
|
||||||
long [] results = new long[num];
|
|
||||||
for (int i = 0; i < num; i++) {
|
|
||||||
results[i] = this.logSeqNum.incrementAndGet();
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* By acquiring a log sequence ID, we can allow log messages to continue while
|
* By acquiring a log sequence ID, we can allow log messages to continue while
|
||||||
* we flush the cache.
|
* we flush the cache.
|
||||||
|
@ -968,10 +1028,10 @@ public class HLog implements HConstants, Syncable {
|
||||||
}
|
}
|
||||||
synchronized (updateLock) {
|
synchronized (updateLock) {
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
WALEdit edits = completeCacheFlushLogEdit();
|
WALEdit edit = completeCacheFlushLogEdit();
|
||||||
this.writer.append(new HLog.Entry(
|
HLogKey key = makeKey(regionName, tableName, logSeqId,
|
||||||
makeKey(regionName, tableName, logSeqId, System.currentTimeMillis()),
|
System.currentTimeMillis());
|
||||||
edits));
|
this.writer.append(new Entry(key, edit));
|
||||||
writeTime += System.currentTimeMillis() - now;
|
writeTime += System.currentTimeMillis() - now;
|
||||||
writeOps++;
|
writeOps++;
|
||||||
this.numEntries.incrementAndGet();
|
this.numEntries.incrementAndGet();
|
||||||
|
@ -1024,11 +1084,12 @@ public class HLog implements HConstants, Syncable {
|
||||||
* <code>${ROOTDIR}/log_HOST_PORT</code>
|
* <code>${ROOTDIR}/log_HOST_PORT</code>
|
||||||
* @param oldLogDir
|
* @param oldLogDir
|
||||||
* @param fs FileSystem
|
* @param fs FileSystem
|
||||||
* @param conf HBaseConfiguration
|
* @param conf Configuration
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static List<Path> splitLog(final Path rootDir, final Path srcDir,
|
public static List<Path> splitLog(final Path rootDir, final Path srcDir,
|
||||||
Path oldLogDir, final FileSystem fs, final Configuration conf) throws IOException {
|
Path oldLogDir, final FileSystem fs, final Configuration conf)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
long millis = System.currentTimeMillis();
|
long millis = System.currentTimeMillis();
|
||||||
List<Path> splits = null;
|
List<Path> splits = null;
|
||||||
|
@ -1104,7 +1165,8 @@ public class HLog implements HConstants, Syncable {
|
||||||
*/
|
*/
|
||||||
private static List<Path> splitLog(final Path rootDir,
|
private static List<Path> splitLog(final Path rootDir,
|
||||||
Path oldLogDir, final FileStatus[] logfiles, final FileSystem fs,
|
Path oldLogDir, final FileStatus[] logfiles, final FileSystem fs,
|
||||||
final Configuration conf) throws IOException {
|
final Configuration conf)
|
||||||
|
throws IOException {
|
||||||
final Map<byte [], WriterAndPath> logWriters =
|
final Map<byte [], WriterAndPath> logWriters =
|
||||||
Collections.synchronizedMap(
|
Collections.synchronizedMap(
|
||||||
new TreeMap<byte [], WriterAndPath>(Bytes.BYTES_COMPARATOR));
|
new TreeMap<byte [], WriterAndPath>(Bytes.BYTES_COMPARATOR));
|
||||||
|
@ -1115,35 +1177,51 @@ public class HLog implements HConstants, Syncable {
|
||||||
int logWriterThreads =
|
int logWriterThreads =
|
||||||
conf.getInt("hbase.regionserver.hlog.splitlog.writer.threads", 3);
|
conf.getInt("hbase.regionserver.hlog.splitlog.writer.threads", 3);
|
||||||
|
|
||||||
// Number of logs to read concurrently when log splitting.
|
// Number of logs to read into memory before writing to their appropriate
|
||||||
// More means faster but bigger mem consumption */
|
// regions when log splitting. More means faster but bigger mem consumption
|
||||||
int concurrentLogReads =
|
int logFilesPerStep =
|
||||||
conf.getInt("hbase.regionserver.hlog.splitlog.reader.threads", 3);
|
conf.getInt("hbase.regionserver.hlog.splitlog.reader.threads", 3);
|
||||||
// Is append supported?
|
|
||||||
|
// append support = we can avoid data loss (yay)
|
||||||
|
// we open for append, then close to recover the correct file length
|
||||||
|
final boolean appendSupport = isAppend(conf);
|
||||||
|
|
||||||
|
// store corrupt logs for post-mortem analysis (empty string = discard)
|
||||||
|
final String corruptDir =
|
||||||
|
conf.get("hbase.regionserver.hlog.splitlog.corrupt.dir", ".corrupt");
|
||||||
|
|
||||||
|
List<Path> finishedFiles = new LinkedList<Path>();
|
||||||
|
List<Path> corruptFiles = new LinkedList<Path>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int maxSteps = Double.valueOf(Math.ceil((logfiles.length * 1.0) /
|
int maxSteps = Double.valueOf(Math.ceil((logfiles.length * 1.0) /
|
||||||
concurrentLogReads)).intValue();
|
logFilesPerStep)).intValue();
|
||||||
for (int step = 0; step < maxSteps; step++) {
|
for (int step = 0; step < maxSteps; step++) {
|
||||||
|
|
||||||
|
// Step 1: read N log files into memory
|
||||||
final Map<byte[], LinkedList<HLog.Entry>> logEntries =
|
final Map<byte[], LinkedList<HLog.Entry>> logEntries =
|
||||||
new TreeMap<byte[], LinkedList<HLog.Entry>>(Bytes.BYTES_COMPARATOR);
|
new TreeMap<byte[], LinkedList<HLog.Entry>>(Bytes.BYTES_COMPARATOR);
|
||||||
// Stop at logfiles.length when it's the last step
|
|
||||||
int endIndex = step == maxSteps - 1? logfiles.length:
|
int endIndex = step == maxSteps - 1? logfiles.length:
|
||||||
step * concurrentLogReads + concurrentLogReads;
|
step * logFilesPerStep + logFilesPerStep;
|
||||||
for (int i = (step * concurrentLogReads); i < endIndex; i++) {
|
for (int i = (step * logFilesPerStep); i < endIndex; i++) {
|
||||||
// Check for possibly empty file. With appends, currently Hadoop
|
Path curLogFile = logfiles[i].getPath();
|
||||||
// reports a zero length even if the file has been sync'd. Revisit if
|
|
||||||
// HADOOP-4751 is committed.
|
// make sure we get the right file length before opening for read
|
||||||
long length = logfiles[i].getLen();
|
recoverLog(fs, curLogFile, appendSupport);
|
||||||
|
|
||||||
|
long length = fs.getFileStatus(curLogFile).getLen();
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Splitting hlog " + (i + 1) + " of " + logfiles.length +
|
LOG.debug("Splitting hlog " + (i + 1) + " of " + logfiles.length +
|
||||||
": " + logfiles[i].getPath() + ", length=" + logfiles[i].getLen());
|
": " + curLogFile + ", length=" + length);
|
||||||
}
|
}
|
||||||
|
|
||||||
Reader in = null;
|
Reader in = null;
|
||||||
|
boolean cleanRead = false;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
try {
|
try {
|
||||||
in = HLog.getReader(fs, logfiles[i].getPath(), conf);
|
in = HLog.getReader(fs, curLogFile, conf);
|
||||||
try {
|
try {
|
||||||
HLog.Entry entry;
|
Entry entry;
|
||||||
while ((entry = in.next()) != null) {
|
while ((entry = in.next()) != null) {
|
||||||
byte [] regionName = entry.getKey().getRegionName();
|
byte [] regionName = entry.getKey().getRegionName();
|
||||||
LinkedList<HLog.Entry> queue = logEntries.get(regionName);
|
LinkedList<HLog.Entry> queue = logEntries.get(regionName);
|
||||||
|
@ -1155,20 +1233,24 @@ public class HLog implements HConstants, Syncable {
|
||||||
queue.push(entry);
|
queue.push(entry);
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
LOG.debug("Pushed=" + count + " entries from " +
|
LOG.debug("Pushed=" + count + " entries from " + curLogFile);
|
||||||
logfiles[i].getPath());
|
cleanRead = true;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.debug("IOE Pushed=" + count + " entries from " +
|
LOG.debug("IOE Pushed=" + count + " entries from " + curLogFile);
|
||||||
logfiles[i].getPath());
|
|
||||||
e = RemoteExceptionHandler.checkIOException(e);
|
e = RemoteExceptionHandler.checkIOException(e);
|
||||||
if (!(e instanceof EOFException)) {
|
if (!(e instanceof EOFException)) {
|
||||||
LOG.warn("Exception processing " + logfiles[i].getPath() +
|
String msg = "Exception processing " + curLogFile +
|
||||||
" -- continuing. Possible DATA LOSS!", e);
|
" -- continuing. Possible DATA LOSS!";
|
||||||
|
if (corruptDir.length() > 0) {
|
||||||
|
msg += " Storing in hlog corruption directory.";
|
||||||
|
}
|
||||||
|
LOG.warn(msg, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
if (length <= 0) {
|
if (length <= 0) {
|
||||||
LOG.warn("Empty hlog, continuing: " + logfiles[i] + " count=" + count, e);
|
LOG.warn("Empty hlog, continuing: " + logfiles[i]);
|
||||||
|
cleanRead = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw e;
|
throw e;
|
||||||
|
@ -1178,8 +1260,16 @@ public class HLog implements HConstants, Syncable {
|
||||||
in.close();
|
in.close();
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("Close in finally threw exception -- continuing", e);
|
LOG.warn("File.close() threw exception -- continuing, "
|
||||||
|
+ "but marking file as corrupt.", e);
|
||||||
|
cleanRead = false;
|
||||||
}
|
}
|
||||||
|
if (cleanRead) {
|
||||||
|
finishedFiles.add(curLogFile);
|
||||||
|
} else {
|
||||||
|
corruptFiles.add(curLogFile);
|
||||||
|
}
|
||||||
|
/* TODO FOR J-D REVIEW
|
||||||
// Archive the input file now so we do not replay edits. We could
|
// Archive the input file now so we do not replay edits. We could
|
||||||
// have gotten here because of an exception. If so, probably
|
// have gotten here because of an exception. If so, probably
|
||||||
// nothing we can do about it. Replaying it, it could work but we
|
// nothing we can do about it. Replaying it, it could work but we
|
||||||
|
@ -1187,85 +1277,89 @@ public class HLog implements HConstants, Syncable {
|
||||||
// could have lost some edits.
|
// could have lost some edits.
|
||||||
fs.rename(logfiles[i].getPath(),
|
fs.rename(logfiles[i].getPath(),
|
||||||
getHLogArchivePath(oldLogDir, logfiles[i].getPath()));
|
getHLogArchivePath(oldLogDir, logfiles[i].getPath()));
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 2: Some regionserver log files have been read into memory.
|
||||||
|
// Assign them to the appropriate region directory.
|
||||||
|
class ThreadWithException extends Thread {
|
||||||
|
ThreadWithException(String name) { super(name); }
|
||||||
|
public IOException exception = null;
|
||||||
|
}
|
||||||
|
List<ThreadWithException> threadList =
|
||||||
|
new ArrayList<ThreadWithException>(logEntries.size());
|
||||||
ExecutorService threadPool =
|
ExecutorService threadPool =
|
||||||
Executors.newFixedThreadPool(logWriterThreads);
|
Executors.newFixedThreadPool(logWriterThreads);
|
||||||
for (final byte[] key : logEntries.keySet()) {
|
for (final byte [] region: logEntries.keySet()) {
|
||||||
Thread thread = new Thread(Bytes.toStringBinary(key)) {
|
ThreadWithException thread =
|
||||||
|
new ThreadWithException(Bytes.toStringBinary(region)) {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
LinkedList<HLog.Entry> entries = logEntries.get(key);
|
LinkedList<HLog.Entry> entries = logEntries.get(region);
|
||||||
LOG.debug("Thread got " + entries.size() + " to process");
|
LOG.debug("Thread got " + entries.size() + " to process");
|
||||||
|
if(entries.size() <= 0) {
|
||||||
|
LOG.warn("Got a region with no entries to process.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
long threadTime = System.currentTimeMillis();
|
long threadTime = System.currentTimeMillis();
|
||||||
try {
|
try {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
// Items were added to the linkedlist oldest first. Pull them
|
// get the logfile associated with this region. 2 logs often
|
||||||
// out in that order.
|
// write to the same region, so persist this info across logs
|
||||||
for (ListIterator<HLog.Entry> i =
|
WriterAndPath wap = logWriters.get(region);
|
||||||
entries.listIterator(entries.size());
|
|
||||||
i.hasPrevious();) {
|
|
||||||
HLog.Entry logEntry = i.previous();
|
|
||||||
WriterAndPath wap = logWriters.get(key);
|
|
||||||
if (wap == null) {
|
if (wap == null) {
|
||||||
|
// first write to this region, make new logfile
|
||||||
|
assert entries.size() > 0;
|
||||||
Path logfile = new Path(HRegion.getRegionDir(HTableDescriptor
|
Path logfile = new Path(HRegion.getRegionDir(HTableDescriptor
|
||||||
.getTableDir(rootDir, logEntry.getKey().getTablename()),
|
.getTableDir(rootDir,
|
||||||
HRegionInfo.encodeRegionName(key)),
|
entries.getFirst().getKey().getTablename()),
|
||||||
|
HRegionInfo.encodeRegionName(region)),
|
||||||
HREGION_OLDLOGFILE_NAME);
|
HREGION_OLDLOGFILE_NAME);
|
||||||
Path oldlogfile = null;
|
|
||||||
Reader old = null;
|
// If splitLog() was running when the user restarted his
|
||||||
|
// cluster, then we could already have a 'logfile'.
|
||||||
|
// Since we don't delete logs until everything is written to
|
||||||
|
// their respective regions, we can safely remove this tmp.
|
||||||
if (fs.exists(logfile)) {
|
if (fs.exists(logfile)) {
|
||||||
FileStatus stat = fs.getFileStatus(logfile);
|
LOG.warn("Deleting old hlog file: " + logfile);
|
||||||
if (stat.getLen() <= 0) {
|
// TODO: Archive?
|
||||||
LOG.warn("Old hlog file " + logfile + " is zero " +
|
fs.delete(logfile, true);
|
||||||
"length. Deleting existing file");
|
|
||||||
fs.delete(logfile, false);
|
|
||||||
} else {
|
|
||||||
LOG.warn("Old hlog file " + logfile + " already " +
|
|
||||||
"exists. Copying existing file to new file");
|
|
||||||
oldlogfile = new Path(logfile.toString() + ".old");
|
|
||||||
fs.rename(logfile, oldlogfile);
|
|
||||||
old = getReader(fs, oldlogfile, conf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Writer w = createWriter(fs, logfile, conf);
|
|
||||||
wap = new WriterAndPath(logfile, w);
|
|
||||||
logWriters.put(key, wap);
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Creating new hlog file writer for path "
|
|
||||||
+ logfile + " and region " + Bytes.toStringBinary(key));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (old != null) {
|
// associate an OutputStream with this logfile
|
||||||
// Copy from existing log file
|
Writer w = createWriter(fs, logfile, conf);
|
||||||
HLog.Entry entry;
|
wap = new WriterAndPath(logfile, w);
|
||||||
for (; (entry = old.next()) != null; count++) {
|
logWriters.put(region, wap);
|
||||||
if (LOG.isDebugEnabled() && count > 0
|
if (LOG.isDebugEnabled()) {
|
||||||
&& count % 10000 == 0) {
|
LOG.debug("Creating new hlog file writer for path "
|
||||||
LOG.debug("Copied " + count + " edits");
|
+ logfile + " and region " + Bytes.toStringBinary(region));
|
||||||
}
|
|
||||||
w.append(entry);
|
|
||||||
}
|
|
||||||
old.close();
|
|
||||||
fs.delete(oldlogfile, true);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wap.w.append(logEntry);
|
|
||||||
|
// Items were added to the linkedlist oldest first. Pull them
|
||||||
|
// out in that order.
|
||||||
|
for (ListIterator<HLog.Entry> i = entries.listIterator(entries.size());
|
||||||
|
i.hasPrevious();) {
|
||||||
|
wap.w.append(i.previous());
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Applied " + count + " total edits to "
|
LOG.debug("Applied " + count + " total edits to "
|
||||||
+ Bytes.toStringBinary(key) + " in "
|
+ Bytes.toStringBinary(region) + " in "
|
||||||
+ (System.currentTimeMillis() - threadTime) + "ms");
|
+ (System.currentTimeMillis() - threadTime) + "ms");
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e = RemoteExceptionHandler.checkIOException(e);
|
e = RemoteExceptionHandler.checkIOException(e);
|
||||||
LOG.warn("Got while writing region " + Bytes.toStringBinary(key)
|
LOG.warn("Got while writing region "
|
||||||
+ " log " + e);
|
+ Bytes.toStringBinary(region) + " log " + e);
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
|
exception = e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
threadList.add(thread);
|
||||||
threadPool.execute(thread);
|
threadPool.execute(thread);
|
||||||
}
|
}
|
||||||
threadPool.shutdown();
|
threadPool.shutdown();
|
||||||
|
@ -1274,9 +1368,19 @@ public class HLog implements HConstants, Syncable {
|
||||||
for(int i = 0; !threadPool.awaitTermination(5, TimeUnit.SECONDS); i++) {
|
for(int i = 0; !threadPool.awaitTermination(5, TimeUnit.SECONDS); i++) {
|
||||||
LOG.debug("Waiting for hlog writers to terminate, iteration #" + i);
|
LOG.debug("Waiting for hlog writers to terminate, iteration #" + i);
|
||||||
}
|
}
|
||||||
}catch(InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
LOG.warn("Hlog writers were interrupted, possible data loss!");
|
LOG.warn("Hlog writers were interrupted during splitLog(). "
|
||||||
|
+"Retaining log files to avoid data loss.");
|
||||||
|
throw new IOException(ex.getMessage(), ex.getCause());
|
||||||
}
|
}
|
||||||
|
// throw an exception if one of the threads reported one
|
||||||
|
for (ThreadWithException t : threadList) {
|
||||||
|
if (t.exception != null) {
|
||||||
|
throw t.exception;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// End of for loop. Rinse and repeat
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
splits = new ArrayList<Path>(logWriters.size());
|
splits = new ArrayList<Path>(logWriters.size());
|
||||||
|
@ -1286,9 +1390,79 @@ public class HLog implements HConstants, Syncable {
|
||||||
splits.add(wap.p);
|
splits.add(wap.p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 3: All writes succeeded! Get rid of the now-unnecessary logs
|
||||||
|
for(Path p : finishedFiles) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Successfully split Hlog file. Deleting " + p);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
if (!fs.delete(p, true) && LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Delete of split Hlog (" + p + ") failed.");
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// don't throw error here. worst case = double-read
|
||||||
|
LOG.warn("Error deleting successfully split Hlog (" + p + ") -- " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (Path p : corruptFiles) {
|
||||||
|
if (corruptDir.length() > 0) {
|
||||||
|
// store any corrupt logs for later analysis
|
||||||
|
Path cp = new Path(conf.get(HBASE_DIR), corruptDir);
|
||||||
|
if(!fs.exists(cp)) {
|
||||||
|
fs.mkdirs(cp);
|
||||||
|
}
|
||||||
|
Path newp = new Path(cp, p.getName());
|
||||||
|
if (!fs.exists(newp)) {
|
||||||
|
if (!fs.rename(p, newp)) {
|
||||||
|
LOG.warn("Rename of " + p + " to " + newp + " failed.");
|
||||||
|
} else {
|
||||||
|
LOG.warn("Corrupt Hlog (" + p + ") moved to " + newp);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.warn("Corrupt Hlog (" + p + ") already moved to " + newp +
|
||||||
|
". Ignoring");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// data loss is less important than disk space, delete
|
||||||
|
try {
|
||||||
|
if (!fs.delete(p, true) ) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Delete of split Hlog " + p + " failed.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.warn("Corrupt Hlog (" + p + ") deleted!");
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Error deleting corrupt Hlog (" + p + ") -- " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return splits;
|
return splits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @param conf
|
||||||
|
* @return True if append enabled and we have the syncFs in our path.
|
||||||
|
*/
|
||||||
|
static boolean isAppend(final Configuration conf) {
|
||||||
|
boolean append = conf.getBoolean("dfs.support.append", false);
|
||||||
|
if (append) {
|
||||||
|
try {
|
||||||
|
// TODO: The implementation that comes back when we do a createWriter
|
||||||
|
// may not be using SequenceFile so the below is not a definitive test.
|
||||||
|
// Will do for now (hdfs-200).
|
||||||
|
SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
|
||||||
|
append = true;
|
||||||
|
} catch (SecurityException e) {
|
||||||
|
} catch (NoSuchMethodException e) {
|
||||||
|
append = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return append;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class that lets us keep track of the edit with it's key
|
* Utility class that lets us keep track of the edit with it's key
|
||||||
* Only used when splitting logs
|
* Only used when splitting logs
|
||||||
|
@ -1345,6 +1519,64 @@ public class HLog implements HConstants, Syncable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recover log.
|
||||||
|
* Try and open log in append mode.
|
||||||
|
* Doing this, we get a hold of the file that crashed writer
|
||||||
|
* was writing to. Once we have it, close it. This will
|
||||||
|
* allow subsequent reader to see up to last sync.
|
||||||
|
* @param fs
|
||||||
|
* @param p
|
||||||
|
* @param append
|
||||||
|
*/
|
||||||
|
public static void recoverLog(final FileSystem fs, final Path p,
|
||||||
|
final boolean append) throws IOException {
|
||||||
|
if (!append) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// lease recovery not needed for local file system case.
|
||||||
|
// currently, local file system doesn't implement append either.
|
||||||
|
if (!(fs instanceof DistributedFileSystem)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.debug("Recovering DFS lease for path " + p);
|
||||||
|
long startWaiting = System.currentTimeMillis();
|
||||||
|
|
||||||
|
// Trying recovery
|
||||||
|
boolean recovered = false;
|
||||||
|
while (!recovered) {
|
||||||
|
try {
|
||||||
|
FSDataOutputStream out = fs.append(p);
|
||||||
|
out.close();
|
||||||
|
recovered = true;
|
||||||
|
} catch (IOException e) {
|
||||||
|
e = RemoteExceptionHandler.checkIOException(e);
|
||||||
|
if (e instanceof AlreadyBeingCreatedException) {
|
||||||
|
// We expect that we'll get this message while the lease is still
|
||||||
|
// within its soft limit, but if we get it past that, it means
|
||||||
|
// that the RS is holding onto the file even though it lost its
|
||||||
|
// znode. We could potentially abort after some time here.
|
||||||
|
long waitedFor = System.currentTimeMillis() - startWaiting;
|
||||||
|
|
||||||
|
if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) {
|
||||||
|
LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p
|
||||||
|
+ ":" + e.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
// ignore it and try again
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IOException("Failed to open " + p + " for append", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.info("Past out lease recovery");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct the HLog directory name
|
* Construct the HLog directory name
|
||||||
*
|
*
|
||||||
|
|
|
@ -21,29 +21,42 @@
|
||||||
package org.apache.hadoop.hbase.regionserver.wal;
|
package org.apache.hadoop.hbase.regionserver.wal;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
import org.apache.hadoop.io.SequenceFile.Metadata;
|
import org.apache.hadoop.io.SequenceFile.Metadata;
|
||||||
import org.apache.hadoop.io.compress.DefaultCodec;
|
import org.apache.hadoop.io.compress.DefaultCodec;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of {@link HLog.Writer} that delegates to
|
||||||
|
* {@link SequenceFile.Writer}.
|
||||||
|
*/
|
||||||
public class SequenceFileLogWriter implements HLog.Writer {
|
public class SequenceFileLogWriter implements HLog.Writer {
|
||||||
|
private final Log LOG = LogFactory.getLog(this.getClass());
|
||||||
|
// The sequence file we delegate to.
|
||||||
|
private SequenceFile.Writer writer;
|
||||||
|
// The dfsclient out stream gotten made accessible or null if not available.
|
||||||
|
private OutputStream dfsClient_out;
|
||||||
|
// The syncFs method from hdfs-200 or null if not available.
|
||||||
|
private Method syncFs;
|
||||||
|
|
||||||
SequenceFile.Writer writer;
|
public SequenceFileLogWriter() {
|
||||||
FSDataOutputStream writer_out;
|
super();
|
||||||
|
}
|
||||||
public SequenceFileLogWriter() { }
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(FileSystem fs, Path path, Configuration conf)
|
public void init(FileSystem fs, Path path, Configuration conf)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
writer = SequenceFile.createWriter(fs, conf, path,
|
// Create a SF.Writer instance.
|
||||||
|
this.writer = SequenceFile.createWriter(fs, conf, path,
|
||||||
HLog.getKeyClass(conf), WALEdit.class,
|
HLog.getKeyClass(conf), WALEdit.class,
|
||||||
fs.getConf().getInt("io.file.buffer.size", 4096),
|
fs.getConf().getInt("io.file.buffer.size", 4096),
|
||||||
(short) conf.getInt("hbase.regionserver.hlog.replication",
|
(short) conf.getInt("hbase.regionserver.hlog.replication",
|
||||||
|
@ -58,19 +71,39 @@ public class SequenceFileLogWriter implements HLog.Writer {
|
||||||
// Get at the private FSDataOutputStream inside in SequenceFile so we can
|
// Get at the private FSDataOutputStream inside in SequenceFile so we can
|
||||||
// call sync on it. Make it accessible. Stash it aside for call up in
|
// call sync on it. Make it accessible. Stash it aside for call up in
|
||||||
// the sync method.
|
// the sync method.
|
||||||
final Field fields[] = writer.getClass().getDeclaredFields();
|
final Field fields [] = this.writer.getClass().getDeclaredFields();
|
||||||
final String fieldName = "out";
|
final String fieldName = "out";
|
||||||
for (int i = 0; i < fields.length; ++i) {
|
for (int i = 0; i < fields.length; ++i) {
|
||||||
if (fieldName.equals(fields[i].getName())) {
|
if (fieldName.equals(fields[i].getName())) {
|
||||||
try {
|
try {
|
||||||
|
// Make the 'out' field up in SF.Writer accessible.
|
||||||
fields[i].setAccessible(true);
|
fields[i].setAccessible(true);
|
||||||
this.writer_out = (FSDataOutputStream)fields[i].get(writer);
|
FSDataOutputStream out =
|
||||||
|
(FSDataOutputStream)fields[i].get(this.writer);
|
||||||
|
this.dfsClient_out = out.getWrappedStream();
|
||||||
break;
|
break;
|
||||||
} catch (IllegalAccessException ex) {
|
} catch (IllegalAccessException ex) {
|
||||||
throw new IOException("Accessing " + fieldName, ex);
|
throw new IOException("Accessing " + fieldName, ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now do dirty work to see if syncFs is available.
|
||||||
|
// Test if syncfs is available.
|
||||||
|
Method m = null;
|
||||||
|
if (conf.getBoolean("dfs.support.append", false)) {
|
||||||
|
try {
|
||||||
|
// function pointer to writer.syncFs()
|
||||||
|
m = this.writer.getClass().getMethod("syncFs", new Class<?> []{});
|
||||||
|
} catch (SecurityException e) {
|
||||||
|
throw new IOException("Failed test for syncfs", e);
|
||||||
|
} catch (NoSuchMethodException e) {
|
||||||
|
// Not available
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.syncFs = m;
|
||||||
|
LOG.info((this.syncFs != null)?
|
||||||
|
"Using syncFs -- HDFS-200": "syncFs -- HDFS-200 -- not available");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -86,12 +119,25 @@ public class SequenceFileLogWriter implements HLog.Writer {
|
||||||
@Override
|
@Override
|
||||||
public void sync() throws IOException {
|
public void sync() throws IOException {
|
||||||
this.writer.sync();
|
this.writer.sync();
|
||||||
|
if (this.syncFs != null) {
|
||||||
this.writer.syncFs();
|
try {
|
||||||
|
this.syncFs.invoke(this.writer, HLog.NO_ARGS);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException("Reflection", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getLength() throws IOException {
|
public long getLength() throws IOException {
|
||||||
return this.writer.getLength();
|
return this.writer.getLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The dfsclient out stream up inside SF.Writer made accessible, or
|
||||||
|
* null if not available.
|
||||||
|
*/
|
||||||
|
public OutputStream getDFSCOutputStream() {
|
||||||
|
return this.dfsClient_out;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -28,15 +28,14 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.hbase.client.HConnectionManager;
|
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||||
import org.apache.hadoop.hbase.master.HMaster;
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
|
||||||
import org.apache.hadoop.hbase.util.JVMClusterUtil;
|
import org.apache.hadoop.hbase.util.JVMClusterUtil;
|
||||||
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.io.MapWritable;
|
import org.apache.hadoop.io.MapWritable;
|
||||||
import org.apache.hadoop.security.UnixUserGroupInformation;
|
import org.apache.hadoop.security.UnixUserGroupInformation;
|
||||||
|
@ -129,6 +128,7 @@ public class MiniHBaseCluster implements HConstants {
|
||||||
*/
|
*/
|
||||||
public static class MiniHBaseClusterRegionServer extends HRegionServer {
|
public static class MiniHBaseClusterRegionServer extends HRegionServer {
|
||||||
private static int index = 0;
|
private static int index = 0;
|
||||||
|
private Thread shutdownThread = null;
|
||||||
|
|
||||||
public MiniHBaseClusterRegionServer(Configuration conf)
|
public MiniHBaseClusterRegionServer(Configuration conf)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -159,11 +159,20 @@ public class MiniHBaseCluster implements HConstants {
|
||||||
@Override
|
@Override
|
||||||
protected void init(MapWritable c) throws IOException {
|
protected void init(MapWritable c) throws IOException {
|
||||||
super.init(c);
|
super.init(c);
|
||||||
// Change shutdown hook to only shutdown the FileSystem added above by
|
// Run this thread to shutdown our filesystem on way out.
|
||||||
// {@link #getFileSystem(HBaseConfiguration)
|
this.shutdownThread = new SingleFileSystemShutdownThread(getFileSystem());
|
||||||
if (getFileSystem() instanceof DistributedFileSystem) {
|
}
|
||||||
Thread t = new SingleFileSystemShutdownThread(getFileSystem());
|
|
||||||
Runtime.getRuntime().addShutdownHook(t);
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
super.run();
|
||||||
|
} finally {
|
||||||
|
// Run this on the way out.
|
||||||
|
if (this.shutdownThread != null) {
|
||||||
|
this.shutdownThread.start();
|
||||||
|
Threads.shutdown(this.shutdownThread, 30000);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,11 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.regionserver.wal;
|
package org.apache.hadoop.hbase.regionserver.wal;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -27,15 +32,10 @@ import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/** JUnit test case for HLog */
|
/** JUnit test case for HLog */
|
||||||
public class TestHLog extends HBaseTestCase implements HConstants {
|
public class TestHLog extends HBaseTestCase implements HConstants {
|
||||||
private Path dir;
|
private Path dir;
|
||||||
|
@ -322,4 +322,70 @@ public class TestHLog extends HBaseTestCase implements HConstants {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void testAppend() throws IOException {
|
||||||
|
final int COL_COUNT = 10;
|
||||||
|
final byte [] tableName = Bytes.toBytes("tablename");
|
||||||
|
final byte [] row = Bytes.toBytes("row");
|
||||||
|
this.conf.setBoolean("dfs.support.append", true);
|
||||||
|
Reader reader = null;
|
||||||
|
HLog log = new HLog(this.fs, dir, this.oldLogDir, this.conf, null);
|
||||||
|
try {
|
||||||
|
// Write columns named 1, 2, 3, etc. and then values of single byte
|
||||||
|
// 1, 2, 3...
|
||||||
|
long timestamp = System.currentTimeMillis();
|
||||||
|
WALEdit cols = new WALEdit();
|
||||||
|
for (int i = 0; i < COL_COUNT; i++) {
|
||||||
|
cols.add(new KeyValue(row, Bytes.toBytes("column"),
|
||||||
|
Bytes.toBytes(Integer.toString(i)),
|
||||||
|
timestamp, new byte[] { (byte)(i + '0') }));
|
||||||
|
}
|
||||||
|
HRegionInfo hri = new HRegionInfo(new HTableDescriptor(tableName),
|
||||||
|
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
|
||||||
|
log.append(hri, tableName, cols, System.currentTimeMillis());
|
||||||
|
long logSeqId = log.startCacheFlush();
|
||||||
|
log.completeCacheFlush(hri.getRegionName(), tableName, logSeqId, false);
|
||||||
|
log.close();
|
||||||
|
Path filename = log.computeFilename(log.getFilenum());
|
||||||
|
log = null;
|
||||||
|
// Now open a reader on the log and assert append worked.
|
||||||
|
reader = HLog.getReader(fs, filename, conf);
|
||||||
|
HLog.Entry entry = reader.next();
|
||||||
|
assertEquals(COL_COUNT, entry.getEdit().size());
|
||||||
|
int idx = 0;
|
||||||
|
for (KeyValue val : entry.getEdit().getKeyValues()) {
|
||||||
|
assertTrue(Bytes.equals(hri.getRegionName(),
|
||||||
|
entry.getKey().getRegionName()));
|
||||||
|
assertTrue(Bytes.equals(tableName, entry.getKey().getTablename()));
|
||||||
|
assertTrue(Bytes.equals(row, val.getRow()));
|
||||||
|
assertEquals((byte)(idx + '0'), val.getValue()[0]);
|
||||||
|
System.out.println(entry.getKey() + " " + val);
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get next row... the meta flushed row.
|
||||||
|
entry = reader.next();
|
||||||
|
assertEquals(1, entry.getEdit().size());
|
||||||
|
for (KeyValue val : entry.getEdit().getKeyValues()) {
|
||||||
|
assertTrue(Bytes.equals(hri.getRegionName(),
|
||||||
|
entry.getKey().getRegionName()));
|
||||||
|
assertTrue(Bytes.equals(tableName, entry.getKey().getTablename()));
|
||||||
|
assertTrue(Bytes.equals(HLog.METAROW, val.getRow()));
|
||||||
|
assertTrue(Bytes.equals(HLog.METAFAMILY, val.getFamily()));
|
||||||
|
assertEquals(0, Bytes.compareTo(HLog.COMPLETE_CACHE_FLUSH,
|
||||||
|
val.getValue()));
|
||||||
|
System.out.println(entry.getKey() + " " + val);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (log != null) {
|
||||||
|
log.closeAndDelete();
|
||||||
|
}
|
||||||
|
if (reader != null) {
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue