HDFS-1580. Add interface for generic Write Ahead Logging mechanisms. Contributed by Ivan Kelly.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1210602 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jitendra Nath Pandey 2011-12-05 20:10:27 +00:00
parent 6a358ee140
commit d18e5b3844
15 changed files with 126 additions and 40 deletions

View File

@ -72,6 +72,9 @@ Trunk (unreleased changes)
Move the support for multiple protocols to lower layer so that Writable,
PB and Avro can all use it (Sanjay)
HDFS-1580. Add interface for generic Write Ahead Logging mechanisms.
(Ivan Kelly via jitendra)
OPTIMIZATIONS
HDFS-2477. Optimize computing the diff between a block report and the
namenode state. (Tomasz Nykiel via hairong)

View File

@ -161,6 +161,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT = "0.0.0.0:" + DFS_NAMENODE_HTTPS_PORT_DEFAULT;
public static final String DFS_NAMENODE_NAME_DIR_KEY = "dfs.namenode.name.dir";
public static final String DFS_NAMENODE_EDITS_DIR_KEY = "dfs.namenode.edits.dir";
public static final String DFS_NAMENODE_EDITS_PLUGIN_PREFIX = "dfs.namenode.edits.journal-plugin";
public static final String DFS_CLIENT_READ_PREFETCH_SIZE_KEY = "dfs.client.read.prefetch.size";
public static final String DFS_CLIENT_RETRY_WINDOW_BASE= "dfs.client.retry.window.base";
public static final String DFS_METRICS_SESSION_ID_KEY = "dfs.metrics.session-id";

View File

@ -103,7 +103,7 @@ public void close() throws IOException {
}
@Override
long length() throws IOException {
public long length() throws IOException {
// file size + size of both buffers
return inner.length();
}

View File

@ -67,12 +67,12 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
}
@Override // EditLogOutputStream
void write(FSEditLogOp op) throws IOException {
public void write(FSEditLogOp op) throws IOException {
doubleBuf.writeOp(op);
}
@Override
void writeRaw(byte[] bytes, int offset, int length) throws IOException {
public void writeRaw(byte[] bytes, int offset, int length) throws IOException {
throw new IOException("Not supported");
}
@ -80,7 +80,7 @@ void writeRaw(byte[] bytes, int offset, int length) throws IOException {
* There is no persistent storage. Just clear the buffers.
*/
@Override // EditLogOutputStream
void create() throws IOException {
public void create() throws IOException {
assert doubleBuf.isFlushed() : "previous data is not flushed yet";
this.doubleBuf = new EditsDoubleBuffer(DEFAULT_BUFFER_SIZE);
}
@ -106,7 +106,7 @@ public void abort() throws IOException {
}
@Override // EditLogOutputStream
void setReadyToFlush() throws IOException {
public void setReadyToFlush() throws IOException {
doubleBuf.setReadyToFlush();
}

View File

@ -127,7 +127,7 @@ public void close() throws IOException {
}
@Override
long length() throws IOException {
public long length() throws IOException {
// file size + size of both buffers
return file.length();
}

View File

@ -73,7 +73,7 @@ class EditLogFileOutputStream extends EditLogOutputStream {
/** {@inheritDoc} */
@Override
void write(FSEditLogOp op) throws IOException {
public void write(FSEditLogOp op) throws IOException {
doubleBuf.writeOp(op);
}
@ -86,7 +86,7 @@ void write(FSEditLogOp op) throws IOException {
* </ul>
* */
@Override
void writeRaw(byte[] bytes, int offset, int length) throws IOException {
public void writeRaw(byte[] bytes, int offset, int length) throws IOException {
doubleBuf.writeRaw(bytes, offset, length);
}
@ -94,7 +94,7 @@ void writeRaw(byte[] bytes, int offset, int length) throws IOException {
* Create empty edits logs file.
*/
@Override
void create() throws IOException {
public void create() throws IOException {
fc.truncate(0);
fc.position(0);
doubleBuf.getCurrentBuf().writeInt(HdfsConstants.LAYOUT_VERSION);
@ -150,7 +150,7 @@ public void abort() throws IOException {
* data can be still written to the stream while flushing is performed.
*/
@Override
void setReadyToFlush() throws IOException {
public void setReadyToFlush() throws IOException {
doubleBuf.getCurrentBuf().write(FSEditLogOpCodes.OP_INVALID.getOpCode()); // insert eof marker
doubleBuf.setReadyToFlush();
}

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import java.io.Closeable;
import java.io.IOException;
@ -27,7 +29,9 @@
* It should stream bytes from the storage exactly as they were written
* into the #{@link EditLogOutputStream}.
*/
abstract class EditLogInputStream implements JournalStream, Closeable {
@InterfaceAudience.Private
@InterfaceStability.Evolving
public abstract class EditLogInputStream implements JournalStream, Closeable {
/**
* @return the first transaction which will be found in this stream
*/
@ -74,5 +78,5 @@ abstract class EditLogInputStream implements JournalStream, Closeable {
/**
* Return the size of the current edits log.
*/
abstract long length() throws IOException;
public abstract long length() throws IOException;
}

View File

@ -21,17 +21,21 @@
import static org.apache.hadoop.hdfs.server.common.Util.now;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* A generic abstract class to support journaling of edits logs into
* a persistent storage.
*/
abstract class EditLogOutputStream {
@InterfaceAudience.Private
@InterfaceStability.Evolving
public abstract class EditLogOutputStream {
// these are statistics counters
private long numSync; // number of sync(s) to disk
private long totalTimeSync; // total time to sync
EditLogOutputStream() {
public EditLogOutputStream() throws IOException {
numSync = totalTimeSync = 0;
}
@ -41,7 +45,7 @@ abstract class EditLogOutputStream {
* @param op operation
* @throws IOException
*/
abstract void write(FSEditLogOp op) throws IOException;
abstract public void write(FSEditLogOp op) throws IOException;
/**
* Write raw data to an edit log. This data should already have
@ -54,7 +58,7 @@ abstract class EditLogOutputStream {
* @param length number of bytes to write
* @throws IOException
*/
abstract void writeRaw(byte[] bytes, int offset, int length)
abstract public void writeRaw(byte[] bytes, int offset, int length)
throws IOException;
/**
@ -62,7 +66,7 @@ abstract void writeRaw(byte[] bytes, int offset, int length)
*
* @throws IOException
*/
abstract void create() throws IOException;
abstract public void create() throws IOException;
/**
* Close the journal.
@ -81,7 +85,7 @@ abstract void writeRaw(byte[] bytes, int offset, int length)
* All data that has been written to the stream so far will be flushed.
* New data can be still written to the stream while flushing is performed.
*/
abstract void setReadyToFlush() throws IOException;
abstract public void setReadyToFlush() throws IOException;
/**
* Flush and sync all data that is ready to be flush

View File

@ -24,6 +24,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.lang.reflect.Constructor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -31,6 +32,7 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
@ -108,6 +110,7 @@ private enum State {
private NameNodeMetrics metrics;
private NNStorage storage;
private Configuration conf;
private static class TransactionId {
public long txid;
@ -144,6 +147,7 @@ protected synchronized TransactionId initialValue() {
* @param editsDirs List of journals to use
*/
FSEditLog(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
this.conf = conf;
isSyncRunning = false;
this.storage = storage;
metrics = NameNode.getNameNodeMetrics();
@ -166,9 +170,13 @@ protected synchronized TransactionId initialValue() {
this.journalSet = new JournalSet();
for (URI u : this.editsDirs) {
StorageDirectory sd = storage.getStorageDirectory(u);
if (sd != null) {
journalSet.add(new FileJournalManager(sd));
if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
StorageDirectory sd = storage.getStorageDirectory(u);
if (sd != null) {
journalSet.add(new FileJournalManager(sd));
}
} else {
journalSet.add(createJournal(u));
}
}
@ -994,4 +1002,53 @@ static void closeAllStreams(Iterable<EditLogInputStream> streams) {
IOUtils.closeStream(s);
}
}
/**
* Retrieve the implementation class for a Journal scheme.
* @param conf The configuration to retrieve the information from
* @param uriScheme The uri scheme to look up.
* @return the class of the journal implementation
* @throws IllegalArgumentException if no class is configured for uri
*/
static Class<? extends JournalManager> getJournalClass(Configuration conf,
String uriScheme) {
String key
= DFSConfigKeys.DFS_NAMENODE_EDITS_PLUGIN_PREFIX + "." + uriScheme;
Class <? extends JournalManager> clazz = null;
try {
clazz = conf.getClass(key, null, JournalManager.class);
} catch (RuntimeException re) {
throw new IllegalArgumentException(
"Invalid class specified for " + uriScheme, re);
}
if (clazz == null) {
LOG.warn("No class configured for " +uriScheme
+ ", " + key + " is empty");
throw new IllegalArgumentException(
"No class configured for " + uriScheme);
}
return clazz;
}
/**
* Construct a custom journal manager.
* The class to construct is taken from the configuration.
* @param uri Uri to construct
* @return The constructed journal manager
* @throws IllegalArgumentException if no class is configured for uri
*/
private JournalManager createJournal(URI uri) {
Class<? extends JournalManager> clazz
= getJournalClass(conf, uri.getScheme());
try {
Constructor<? extends JournalManager> cons
= clazz.getConstructor(Configuration.class, URI.class);
return cons.newInstance(conf, uri);
} catch (Exception e) {
throw new IllegalArgumentException("Unable to construct journal, "
+ uri, e);
}
}
}

View File

@ -25,6 +25,8 @@
import java.util.Arrays;
import java.util.EnumMap;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@ -57,6 +59,8 @@
import org.apache.hadoop.hdfs.util.Holder;
import com.google.common.base.Joiner;
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class FSEditLogLoader {
private final FSNamesystem fsNamesys;
@ -514,7 +518,7 @@ long getNumTransactions() {
/**
* Stream wrapper that keeps track of the current stream position.
*/
static class PositionTrackingInputStream extends FilterInputStream {
public static class PositionTrackingInputStream extends FilterInputStream {
private long curPos = 0;
private long markPos = -1;

View File

@ -113,6 +113,10 @@ private FSEditLogOp(FSEditLogOpCodes opCode) {
this.txid = 0;
}
public long getTransactionId() {
return txid;
}
public void setTransactionId(long txid) {
this.txid = txid;
}

View File

@ -20,6 +20,8 @@
import java.io.Closeable;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* A JournalManager is responsible for managing a single place of storing
@ -28,7 +30,9 @@
* each conceptual place of storage corresponds to exactly one instance of
* this class, which is created when the EditLog is first opened.
*/
interface JournalManager extends Closeable {
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface JournalManager extends Closeable {
/**
* Begin writing to a new segment of the log stream, which starts at
* the given transaction ID.
@ -71,7 +75,6 @@ long getNumberOfTransactions(long fromTxnId)
*
* @param minTxIdToKeep the earliest txid that must be retained after purging
* old logs
* @param purger the purging implementation to use
* @throws IOException if purging fails
*/
void purgeLogsOlderThan(long minTxIdToKeep)

View File

@ -309,7 +309,7 @@ private class JournalSetOutputStream extends EditLogOutputStream {
}
@Override
void write(final FSEditLogOp op)
public void write(final FSEditLogOp op)
throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {
@Override
@ -322,7 +322,7 @@ public void apply(JournalAndStream jas) throws IOException {
}
@Override
void writeRaw(final byte[] data, final int offset, final int length)
public void writeRaw(final byte[] data, final int offset, final int length)
throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {
@Override
@ -335,7 +335,7 @@ public void apply(JournalAndStream jas) throws IOException {
}
@Override
void create() throws IOException {
public void create() throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {
@Override
public void apply(JournalAndStream jas) throws IOException {
@ -367,7 +367,7 @@ public void apply(JournalAndStream jas) throws IOException {
}
@Override
void setReadyToFlush() throws IOException {
public void setReadyToFlush() throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {
@Override
public void apply(JournalAndStream jas) throws IOException {

View File

@ -70,7 +70,8 @@ public class NNStorage extends Storage implements Closeable {
private static final Log LOG = LogFactory.getLog(NNStorage.class.getName());
static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
static final String LOCAL_URI_SCHEME = "file";
//
// The filenames used for storing the images
//
@ -324,22 +325,14 @@ StorageDirectory getStorageDirectory(URI uri) {
/**
* Checks the consistency of a URI, in particular if the scheme
* is specified and is supported by a concrete implementation
* is specified
* @param u URI whose consistency is being checked.
*/
private static void checkSchemeConsistency(URI u) throws IOException {
String scheme = u.getScheme();
// the URI should have a proper scheme
if(scheme == null)
if(scheme == null) {
throw new IOException("Undefined scheme for " + u);
else {
try {
// the scheme should be enumerated as JournalType
JournalType.valueOf(scheme.toUpperCase());
} catch (IllegalArgumentException iae){
throw new IOException("Unknown scheme " + scheme +
". It should correspond to a JournalType enumeration value");
}
}
}

View File

@ -33,6 +33,8 @@
import org.apache.hadoop.hdfs.server.common.Util;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Collections2;
import com.google.common.base.Predicate;
/**
*
@ -69,7 +71,18 @@ public NameNodeResourceChecker(Configuration conf) throws IOException {
.getTrimmedStringCollection(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_KEY));
addDirsToCheck(FSNamesystem.getNamespaceDirs(conf));
addDirsToCheck(FSNamesystem.getNamespaceEditsDirs(conf));
Collection<URI> localEditDirs = Collections2.filter(
FSNamesystem.getNamespaceEditsDirs(conf),
new Predicate<URI>() {
public boolean apply(URI input) {
if (input.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
return true;
}
return false;
}
});
addDirsToCheck(localEditDirs);
addDirsToCheck(extraCheckedVolumes);
}