Merge MAPREDUCE-3289 from trunk. Make use of fadvise in the NM's shuffle handler. (Contributed by Todd Lipcon and Siddharth Seth)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1368722 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Siddharth Seth 2012-08-02 21:57:42 +00:00
parent f5dfa69617
commit f7545f076e
4 changed files with 202 additions and 15 deletions

View File

@ -68,6 +68,9 @@ Release 2.1.0-alpha - Unreleased
MAPREDUCE-4427. Added an 'unmanaged' mode for AMs so as to ease MAPREDUCE-4427. Added an 'unmanaged' mode for AMs so as to ease
development of new applications. (Bikas Saha via acmurthy) development of new applications. (Bikas Saha via acmurthy)
MAPREDUCE-3289. Make use of fadvise in the NM's shuffle handler.
(Todd Lipcon and Siddharth Seth via sseth)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -0,0 +1,80 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.FileDescriptor;
import java.io.IOException;
import java.io.RandomAccessFile;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.jboss.netty.handler.stream.ChunkedFile;
public class FadvisedChunkedFile extends ChunkedFile {
private static final Log LOG = LogFactory.getLog(FadvisedChunkedFile.class);
private final boolean manageOsCache;
private final int readaheadLength;
private final ReadaheadPool readaheadPool;
private final FileDescriptor fd;
private final String identifier;
private ReadaheadRequest readaheadRequest;
public FadvisedChunkedFile(RandomAccessFile file, long position, long count,
int chunkSize, boolean manageOsCache, int readaheadLength,
ReadaheadPool readaheadPool, String identifier) throws IOException {
super(file, position, count, chunkSize);
this.manageOsCache = manageOsCache;
this.readaheadLength = readaheadLength;
this.readaheadPool = readaheadPool;
this.fd = file.getFD();
this.identifier = identifier;
}
@Override
public Object nextChunk() throws Exception {
if (manageOsCache && readaheadPool != null) {
readaheadRequest = readaheadPool
.readaheadStream(identifier, fd, getCurrentOffset(), readaheadLength,
getEndOffset(), readaheadRequest);
}
return super.nextChunk();
}
@Override
public void close() throws Exception {
if (readaheadRequest != null) {
readaheadRequest.cancel();
}
if (manageOsCache && getEndOffset() - getStartOffset() > 0) {
try {
NativeIO.posixFadviseIfPossible(fd, getStartOffset(), getEndOffset()
- getStartOffset(), NativeIO.POSIX_FADV_DONTNEED);
} catch (Throwable t) {
LOG.warn("Failed to manage OS cache for " + identifier, t);
}
}
super.close();
}
}

View File

@ -0,0 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.FileDescriptor;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.WritableByteChannel;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.jboss.netty.channel.DefaultFileRegion;
public class FadvisedFileRegion extends DefaultFileRegion {
private static final Log LOG = LogFactory.getLog(FadvisedFileRegion.class);
private final boolean manageOsCache;
private final int readaheadLength;
private final ReadaheadPool readaheadPool;
private final FileDescriptor fd;
private final String identifier;
private ReadaheadRequest readaheadRequest;
public FadvisedFileRegion(RandomAccessFile file, long position, long count,
boolean manageOsCache, int readaheadLength, ReadaheadPool readaheadPool,
String identifier) throws IOException {
super(file.getChannel(), position, count);
this.manageOsCache = manageOsCache;
this.readaheadLength = readaheadLength;
this.readaheadPool = readaheadPool;
this.fd = file.getFD();
this.identifier = identifier;
}
@Override
public long transferTo(WritableByteChannel target, long position)
throws IOException {
if (manageOsCache && readaheadPool != null) {
readaheadRequest = readaheadPool.readaheadStream(identifier, fd,
getPosition() + position, readaheadLength,
getPosition() + getCount(), readaheadRequest);
}
return super.transferTo(target, position);
}
@Override
public void releaseExternalResources() {
if (readaheadRequest != null) {
readaheadRequest.cancel();
}
if (manageOsCache && getCount() > 0) {
try {
NativeIO.posixFadviseIfPossible(fd, getPosition(), getCount(),
NativeIO.POSIX_FADV_DONTNEED);
} catch (Throwable t) {
LOG.warn("Failed to manage OS cache for " + identifier, t);
}
}
super.releaseExternalResources();
}
}

View File

@ -55,6 +55,7 @@ import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.io.DataInputByteBuffer;
import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.security.SecureShuffleUtils; import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.hadoop.security.ssl.SSLFactory;
@ -86,9 +87,7 @@ import org.jboss.netty.channel.ChannelHandlerContext;
import org.jboss.netty.channel.ChannelPipeline; import org.jboss.netty.channel.ChannelPipeline;
import org.jboss.netty.channel.ChannelPipelineFactory; import org.jboss.netty.channel.ChannelPipelineFactory;
import org.jboss.netty.channel.Channels; import org.jboss.netty.channel.Channels;
import org.jboss.netty.channel.DefaultFileRegion;
import org.jboss.netty.channel.ExceptionEvent; import org.jboss.netty.channel.ExceptionEvent;
import org.jboss.netty.channel.FileRegion;
import org.jboss.netty.channel.MessageEvent; import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelUpstreamHandler; import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
import org.jboss.netty.channel.group.ChannelGroup; import org.jboss.netty.channel.group.ChannelGroup;
@ -104,7 +103,6 @@ import org.jboss.netty.handler.codec.http.HttpResponseEncoder;
import org.jboss.netty.handler.codec.http.HttpResponseStatus; import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.jboss.netty.handler.codec.http.QueryStringDecoder; import org.jboss.netty.handler.codec.http.QueryStringDecoder;
import org.jboss.netty.handler.ssl.SslHandler; import org.jboss.netty.handler.ssl.SslHandler;
import org.jboss.netty.handler.stream.ChunkedFile;
import org.jboss.netty.handler.stream.ChunkedWriteHandler; import org.jboss.netty.handler.stream.ChunkedWriteHandler;
import org.jboss.netty.util.CharsetUtil; import org.jboss.netty.util.CharsetUtil;
@ -115,12 +113,27 @@ public class ShuffleHandler extends AbstractService
private static final Log LOG = LogFactory.getLog(ShuffleHandler.class); private static final Log LOG = LogFactory.getLog(ShuffleHandler.class);
public static final String SHUFFLE_MANAGE_OS_CACHE = "mapreduce.shuffle.manage.os.cache";
public static final boolean DEFAULT_SHUFFLE_MANAGE_OS_CACHE = true;
public static final String SHUFFLE_READAHEAD_BYTES = "mapreduce.shuffle.readahead.bytes";
public static final int DEFAULT_SHUFFLE_READAHEAD_BYTES = 4 * 1024 * 1024;
private int port; private int port;
private ChannelFactory selector; private ChannelFactory selector;
private final ChannelGroup accepted = new DefaultChannelGroup(); private final ChannelGroup accepted = new DefaultChannelGroup();
private HttpPipelineFactory pipelineFact; private HttpPipelineFactory pipelineFact;
private int sslFileBufferSize; private int sslFileBufferSize;
/**
* Should the shuffle use posix_fadvise calls to manage the OS cache during
* sendfile
*/
private boolean manageOsCache;
private int readaheadLength;
private ReadaheadPool readaheadPool = ReadaheadPool.getInstance();
public static final String MAPREDUCE_SHUFFLE_SERVICEID = public static final String MAPREDUCE_SHUFFLE_SERVICEID =
"mapreduce.shuffle"; "mapreduce.shuffle";
@ -242,6 +255,12 @@ public class ShuffleHandler extends AbstractService
@Override @Override
public synchronized void init(Configuration conf) { public synchronized void init(Configuration conf) {
manageOsCache = conf.getBoolean(SHUFFLE_MANAGE_OS_CACHE,
DEFAULT_SHUFFLE_MANAGE_OS_CACHE);
readaheadLength = conf.getInt(SHUFFLE_READAHEAD_BYTES,
DEFAULT_SHUFFLE_READAHEAD_BYTES);
ThreadFactory bossFactory = new ThreadFactoryBuilder() ThreadFactory bossFactory = new ThreadFactoryBuilder()
.setNameFormat("ShuffleHandler Netty Boss #%d") .setNameFormat("ShuffleHandler Netty Boss #%d")
.build(); .build();
@ -503,14 +522,14 @@ public class ShuffleHandler extends AbstractService
base + "/file.out", conf); base + "/file.out", conf);
LOG.debug("DEBUG1 " + base + " : " + mapOutputFileName + " : " + LOG.debug("DEBUG1 " + base + " : " + mapOutputFileName + " : " +
indexFileName); indexFileName);
IndexRecord info = final IndexRecord info =
indexCache.getIndexInformation(mapId, reduce, indexFileName, user); indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
final ShuffleHeader header = final ShuffleHeader header =
new ShuffleHeader(mapId, info.partLength, info.rawLength, reduce); new ShuffleHeader(mapId, info.partLength, info.rawLength, reduce);
final DataOutputBuffer dob = new DataOutputBuffer(); final DataOutputBuffer dob = new DataOutputBuffer();
header.write(dob); header.write(dob);
ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
File spillfile = new File(mapOutputFileName.toString()); final File spillfile = new File(mapOutputFileName.toString());
RandomAccessFile spill; RandomAccessFile spill;
try { try {
spill = new RandomAccessFile(spillfile, "r"); spill = new RandomAccessFile(spillfile, "r");
@ -520,22 +539,25 @@ public class ShuffleHandler extends AbstractService
} }
ChannelFuture writeFuture; ChannelFuture writeFuture;
if (ch.getPipeline().get(SslHandler.class) == null) { if (ch.getPipeline().get(SslHandler.class) == null) {
final FileRegion partition = new DefaultFileRegion( final FadvisedFileRegion partition = new FadvisedFileRegion(spill,
spill.getChannel(), info.startOffset, info.partLength); info.startOffset, info.partLength, manageOsCache, readaheadLength,
readaheadPool, spillfile.getAbsolutePath());
writeFuture = ch.write(partition); writeFuture = ch.write(partition);
writeFuture.addListener(new ChannelFutureListener() { writeFuture.addListener(new ChannelFutureListener() {
// TODO error handling; distinguish IO/connection failures, // TODO error handling; distinguish IO/connection failures,
// attribute to appropriate spill output // attribute to appropriate spill output
@Override @Override
public void operationComplete(ChannelFuture future) { public void operationComplete(ChannelFuture future) {
partition.releaseExternalResources(); partition.releaseExternalResources();
} }
}); });
} else { } else {
// HTTPS cannot be done with zero copy. // HTTPS cannot be done with zero copy.
writeFuture = ch.write(new ChunkedFile(spill, info.startOffset, final FadvisedChunkedFile chunk = new FadvisedChunkedFile(spill,
info.partLength, info.startOffset, info.partLength, sslFileBufferSize,
sslFileBufferSize)); manageOsCache, readaheadLength, readaheadPool,
spillfile.getAbsolutePath());
writeFuture = ch.write(chunk);
} }
metrics.shuffleConnections.incr(); metrics.shuffleConnections.incr();
metrics.shuffleOutputBytes.incr(info.partLength); // optimistic metrics.shuffleOutputBytes.incr(info.partLength); // optimistic