MAPREDUCE-7237. Supports config the shuffle's path cache related parameters (#1397)

This commit is contained in:
Wanqiang Ji 2020-03-16 10:28:36 +08:00 committed by GitHub
parent 1d772dc542
commit ea688631b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 68 additions and 52 deletions

View File

@ -1266,6 +1266,29 @@
</description>
</property>
<property>
<name>mapreduce.shuffle.pathcache.max-weight</name>
<value>10485760</value>
<description>The maximum total weight of entries the cache may contain.
</description>
</property>
<property>
<name>mapreduce.shuffle.pathcache.expire-after-access-minutes</name>
<value>5</value>
<description>The length of time after an entry is last accessed that it
should be automatically removed.
</description>
</property>
<property>
<name>mapreduce.shuffle.pathcache.concurrency-level</name>
<value>16</value>
<description>Uses the concurrency level to create a fixed number of hashtable
segments, each governed by its own write lock.
</description>
</property>
<property>
<name>mapreduce.job.reduce.shuffle.consumer.plugin.class</name>
<value>org.apache.hadoop.mapreduce.task.reduce.Shuffle</value>

View File

@ -139,8 +139,6 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.google.common.cache.Weigher;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.thirdparty.protobuf.ByteString;
@ -836,63 +834,58 @@ public class ShuffleHandler extends AuxiliaryService {
// TODO factor out encode/decode to permit binary shuffle
// TODO factor out decode of index to permit alt. models
}
}
class Shuffle extends SimpleChannelUpstreamHandler {
private static final int MAX_WEIGHT = 10 * 1024 * 1024;
private static final int EXPIRE_AFTER_ACCESS_MINUTES = 5;
private static final int ALLOWED_CONCURRENCY = 16;
private final Configuration conf;
private static final String MAX_WEIGHT =
"mapreduce.shuffle.pathcache.max-weight";
private static final int DEFAULT_MAX_WEIGHT = 10 * 1024 * 1024;
private static final String EXPIRE_AFTER_ACCESS_MINUTES =
"mapreduce.shuffle.pathcache.expire-after-access-minutes";
private static final int DEFAULT_EXPIRE_AFTER_ACCESS_MINUTES = 5;
private static final String CONCURRENCY_LEVEL =
"mapreduce.shuffle.pathcache.concurrency-level";
private static final int DEFAULT_CONCURRENCY_LEVEL = 16;
private final IndexCache indexCache;
private final
LoadingCache<AttemptPathIdentifier, AttemptPathInfo> pathCache;
private int port;
private final LoadingCache<AttemptPathIdentifier, AttemptPathInfo> pathCache =
CacheBuilder.newBuilder().expireAfterAccess(EXPIRE_AFTER_ACCESS_MINUTES,
TimeUnit.MINUTES).softValues().concurrencyLevel(ALLOWED_CONCURRENCY).
removalListener(
new RemovalListener<AttemptPathIdentifier, AttemptPathInfo>() {
@Override
public void onRemoval(RemovalNotification<AttemptPathIdentifier,
AttemptPathInfo> notification) {
if (LOG.isDebugEnabled()) {
LOG.debug("PathCache Eviction: " + notification.getKey() +
", Reason=" + notification.getCause());
}
}
}
).maximumWeight(MAX_WEIGHT).weigher(
new Weigher<AttemptPathIdentifier, AttemptPathInfo>() {
@Override
public int weigh(AttemptPathIdentifier key,
AttemptPathInfo value) {
return key.jobId.length() + key.user.length() +
key.attemptId.length()+
value.indexPath.toString().length() +
value.dataPath.toString().length();
}
}
).build(new CacheLoader<AttemptPathIdentifier, AttemptPathInfo>() {
@Override
public AttemptPathInfo load(AttemptPathIdentifier key) throws
Exception {
String base = getBaseLocation(key.jobId, key.user);
String attemptBase = base + key.attemptId;
Path indexFileName = getAuxiliaryLocalPathHandler()
.getLocalPathForRead(attemptBase + "/" + INDEX_FILE_NAME);
Path mapOutputFileName = getAuxiliaryLocalPathHandler()
.getLocalPathForRead(attemptBase + "/" + DATA_FILE_NAME);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded : " + key + " via loader");
}
return new AttemptPathInfo(indexFileName, mapOutputFileName);
}
});
public Shuffle(Configuration conf) {
this.conf = conf;
indexCache = new IndexCache(new JobConf(conf));
Shuffle(Configuration conf) {
this.port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
this.indexCache = new IndexCache(new JobConf(conf));
this.pathCache = CacheBuilder.newBuilder()
.expireAfterAccess(conf.getInt(EXPIRE_AFTER_ACCESS_MINUTES,
DEFAULT_EXPIRE_AFTER_ACCESS_MINUTES), TimeUnit.MINUTES)
.softValues()
.concurrencyLevel(conf.getInt(CONCURRENCY_LEVEL,
DEFAULT_CONCURRENCY_LEVEL))
.removalListener((RemovalListener<AttemptPathIdentifier,
AttemptPathInfo>) notification ->
LOG.debug("PathCache Eviction: {}, Reason={}",
notification.getKey(), notification.getCause()))
.maximumWeight(conf.getInt(MAX_WEIGHT, DEFAULT_MAX_WEIGHT))
.weigher((key, value) -> key.jobId.length() + key.user.length() +
key.attemptId.length()+ value.indexPath.toString().length() +
value.dataPath.toString().length())
.build(new CacheLoader<AttemptPathIdentifier, AttemptPathInfo>() {
@Override
public AttemptPathInfo load(AttemptPathIdentifier key) throws
Exception {
String base = getBaseLocation(key.jobId, key.user);
String attemptBase = base + key.attemptId;
Path indexFileName = getAuxiliaryLocalPathHandler()
.getLocalPathForRead(attemptBase + "/" + INDEX_FILE_NAME);
Path mapOutputFileName = getAuxiliaryLocalPathHandler()
.getLocalPathForRead(attemptBase + "/" + DATA_FILE_NAME);
LOG.debug("Loaded : {} via loader", key);
return new AttemptPathInfo(indexFileName, mapOutputFileName);
}
});
}
public void setPort(int port) {