Merge trunk to HDFS-4685.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4685@1556097 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris Nauroth 2014-01-07 01:07:33 +00:00
commit 2fbb3d694e
159 changed files with 5282 additions and 2295 deletions

View File

@ -92,6 +92,11 @@
<artifactId>hadoop-minikdc</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
<build>

View File

@ -16,10 +16,15 @@ package org.apache.hadoop.security.authentication.server;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.security.authentication.client.PseudoAuthenticator;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.NameValuePair;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Properties;
/**
@ -48,6 +53,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
*/
public static final String ANONYMOUS_ALLOWED = TYPE + ".anonymous.allowed";
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
private boolean acceptAnonymous;
/**
@ -114,6 +120,18 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
return true;
}
private String getUserName(HttpServletRequest request) {
List<NameValuePair> list = URLEncodedUtils.parse(request.getQueryString(), UTF8_CHARSET);
if (list != null) {
for (NameValuePair nv : list) {
if (PseudoAuthenticator.USER_NAME.equals(nv.getName())) {
return nv.getValue();
}
}
}
return null;
}
/**
* Authenticates an HTTP client request.
* <p/>
@ -139,7 +157,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
throws IOException, AuthenticationException {
AuthenticationToken token;
String userName = request.getParameter(PseudoAuthenticator.USER_NAME);
String userName = getUserName(request);
if (userName == null) {
if (getAcceptAnonymous()) {
token = AuthenticationToken.ANONYMOUS;

View File

@ -94,7 +94,7 @@ public class TestPseudoAuthenticationHandler {
HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
Mockito.when(request.getParameter(PseudoAuthenticator.USER_NAME)).thenReturn("user");
Mockito.when(request.getQueryString()).thenReturn(PseudoAuthenticator.USER_NAME + "=" + "user");
AuthenticationToken token = handler.authenticate(request, response);

View File

@ -108,6 +108,8 @@ Trunk (Unreleased)
HADOOP-10141. Create KeyProvider API to separate encryption key storage
from the applications. (omalley)
HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley)
BUG FIXES
HADOOP-9451. Fault single-layer config if node group topology is enabled.
@ -407,6 +409,9 @@ Release 2.4.0 - UNRELEASED
HADOOP-10169. Remove the unnecessary synchronized in JvmMetrics class.
(Liang Xie via jing9)
HADOOP-10198. DomainSocket: add support for socketpair.
(Colin Patrick McCabe via wang)
OPTIMIZATIONS
HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn)
@ -416,6 +421,9 @@ Release 2.4.0 - UNRELEASED
HADOOP-10172. Cache SASL server factories (daryn)
HADOOP-10173. Remove UGI from DIGEST-MD5 SASL server creation (daryn via
kihwal)
BUG FIXES
HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize
@ -489,6 +497,9 @@ Release 2.4.0 - UNRELEASED
HADOOP-10171. TestRPC fails intermittently on jkd7 (Mit Desai via jeagles)
HADOOP-10147 HDFS-5678 Upgrade to commons-logging 1.1.3 to avoid potential
deadlock in MiniDFSCluster (stevel)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES
@ -568,6 +579,12 @@ Release 2.3.0 - UNRELEASED
HADOOP-10175. Har files system authority should preserve userinfo.
(Chuan Liu via cnauroth)
HADOOP-10090. Jobtracker metrics not updated properly after execution
of a mapreduce job. (ivanmi)
HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream.
(gchanan via tucu)
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES

View File

@ -36,8 +36,11 @@ import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.UnrecoverableKeyException;
import java.security.cert.CertificateException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
@ -56,6 +59,7 @@ import java.util.Map;
*/
@InterfaceAudience.Private
public class JavaKeyStoreProvider extends KeyProvider {
private static final String KEY_METADATA = "KeyMetadata";
public static final String SCHEME_NAME = "jceks";
public static final String KEYSTORE_PASSWORD_NAME =
"HADOOP_KEYSTORE_PASSWORD";
@ -117,6 +121,44 @@ public class JavaKeyStoreProvider extends KeyProvider {
return new KeyVersion(versionName, key.getEncoded());
}
@Override
public List<String> getKeys() throws IOException {
ArrayList<String> list = new ArrayList<String>();
String alias = null;
try {
Enumeration<String> e = keyStore.aliases();
while (e.hasMoreElements()) {
alias = e.nextElement();
// only include the metadata key names in the list of names
if (!alias.contains("@")) {
list.add(alias);
}
}
} catch (KeyStoreException e) {
throw new IOException("Can't get key " + alias + " from " + path, e);
}
return list;
}
@Override
public List<KeyVersion> getKeyVersions(String name) throws IOException {
List<KeyVersion> list = new ArrayList<KeyVersion>();
Metadata km = getMetadata(name);
if (km != null) {
int latestVersion = km.getVersions();
KeyVersion v = null;
String versionName = null;
for (int i = 0; i < latestVersion; i++) {
versionName = buildVersionName(name, i);
v = getKeyVersion(versionName);
if (v != null) {
list.add(v);
}
}
}
return list;
}
@Override
public Metadata getMetadata(String name) throws IOException {
if (cache.containsKey(name)) {
@ -288,7 +330,7 @@ public class JavaKeyStoreProvider extends KeyProvider {
@Override
public String getFormat() {
return "KeyMetadata";
return KEY_METADATA;
}
@Override

View File

@ -254,6 +254,20 @@ public abstract class KeyProvider {
public abstract KeyVersion getKeyVersion(String versionName
) throws IOException;
/**
* Get the key names for all keys.
* @return the list of key names
* @throws IOException
*/
public abstract List<String> getKeys() throws IOException;
/**
* Get the key material for all versions of a specific key name.
* @return the list of key material
* @throws IOException
*/
public abstract List<KeyVersion> getKeyVersions(String name) throws IOException;
/**
* Get the current version of the key, which should be used for encrypting new
* data.

View File

@ -20,8 +20,10 @@ package org.apache.hadoop.crypto.key;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
@ -142,4 +144,32 @@ public class UserProvider extends KeyProvider {
return null;
}
}
@Override
public List<String> getKeys() throws IOException {
List<String> list = new ArrayList<String>();
List<Text> keys = credentials.getAllSecretKeys();
for (Text key : keys) {
if (key.find("@") == -1) {
list.add(key.toString());
}
}
return list;
}
@Override
public List<KeyVersion> getKeyVersions(String name) throws IOException {
List<KeyVersion> list = new ArrayList<KeyVersion>();
Metadata km = getMetadata(name);
if (km != null) {
int latestVersion = km.getVersions();
for (int i = 0; i < latestVersion; i++) {
KeyVersion v = getKeyVersion(buildVersionName(name, i));
if (v != null) {
list.add(v);
}
}
}
return list;
}
}

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.http.lib.StaticUserWebFilter;
import org.apache.hadoop.security.authorize.Service;
/**
* This class contains constants for configuration keys used
@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
/** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
10000;
public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
"hadoop.user.group.metrics.percentiles.intervals";
}

View File

@ -276,6 +276,24 @@ public class DomainSocket implements Closeable {
return new DomainSocket(path, fd);
}
/**
* Create a pair of UNIX domain sockets which are connected to each other
* by calling socketpair(2).
*
* @return An array of two UNIX domain sockets connected to
* each other.
* @throws IOException on error.
*/
public static DomainSocket[] socketpair() throws IOException {
int fds[] = socketpair0();
return new DomainSocket[] {
new DomainSocket("(anonymous0)", fds[0]),
new DomainSocket("(anonymous1)", fds[1])
};
}
private static native int[] socketpair0() throws IOException;
private static native int accept0(int fd) throws IOException;
/**

View File

@ -29,7 +29,9 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -73,15 +75,6 @@ public class Credentials implements Writable {
this.addAll(credentials);
}
/**
* Returns the key bytes for the alias
* @param alias the alias for the key
* @return key for this alias
*/
public byte[] getSecretKey(Text alias) {
return secretKeysMap.get(alias);
}
/**
* Returns the Token object for the alias
* @param alias the alias for the Token
@ -118,6 +111,15 @@ public class Credentials implements Writable {
return tokenMap.size();
}
/**
* Returns the key bytes for the alias
* @param alias the alias for the key
* @return key for this alias
*/
public byte[] getSecretKey(Text alias) {
return secretKeysMap.get(alias);
}
/**
* @return number of keys in the in-memory map
*/
@ -142,6 +144,16 @@ public class Credentials implements Writable {
secretKeysMap.remove(alias);
}
/**
* Return all the secret key entries in the in-memory map
*/
public List<Text> getAllSecretKeys() {
List<Text> list = new java.util.ArrayList<Text>();
list.addAll(secretKeysMap.keySet());
return list;
}
/**
* Convenience method for reading a token storage file, and loading the Tokens
* therein in the passed UGI

View File

@ -138,6 +138,7 @@ public class Groups {
List<String> groupList = impl.getGroups(user);
long endMs = Time.monotonicNow();
long deltaMs = endMs - startMs ;
UserGroupInformation.metrics.addGetGroups(deltaMs);
if (deltaMs > warningDeltaMs) {
LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
"took " + deltaMs + " milliseconds.");

View File

@ -131,7 +131,7 @@ public class SaslRpcServer {
public SaslServer create(Connection connection,
SecretManager<TokenIdentifier> secretManager
) throws IOException, InterruptedException {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
UserGroupInformation ugi = null;
final CallbackHandler callback;
switch (authMethod) {
case TOKEN: {
@ -139,6 +139,7 @@ public class SaslRpcServer {
break;
}
case KERBEROS: {
ugi = UserGroupInformation.getCurrentUser();
if (serverId.isEmpty()) {
throw new AccessControlException(
"Kerberos principal name does NOT have the expected "
@ -153,7 +154,9 @@ public class SaslRpcServer {
"Server does not support SASL " + authMethod);
}
SaslServer saslServer = ugi.doAs(
final SaslServer saslServer;
if (ugi != null) {
saslServer = ugi.doAs(
new PrivilegedExceptionAction<SaslServer>() {
@Override
public SaslServer run() throws SaslException {
@ -161,6 +164,10 @@ public class SaslRpcServer {
SaslRpcServer.SASL_PROPS, callback);
}
});
} else {
saslServer = saslFactory.createSaslServer(mechanism, protocol, serverId,
SaslRpcServer.SASL_PROPS, callback);
}
if (saslServer == null) {
throw new AccessControlException(
"Unable to find SASL server implementation for " + mechanism);

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.security;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
import java.io.File;
import java.io.IOException;
@ -58,6 +59,8 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.authentication.util.KerberosUtil;
@ -92,14 +95,27 @@ public class UserGroupInformation {
*/
@Metrics(about="User and group related metrics", context="ugi")
static class UgiMetrics {
final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
@Metric("Rate of successful kerberos logins and latency (milliseconds)")
MutableRate loginSuccess;
@Metric("Rate of failed kerberos logins and latency (milliseconds)")
MutableRate loginFailure;
@Metric("GetGroups") MutableRate getGroups;
MutableQuantiles[] getGroupsQuantiles;
static UgiMetrics create() {
return DefaultMetricsSystem.instance().register(new UgiMetrics());
}
void addGetGroups(long latency) {
getGroups.add(latency);
if (getGroupsQuantiles != null) {
for (MutableQuantiles q : getGroupsQuantiles) {
q.add(latency);
}
}
}
}
/**
@ -250,6 +266,20 @@ public class UserGroupInformation {
groups = Groups.getUserToGroupsMappingService(conf);
}
UserGroupInformation.conf = conf;
if (metrics.getGroupsQuantiles == null) {
int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
if (intervals != null && intervals.length > 0) {
final int length = intervals.length;
MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
for (int i = 0; i < length; i++) {
getGroupsQuantiles[i] = metrics.registry.newQuantiles(
"getGroups" + intervals[i] + "s",
"Get groups", "ops", "latency", intervals[i]);
}
metrics.getGroupsQuantiles = getGroupsQuantiles;
}
}
}
/**

View File

@ -928,8 +928,10 @@ public class StringUtils {
* @param args List of arguments.
* @return null if the option was not found; the value of the
* option otherwise.
* @throws IllegalArgumentException if the option's argument is not present
*/
public static String popOptionWithArgument(String name, List<String> args) {
public static String popOptionWithArgument(String name, List<String> args)
throws IllegalArgumentException {
String val = null;
for (Iterator<String> iter = args.iterator(); iter.hasNext(); ) {
String cur = iter.next();
@ -939,7 +941,7 @@ public class StringUtils {
} else if (cur.equals(name)) {
iter.remove();
if (!iter.hasNext()) {
throw new RuntimeException("option " + name + " requires 1 " +
throw new IllegalArgumentException("option " + name + " requires 1 " +
"argument.");
}
val = iter.next();

View File

@ -364,6 +364,50 @@ JNIEnv *env, jclass clazz, jstring path)
return fd;
}
#define SOCKETPAIR_ARRAY_LEN 2
JNIEXPORT jarray JNICALL
Java_org_apache_hadoop_net_unix_DomainSocket_socketpair0(
JNIEnv *env, jclass clazz)
{
jarray arr = NULL;
int idx, err, fds[SOCKETPAIR_ARRAY_LEN] = { -1, -1 };
jthrowable jthr = NULL;
arr = (*env)->NewIntArray(env, SOCKETPAIR_ARRAY_LEN);
jthr = (*env)->ExceptionOccurred(env);
if (jthr) {
(*env)->ExceptionClear(env);
goto done;
}
if (socketpair(PF_UNIX, SOCK_STREAM, 0, fds) < 0) {
err = errno;
jthr = newSocketException(env, err,
"socketpair(2) error: %s", terror(err));
goto done;
}
(*env)->SetIntArrayRegion(env, arr, 0, SOCKETPAIR_ARRAY_LEN, fds);
jthr = (*env)->ExceptionOccurred(env);
if (jthr) {
(*env)->ExceptionClear(env);
goto done;
}
done:
if (jthr) {
(*env)->DeleteLocalRef(env, arr);
arr = NULL;
for (idx = 0; idx < SOCKETPAIR_ARRAY_LEN; idx++) {
if (fds[idx] >= 0) {
close(fds[idx]);
fds[idx] = -1;
}
}
(*env)->Throw(env, jthr);
}
return arr;
}
JNIEXPORT jint JNICALL
Java_org_apache_hadoop_net_unix_DomainSocket_accept0(
JNIEnv *env, jclass clazz, jint fd)

View File

@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
@ -160,6 +161,16 @@ public class TestKeyProviderFactory {
provider.getCurrentKey("key4").getMaterial());
assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial());
assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName());
List<String> keys = provider.getKeys();
assertTrue("Keys should have been returned.", keys.size() == 2);
assertTrue("Returned Keys should have included key3.", keys.contains("key3"));
assertTrue("Returned Keys should have included key4.", keys.contains("key4"));
List<KeyVersion> kvl = provider.getKeyVersions("key3");
assertTrue("KeyVersions should have been returned for key3.", kvl.size() == 1);
assertTrue("KeyVersions should have included key3@0.", kvl.get(0).getVersionName().equals("key3@0"));
assertArrayEquals(key3, kvl.get(0).getMaterial());
}
@Test

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metrics2.impl;
import static org.junit.Assert.*;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsTag;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.MetricsAnnotations;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MetricsSourceBuilder;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.junit.Test;
public class TestMetricsSourceAdapter {
@Test
public void testGetMetricsAndJmx() throws Exception {
// create test source with a single metric counter of value 0
TestSource source = new TestSource("test");
MetricsSourceBuilder sb = MetricsAnnotations.newSourceBuilder(source);
final MetricsSource s = sb.build();
List<MetricsTag> injectedTags = new ArrayList<MetricsTag>();
MetricsSourceAdapter sa = new MetricsSourceAdapter(
"test", "test", "test desc", s, injectedTags, null, null, 1, false);
// all metrics are initially assumed to have changed
MetricsCollectorImpl builder = new MetricsCollectorImpl();
Iterable<MetricsRecordImpl> metricsRecords = sa.getMetrics(builder, true);
// Validate getMetrics and JMX initial values
MetricsRecordImpl metricsRecord = metricsRecords.iterator().next();
assertEquals(0L,
metricsRecord.metrics().iterator().next().value().longValue());
Thread.sleep(100); // skip JMX cache TTL
assertEquals(0L, (Number)sa.getAttribute("C1"));
// change metric value
source.incrementCnt();
// validate getMetrics and JMX
builder = new MetricsCollectorImpl();
metricsRecords = sa.getMetrics(builder, true);
metricsRecord = metricsRecords.iterator().next();
assertTrue(metricsRecord.metrics().iterator().hasNext());
Thread.sleep(100); // skip JMX cache TTL
assertEquals(1L, (Number)sa.getAttribute("C1"));
}
@SuppressWarnings("unused")
@Metrics(context="test")
private static class TestSource {
@Metric("C1 desc") MutableCounterLong c1;
final MetricsRegistry registry;
TestSource(String recName) {
registry = new MetricsRegistry(recName);
}
public void incrementCnt() {
c1.incr();
}
}
}

View File

@ -420,7 +420,8 @@ public class TestDomainSocket {
* @throws IOException
*/
void testClientServer1(final Class<? extends WriteStrategy> writeStrategyClass,
final Class<? extends ReadStrategy> readStrategyClass) throws Exception {
final Class<? extends ReadStrategy> readStrategyClass,
final DomainSocket preConnectedSockets[]) throws Exception {
final String TEST_PATH = new File(sockDir.getDir(),
"test_sock_client_server1").getAbsolutePath();
final byte clientMsg1[] = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 };
@ -428,13 +429,15 @@ public class TestDomainSocket {
final byte clientMsg2 = 0x45;
final ArrayBlockingQueue<Throwable> threadResults =
new ArrayBlockingQueue<Throwable>(2);
final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
final DomainSocket serv = (preConnectedSockets != null) ?
null : DomainSocket.bindAndListen(TEST_PATH);
Thread serverThread = new Thread() {
public void run(){
// Run server
DomainSocket conn = null;
try {
conn = serv.accept();
conn = preConnectedSockets != null ?
preConnectedSockets[0] : serv.accept();
byte in1[] = new byte[clientMsg1.length];
ReadStrategy reader = readStrategyClass.newInstance();
reader.init(conn);
@ -459,7 +462,8 @@ public class TestDomainSocket {
Thread clientThread = new Thread() {
public void run(){
try {
DomainSocket client = DomainSocket.connect(TEST_PATH);
DomainSocket client = preConnectedSockets != null ?
preConnectedSockets[1] : DomainSocket.connect(TEST_PATH);
WriteStrategy writer = writeStrategyClass.newInstance();
writer.init(client);
writer.write(clientMsg1);
@ -487,25 +491,45 @@ public class TestDomainSocket {
}
serverThread.join(120000);
clientThread.join(120000);
if (serv != null) {
serv.close();
}
}
@Test(timeout=180000)
public void testClientServerOutStreamInStream() throws Exception {
testClientServer1(OutputStreamWriteStrategy.class,
InputStreamReadStrategy.class);
InputStreamReadStrategy.class, null);
}
@Test(timeout=180000)
public void testClientServerOutStreamInStreamWithSocketpair() throws Exception {
testClientServer1(OutputStreamWriteStrategy.class,
InputStreamReadStrategy.class, DomainSocket.socketpair());
}
@Test(timeout=180000)
public void testClientServerOutStreamInDbb() throws Exception {
testClientServer1(OutputStreamWriteStrategy.class,
DirectByteBufferReadStrategy.class);
DirectByteBufferReadStrategy.class, null);
}
@Test(timeout=180000)
public void testClientServerOutStreamInDbbWithSocketpair() throws Exception {
testClientServer1(OutputStreamWriteStrategy.class,
DirectByteBufferReadStrategy.class, DomainSocket.socketpair());
}
@Test(timeout=180000)
public void testClientServerOutStreamInAbb() throws Exception {
testClientServer1(OutputStreamWriteStrategy.class,
ArrayBackedByteBufferReadStrategy.class);
ArrayBackedByteBufferReadStrategy.class, null);
}
@Test(timeout=180000)
public void testClientServerOutStreamInAbbWithSocketpair() throws Exception {
testClientServer1(OutputStreamWriteStrategy.class,
ArrayBackedByteBufferReadStrategy.class, DomainSocket.socketpair());
}
static private class PassedFile {

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.security;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.TestSaslRPC;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.authentication.util.KerberosName;
@ -40,9 +39,9 @@ import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Set;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
import static org.apache.hadoop.ipc.TestSaslRPC.*;
import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
import static org.apache.hadoop.test.MetricsAsserts.*;
import static org.junit.Assert.*;
import static org.mockito.Mockito.mock;
@ -55,6 +54,8 @@ public class TestUserGroupInformation {
final private static String GROUP3_NAME = "group3";
final private static String[] GROUP_NAMES =
new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
// Rollover interval of percentile metrics (in seconds)
private static final int PERCENTILES_INTERVAL = 1;
private static Configuration conf;
/**
@ -80,7 +81,8 @@ public class TestUserGroupInformation {
// doesn't matter what it is, but getGroups needs it set...
// use HADOOP_HOME environment variable to prevent interfering with logic
// that finds winutils.exe
System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
String home = System.getenv("HADOOP_HOME");
System.setProperty("hadoop.home.dir", (home != null ? home : "."));
// fake the realm is kerberos is enabled
System.setProperty("java.security.krb5.kdc", "");
System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@ -150,11 +152,15 @@ public class TestUserGroupInformation {
/** Test login method */
@Test (timeout = 30000)
public void testLogin() throws Exception {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
String.valueOf(PERCENTILES_INTERVAL));
UserGroupInformation.setConfiguration(conf);
// login from unix
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
assertEquals(UserGroupInformation.getCurrentUser(),
UserGroupInformation.getLoginUser());
assertTrue(ugi.getGroupNames().length >= 1);
verifyGroupMetrics(1);
// ensure that doAs works correctly
UserGroupInformation userGroupInfo =
@ -728,6 +734,21 @@ public class TestUserGroupInformation {
}
}
private static void verifyGroupMetrics(
long groups) throws InterruptedException {
MetricsRecordBuilder rb = getMetrics("UgiMetrics");
if (groups > 0) {
assertCounter("GetGroupsNumOps", groups, rb);
double avg = getDoubleGauge("GetGroupsAvgTime", rb);
assertTrue(avg >= 0.0);
// Sleep for an interval+slop to let the percentiles rollover
Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
// Check that the percentiles were updated
assertQuantileGauges("GetGroups1s", rb);
}
}
/**
* Test for the case that UserGroupInformation.getCurrentUser()
* is called when the AccessControlContext has a Subject associated

View File

@ -13,10 +13,6 @@ Trunk (Unreleased)
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
as a collection of storages (see breakdown of tasks below for features and
contributors).
IMPROVEMENTS
HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common.
@ -243,13 +239,18 @@ Trunk (Unreleased)
HDFS-5636. Enforce a max TTL per cache pool. (awang via cmccabe)
HDFS-5651. Remove dfs.namenode.caching.enabled and improve CRM locking.
(cmccabe via wang)
OPTIMIZATIONS
HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe)
HDFS-5665. Remove the unnecessary writeLock while initializing CacheManager
in FsNameSystem Ctor. (Uma Maheswara Rao G via Andrew Wang)
BUG FIXES
HADOOP-9635 Fix potential Stack Overflow in DomainSocket.c (V. Karthik Kumar
via cmccabe)
@ -444,139 +445,27 @@ Trunk (Unreleased)
HDFS-5626. dfsadmin -report shows incorrect cache values. (cmccabe)
HDFS-5406. Send incremental block reports for all storages in a
single call. (Arpit Agarwal)
HDFS-5454. DataNode UUID should be assigned prior to FsDataset
initialization. (Arpit Agarwal)
HDFS-5679. TestCacheDirectives should handle the case where native code
is not available. (wang)
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
HDFS-5701. Fix the CacheAdmin -addPool -maxTtl option name.
(Stephen Chu via wang)
HDFS-4985. Add storage type to the protocol and expose it in block report
and block locations. (Arpit Agarwal)
HDFS-5708. The CacheManager throws a NPE in the DataNode logs when
processing cache reports that refer to a block not known to the
BlockManager. (cmccabe via wang)
HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
HDFS-5659. dfsadmin -report doesn't output cache information properly.
(wang)
HDFS-5000. DataNode configuration should allow specifying storage type.
(Arpit Agarwal)
HDFS-5705. TestSecondaryNameNodeUpgrade#testChangeNsIDFails may fail due
to ConcurrentModificationException. (Ted Yu via brandonli)
HDFS-4987. Namenode changes to track multiple storages per datanode.
(szetszwo)
HDFS-5719. FSImage#doRollback() should close prevState before return
(Ted Yu via brandonli)
HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
(Junping Du via szetszwo)
HDFS-5009. Include storage information in the LocatedBlock. (szetszwo)
HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
fix a synchronization problem in DatanodeStorageInfo. (szetszwo)
HDFS-5157. Add StorageType to FsVolume. (Junping Du via szetszwo)
HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
datanodes. (szetszwo)
HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
HDFS-5222. Move block schedule information from DatanodeDescriptor to
DatanodeStorageInfo. (szetszwo)
HDFS-4988. Datanode must support all the volumes as individual storages.
(Arpit Agarwal)
HDFS-5377. Heartbeats from Datandode should include one storage report
per storage directory. (Arpit Agarwal)
HDFS-5398. NameNode changes to process storage reports per storage
directory. (Arpit Agarwal)
HDFS-5390. Send one incremental block report per storage directory.
(Arpit Agarwal)
HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities. (szetszwo)
HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
Arpit Agarwal)
HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
Agarwal)
HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
HDFS-5448. Datanode should generate its ID on first registration. (Arpit
Agarwal)
HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
Agarwal)
HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
(Contributed by szetszwo)
HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
by szetszwo)
HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
Agarwal)
HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
(Contributed by szetszwo)
HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
TestNNThroughputBenchmark (Contributed by szetszwo)
HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
Agarwal)
HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
by Junping Du)
HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
Agarwal)
HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
(Junping Du via Arpit Agarwal)
HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
Agarwal)
HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
Agarwal)
HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
szetszwo)
HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
not accurate. (Eric Sirianni via Arpit Agarwal)
HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
HDFS-5589. Namenode loops caching and uncaching when data should be
uncached (awang via cmccabe)
Release 2.4.0 - UNRELEASED
@ -608,6 +497,10 @@ Release 2.4.0 - UNRELEASED
HDFS-5514. FSNamesystem's fsLock should allow custom implementation (daryn)
HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
as a collection of storages (see breakdown of tasks below for features and
contributors).
IMPROVEMENTS
HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu)
@ -618,9 +511,6 @@ Release 2.4.0 - UNRELEASED
HDFS-5004. Add additional JMX bean for NameNode status data
(Trevor Lorimer via cos)
HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
(shv)
HDFS-4994. Audit log getContentSummary() calls. (Robert Parker via kihwal)
HDFS-5144. Document time unit to NameNodeMetrics. (Akira Ajisaka via
@ -768,6 +658,11 @@ Release 2.4.0 - UNRELEASED
HDFS-2933. Improve DataNode Web UI Index Page. (Vivek Ganesan via
Arpit Agarwal)
HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
(Haohui Mai via jing9)
HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
@ -833,6 +728,139 @@ Release 2.4.0 - UNRELEASED
HDFS-5690. DataNode fails to start in secure mode when dfs.http.policy equals to
HTTP_ONLY. (Haohui Mai via jing9)
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
HDFS-4985. Add storage type to the protocol and expose it in block report
and block locations. (Arpit Agarwal)
HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
HDFS-5000. DataNode configuration should allow specifying storage type.
(Arpit Agarwal)
HDFS-4987. Namenode changes to track multiple storages per datanode.
(szetszwo)
HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
(Junping Du via szetszwo)
HDFS-5009. Include storage information in the LocatedBlock. (szetszwo)
HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
fix a synchronization problem in DatanodeStorageInfo. (szetszwo)
HDFS-5157. Add StorageType to FsVolume. (Junping Du via szetszwo)
HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
datanodes. (szetszwo)
HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
HDFS-5222. Move block schedule information from DatanodeDescriptor to
DatanodeStorageInfo. (szetszwo)
HDFS-4988. Datanode must support all the volumes as individual storages.
(Arpit Agarwal)
HDFS-5377. Heartbeats from Datandode should include one storage report
per storage directory. (Arpit Agarwal)
HDFS-5398. NameNode changes to process storage reports per storage
directory. (Arpit Agarwal)
HDFS-5390. Send one incremental block report per storage directory.
(Arpit Agarwal)
HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities. (szetszwo)
HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
Arpit Agarwal)
HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
Agarwal)
HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
HDFS-5448. Datanode should generate its ID on first registration. (Arpit
Agarwal)
HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
Agarwal)
HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
(Contributed by szetszwo)
HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
by szetszwo)
HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
Agarwal)
HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
(Contributed by szetszwo)
HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
TestNNThroughputBenchmark (Contributed by szetszwo)
HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
Agarwal)
HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
by Junping Du)
HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
Agarwal)
HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
(Junping Du via Arpit Agarwal)
HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
Agarwal)
HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
Agarwal)
HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
szetszwo)
HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
not accurate. (Eric Sirianni via Arpit Agarwal)
HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
HDFS-5406. Send incremental block reports for all storages in a
single call. (Arpit Agarwal)
HDFS-5454. DataNode UUID should be assigned prior to FsDataset
initialization. (Arpit Agarwal)
HDFS-5667. Include DatanodeStorage in StorageReport. (Arpit Agarwal)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES
@ -866,6 +894,12 @@ Release 2.3.0 - UNRELEASED
HDFS-5662. Can't decommission a DataNode due to file's replication factor
larger than the rest of the cluster size. (brandonli)
HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
(shv)
HDFS-5675. Add Mkdirs operation to NNThroughputBenchmark.
(Plamen Jeliazkov via shv)
OPTIMIZATIONS
BUG FIXES
@ -1016,6 +1050,11 @@ Release 2.3.0 - UNRELEASED
HDFS-5661. Browsing FileSystem via web ui, should use datanode's fqdn instead of ip
address. (Benoy Antony via jing9)
HDFS-5582. hdfs getconf -excludeFile or -includeFile always failed (sathish
via cmccabe)
HDFS-5671. Fix socket leak in DFSInputStream#getBlockReader. (JamesLi via umamahesh)
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES

View File

@ -108,8 +108,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0;
public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume";
public static final int DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT = 4;
public static final String DFS_NAMENODE_CACHING_ENABLED_KEY = "dfs.namenode.caching.enabled";
public static final boolean DFS_NAMENODE_CACHING_ENABLED_DEFAULT = false;
public static final String DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT =
"dfs.namenode.path.based.cache.block.map.allocation.percent";
public static final float DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f;
public static final String DFS_NAMENODE_HTTP_PORT_KEY = "dfs.http.port";
public static final int DFS_NAMENODE_HTTP_PORT_DEFAULT = 50070;

View File

@ -1188,11 +1188,21 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
}
// Try to create a new remote peer.
Peer peer = newTcpPeer(dnAddr);
return BlockReaderFactory.newBlockReader(
dfsClient.getConf(), file, block, blockToken, startOffset,
len, verifyChecksum, clientName, peer, chosenNode,
dsFactory, peerCache, fileInputStreamCache, false,
try {
reader = BlockReaderFactory.newBlockReader(dfsClient.getConf(), file,
block, blockToken, startOffset, len, verifyChecksum, clientName,
peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false,
curCachingStrategy);
return reader;
} catch (IOException ex) {
DFSClient.LOG.debug(
"Exception while getting block reader, closing stale " + peer, ex);
throw ex;
} finally {
if (reader == null) {
IOUtils.closeQuietly(peer);
}
}
}

View File

@ -533,21 +533,7 @@ public class PBHelper {
static public DatanodeInfoProto convertDatanodeInfo(DatanodeInfo di) {
if (di == null) return null;
DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
if (di.getNetworkLocation() != null) {
builder.setLocation(di.getNetworkLocation());
}
return builder.
setId(PBHelper.convert((DatanodeID) di)).
setCapacity(di.getCapacity()).
setDfsUsed(di.getDfsUsed()).
setRemaining(di.getRemaining()).
setBlockPoolUsed(di.getBlockPoolUsed()).
setLastUpdate(di.getLastUpdate()).
setXceiverCount(di.getXceiverCount()).
setAdminState(PBHelper.convert(di.getAdminState())).
build();
return convert(di);
}
@ -591,15 +577,20 @@ public class PBHelper {
public static DatanodeInfoProto convert(DatanodeInfo info) {
DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
builder.setBlockPoolUsed(info.getBlockPoolUsed());
builder.setAdminState(PBHelper.convert(info.getAdminState()));
builder.setCapacity(info.getCapacity())
.setDfsUsed(info.getDfsUsed())
if (info.getNetworkLocation() != null) {
builder.setLocation(info.getNetworkLocation());
}
builder
.setId(PBHelper.convert((DatanodeID)info))
.setLastUpdate(info.getLastUpdate())
.setLocation(info.getNetworkLocation())
.setCapacity(info.getCapacity())
.setDfsUsed(info.getDfsUsed())
.setRemaining(info.getRemaining())
.setBlockPoolUsed(info.getBlockPoolUsed())
.setCacheCapacity(info.getCacheCapacity())
.setCacheUsed(info.getCacheUsed())
.setLastUpdate(info.getLastUpdate())
.setXceiverCount(info.getXceiverCount())
.setAdminState(PBHelper.convert(info.getAdminState()))
.build();
return builder.build();
}
@ -1591,13 +1582,17 @@ public class PBHelper {
StorageReportProto.Builder builder = StorageReportProto.newBuilder()
.setBlockPoolUsed(r.getBlockPoolUsed()).setCapacity(r.getCapacity())
.setDfsUsed(r.getDfsUsed()).setRemaining(r.getRemaining())
.setStorageUuid(r.getStorageID());
.setStorageUuid(r.getStorage().getStorageID())
.setStorage(convert(r.getStorage()));
return builder.build();
}
public static StorageReport convert(StorageReportProto p) {
return new StorageReport(p.getStorageUuid(), p.getFailed(),
p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
return new StorageReport(
p.hasStorage() ?
convert(p.getStorage()) :
new DatanodeStorage(p.getStorageUuid()),
p.getFailed(), p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
p.getBlockPoolUsed());
}

View File

@ -21,12 +21,14 @@ import static org.apache.hadoop.util.ExitUtil.terminate;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
@ -76,7 +78,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
/**
* Pseudorandom number source
*/
private final Random random = new Random();
private static final Random random = new Random();
/**
* The interval at which we scan the namesystem for caching changes.
@ -87,17 +89,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
* The CacheReplicationMonitor (CRM) lock. Used to synchronize starting and
* waiting for rescan operations.
*/
private final ReentrantLock lock = new ReentrantLock();
private final ReentrantLock lock;
/**
* Notifies the scan thread that an immediate rescan is needed.
*/
private final Condition doRescan = lock.newCondition();
private final Condition doRescan;
/**
* Notifies waiting threads that a rescan has finished.
*/
private final Condition scanFinished = lock.newCondition();
private final Condition scanFinished;
/**
* Whether there are pending CacheManager operations that necessitate a
@ -121,11 +123,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
*/
private boolean shutdown = false;
/**
* The monotonic time at which the current scan started.
*/
private long startTimeMs;
/**
* Mark status of the current scan.
*/
@ -142,24 +139,27 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
private long scannedBlocks;
public CacheReplicationMonitor(FSNamesystem namesystem,
CacheManager cacheManager, long intervalMs) {
CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
this.namesystem = namesystem;
this.blockManager = namesystem.getBlockManager();
this.cacheManager = cacheManager;
this.cachedBlocks = cacheManager.getCachedBlocks();
this.intervalMs = intervalMs;
this.lock = lock;
this.doRescan = this.lock.newCondition();
this.scanFinished = this.lock.newCondition();
}
@Override
public void run() {
startTimeMs = 0;
long startTimeMs = 0;
Thread.currentThread().setName("CacheReplicationMonitor(" +
System.identityHashCode(this) + ")");
LOG.info("Starting CacheReplicationMonitor with interval " +
intervalMs + " milliseconds");
try {
long curTimeMs = Time.monotonicNow();
while (true) {
// Not all of the variables accessed here need the CRM lock, but take
// it anyway for simplicity
lock.lock();
try {
while (true) {
@ -180,12 +180,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
doRescan.await(delta, TimeUnit.MILLISECONDS);
curTimeMs = Time.monotonicNow();
}
} finally {
lock.unlock();
}
// Mark scan as started, clear needsRescan
lock.lock();
try {
isScanning = true;
needsRescan = false;
} finally {
@ -195,7 +189,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
mark = !mark;
rescan();
curTimeMs = Time.monotonicNow();
// Retake the CRM lock to update synchronization-related variables
// Update synchronization-related variables.
lock.lock();
try {
isScanning = false;
@ -208,32 +202,15 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
scannedBlocks + " block(s) in " + (curTimeMs - startTimeMs) + " " +
"millisecond(s).");
}
} catch (InterruptedException e) {
LOG.info("Shutting down CacheReplicationMonitor.");
return;
} catch (Throwable t) {
LOG.fatal("Thread exiting", t);
terminate(1, t);
}
}
/**
* Similar to {@link CacheReplicationMonitor#waitForRescan()}, except it only
* waits if there are pending operations that necessitate a rescan as
* indicated by {@link #setNeedsRescan()}.
* <p>
* Note that this call may release the FSN lock, so operations before and
* after are not necessarily atomic.
*/
public void waitForRescanIfNeeded() {
lock.lock();
try {
if (!needsRescan) {
return;
}
} finally {
lock.unlock();
}
waitForRescan();
}
/**
* Waits for a rescan to complete. This doesn't guarantee consistency with
* pending operations, only relative recency, since it will not force a new
@ -242,33 +219,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
* Note that this call will release the FSN lock, so operations before and
* after are not atomic.
*/
public void waitForRescan() {
// Drop the FSN lock temporarily and retake it after we finish waiting
// Need to handle both the read lock and the write lock
boolean retakeWriteLock = false;
if (namesystem.hasWriteLock()) {
namesystem.writeUnlock();
retakeWriteLock = true;
} else if (namesystem.hasReadLock()) {
namesystem.readUnlock();
} else {
// Expected to have at least one of the locks
Preconditions.checkState(false,
"Need to be holding either the read or write lock");
public void waitForRescanIfNeeded() {
Preconditions.checkArgument(!namesystem.hasWriteLock(),
"Must not hold the FSN write lock when waiting for a rescan.");
Preconditions.checkArgument(lock.isHeldByCurrentThread(),
"Must hold the CRM lock when waiting for a rescan.");
if (!needsRescan) {
return;
}
// try/finally for retaking FSN lock
try {
lock.lock();
// try/finally for releasing CRM lock
try {
// If no scan is already ongoing, mark the CRM as dirty and kick
if (!isScanning) {
needsRescan = true;
doRescan.signal();
}
// Wait until the scan finishes and the count advances
final long startCount = scanCount;
while (startCount >= scanCount) {
while ((!shutdown) && (startCount >= scanCount)) {
try {
scanFinished.await();
} catch (InterruptedException e) {
@ -277,16 +242,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
break;
}
}
} finally {
lock.unlock();
}
} finally {
if (retakeWriteLock) {
namesystem.writeLock();
} else {
namesystem.readLock();
}
}
}
/**
@ -294,42 +249,43 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
* changes that require a rescan.
*/
public void setNeedsRescan() {
lock.lock();
try {
Preconditions.checkArgument(lock.isHeldByCurrentThread(),
"Must hold the CRM lock when setting the needsRescan bit.");
this.needsRescan = true;
} finally {
lock.unlock();
}
}
/**
* Shut down and join the monitor thread.
* Shut down the monitor thread.
*/
@Override
public void close() throws IOException {
Preconditions.checkArgument(namesystem.hasWriteLock());
lock.lock();
try {
if (shutdown) return;
// Since we hold both the FSN write lock and the CRM lock here,
// we know that the CRM thread cannot be currently modifying
// the cache manager state while we're closing it.
// Since the CRM thread checks the value of 'shutdown' after waiting
// for a lock, we know that the thread will not modify the cache
// manager state after this point.
shutdown = true;
doRescan.signalAll();
scanFinished.signalAll();
} finally {
lock.unlock();
}
try {
if (this.isAlive()) {
this.join(60000);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
private void rescan() {
private void rescan() throws InterruptedException {
scannedDirectives = 0;
scannedBlocks = 0;
namesystem.writeLock();
try {
if (shutdown) {
throw new InterruptedException("CacheReplicationMonitor was " +
"shut down.");
}
resetStatistics();
rescanCacheDirectives();
rescanCachedBlockMap();
@ -356,8 +312,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
FSDirectory fsDir = namesystem.getFSDirectory();
final long now = new Date().getTime();
for (CacheDirective directive : cacheManager.getCacheDirectives()) {
// Reset the directive's statistics
directive.resetStatistics();
// Skip processing this entry if it has expired
if (LOG.isTraceEnabled()) {
LOG.trace("Directive expiry is at " + directive.getExpiryTime());
@ -460,14 +414,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
directive.getReplication()) * blockInfo.getNumBytes();
cachedTotal += cachedByBlock;
if (mark != ocblock.getMark()) {
// Mark hasn't been set in this scan, so update replication and mark.
if ((mark != ocblock.getMark()) ||
(ocblock.getReplication() < directive.getReplication())) {
//
// Overwrite the block's replication and mark in two cases:
//
// 1. If the mark on the CachedBlock is different from the mark for
// this scan, that means the block hasn't been updated during this
// scan, and we should overwrite whatever is there, since it is no
// longer valid.
//
// 2. If the replication in the CachedBlock is less than what the
// directive asks for, we want to increase the block's replication
// field to what the directive asks for.
//
ocblock.setReplicationAndMark(directive.getReplication(), mark);
} else {
// Mark already set in this scan. Set replication to highest value in
// any CacheDirective that covers this file.
ocblock.setReplicationAndMark((short)Math.max(
directive.getReplication(), ocblock.getReplication()), mark);
}
}
}
@ -483,6 +444,39 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
}
}
private String findReasonForNotCaching(CachedBlock cblock,
BlockInfo blockInfo) {
if (blockInfo == null) {
// Somehow, a cache report with the block arrived, but the block
// reports from the DataNode haven't (yet?) described such a block.
// Alternately, the NameNode might have invalidated the block, but the
// DataNode hasn't caught up. In any case, we want to tell the DN
// to uncache this.
return "not tracked by the BlockManager";
} else if (!blockInfo.isComplete()) {
// When a cached block changes state from complete to some other state
// on the DataNode (perhaps because of append), it will begin the
// uncaching process. However, the uncaching process is not
// instantaneous, especially if clients have pinned the block. So
// there may be a period of time when incomplete blocks remain cached
// on the DataNodes.
return "not complete";
} else if (cblock.getReplication() == 0) {
// Since 0 is not a valid value for a cache directive's replication
// field, seeing a replication of 0 on a CacheBlock means that it
// has never been reached by any sweep.
return "not needed by any directives";
} else if (cblock.getMark() != mark) {
// Although the block was needed in the past, we didn't reach it during
// the current sweep. Therefore, it doesn't need to be cached any more.
// Need to set the replication to 0 so it doesn't flip back to cached
// when the mark flips on the next scan
cblock.setReplicationAndMark((short)0, mark);
return "no longer needed by any directives";
}
return null;
}
/**
* Scan through the cached block map.
* Any blocks which are under-replicated should be assigned new Datanodes.
@ -508,11 +502,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
iter.remove();
}
}
// If the block's mark doesn't match with the mark of this scan, that
// means that this block couldn't be reached during this scan. That means
// it doesn't need to be cached any more.
int neededCached = (cblock.getMark() != mark) ?
0 : cblock.getReplication();
BlockInfo blockInfo = blockManager.
getStoredBlock(new Block(cblock.getBlockId()));
String reason = findReasonForNotCaching(cblock, blockInfo);
int neededCached = 0;
if (reason != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("not caching " + cblock + " because it is " + reason);
}
} else {
neededCached = cblock.getReplication();
}
int numCached = cached.size();
if (numCached >= neededCached) {
// If we have enough replicas, drop all pending cached.
@ -566,9 +566,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
private void addNewPendingUncached(int neededUncached,
CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
List<DatanodeDescriptor> pendingUncached) {
if (!cacheManager.isActive()) {
return;
}
// Figure out which replicas can be uncached.
LinkedList<DatanodeDescriptor> possibilities =
new LinkedList<DatanodeDescriptor>();
@ -601,19 +598,18 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
* @param pendingCached A list of DataNodes that will soon cache the
* block.
*/
private void addNewPendingCached(int neededCached,
private void addNewPendingCached(final int neededCached,
CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
List<DatanodeDescriptor> pendingCached) {
if (!cacheManager.isActive()) {
return;
}
// To figure out which replicas can be cached, we consult the
// blocksMap. We don't want to try to cache a corrupt replica, though.
BlockInfo blockInfo = blockManager.
getStoredBlock(new Block(cachedBlock.getBlockId()));
if (blockInfo == null) {
LOG.debug("Not caching block " + cachedBlock + " because it " +
"was deleted from all DataNodes.");
if (LOG.isDebugEnabled()) {
LOG.debug("Not caching block " + cachedBlock + " because there " +
"is no record of it on the NameNode.");
}
return;
}
if (!blockInfo.isComplete()) {
@ -623,35 +619,156 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
}
return;
}
List<DatanodeDescriptor> possibilities = new LinkedList<DatanodeDescriptor>();
// Filter the list of replicas to only the valid targets
List<DatanodeDescriptor> possibilities =
new LinkedList<DatanodeDescriptor>();
int numReplicas = blockInfo.getCapacity();
Collection<DatanodeDescriptor> corrupt =
blockManager.getCorruptReplicas(blockInfo);
int outOfCapacity = 0;
for (int i = 0; i < numReplicas; i++) {
DatanodeDescriptor datanode = blockInfo.getDatanode(i);
if ((datanode != null) &&
((!pendingCached.contains(datanode)) &&
((corrupt == null) || (!corrupt.contains(datanode))))) {
if (datanode == null) {
continue;
}
if (datanode.isDecommissioned() || datanode.isDecommissionInProgress()) {
continue;
}
if (corrupt != null && corrupt.contains(datanode)) {
continue;
}
if (pendingCached.contains(datanode) || cached.contains(datanode)) {
continue;
}
long pendingCapacity = datanode.getCacheRemaining();
// Subtract pending cached blocks from effective capacity
Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
while (it.hasNext()) {
CachedBlock cBlock = it.next();
BlockInfo info =
blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
if (info != null) {
pendingCapacity -= info.getNumBytes();
}
}
it = datanode.getPendingUncached().iterator();
// Add pending uncached blocks from effective capacity
while (it.hasNext()) {
CachedBlock cBlock = it.next();
BlockInfo info =
blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
if (info != null) {
pendingCapacity += info.getNumBytes();
}
}
if (pendingCapacity < blockInfo.getNumBytes()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Datanode " + datanode + " is not a valid possibility for"
+ " block " + blockInfo.getBlockId() + " of size "
+ blockInfo.getNumBytes() + " bytes, only has "
+ datanode.getCacheRemaining() + " bytes of cache remaining.");
}
outOfCapacity++;
continue;
}
possibilities.add(datanode);
}
}
while (neededCached > 0) {
if (possibilities.isEmpty()) {
LOG.warn("We need " + neededCached + " more replica(s) than " +
"actually exist to provide a cache replication of " +
cachedBlock.getReplication() + " for " + cachedBlock);
return;
}
DatanodeDescriptor datanode =
possibilities.remove(random.nextInt(possibilities.size()));
if (LOG.isDebugEnabled()) {
LOG.debug("AddNewPendingCached: datanode " + datanode +
" will now cache block " + cachedBlock);
}
List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
neededCached, blockManager.getDatanodeManager().getStaleInterval());
for (DatanodeDescriptor datanode : chosen) {
pendingCached.add(datanode);
boolean added = datanode.getPendingCached().add(cachedBlock);
assert added;
neededCached--;
}
// We were unable to satisfy the requested replication factor
if (neededCached > chosen.size()) {
if (LOG.isDebugEnabled()) {
LOG.debug(
"Only have " +
(cachedBlock.getReplication() - neededCached + chosen.size()) +
" of " + cachedBlock.getReplication() + " cached replicas for " +
cachedBlock + " (" + outOfCapacity + " nodes have insufficient " +
"capacity).");
}
}
}
/**
* Chooses datanode locations for caching from a list of valid possibilities.
* Non-stale nodes are chosen before stale nodes.
*
* @param possibilities List of candidate datanodes
* @param neededCached Number of replicas needed
* @param staleInterval Age of a stale datanode
* @return A list of chosen datanodes
*/
private static List<DatanodeDescriptor> chooseDatanodesForCaching(
final List<DatanodeDescriptor> possibilities, final int neededCached,
final long staleInterval) {
// Make a copy that we can modify
List<DatanodeDescriptor> targets =
new ArrayList<DatanodeDescriptor>(possibilities);
// Selected targets
List<DatanodeDescriptor> chosen = new LinkedList<DatanodeDescriptor>();
// Filter out stale datanodes
List<DatanodeDescriptor> stale = new LinkedList<DatanodeDescriptor>();
Iterator<DatanodeDescriptor> it = targets.iterator();
while (it.hasNext()) {
DatanodeDescriptor d = it.next();
if (d.isStale(staleInterval)) {
it.remove();
stale.add(d);
}
}
// Select targets
while (chosen.size() < neededCached) {
// Try to use stale nodes if we're out of non-stale nodes, else we're done
if (targets.isEmpty()) {
if (!stale.isEmpty()) {
targets = stale;
} else {
break;
}
}
// Select a random target
DatanodeDescriptor target =
chooseRandomDatanodeByRemainingCapacity(targets);
chosen.add(target);
targets.remove(target);
}
return chosen;
}
/**
* Choose a single datanode from the provided list of possible
* targets, weighted by the percentage of free space remaining on the node.
*
* @return The chosen datanode
*/
private static DatanodeDescriptor chooseRandomDatanodeByRemainingCapacity(
final List<DatanodeDescriptor> targets) {
// Use a weighted probability to choose the target datanode
float total = 0;
for (DatanodeDescriptor d : targets) {
total += d.getCacheRemainingPercent();
}
// Give each datanode a portion of keyspace equal to its relative weight
// [0, w1) selects d1, [w1, w2) selects d2, etc.
TreeMap<Integer, DatanodeDescriptor> lottery =
new TreeMap<Integer, DatanodeDescriptor>();
int offset = 0;
for (DatanodeDescriptor d : targets) {
// Since we're using floats, be paranoid about negative values
int weight =
Math.max(1, (int)((d.getCacheRemainingPercent() / total) * 1000000));
offset += weight;
lottery.put(offset, d);
}
// Choose a number from [0, offset), which is the total amount of weight,
// to select the winner
DatanodeDescriptor winner =
lottery.higherEntry(random.nextInt(offset)).getValue();
return winner;
}
}

View File

@ -355,11 +355,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
setLastUpdate(Time.now());
this.volumeFailures = volFailures;
for (StorageReport report : reports) {
DatanodeStorageInfo storage = storageMap.get(report.getStorageID());
DatanodeStorageInfo storage = storageMap.get(report.getStorage().getStorageID());
if (storage == null) {
// This is seen during cluster initialization when the heartbeat
// is received before the initial block reports from each storage.
storage = updateStorage(new DatanodeStorage(report.getStorageID()));
storage = updateStorage(report.getStorage());
}
storage.receivedHeartbeat(report);
totalCapacity += report.getCapacity();

View File

@ -1443,6 +1443,13 @@ public class DatanodeManager {
return getClass().getSimpleName() + ": " + host2DatanodeMap;
}
public void clearPendingCachingCommands() {
for (DatanodeDescriptor dn : datanodeMap.values()) {
dn.getPendingCached().clear();
dn.getPendingUncached().clear();
}
}
public void setShouldSendCachingCommands(boolean shouldSendCachingCommands) {
this.shouldSendCachingCommands = shouldSendCachingCommands;
}

View File

@ -121,7 +121,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
reports = new StorageReport[volumes.volumes.size()];
int i = 0;
for (FsVolumeImpl volume : volumes.volumes) {
reports[i++] = new StorageReport(volume.getStorageID(),
reports[i++] = new StorageReport(volume.toDatanodeStorage(),
false,
volume.getCapacity(),
volume.getDfsUsed(),
@ -237,12 +237,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
final List<FsVolumeImpl> volArray = new ArrayList<FsVolumeImpl>(
storage.getNumStorageDirs());
for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
// TODO: getStorageTypeFromLocations() is only a temporary workaround and
// should be replaced with getting storage type from DataStorage (missing
// storage type now) directly.
Storage.StorageDirectory sd = storage.getStorageDir(idx);
final File dir = sd.getCurrentDir();
final StorageType storageType = getStorageTypeFromLocations(dataLocations, dir);
final StorageType storageType = getStorageTypeFromLocations(dataLocations, sd.getRoot());
volArray.add(new FsVolumeImpl(this, sd.getStorageUuid(), dir, conf,
storageType));
LOG.info("Added volume - " + dir + ", StorageType: " + storageType);

View File

@ -19,10 +19,10 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
@ -54,7 +54,7 @@ class FsVolumeImpl implements FsVolumeSpi {
private final String storageID;
private final StorageType storageType;
private final Map<String, BlockPoolSlice> bpSlices
= new HashMap<String, BlockPoolSlice>();
= new ConcurrentHashMap<String, BlockPoolSlice>();
private final File currentDir; // <StorageDirectory>/current
private final DF usage;
private final long reserved;

View File

@ -17,8 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
@ -40,6 +40,7 @@ import java.util.List;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
@ -62,7 +63,6 @@ import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
@ -85,7 +85,7 @@ import com.google.common.annotations.VisibleForTesting;
/**
* The Cache Manager handles caching on DataNodes.
*
* This class is instantiated by the FSNamesystem when caching is enabled.
* This class is instantiated by the FSNamesystem.
* It maintains the mapping of cached blocks to datanodes via processing
* datanode cache reports. Based on these reports and addition and removal of
* caching directives, we will schedule caching and uncaching work.
@ -94,6 +94,8 @@ import com.google.common.annotations.VisibleForTesting;
public final class CacheManager {
public static final Log LOG = LogFactory.getLog(CacheManager.class);
private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
// TODO: add pending / underCached / schedule cached blocks stats.
/**
@ -148,34 +150,16 @@ public final class CacheManager {
*/
private final long scanIntervalMs;
/**
* Whether caching is enabled.
*
* If caching is disabled, we will not process cache reports or store
* information about what is cached where. We also do not start the
* CacheReplicationMonitor thread. This will save resources, but provide
* less functionality.
*
* Even when caching is disabled, we still store path-based cache
* information. This information is stored in the edit log and fsimage. We
* don't want to lose it just because a configuration setting was turned off.
* However, we will not act on this information if caching is disabled.
*/
private final boolean enabled;
/**
* Whether the CacheManager is active.
*
* When the CacheManager is active, it tells the DataNodes what to cache
* and uncache. The CacheManager cannot become active if enabled = false.
*/
private boolean active = false;
/**
* All cached blocks.
*/
private final GSet<CachedBlock, CachedBlock> cachedBlocks;
/**
* Lock which protects the CacheReplicationMonitor.
*/
private final ReentrantLock crmLock = new ReentrantLock();
/**
* The CacheReplicationMonitor.
*/
@ -195,56 +179,53 @@ public final class CacheManager {
scanIntervalMs = conf.getLong(
DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
this.enabled = conf.getBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY,
DFS_NAMENODE_CACHING_ENABLED_DEFAULT);
this.cachedBlocks = !enabled ? null :
new LightWeightGSet<CachedBlock, CachedBlock>(
LightWeightGSet.computeCapacity(0.25, "cachedBlocks"));
float cachedBlocksPercent = conf.getFloat(
DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
" for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
}
this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
LightWeightGSet.computeCapacity(cachedBlocksPercent,
"cachedBlocks"));
}
/**
* Activate the cache manager.
*
* When the cache manager is active, tell the datanodes where to cache files.
*/
public void activate() {
public void startMonitorThread() {
crmLock.lock();
try {
if (this.monitor == null) {
this.monitor = new CacheReplicationMonitor(namesystem, this,
scanIntervalMs, crmLock);
this.monitor.start();
}
} finally {
crmLock.unlock();
}
}
public void stopMonitorThread() {
crmLock.lock();
try {
if (this.monitor != null) {
CacheReplicationMonitor prevMonitor = this.monitor;
this.monitor = null;
IOUtils.closeQuietly(prevMonitor);
}
} finally {
crmLock.unlock();
}
}
public void clearDirectiveStats() {
assert namesystem.hasWriteLock();
if (enabled && (!active)) {
LOG.info("Activating CacheManager. " +
"Starting replication monitor thread...");
active = true;
monitor = new CacheReplicationMonitor(namesystem, this,
scanIntervalMs);
monitor.start();
for (CacheDirective directive : directivesById.values()) {
directive.resetStatistics();
}
}
/**
* Deactivate the cache manager.
*
* When the cache manager is inactive, it does not tell the datanodes where to
* cache files.
*/
public void deactivate() {
assert namesystem.hasWriteLock();
if (active) {
LOG.info("Deactivating CacheManager. " +
"stopping CacheReplicationMonitor thread...");
active = false;
IOUtils.closeQuietly(monitor);
monitor = null;
LOG.info("CacheReplicationMonitor thread stopped and deactivated.");
}
}
/**
* Return true only if the cache manager is active.
* Must be called under the FSN read or write lock.
*/
public boolean isActive() {
return active;
}
/**
* @return Unmodifiable view of the collection of CachePools.
*/
@ -481,9 +462,7 @@ public final class CacheManager {
directive.addBytesNeeded(stats.getBytesNeeded());
directive.addFilesNeeded(directive.getFilesNeeded());
if (monitor != null) {
monitor.setNeedsRescan();
}
setNeedsRescan();
}
/**
@ -515,10 +494,6 @@ public final class CacheManager {
long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
// Do quota validation if required
if (!flags.contains(CacheFlag.FORCE)) {
// Can't kick and wait if caching is disabled
if (monitor != null) {
monitor.waitForRescan();
}
checkLimit(pool, path, replication);
}
// All validation passed
@ -623,9 +598,7 @@ public final class CacheManager {
validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
// Indicate changes to the CRM
if (monitor != null) {
monitor.setNeedsRescan();
}
setNeedsRescan();
// Validation passed
removeInternal(prevEntry);
@ -660,9 +633,7 @@ public final class CacheManager {
pool.getDirectiveList().remove(directive);
assert directive.getPool() == null;
if (monitor != null) {
monitor.setNeedsRescan();
}
setNeedsRescan();
}
public void removeDirective(long id, FSPermissionChecker pc)
@ -695,9 +666,6 @@ public final class CacheManager {
if (filter.getReplication() != null) {
throw new IOException("Filtering by replication is unsupported.");
}
if (monitor != null) {
monitor.waitForRescanIfNeeded();
}
ArrayList<CacheDirectiveEntry> replies =
new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
int numReplies = 0;
@ -806,9 +774,7 @@ public final class CacheManager {
bld.append(prefix).append("set limit to " + info.getLimit());
prefix = "; ";
// New limit changes stats, need to set needs refresh
if (monitor != null) {
monitor.setNeedsRescan();
}
setNeedsRescan();
}
if (info.getMaxRelativeExpiryMs() != null) {
final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
@ -854,9 +820,7 @@ public final class CacheManager {
directivesById.remove(directive.getId());
iter.remove();
}
if (monitor != null) {
monitor.setNeedsRescan();
}
setNeedsRescan();
} catch (IOException e) {
LOG.info("removeCachePool of " + poolName + " failed: ", e);
throw e;
@ -867,9 +831,6 @@ public final class CacheManager {
public BatchedListEntries<CachePoolEntry>
listCachePools(FSPermissionChecker pc, String prevKey) {
assert namesystem.hasReadLock();
if (monitor != null) {
monitor.waitForRescanIfNeeded();
}
final int NUM_PRE_ALLOCATED_ENTRIES = 16;
ArrayList<CachePoolEntry> results =
new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
@ -885,9 +846,6 @@ public final class CacheManager {
}
public void setCachedLocations(LocatedBlock block) {
if (!enabled) {
return;
}
CachedBlock cachedBlock =
new CachedBlock(block.getBlock().getBlockId(),
(short)0, false);
@ -903,12 +861,6 @@ public final class CacheManager {
public final void processCacheReport(final DatanodeID datanodeID,
final List<Long> blockIds) throws IOException {
if (!enabled) {
LOG.info("Ignoring cache report from " + datanodeID +
" because " + DFS_NAMENODE_CACHING_ENABLED_KEY + " = false. " +
"number of blocks: " + blockIds.size());
return;
}
namesystem.writeLock();
final long startTime = Time.monotonicNow();
final long endTime;
@ -940,39 +892,28 @@ public final class CacheManager {
final List<Long> blockIds) {
CachedBlocksList cached = datanode.getCached();
cached.clear();
CachedBlocksList cachedList = datanode.getCached();
CachedBlocksList pendingCachedList = datanode.getPendingCached();
for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
Block block = new Block(iter.next());
BlockInfo blockInfo = blockManager.getStoredBlock(block);
if (!blockInfo.isComplete()) {
LOG.warn("Ignoring block id " + block.getBlockId() + ", because " +
"it is in not complete yet. It is in state " +
blockInfo.getBlockUCState());
continue;
}
Collection<DatanodeDescriptor> corruptReplicas =
blockManager.getCorruptReplicas(blockInfo);
if ((corruptReplicas != null) && corruptReplicas.contains(datanode)) {
// The NameNode will eventually remove or update the corrupt block.
// Until then, we pretend that it isn't cached.
LOG.warn("Ignoring cached replica on " + datanode + " of " + block +
" because it is corrupt.");
continue;
}
long blockId = iter.next();
CachedBlock cachedBlock =
new CachedBlock(block.getBlockId(), (short)0, false);
new CachedBlock(blockId, (short)0, false);
CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
// Use the existing CachedBlock if it's present; otherwise,
// insert a new one.
// Add the block ID from the cache report to the cachedBlocks map
// if it's not already there.
if (prevCachedBlock != null) {
cachedBlock = prevCachedBlock;
} else {
cachedBlocks.put(cachedBlock);
}
if (!cachedBlock.isPresent(datanode.getCached())) {
datanode.getCached().add(cachedBlock);
// Add the block to the datanode's implicit cached block list
// if it's not already there. Similarly, remove it from the pending
// cached block list if it exists there.
if (!cachedBlock.isPresent(cachedList)) {
cachedList.add(cachedBlock);
}
if (cachedBlock.isPresent(datanode.getPendingCached())) {
datanode.getPendingCached().remove(cachedBlock);
if (cachedBlock.isPresent(pendingCachedList)) {
pendingCachedList.remove(cachedBlock);
}
}
}
@ -1097,4 +1038,36 @@ public final class CacheManager {
}
prog.endStep(Phase.LOADING_FSIMAGE, step);
}
public void waitForRescanIfNeeded() {
crmLock.lock();
try {
if (monitor != null) {
monitor.waitForRescanIfNeeded();
}
} finally {
crmLock.unlock();
}
}
private void setNeedsRescan() {
crmLock.lock();
try {
if (monitor != null) {
monitor.setNeedsRescan();
}
} finally {
crmLock.unlock();
}
}
@VisibleForTesting
public Thread getCacheReplicationMonitor() {
crmLock.lock();
try {
return monitor;
} finally {
crmLock.unlock();
}
}
}

View File

@ -405,6 +405,7 @@ public class FSImage implements Closeable {
// Directories that don't have previous state do not rollback
boolean canRollback = false;
FSImage prevState = new FSImage(conf);
try {
prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
StorageDirectory sd = it.next();
@ -459,6 +460,9 @@ public class FSImage implements Closeable {
LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
}
isUpgradeFinalized = true;
} finally {
prevState.close();
}
}
private void doFinalize(StorageDirectory sd) throws IOException {

View File

@ -931,7 +931,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
writeLock();
try {
if (blockManager != null) blockManager.close();
cacheManager.deactivate();
} finally {
writeUnlock();
}
@ -1001,7 +1000,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
editLogRollerThreshold, editLogRollerInterval));
nnEditLogRoller.start();
cacheManager.activate();
cacheManager.startMonitorThread();
blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
} finally {
writeUnlock();
@ -1052,7 +1051,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// so that the tailer starts from the right spot.
dir.fsImage.updateLastAppliedTxIdFromWritten();
}
cacheManager.deactivate();
cacheManager.stopMonitorThread();
cacheManager.clearDirectiveStats();
blockManager.getDatanodeManager().clearPendingCachingCommands();
blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
} finally {
writeUnlock();
@ -7066,6 +7067,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
return (Long) cacheEntry.getPayload();
}
boolean success = false;
if (!flags.contains(CacheFlag.FORCE)) {
cacheManager.waitForRescanIfNeeded();
}
writeLock();
Long result = null;
try {
@ -7107,6 +7111,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
if (cacheEntry != null && cacheEntry.isSuccess()) {
return;
}
if (!flags.contains(CacheFlag.FORCE)) {
cacheManager.waitForRescanIfNeeded();
}
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -7166,6 +7173,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
final FSPermissionChecker pc = isPermissionEnabled ?
getPermissionChecker() : null;
BatchedListEntries<CacheDirectiveEntry> results;
cacheManager.waitForRescanIfNeeded();
readLock();
boolean success = false;
try {
@ -7289,6 +7297,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
BatchedListEntries<CachePoolEntry> results;
checkOperation(OperationCategory.READ);
boolean success = false;
cacheManager.waitForRescanIfNeeded();
readLock();
try {
checkOperation(OperationCategory.READ);

View File

@ -480,6 +480,14 @@ public class NameNode implements NameNodeStatusMXBean {
* @param conf the configuration
*/
protected void initialize(Configuration conf) throws IOException {
if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
if (intervals != null) {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
intervals);
}
}
UserGroupInformation.setConfiguration(conf);
loginAsNameNodeUser(conf);

View File

@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.protocol;
* Utilization report for a Datanode storage
*/
public class StorageReport {
private final String storageID;
private final DatanodeStorage storage;
private final boolean failed;
private final long capacity;
private final long dfsUsed;
@ -30,9 +30,9 @@ public class StorageReport {
public static final StorageReport[] EMPTY_ARRAY = {};
public StorageReport(String sid, boolean failed, long capacity, long dfsUsed,
long remaining, long bpUsed) {
this.storageID = sid;
public StorageReport(DatanodeStorage storage, boolean failed,
long capacity, long dfsUsed, long remaining, long bpUsed) {
this.storage = storage;
this.failed = failed;
this.capacity = capacity;
this.dfsUsed = dfsUsed;
@ -40,8 +40,8 @@ public class StorageReport {
this.blockPoolUsed = bpUsed;
}
public String getStorageID() {
return storageID;
public DatanodeStorage getStorage() {
return storage;
}
public boolean isFailed() {

View File

@ -84,7 +84,12 @@ public class CacheAdmin extends Configured implements Tool {
for (int j = 1; j < args.length; j++) {
argsList.add(args[j]);
}
try {
return command.run(getConf(), argsList);
} catch (IllegalArgumentException e) {
System.err.println(prettifyException(e));
return -1;
}
}
public static void main(String[] argsArray) throws IOException {
@ -135,6 +140,20 @@ public class CacheAdmin extends Configured implements Tool {
return maxTtl;
}
private static Expiration parseExpirationString(String ttlString)
throws IOException {
Expiration ex = null;
if (ttlString != null) {
if (ttlString.equalsIgnoreCase("never")) {
ex = CacheDirectiveInfo.Expiration.NEVER;
} else {
long ttl = DFSUtil.parseRelativeTime(ttlString);
ex = CacheDirectiveInfo.Expiration.newRelative(ttl);
}
}
return ex;
}
interface Command {
String getName();
String getShortUsage();
@ -171,6 +190,7 @@ public class CacheAdmin extends Configured implements Tool {
listing.addRow("<time-to-live>", "How long the directive is " +
"valid. Can be specified in minutes, hours, and days, e.g. " +
"30m, 4h, 2d. Valid units are [smhd]." +
" \"never\" indicates a directive that never expires." +
" If unspecified, the directive never expires.");
return getShortUsage() + "\n" +
"Add a new cache directive.\n\n" +
@ -203,16 +223,16 @@ public class CacheAdmin extends Configured implements Tool {
}
String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
if (ttlString != null) {
try {
long ttl = DFSUtil.parseRelativeTime(ttlString);
builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
Expiration ex = parseExpirationString(ttlString);
if (ex != null) {
builder.setExpiration(ex);
}
} catch (IOException e) {
System.err.println(
"Error while parsing ttl value: " + e.getMessage());
return 1;
}
}
if (!args.isEmpty()) {
System.err.println("Can't understand argument: " + args.get(0));
@ -326,7 +346,7 @@ public class CacheAdmin extends Configured implements Tool {
listing.addRow("<time-to-live>", "How long the directive is " +
"valid. Can be specified in minutes, hours, and days, e.g. " +
"30m, 4h, 2d. Valid units are [smhd]." +
" If unspecified, the directive never expires.");
" \"never\" indicates a directive that never expires.");
return getShortUsage() + "\n" +
"Modify a cache directive.\n\n" +
listing.toString();
@ -362,18 +382,17 @@ public class CacheAdmin extends Configured implements Tool {
modified = true;
}
String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
if (ttlString != null) {
long ttl;
try {
ttl = DFSUtil.parseRelativeTime(ttlString);
Expiration ex = parseExpirationString(ttlString);
if (ex != null) {
builder.setExpiration(ex);
modified = true;
}
} catch (IOException e) {
System.err.println(
"Error while parsing ttl value: " + e.getMessage());
return 1;
}
builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
modified = true;
}
if (!args.isEmpty()) {
System.err.println("Can't understand argument: " + args.get(0));
System.err.println("Usage is " + getShortUsage());
@ -578,7 +597,7 @@ public class CacheAdmin extends Configured implements Tool {
public String getShortUsage() {
return "[" + NAME + " <name> [-owner <owner>] " +
"[-group <group>] [-mode <mode>] [-limit <limit>] " +
"[-maxttl <maxTtl>]\n";
"[-maxTtl <maxTtl>]\n";
}
@Override

View File

@ -29,6 +29,7 @@ import java.util.Map;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
@ -85,9 +86,9 @@ public class GetConf extends Configured implements Tool {
map.put(BACKUP.getName().toLowerCase(),
new BackupNodesCommandHandler());
map.put(INCLUDE_FILE.getName().toLowerCase(),
new CommandHandler("DFSConfigKeys.DFS_HOSTS"));
new CommandHandler(DFSConfigKeys.DFS_HOSTS));
map.put(EXCLUDE_FILE.getName().toLowerCase(),
new CommandHandler("DFSConfigKeys.DFS_HOSTS_EXCLUDE"));
new CommandHandler(DFSConfigKeys.DFS_HOSTS_EXCLUDE));
map.put(NNRPCADDRESSES.getName().toLowerCase(),
new NNRpcAddressesCommandHandler());
map.put(CONFKEY.getName().toLowerCase(),

View File

@ -196,12 +196,13 @@ message HeartbeatRequestProto {
}
message StorageReportProto {
required string storageUuid = 1;
required string storageUuid = 1 [ deprecated = true ];
optional bool failed = 2 [ default = false ];
optional uint64 capacity = 3 [ default = 0 ];
optional uint64 dfsUsed = 4 [ default = 0 ];
optional uint64 remaining = 5 [ default = 0 ];
optional uint64 blockPoolUsed = 6 [ default = 0 ];
optional DatanodeStorageProto storage = 7; // supersedes StorageUuid
}
/**

View File

@ -1476,13 +1476,13 @@
</property>
<property>
<name>dfs.namenode.caching.enabled</name>
<value>false</value>
<name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
<value>0.25</value>
<description>
Set to true to enable block caching. This flag enables the NameNode to
maintain a mapping of cached blocks to DataNodes via processing DataNode
cache reports. Based on these reports and addition and removal of caching
directives, the NameNode will schedule caching and uncaching work.
The percentage of the Java heap which we will allocate to the cached blocks
map. The cached blocks map is a hash map which uses chained hashing.
Smaller maps may be accessed more slowly if the number of cached blocks is
large; larger maps will consume more memory.
</description>
</property>

View File

@ -242,12 +242,6 @@ Centralized Cache Management in HDFS
Be sure to configure the following:
* dfs.namenode.caching.enabled
This must be set to true to enable caching. If this is false, the NameNode
will ignore cache reports, and will not ask DataNodes to cache
blocks.
* dfs.datanode.max.locked.memory
The DataNode will treat this as the maximum amount of memory it can use for
@ -281,6 +275,13 @@ Centralized Cache Management in HDFS
By default, this parameter is set to 10000, which is 10 seconds.
* dfs.namenode.path.based.cache.block.map.allocation.percent
The percentage of the Java heap which we will allocate to the cached blocks
map. The cached blocks map is a hash map which uses chained hashing.
Smaller maps may be accessed more slowly if the number of cached blocks is
large; larger maps will consume more memory. The default is 0.25 percent.
** {OS Limits}
If you get the error "Cannot start datanode because the configured max

View File

@ -140,6 +140,7 @@ public class MiniDFSCluster {
private int nameNodeHttpPort = 0;
private final Configuration conf;
private int numDataNodes = 1;
private StorageType storageType = StorageType.DEFAULT;
private boolean format = true;
private boolean manageNameDfsDirs = true;
private boolean manageNameDfsSharedDirs = true;
@ -185,6 +186,14 @@ public class MiniDFSCluster {
return this;
}
/**
* Default: StorageType.DEFAULT
*/
public Builder storageType(StorageType type) {
this.storageType = type;
return this;
}
/**
* Default: true
*/
@ -341,6 +350,7 @@ public class MiniDFSCluster {
initMiniDFSCluster(builder.conf,
builder.numDataNodes,
builder.storageType,
builder.format,
builder.manageNameDfsDirs,
builder.manageNameDfsSharedDirs,
@ -592,7 +602,7 @@ public class MiniDFSCluster {
String[] racks, String hosts[],
long[] simulatedCapacities) throws IOException {
this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
initMiniDFSCluster(conf, numDataNodes, format,
initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
operation, racks, hosts,
simulatedCapacities, null, true, false,
@ -601,7 +611,7 @@ public class MiniDFSCluster {
private void initMiniDFSCluster(
Configuration conf,
int numDataNodes, boolean format, boolean manageNameDfsDirs,
int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
boolean manageDataDfsDirs, StartupOption operation, String[] racks,
String[] hosts, long[] simulatedCapacities, String clusterId,
@ -670,7 +680,7 @@ public class MiniDFSCluster {
}
// Start the DataNodes
startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, operation, racks,
hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig);
waitClusterUp();
//make sure ProxyUsers uses the latest conf
@ -990,6 +1000,19 @@ public class MiniDFSCluster {
}
}
String makeDataNodeDirs(int dnIndex, StorageType storageType) throws IOException {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
File dir = getInstanceStorageDir(dnIndex, j);
dir.mkdirs();
if (!dir.isDirectory()) {
throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
}
sb.append((j > 0 ? "," : "") + "[" + storageType + "]" + fileAsURI(dir));
}
return sb.toString();
}
/**
* Modify the config and start up additional DataNodes. The info port for
* DataNodes is guaranteed to use a free port.
@ -1052,7 +1075,7 @@ public class MiniDFSCluster {
String[] racks, String[] hosts,
long[] simulatedCapacities,
boolean setupHostsFile) throws IOException {
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
simulatedCapacities, setupHostsFile, false, false);
}
@ -1066,7 +1089,7 @@ public class MiniDFSCluster {
long[] simulatedCapacities,
boolean setupHostsFile,
boolean checkDataNodeAddrConfig) throws IOException {
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false);
}
@ -1098,7 +1121,7 @@ public class MiniDFSCluster {
* @throws IllegalStateException if NameNode has been shutdown
*/
public synchronized void startDataNodes(Configuration conf, int numDataNodes,
boolean manageDfsDirs, StartupOption operation,
StorageType storageType, boolean manageDfsDirs, StartupOption operation,
String[] racks, String[] hosts,
long[] simulatedCapacities,
boolean setupHostsFile,
@ -1154,16 +1177,7 @@ public class MiniDFSCluster {
// Set up datanode address
setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
if (manageDfsDirs) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
File dir = getInstanceStorageDir(i, j);
dir.mkdirs();
if (!dir.isDirectory()) {
throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
}
sb.append((j > 0 ? "," : "") + fileAsURI(dir));
}
String dirs = sb.toString();
String dirs = makeDataNodeDirs(i, storageType);
dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
}

View File

@ -50,7 +50,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
}
public synchronized void startDataNodes(Configuration conf, int numDataNodes,
boolean manageDfsDirs, StartupOption operation,
StorageType storageType, boolean manageDfsDirs, StartupOption operation,
String[] racks, String[] nodeGroups, String[] hosts,
long[] simulatedCapacities,
boolean setupHostsFile,
@ -112,15 +112,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
// Set up datanode address
setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
if (manageDfsDirs) {
File dir1 = getInstanceStorageDir(i, 0);
File dir2 = getInstanceStorageDir(i, 1);
dir1.mkdirs();
dir2.mkdirs();
if (!dir1.isDirectory() || !dir2.isDirectory()) {
throw new IOException("Mkdirs failed to create directory for DataNode "
+ i + ": " + dir1 + " or " + dir2);
}
String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
String dirs = makeDataNodeDirs(i, storageType);
dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
}
@ -198,7 +190,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
String[] racks, String[] nodeGroups, String[] hosts,
long[] simulatedCapacities,
boolean setupHostsFile) throws IOException {
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups,
startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, nodeGroups,
hosts, simulatedCapacities, setupHostsFile, false, false);
}
@ -213,13 +205,13 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
// This is for initialize from parent class.
@Override
public synchronized void startDataNodes(Configuration conf, int numDataNodes,
boolean manageDfsDirs, StartupOption operation,
StorageType storageType, boolean manageDfsDirs, StartupOption operation,
String[] racks, String[] hosts,
long[] simulatedCapacities,
boolean setupHostsFile,
boolean checkDataNodeAddrConfig,
boolean checkDataNodeHostConfig) throws IOException {
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks,
startDataNodes(conf, numDataNodes, storageType, manageDfsDirs, operation, racks,
NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile,
checkDataNodeAddrConfig, checkDataNodeHostConfig);
}

View File

@ -257,8 +257,10 @@ public class BlockManagerTestUtil {
DatanodeDescriptor dnd) {
ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
for (DatanodeStorageInfo storage : dnd.getStorageInfos()) {
DatanodeStorage dns = new DatanodeStorage(
storage.getStorageID(), storage.getState(), storage.getStorageType());
StorageReport report = new StorageReport(
storage.getStorageID(), false, storage.getCapacity(),
dns ,false, storage.getCapacity(),
storage.getDfsUsed(), storage.getRemaining(),
storage.getBlockPoolUsed());
reports.add(report);

View File

@ -470,11 +470,14 @@ public class TestJspHelper {
BlockManagerTestUtil.updateStorage(dnDesc1, new DatanodeStorage("dnStorage1"));
BlockManagerTestUtil.updateStorage(dnDesc2, new DatanodeStorage("dnStorage2"));
DatanodeStorage dns1 = new DatanodeStorage("dnStorage1");
DatanodeStorage dns2 = new DatanodeStorage("dnStorage2");
StorageReport[] report1 = new StorageReport[] {
new StorageReport("dnStorage1", false, 1024, 100, 924, 100)
new StorageReport(dns1, false, 1024, 100, 924, 100)
};
StorageReport[] report2 = new StorageReport[] {
new StorageReport("dnStorage2", false, 2500, 200, 1848, 200)
new StorageReport(dns2, false, 2500, 200, 1848, 200)
};
dnDesc1.updateHeartbeat(report1, 5l, 3l, 10, 2);
dnDesc2.updateHeartbeat(report2, 10l, 2l, 20, 1);

View File

@ -394,8 +394,9 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
}
synchronized StorageReport getStorageReport(String bpid) {
return new StorageReport(getStorageUuid(), false, getCapacity(),
getUsed(), getFree(), map.get(bpid).getUsed());
return new StorageReport(new DatanodeStorage(getStorageUuid()),
false, getCapacity(), getUsed(), getFree(),
map.get(bpid).getUsed());
}
}

View File

@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.util.DataChecksum;
import org.junit.After;
@ -186,9 +187,8 @@ public class TestDiskError {
// Check permissions on directories in 'dfs.datanode.data.dir'
FileSystem localFS = FileSystem.getLocal(conf);
for (DataNode dn : cluster.getDataNodes()) {
String[] dataDirs =
dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
for (String dir : dataDirs) {
for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) {
String dir = v.getBasePath();
Path dataDir = new Path(dir);
FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
assertEquals("Permission for dir: " + dataDir + ", is " + actual +

View File

@ -36,16 +36,20 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.HdfsBlockLocation;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.LogVerificationAppender;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
@ -82,7 +86,11 @@ public class TestFsDatasetCache {
// Most Linux installs allow a default of 64KB locked memory
private static final long CACHE_CAPACITY = 64 * 1024;
private static final long BLOCK_SIZE = 4096;
// mlock always locks the entire page. So we don't need to deal with this
// rounding, use the OS page size for the block size.
private static final long PAGE_SIZE =
NativeIO.POSIX.getCacheManipulator().getOperatingSystemPageSize();
private static final long BLOCK_SIZE = PAGE_SIZE;
private static Configuration conf;
private static MiniDFSCluster cluster = null;
@ -104,14 +112,13 @@ public class TestFsDatasetCache {
public void setUp() throws Exception {
assumeTrue(!Path.WINDOWS);
conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS,
500);
conf.setLong(
DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 100);
conf.setLong(DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 500);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
CACHE_CAPACITY);
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
prevCacheManipulator = NativeIO.POSIX.getCacheManipulator();
NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator());
@ -325,7 +332,7 @@ public class TestFsDatasetCache {
// Create some test files that will exceed total cache capacity
final int numFiles = 5;
final long fileSize = 15000;
final long fileSize = CACHE_CAPACITY / (numFiles-1);
final Path[] testFiles = new Path[numFiles];
final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][];
@ -451,4 +458,65 @@ public class TestFsDatasetCache {
}
}, 100, 10000);
}
@Test(timeout=60000)
public void testPageRounder() throws Exception {
// Write a small file
Path fileName = new Path("/testPageRounder");
final int smallBlocks = 512; // This should be smaller than the page size
assertTrue("Page size should be greater than smallBlocks!",
PAGE_SIZE > smallBlocks);
final int numBlocks = 5;
final int fileLen = smallBlocks * numBlocks;
FSDataOutputStream out =
fs.create(fileName, false, 4096, (short)1, smallBlocks);
out.write(new byte[fileLen]);
out.close();
HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations(
fileName, 0, fileLen);
// Cache the file and check the sizes match the page size
setHeartbeatResponse(cacheBlocks(locs));
verifyExpectedCacheUsage(PAGE_SIZE * numBlocks, numBlocks);
// Uncache and check that it decrements by the page size too
setHeartbeatResponse(uncacheBlocks(locs));
verifyExpectedCacheUsage(0, 0);
}
@Test(timeout=60000)
public void testUncacheQuiesces() throws Exception {
// Create a file
Path fileName = new Path("/testUncacheQuiesces");
int fileLen = 4096;
DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD);
// Cache it
DistributedFileSystem dfs = cluster.getFileSystem();
dfs.addCachePool(new CachePoolInfo("pool"));
dfs.addCacheDirective(new CacheDirectiveInfo.Builder()
.setPool("pool").setPath(fileName).setReplication((short)3).build());
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
long blocksCached =
MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
return blocksCached > 0;
}
}, 1000, 30000);
// Uncache it
dfs.removeCacheDirective(1);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
long blocksUncached =
MetricsAsserts.getLongCounter("BlocksUncached", dnMetrics);
return blocksUncached > 0;
}
}, 1000, 30000);
// Make sure that no additional messages were sent
Thread.sleep(10000);
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
MetricsAsserts.assertCounter("BlocksCached", 1l, dnMetrics);
MetricsAsserts.assertCounter("BlocksUncached", 1l, dnMetrics);
}
}

View File

@ -0,0 +1,113 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.*;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mockito;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertThat;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyLong;
public class TestStorageReport {
public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
private static short REPL_FACTOR = 1;
private static final StorageType storageType = StorageType.SSD; // pick non-default.
private static Configuration conf;
private MiniDFSCluster cluster;
private DistributedFileSystem fs;
static String bpid;
@Before
public void startUpCluster() throws IOException {
conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(REPL_FACTOR)
.storageType(storageType)
.build();
fs = cluster.getFileSystem();
bpid = cluster.getNamesystem().getBlockPoolId();
}
@After
public void shutDownCluster() throws IOException {
if (cluster != null) {
fs.close();
cluster.shutdown();
cluster = null;
}
}
/**
* Ensure that storage type and storage state are propagated
* in Storage Reports.
*/
@Test
public void testStorageReportHasStorageTypeAndState() throws IOException {
// Make sure we are not testing with the default type, that would not
// be a very good test.
assertNotSame(storageType, StorageType.DEFAULT);
NameNode nn = cluster.getNameNode();
DataNode dn = cluster.getDataNodes().get(0);
// Insert a spy object for the NN RPC.
DatanodeProtocolClientSideTranslatorPB nnSpy =
DataNodeTestUtils.spyOnBposToNN(dn, nn);
// Trigger a heartbeat so there is an interaction with the spy
// object.
DataNodeTestUtils.triggerHeartbeat(dn);
// Verify that the callback passed in the expected parameters.
ArgumentCaptor<StorageReport[]> captor =
ArgumentCaptor.forClass(StorageReport[].class);
Mockito.verify(nnSpy).sendHeartbeat(
any(DatanodeRegistration.class),
captor.capture(),
anyLong(), anyLong(), anyInt(), anyInt(), anyInt());
StorageReport[] reports = captor.getValue();
for (StorageReport report: reports) {
assertThat(report.getStorage().getStorageType(), is(storageType));
assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL));
}
}
}

View File

@ -605,6 +605,98 @@ public class NNThroughputBenchmark implements Tool {
}
}
/**
* Directory creation statistics.
*
* Each thread creates the same (+ or -1) number of directories.
* Directory names are pre-generated during initialization.
*/
class MkdirsStats extends OperationStatsBase {
// Operation types
static final String OP_MKDIRS_NAME = "mkdirs";
static final String OP_MKDIRS_USAGE = "-op mkdirs [-threads T] [-dirs N] " +
"[-dirsPerDir P]";
protected FileNameGenerator nameGenerator;
protected String[][] dirPaths;
MkdirsStats(List<String> args) {
super();
parseArguments(args);
}
@Override
String getOpName() {
return OP_MKDIRS_NAME;
}
@Override
void parseArguments(List<String> args) {
boolean ignoreUnrelatedOptions = verifyOpArgument(args);
int nrDirsPerDir = 2;
for (int i = 2; i < args.size(); i++) { // parse command line
if(args.get(i).equals("-dirs")) {
if(i+1 == args.size()) printUsage();
numOpsRequired = Integer.parseInt(args.get(++i));
} else if(args.get(i).equals("-threads")) {
if(i+1 == args.size()) printUsage();
numThreads = Integer.parseInt(args.get(++i));
} else if(args.get(i).equals("-dirsPerDir")) {
if(i+1 == args.size()) printUsage();
nrDirsPerDir = Integer.parseInt(args.get(++i));
} else if(!ignoreUnrelatedOptions)
printUsage();
}
nameGenerator = new FileNameGenerator(getBaseDir(), nrDirsPerDir);
}
@Override
void generateInputs(int[] opsPerThread) throws IOException {
assert opsPerThread.length == numThreads : "Error opsPerThread.length";
nameNodeProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE,
false);
LOG.info("Generate " + numOpsRequired + " inputs for " + getOpName());
dirPaths = new String[numThreads][];
for(int idx=0; idx < numThreads; idx++) {
int threadOps = opsPerThread[idx];
dirPaths[idx] = new String[threadOps];
for(int jdx=0; jdx < threadOps; jdx++)
dirPaths[idx][jdx] = nameGenerator.
getNextFileName("ThroughputBench");
}
}
/**
* returns client name
*/
@Override
String getExecutionArgument(int daemonId) {
return getClientName(daemonId);
}
/**
* Do mkdirs operation.
*/
@Override
long executeOp(int daemonId, int inputIdx, String clientName)
throws IOException {
long start = Time.now();
nameNodeProto.mkdirs(dirPaths[daemonId][inputIdx],
FsPermission.getDefault(), true);
long end = Time.now();
return end-start;
}
@Override
void printResults() {
LOG.info("--- " + getOpName() + " inputs ---");
LOG.info("nrDirs = " + numOpsRequired);
LOG.info("nrThreads = " + numThreads);
LOG.info("nrDirsPerDir = " + nameGenerator.getFilesPerDirectory());
printStats();
}
}
/**
* Open file statistics.
*
@ -846,7 +938,7 @@ public class NNThroughputBenchmark implements Tool {
// register datanode
dnRegistration = nameNodeProto.registerDatanode(dnRegistration);
//first block reports
storage = new DatanodeStorage(dnRegistration.getDatanodeUuid());
storage = new DatanodeStorage(DatanodeStorage.generateUuid());
final StorageBlockReport[] reports = {
new StorageBlockReport(storage,
new BlockListAsLongs(null, null).getBlockListAsLongs())
@ -862,8 +954,8 @@ public class NNThroughputBenchmark implements Tool {
void sendHeartbeat() throws IOException {
// register datanode
// TODO:FEDERATION currently a single block pool is supported
StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
StorageReport[] rep = { new StorageReport(storage, false,
DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration, rep,
0L, 0L, 0, 0, 0).getCommands();
if(cmds != null) {
@ -909,7 +1001,7 @@ public class NNThroughputBenchmark implements Tool {
@SuppressWarnings("unused") // keep it for future blockReceived benchmark
int replicateBlocks() throws IOException {
// register datanode
StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
StorageReport[] rep = { new StorageReport(storage,
false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
rep, 0L, 0L, 0, 0, 0).getCommands();
@ -918,7 +1010,8 @@ public class NNThroughputBenchmark implements Tool {
if (cmd.getAction() == DatanodeProtocol.DNA_TRANSFER) {
// Send a copy of a block to another datanode
BlockCommand bcmd = (BlockCommand)cmd;
return transferBlocks(bcmd.getBlocks(), bcmd.getTargets());
return transferBlocks(bcmd.getBlocks(), bcmd.getTargets(),
bcmd.getTargetStorageIDs());
}
}
}
@ -931,12 +1024,14 @@ public class NNThroughputBenchmark implements Tool {
* that the blocks have been received.
*/
private int transferBlocks( Block blocks[],
DatanodeInfo xferTargets[][]
DatanodeInfo xferTargets[][],
String targetStorageIDs[][]
) throws IOException {
for(int i = 0; i < blocks.length; i++) {
DatanodeInfo blockTargets[] = xferTargets[i];
for(int t = 0; t < blockTargets.length; t++) {
DatanodeInfo dnInfo = blockTargets[t];
String targetStorageID = targetStorageIDs[i][t];
DatanodeRegistration receivedDNReg;
receivedDNReg = new DatanodeRegistration(dnInfo,
new DataStorage(nsInfo),
@ -946,7 +1041,7 @@ public class NNThroughputBenchmark implements Tool {
blocks[i], ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
null) };
StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
receivedDNReg.getDatanodeUuid(), rdBlocks) };
targetStorageID, rdBlocks) };
nameNodeProto.blockReceivedAndDeleted(receivedDNReg, nameNode
.getNamesystem().getBlockPoolId(), report);
}
@ -1035,7 +1130,7 @@ public class NNThroughputBenchmark implements Tool {
}
// create files
LOG.info("Creating " + nrFiles + " with " + blocksPerFile + " blocks each.");
LOG.info("Creating " + nrFiles + " files with " + blocksPerFile + " blocks each.");
FileNameGenerator nameGenerator;
nameGenerator = new FileNameGenerator(getBaseDir(), 100);
String clientName = getClientName(007);
@ -1069,7 +1164,7 @@ public class NNThroughputBenchmark implements Tool {
loc.getBlock().getLocalBlock(),
ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null) };
StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
datanodes[dnIdx].dnRegistration.getDatanodeUuid(), rdBlocks) };
datanodes[dnIdx].storage.getStorageID(), rdBlocks) };
nameNodeProto.blockReceivedAndDeleted(datanodes[dnIdx].dnRegistration, loc
.getBlock().getBlockPoolId(), report);
}
@ -1279,6 +1374,7 @@ public class NNThroughputBenchmark implements Tool {
System.err.println("Usage: NNThroughputBenchmark"
+ "\n\t" + OperationStatsBase.OP_ALL_USAGE
+ " | \n\t" + CreateFileStats.OP_CREATE_USAGE
+ " | \n\t" + MkdirsStats.OP_MKDIRS_USAGE
+ " | \n\t" + OpenFileStats.OP_OPEN_USAGE
+ " | \n\t" + DeleteFileStats.OP_DELETE_USAGE
+ " | \n\t" + FileStatusStats.OP_FILE_STATUS_USAGE
@ -1328,6 +1424,10 @@ public class NNThroughputBenchmark implements Tool {
opStat = new CreateFileStats(args);
ops.add(opStat);
}
if(runAll || MkdirsStats.OP_MKDIRS_NAME.equals(type)) {
opStat = new MkdirsStats(args);
ops.add(opStat);
}
if(runAll || OpenFileStats.OP_OPEN_NAME.equals(type)) {
opStat = new OpenFileStats(args);
ops.add(opStat);

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode;
import java.io.File;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.Iterator;
import org.apache.commons.logging.Log;
@ -29,25 +28,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSClientAdapter;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
/**
* OfflineEditsViewerHelper is a helper class for TestOfflineEditsViewer,
@ -135,151 +122,11 @@ public class OfflineEditsViewerHelper {
* OP_CLEAR_NS_QUOTA (12)
*/
private CheckpointSignature runOperations() throws IOException {
LOG.info("Creating edits by performing fs operations");
// no check, if it's not it throws an exception which is what we want
DistributedFileSystem dfs =
(DistributedFileSystem)cluster.getFileSystem();
FileContext fc = FileContext.getFileContext(cluster.getURI(0), config);
// OP_ADD 0
Path pathFileCreate = new Path("/file_create_u\1F431");
FSDataOutputStream s = dfs.create(pathFileCreate);
// OP_CLOSE 9
s.close();
// OP_RENAME_OLD 1
Path pathFileMoved = new Path("/file_moved");
dfs.rename(pathFileCreate, pathFileMoved);
// OP_DELETE 2
dfs.delete(pathFileMoved, false);
// OP_MKDIR 3
Path pathDirectoryMkdir = new Path("/directory_mkdir");
dfs.mkdirs(pathDirectoryMkdir);
// OP_ALLOW_SNAPSHOT 29
dfs.allowSnapshot(pathDirectoryMkdir);
// OP_DISALLOW_SNAPSHOT 30
dfs.disallowSnapshot(pathDirectoryMkdir);
// OP_CREATE_SNAPSHOT 26
String ssName = "snapshot1";
dfs.allowSnapshot(pathDirectoryMkdir);
dfs.createSnapshot(pathDirectoryMkdir, ssName);
// OP_RENAME_SNAPSHOT 28
String ssNewName = "snapshot2";
dfs.renameSnapshot(pathDirectoryMkdir, ssName, ssNewName);
// OP_DELETE_SNAPSHOT 27
dfs.deleteSnapshot(pathDirectoryMkdir, ssNewName);
// OP_SET_REPLICATION 4
s = dfs.create(pathFileCreate);
s.close();
dfs.setReplication(pathFileCreate, (short)1);
// OP_SET_PERMISSIONS 7
Short permission = 0777;
dfs.setPermission(pathFileCreate, new FsPermission(permission));
// OP_SET_OWNER 8
dfs.setOwner(pathFileCreate, new String("newOwner"), null);
// OP_CLOSE 9 see above
// OP_SET_GENSTAMP 10 see above
// OP_SET_NS_QUOTA 11 obsolete
// OP_CLEAR_NS_QUOTA 12 obsolete
// OP_TIMES 13
long mtime = 1285195527000L; // Wed, 22 Sep 2010 22:45:27 GMT
long atime = mtime;
dfs.setTimes(pathFileCreate, mtime, atime);
// OP_SET_QUOTA 14
dfs.setQuota(pathDirectoryMkdir, 1000L, HdfsConstants.QUOTA_DONT_SET);
// OP_RENAME 15
fc.rename(pathFileCreate, pathFileMoved, Rename.NONE);
// OP_CONCAT_DELETE 16
Path pathConcatTarget = new Path("/file_concat_target");
Path[] pathConcatFiles = new Path[2];
pathConcatFiles[0] = new Path("/file_concat_0");
pathConcatFiles[1] = new Path("/file_concat_1");
long length = blockSize * 3; // multiple of blocksize for concat
short replication = 1;
long seed = 1;
DFSTestUtil.createFile(dfs, pathConcatTarget, length, replication, seed);
DFSTestUtil.createFile(dfs, pathConcatFiles[0], length, replication, seed);
DFSTestUtil.createFile(dfs, pathConcatFiles[1], length, replication, seed);
dfs.concat(pathConcatTarget, pathConcatFiles);
// OP_SYMLINK 17
Path pathSymlink = new Path("/file_symlink");
fc.createSymlink(pathConcatTarget, pathSymlink, false);
// OP_GET_DELEGATION_TOKEN 18
// OP_RENEW_DELEGATION_TOKEN 19
// OP_CANCEL_DELEGATION_TOKEN 20
// see TestDelegationToken.java
// fake the user to renew token for
final Token<?>[] tokens = dfs.addDelegationTokens("JobTracker", null);
UserGroupInformation longUgi = UserGroupInformation.createRemoteUser(
"JobTracker/foo.com@FOO.COM");
try {
longUgi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws IOException, InterruptedException {
for (Token<?> token : tokens) {
token.renew(config);
token.cancel(config);
}
return null;
}
});
} catch(InterruptedException e) {
throw new IOException(
"renewDelegationToken threw InterruptedException", e);
}
// OP_UPDATE_MASTER_KEY 21
// done by getDelegationTokenSecretManager().startThreads();
// OP_ADD_CACHE_POOL 35
final String pool = "poolparty";
dfs.addCachePool(new CachePoolInfo(pool));
// OP_MODIFY_CACHE_POOL 36
dfs.modifyCachePool(new CachePoolInfo(pool)
.setOwnerName("carlton")
.setGroupName("party")
.setMode(new FsPermission((short)0700))
.setLimit(1989l));
// OP_ADD_PATH_BASED_CACHE_DIRECTIVE 33
long id = dfs.addCacheDirective(
new CacheDirectiveInfo.Builder().
setPath(new Path("/bar")).
setReplication((short)1).
setPool(pool).
build());
// OP_MODIFY_PATH_BASED_CACHE_DIRECTIVE 38
dfs.modifyCacheDirective(
new CacheDirectiveInfo.Builder().
setId(id).
setPath(new Path("/bar2")).
build());
// OP_REMOVE_PATH_BASED_CACHE_DIRECTIVE 34
dfs.removeCacheDirective(id);
// OP_REMOVE_CACHE_POOL 37
dfs.removeCachePool(pool);
// sync to disk, otherwise we parse partial edits
cluster.getNameNode().getFSImage().getEditLog().logSync();
// OP_REASSIGN_LEASE 22
String filePath = "/hard-lease-recovery-test";
byte[] bytes = "foo-bar-baz".getBytes();
DFSClientAdapter.stopLeaseRenewer(dfs);
FSDataOutputStream leaseRecoveryPath = dfs.create(new Path(filePath));
leaseRecoveryPath.write(bytes);
leaseRecoveryPath.hflush();
// Set the hard lease timeout to 1 second.
cluster.setLeasePeriod(60 * 1000, 1000);
// wait for lease recovery to complete
LocatedBlocks locatedBlocks;
do {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.info("Innocuous exception", e);
}
locatedBlocks = DFSClientAdapter.callGetBlockLocations(
cluster.getNameNodeRpc(), filePath, 0L, bytes.length);
} while (locatedBlocks.isUnderConstruction());
DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
DFSTestUtil.runOperations(cluster, dfs, cluster.getConfiguration(0),
dfs.getDefaultBlockSize(), 0);
// Force a roll so we get an OP_END_LOG_SEGMENT txn
return cluster.getNameNodeRpc().rollEditLog();

View File

@ -21,7 +21,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
import static org.apache.hadoop.hdfs.protocol.CachePoolInfo.RELATIVE_EXPIRY_NEVER;
import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
@ -58,17 +57,21 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.LogVerificationAppender;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
import org.apache.hadoop.hdfs.protocol.CachePoolStats;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheManipulator;
@ -79,6 +82,7 @@ import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.GSet;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@ -104,7 +108,7 @@ public class TestCacheDirectives {
EditLogFileOutputStream.setShouldSkipFsyncForTesting(false);
}
private static final long BLOCK_SIZE = 512;
private static final long BLOCK_SIZE = 4096;
private static final int NUM_DATANODES = 4;
// Most Linux installs will allow non-root users to lock 64KB.
// In this test though, we stub out mlock so this doesn't matter.
@ -115,7 +119,6 @@ public class TestCacheDirectives {
conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY);
conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, true);
conf.setLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 1000);
conf.setLong(DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1000);
// set low limits here for testing purposes
@ -602,8 +605,8 @@ public class TestCacheDirectives {
* Wait for the NameNode to have an expected number of cached blocks
* and replicas.
* @param nn NameNode
* @param expectedCachedBlocks
* @param expectedCachedReplicas
* @param expectedCachedBlocks if -1, treat as wildcard
* @param expectedCachedReplicas if -1, treat as wildcard
* @throws Exception
*/
private static void waitForCachedBlocks(NameNode nn,
@ -632,17 +635,19 @@ public class TestCacheDirectives {
} finally {
namesystem.readUnlock();
}
if ((numCachedBlocks == expectedCachedBlocks) &&
(numCachedReplicas == expectedCachedReplicas)) {
if (expectedCachedBlocks == -1 ||
numCachedBlocks == expectedCachedBlocks) {
if (expectedCachedReplicas == -1 ||
numCachedReplicas == expectedCachedReplicas) {
return true;
} else {
}
}
LOG.info(logString + " cached blocks: have " + numCachedBlocks +
" / " + expectedCachedBlocks + ". " +
"cached replicas: have " + numCachedReplicas +
" / " + expectedCachedReplicas);
return false;
}
}
}, 500, 60000);
}
@ -796,7 +801,15 @@ public class TestCacheDirectives {
}
}, 500, 60000);
// Send a cache report referring to a bogus block. It is important that
// the NameNode be robust against this.
NamenodeProtocols nnRpc = namenode.getRpcServer();
DataNode dn0 = cluster.getDataNodes().get(0);
String bpid = cluster.getNamesystem().getBlockPoolId();
LinkedList<Long> bogusBlockIds = new LinkedList<Long> ();
bogusBlockIds.add(999999L);
nnRpc.cacheReport(dn0.getDNRegistrationForBP(bpid), bpid, bogusBlockIds);
Path rootDir = helper.getDefaultWorkingDirectory(dfs);
// Create the pool
final String pool = "friendlyPool";
@ -826,6 +839,24 @@ public class TestCacheDirectives {
waitForCachedBlocks(namenode, expected, expected,
"testWaitForCachedReplicas:1");
}
// Check that the datanodes have the right cache values
DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
assertEquals("Unexpected number of live nodes", NUM_DATANODES, live.length);
long totalUsed = 0;
for (DatanodeInfo dn : live) {
final long cacheCapacity = dn.getCacheCapacity();
final long cacheUsed = dn.getCacheUsed();
final long cacheRemaining = dn.getCacheRemaining();
assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
assertEquals("Capacity not equal to used + remaining",
cacheCapacity, cacheUsed + cacheRemaining);
assertEquals("Remaining not equal to capacity - used",
cacheCapacity - cacheUsed, cacheRemaining);
totalUsed += cacheUsed;
}
assertEquals(expected*BLOCK_SIZE, totalUsed);
// Uncache and check each path in sequence
RemoteIterator<CacheDirectiveEntry> entries =
new CacheDirectiveIterator(nnRpc, null);
@ -838,55 +869,6 @@ public class TestCacheDirectives {
}
}
@Test(timeout=120000)
public void testAddingCacheDirectiveInfosWhenCachingIsDisabled()
throws Exception {
cluster.shutdown();
HdfsConfiguration conf = createCachingConf();
conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, false);
MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
try {
cluster.waitActive();
DistributedFileSystem dfs = cluster.getFileSystem();
NameNode namenode = cluster.getNameNode();
// Create the pool
String pool = "pool1";
namenode.getRpcServer().addCachePool(new CachePoolInfo(pool));
// Create some test files
final int numFiles = 2;
final int numBlocksPerFile = 2;
final List<String> paths = new ArrayList<String>(numFiles);
for (int i=0; i<numFiles; i++) {
Path p = new Path("/testCachePaths-" + i);
FileSystemTestHelper.createFile(dfs, p, numBlocksPerFile,
(int)BLOCK_SIZE);
paths.add(p.toUri().getPath());
}
// Check the initial statistics at the namenode
waitForCachedBlocks(namenode, 0, 0,
"testAddingCacheDirectiveInfosWhenCachingIsDisabled:0");
// Cache and check each path in sequence
int expected = 0;
for (int i=0; i<numFiles; i++) {
CacheDirectiveInfo directive =
new CacheDirectiveInfo.Builder().
setPath(new Path(paths.get(i))).
setPool(pool).
build();
dfs.addCacheDirective(directive);
waitForCachedBlocks(namenode, expected, 0,
"testAddingCacheDirectiveInfosWhenCachingIsDisabled:1");
}
Thread.sleep(20000);
waitForCachedBlocks(namenode, expected, 0,
"testAddingCacheDirectiveInfosWhenCachingIsDisabled:2");
} finally {
cluster.shutdown();
}
}
@Test(timeout=120000)
public void testWaitForCachedReplicasInDirectory() throws Exception {
// Create the pool
@ -965,7 +947,6 @@ public class TestCacheDirectives {
(4+3) * numBlocksPerFile * BLOCK_SIZE,
3, 2,
poolInfo, "testWaitForCachedReplicasInDirectory:2:pool");
// remove and watch numCached go to 0
dfs.removeCacheDirective(id);
dfs.removeCacheDirective(id2);
@ -1374,4 +1355,39 @@ public class TestCacheDirectives {
.setExpiration(Expiration.newRelative(RELATIVE_EXPIRY_NEVER - 1))
.build());
}
@Test(timeout=60000)
public void testExceedsCapacity() throws Exception {
// Create a giant file
final Path fileName = new Path("/exceeds");
final long fileLen = CACHE_CAPACITY * (NUM_DATANODES*2);
int numCachedReplicas = (int) ((CACHE_CAPACITY*NUM_DATANODES)/BLOCK_SIZE);
DFSTestUtil.createFile(dfs, fileName, fileLen, (short) NUM_DATANODES,
0xFADED);
// Set up a log appender watcher
final LogVerificationAppender appender = new LogVerificationAppender();
final Logger logger = Logger.getRootLogger();
logger.addAppender(appender);
dfs.addCachePool(new CachePoolInfo("pool"));
dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool")
.setPath(fileName).setReplication((short) 1).build());
waitForCachedBlocks(namenode, -1, numCachedReplicas,
"testExceeds:1");
// Check that no DNs saw an excess CACHE message
int lines = appender.countLinesWithMessage(
"more bytes in the cache: " +
DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
assertEquals("Namenode should not send extra CACHE commands", 0, lines);
// Try creating a file with giant-sized blocks that exceed cache capacity
dfs.delete(fileName, false);
DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2,
(short) 1, 0xFADED);
// Nothing will get cached, so just force sleep for a bit
Thread.sleep(4000);
// Still should not see any excess commands
lines = appender.countLinesWithMessage(
"more bytes in the cache: " +
DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
assertEquals("Namenode should not send extra CACHE commands", 0, lines);
}
}

View File

@ -140,8 +140,9 @@ public class TestDeadDatanode {
// Ensure heartbeat from dead datanode is rejected with a command
// that asks datanode to register again
StorageReport[] rep = { new StorageReport(reg.getDatanodeUuid(), false, 0, 0,
0, 0) };
StorageReport[] rep = { new StorageReport(
new DatanodeStorage(reg.getDatanodeUuid()),
false, 0, 0, 0, 0) };
DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, rep, 0L, 0L, 0, 0, 0)
.getCommands();
assertEquals(1, cmd.length);

View File

@ -27,6 +27,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.LinkedList;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.commons.logging.Log;
@ -59,6 +60,8 @@ import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;
import com.google.common.util.concurrent.Uninterruptibles;
/**
* Tests state transition from active->standby, and manual failover
* and failback between two namenodes.
@ -124,6 +127,17 @@ public class TestHAStateTransitions {
}
}
private void addCrmThreads(MiniDFSCluster cluster,
LinkedList<Thread> crmThreads) {
for (int nn = 0; nn <= 1; nn++) {
Thread thread = cluster.getNameNode(nn).getNamesystem().
getCacheManager().getCacheReplicationMonitor();
if (thread != null) {
crmThreads.add(thread);
}
}
}
/**
* Test that transitioning a service to the state that it is already
* in is a nop, specifically, an exception is not thrown.
@ -131,19 +145,30 @@ public class TestHAStateTransitions {
@Test
public void testTransitionToCurrentStateIsANop() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1L);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(1)
.build();
LinkedList<Thread> crmThreads = new LinkedList<Thread>();
try {
cluster.waitActive();
addCrmThreads(cluster, crmThreads);
cluster.transitionToActive(0);
addCrmThreads(cluster, crmThreads);
cluster.transitionToActive(0);
addCrmThreads(cluster, crmThreads);
cluster.transitionToStandby(0);
addCrmThreads(cluster, crmThreads);
cluster.transitionToStandby(0);
addCrmThreads(cluster, crmThreads);
} finally {
cluster.shutdown();
}
// Verify that all cacheReplicationMonitor threads shut down
for (Thread thread : crmThreads) {
Uninterruptibles.joinUninterruptibly(thread);
}
}
/**

View File

@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.test.MetricsAsserts;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Level;
@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
@After
public void tearDown() throws Exception {
MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
if (source != null) {
// Run only once since the UGI metrics is cleaned up during teardown
MetricsRecordBuilder rb = getMetrics(source);
assertQuantileGauges("GetGroups1s", rb);
}
cluster.shutdown();
}

View File

@ -33,10 +33,15 @@ import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@ -55,7 +60,7 @@ public class TestGetConf {
enum TestType {
NAMENODE, BACKUP, SECONDARY, NNRPCADDRESSES
}
FileSystem localFileSys;
/** Setup federation nameServiceIds in the configuration */
private void setupNameServices(HdfsConfiguration conf, int nameServiceIdCount) {
StringBuilder nsList = new StringBuilder();
@ -379,4 +384,70 @@ public class TestGetConf {
}
}
}
@Test
public void TestGetConfExcludeCommand() throws Exception{
HdfsConfiguration conf = new HdfsConfiguration();
// Set up the hosts/exclude files.
localFileSys = FileSystem.getLocal(conf);
Path workingDir = localFileSys.getWorkingDirectory();
Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
Path hostsFile = new Path(dir, "hosts");
Path excludeFile = new Path(dir, "exclude");
// Setup conf
conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
writeConfigFile(hostsFile, null);
writeConfigFile(excludeFile, null);
String[] args = {"-excludeFile"};
String ret = runTool(conf, args, true);
assertEquals(excludeFile.toUri().getPath(),ret.trim());
cleanupFile(localFileSys, excludeFile.getParent());
}
@Test
public void TestGetConfIncludeCommand() throws Exception{
HdfsConfiguration conf = new HdfsConfiguration();
// Set up the hosts/exclude files.
localFileSys = FileSystem.getLocal(conf);
Path workingDir = localFileSys.getWorkingDirectory();
Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
Path hostsFile = new Path(dir, "hosts");
Path excludeFile = new Path(dir, "exclude");
// Setup conf
conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
writeConfigFile(hostsFile, null);
writeConfigFile(excludeFile, null);
String[] args = {"-includeFile"};
String ret = runTool(conf, args, true);
assertEquals(hostsFile.toUri().getPath(),ret.trim());
cleanupFile(localFileSys, excludeFile.getParent());
}
private void writeConfigFile(Path name, ArrayList<String> nodes)
throws IOException {
// delete if it already exists
if (localFileSys.exists(name)) {
localFileSys.delete(name, true);
}
FSDataOutputStream stm = localFileSys.create(name);
if (nodes != null) {
for (Iterator<String> it = nodes.iterator(); it.hasNext();) {
String node = it.next();
stm.writeBytes(node);
stm.writeBytes("\n");
}
}
stm.close();
}
private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
assertTrue(fileSys.exists(name));
fileSys.delete(name, true);
assertTrue(!fileSys.exists(name));
}
}

View File

@ -26,8 +26,6 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -36,64 +34,58 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
import org.apache.hadoop.hdfs.server.namenode.OfflineEditsViewerHelper;
import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer.Flags;
import org.apache.hadoop.test.PathUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import com.google.common.collect.ImmutableSet;
public class TestOfflineEditsViewer {
private static final Log LOG = LogFactory.getLog(TestOfflineEditsViewer.class);
private static final Log LOG = LogFactory
.getLog(TestOfflineEditsViewer.class);
private static final Map<FSEditLogOpCodes, Boolean> obsoleteOpCodes =
new HashMap<FSEditLogOpCodes, Boolean>();
private static final Map<FSEditLogOpCodes, Boolean> missingOpCodes =
new HashMap<FSEditLogOpCodes, Boolean>();
static {
initializeObsoleteOpCodes();
initializeMissingOpCodes();
}
private static String buildDir =
PathUtils.getTestDirName(TestOfflineEditsViewer.class);
private static String cacheDir =
System.getProperty("test.cache.data", "build/test/cache");
private static String buildDir = PathUtils
.getTestDirName(TestOfflineEditsViewer.class);
// to create edits and get edits filename
private static final OfflineEditsViewerHelper nnHelper
= new OfflineEditsViewerHelper();
private static final OfflineEditsViewerHelper nnHelper = new OfflineEditsViewerHelper();
private static final ImmutableSet<FSEditLogOpCodes> skippedOps = skippedOps();
/**
* Initialize obsoleteOpCodes
*
* Reason for suppressing "deprecation" warnings:
*
* These are the opcodes that are not used anymore, some
* are marked deprecated, we need to include them here to make
* sure we exclude them when checking for completeness of testing,
* that's why the "deprecation" warnings are suppressed.
*/
@SuppressWarnings("deprecation")
private static void initializeObsoleteOpCodes() {
obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_ADD, true);
obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_REMOVE, true);
obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_NS_QUOTA, true);
obsoleteOpCodes.put(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA, true);
private static ImmutableSet<FSEditLogOpCodes> skippedOps() {
ImmutableSet.Builder<FSEditLogOpCodes> b = ImmutableSet
.<FSEditLogOpCodes> builder();
// Deprecated opcodes
b.add(FSEditLogOpCodes.OP_DATANODE_ADD)
.add(FSEditLogOpCodes.OP_DATANODE_REMOVE)
.add(FSEditLogOpCodes.OP_SET_NS_QUOTA)
.add(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA)
.add(FSEditLogOpCodes.OP_SET_GENSTAMP_V1);
// Cannot test delegation token related code in insecure set up
b.add(FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN)
.add(FSEditLogOpCodes.OP_RENEW_DELEGATION_TOKEN)
.add(FSEditLogOpCodes.OP_CANCEL_DELEGATION_TOKEN);
// Skip invalid opcode
b.add(FSEditLogOpCodes.OP_INVALID);
return b.build();
}
/**
* Initialize missingOpcodes
*
* Opcodes that are not available except after uprade from
* an older version. We don't test these here.
*/
private static void initializeMissingOpCodes() {
obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_GENSTAMP_V1, true);
}
@Rule
public final TemporaryFolder folder = new TemporaryFolder();
@Before
public void setup() {
new File(cacheDir).mkdirs();
public void setUp() throws IOException {
nnHelper.startCluster(buildDir + "/dfs/");
}
@After
public void tearDown() throws IOException {
nnHelper.shutdownCluster();
}
/**
@ -101,54 +93,42 @@ public class TestOfflineEditsViewer {
*/
@Test
public void testGenerated() throws IOException {
LOG.info("START - testing with generated edits");
nnHelper.startCluster(buildDir + "/dfs/");
// edits generated by nnHelper (MiniDFSCluster), should have all op codes
// binary, XML, reparsed binary
String edits = nnHelper.generateEdits();
String editsParsedXml = cacheDir + "/editsParsed.xml";
String editsReparsed = cacheDir + "/editsReparsed";
String editsParsedXml = folder.newFile("editsParsed.xml").getAbsolutePath();
String editsReparsed = folder.newFile("editsParsed").getAbsolutePath();
// parse to XML then back to binary
assertEquals(0, runOev(edits, editsParsedXml, "xml", false));
assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));
// judgment time
assertTrue(
"Edits " + edits + " should have all op codes",
assertTrue("Edits " + edits + " should have all op codes",
hasAllOpCodes(edits));
LOG.info("Comparing generated file " + editsReparsed +
" with reference file " + edits);
LOG.info("Comparing generated file " + editsReparsed
+ " with reference file " + edits);
assertTrue(
"Generated edits and reparsed (bin to XML to bin) should be same",
filesEqualIgnoreTrailingZeros(edits, editsReparsed));
// removes edits so do this at the end
nnHelper.shutdownCluster();
LOG.info("END");
}
@Test
public void testRecoveryMode() throws IOException {
LOG.info("START - testing with generated edits");
nnHelper.startCluster(buildDir + "/dfs/");
// edits generated by nnHelper (MiniDFSCluster), should have all op codes
// binary, XML, reparsed binary
String edits = nnHelper.generateEdits();
FileOutputStream os = new FileOutputStream(edits, true);
// Corrupt the file by truncating the end
FileChannel editsFile = new FileOutputStream(edits, true).getChannel();
FileChannel editsFile = os.getChannel();
editsFile.truncate(editsFile.size() - 5);
String editsParsedXml = cacheDir + "/editsRecoveredParsed.xml";
String editsReparsed = cacheDir + "/editsRecoveredReparsed";
String editsParsedXml2 = cacheDir + "/editsRecoveredParsed2.xml";
String editsParsedXml = folder.newFile("editsRecoveredParsed.xml")
.getAbsolutePath();
String editsReparsed = folder.newFile("editsRecoveredReparsed")
.getAbsolutePath();
String editsParsedXml2 = folder.newFile("editsRecoveredParsed2.xml")
.getAbsolutePath();
// Can't read the corrupted file without recovery mode
assertEquals(-1, runOev(edits, editsParsedXml, "xml", false));
@ -162,18 +142,14 @@ public class TestOfflineEditsViewer {
assertTrue("Test round trip",
filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));
// removes edits so do this at the end
nnHelper.shutdownCluster();
LOG.info("END");
os.close();
}
@Test
public void testStored() throws IOException {
LOG.info("START - testing with stored reference edits");
// reference edits stored with source code (see build.xml)
final String cacheDir = System.getProperty("test.cache.data",
"build/test/cache");
// binary, XML, reparsed binary
String editsStored = cacheDir + "/editsStored";
String editsStoredParsedXml = cacheDir + "/editsStoredParsed.xml";
@ -183,21 +159,17 @@ public class TestOfflineEditsViewer {
// parse to XML then back to binary
assertEquals(0, runOev(editsStored, editsStoredParsedXml, "xml", false));
assertEquals(0, runOev(editsStoredParsedXml, editsStoredReparsed,
"binary", false));
assertEquals(0,
runOev(editsStoredParsedXml, editsStoredReparsed, "binary", false));
// judgement time
assertTrue(
"Edits " + editsStored + " should have all op codes",
assertTrue("Edits " + editsStored + " should have all op codes",
hasAllOpCodes(editsStored));
assertTrue(
"Reference XML edits and parsed to XML should be same",
assertTrue("Reference XML edits and parsed to XML should be same",
filesEqual(editsStoredXml, editsStoredParsedXml));
assertTrue(
"Reference edits and reparsed (bin to XML to bin) should be same",
filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
LOG.info("END");
}
/**
@ -233,19 +205,14 @@ public class TestOfflineEditsViewer {
OfflineEditsViewer oev = new OfflineEditsViewer();
if (oev.go(inFilename, outFilename, "stats", new Flags(), visitor) != 0)
return false;
LOG.info("Statistics for " + inFilename + "\n" +
visitor.getStatisticsString());
LOG.info("Statistics for " + inFilename + "\n"
+ visitor.getStatisticsString());
boolean hasAllOpCodes = true;
for (FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
// don't need to test obsolete opCodes
if(obsoleteOpCodes.containsKey(opCode)) {
if (skippedOps.contains(opCode))
continue;
} else if (missingOpCodes.containsKey(opCode)) {
continue;
} else if (opCode == FSEditLogOpCodes.OP_INVALID) {
continue;
}
Long count = visitor.getStatistics().get(opCode);
if ((count == null) || (count == 0)) {
@ -257,9 +224,9 @@ public class TestOfflineEditsViewer {
}
/**
* Compare two files, ignore trailing zeros at the end,
* for edits log the trailing zeros do not make any difference,
* throw exception is the files are not same
* Compare two files, ignore trailing zeros at the end, for edits log the
* trailing zeros do not make any difference, throw exception is the files are
* not same
*
* @param filenameSmall first file to compare (doesn't have to be smaller)
* @param filenameLarge second file to compare (doesn't have to be larger)
@ -288,7 +255,9 @@ public class TestOfflineEditsViewer {
large.limit(small.capacity());
// compares position to limit
if(!small.equals(large)) { return false; }
if (!small.equals(large)) {
return false;
}
// everything after limit should be 0xFF
int i = large.limit();

View File

@ -13,8 +13,8 @@
<TXID>2</TXID>
<DELEGATION_KEY>
<KEY_ID>1</KEY_ID>
<EXPIRY_DATE>1388171826188</EXPIRY_DATE>
<KEY>c7d869c22c8afce1</KEY>
<EXPIRY_DATE>1389121087930</EXPIRY_DATE>
<KEY>d48b4b3e6a43707b</KEY>
</DELEGATION_KEY>
</DATA>
</RECORD>
@ -24,8 +24,8 @@
<TXID>3</TXID>
<DELEGATION_KEY>
<KEY_ID>2</KEY_ID>
<EXPIRY_DATE>1388171826191</EXPIRY_DATE>
<KEY>a3c41446507dfca9</KEY>
<EXPIRY_DATE>1389121087937</EXPIRY_DATE>
<KEY>62b6fae6bff918a9</KEY>
</DELEGATION_KEY>
</DATA>
</RECORD>
@ -37,17 +37,17 @@
<INODEID>16386</INODEID>
<PATH>/file_create_u\0001;F431</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480626844</MTIME>
<ATIME>1387480626844</ATIME>
<MTIME>1388429889312</MTIME>
<ATIME>1388429889312</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>420</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>7</RPC_CALLID>
</DATA>
</RECORD>
@ -59,8 +59,8 @@
<INODEID>0</INODEID>
<PATH>/file_create_u\0001;F431</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480626885</MTIME>
<ATIME>1387480626844</ATIME>
<MTIME>1388429889328</MTIME>
<ATIME>1388429889312</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME></CLIENT_NAME>
<CLIENT_MACHINE></CLIENT_MACHINE>
@ -78,8 +78,8 @@
<LENGTH>0</LENGTH>
<SRC>/file_create_u\0001;F431</SRC>
<DST>/file_moved</DST>
<TIMESTAMP>1387480626894</TIMESTAMP>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<TIMESTAMP>1388429889336</TIMESTAMP>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>9</RPC_CALLID>
</DATA>
</RECORD>
@ -89,8 +89,8 @@
<TXID>7</TXID>
<LENGTH>0</LENGTH>
<PATH>/file_moved</PATH>
<TIMESTAMP>1387480626905</TIMESTAMP>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<TIMESTAMP>1388429889346</TIMESTAMP>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>10</RPC_CALLID>
</DATA>
</RECORD>
@ -101,7 +101,7 @@
<LENGTH>0</LENGTH>
<INODEID>16387</INODEID>
<PATH>/directory_mkdir</PATH>
<TIMESTAMP>1387480626917</TIMESTAMP>
<TIMESTAMP>1388429889357</TIMESTAMP>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
@ -136,7 +136,7 @@
<TXID>12</TXID>
<SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
<SNAPSHOTNAME>snapshot1</SNAPSHOTNAME>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>15</RPC_CALLID>
</DATA>
</RECORD>
@ -147,7 +147,7 @@
<SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
<SNAPSHOTOLDNAME>snapshot1</SNAPSHOTOLDNAME>
<SNAPSHOTNEWNAME>snapshot2</SNAPSHOTNEWNAME>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>16</RPC_CALLID>
</DATA>
</RECORD>
@ -157,7 +157,7 @@
<TXID>14</TXID>
<SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
<SNAPSHOTNAME>snapshot2</SNAPSHOTNAME>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>17</RPC_CALLID>
</DATA>
</RECORD>
@ -169,17 +169,17 @@
<INODEID>16388</INODEID>
<PATH>/file_create_u\0001;F431</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480626978</MTIME>
<ATIME>1387480626978</ATIME>
<MTIME>1388429889412</MTIME>
<ATIME>1388429889412</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>420</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>18</RPC_CALLID>
</DATA>
</RECORD>
@ -191,8 +191,8 @@
<INODEID>0</INODEID>
<PATH>/file_create_u\0001;F431</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480626985</MTIME>
<ATIME>1387480626978</ATIME>
<MTIME>1388429889420</MTIME>
<ATIME>1388429889412</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME></CLIENT_NAME>
<CLIENT_MACHINE></CLIENT_MACHINE>
@ -253,9 +253,9 @@
<LENGTH>0</LENGTH>
<SRC>/file_create_u\0001;F431</SRC>
<DST>/file_moved</DST>
<TIMESTAMP>1387480627035</TIMESTAMP>
<TIMESTAMP>1388429889495</TIMESTAMP>
<OPTIONS>NONE</OPTIONS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>25</RPC_CALLID>
</DATA>
</RECORD>
@ -267,17 +267,17 @@
<INODEID>16389</INODEID>
<PATH>/file_concat_target</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627043</MTIME>
<ATIME>1387480627043</ATIME>
<MTIME>1388429889511</MTIME>
<ATIME>1388429889511</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>420</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>27</RPC_CALLID>
</DATA>
</RECORD>
@ -388,8 +388,8 @@
<INODEID>0</INODEID>
<PATH>/file_concat_target</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627148</MTIME>
<ATIME>1387480627043</ATIME>
<MTIME>1388429889812</MTIME>
<ATIME>1388429889511</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME></CLIENT_NAME>
<CLIENT_MACHINE></CLIENT_MACHINE>
@ -423,17 +423,17 @@
<INODEID>16390</INODEID>
<PATH>/file_concat_0</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627155</MTIME>
<ATIME>1387480627155</ATIME>
<MTIME>1388429889825</MTIME>
<ATIME>1388429889825</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>420</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>40</RPC_CALLID>
</DATA>
</RECORD>
@ -544,8 +544,8 @@
<INODEID>0</INODEID>
<PATH>/file_concat_0</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627193</MTIME>
<ATIME>1387480627155</ATIME>
<MTIME>1388429889909</MTIME>
<ATIME>1388429889825</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME></CLIENT_NAME>
<CLIENT_MACHINE></CLIENT_MACHINE>
@ -579,17 +579,17 @@
<INODEID>16391</INODEID>
<PATH>/file_concat_1</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627200</MTIME>
<ATIME>1387480627200</ATIME>
<MTIME>1388429889920</MTIME>
<ATIME>1388429889920</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>420</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>52</RPC_CALLID>
</DATA>
</RECORD>
@ -700,8 +700,8 @@
<INODEID>0</INODEID>
<PATH>/file_concat_1</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627238</MTIME>
<ATIME>1387480627200</ATIME>
<MTIME>1388429890016</MTIME>
<ATIME>1388429889920</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME></CLIENT_NAME>
<CLIENT_MACHINE></CLIENT_MACHINE>
@ -733,12 +733,12 @@
<TXID>56</TXID>
<LENGTH>0</LENGTH>
<TRG>/file_concat_target</TRG>
<TIMESTAMP>1387480627246</TIMESTAMP>
<TIMESTAMP>1388429890031</TIMESTAMP>
<SOURCES>
<SOURCE1>/file_concat_0</SOURCE1>
<SOURCE2>/file_concat_1</SOURCE2>
</SOURCES>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>63</RPC_CALLID>
</DATA>
</RECORD>
@ -750,14 +750,14 @@
<INODEID>16392</INODEID>
<PATH>/file_symlink</PATH>
<VALUE>/file_concat_target</VALUE>
<MTIME>1387480627255</MTIME>
<ATIME>1387480627255</ATIME>
<MTIME>1388429890046</MTIME>
<ATIME>1388429890046</ATIME>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>511</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>64</RPC_CALLID>
</DATA>
</RECORD>
@ -771,11 +771,11 @@
<OWNER>andrew</OWNER>
<RENEWER>JobTracker</RENEWER>
<REALUSER></REALUSER>
<ISSUE_DATE>1387480627262</ISSUE_DATE>
<MAX_DATE>1388085427262</MAX_DATE>
<ISSUE_DATE>1388429890059</ISSUE_DATE>
<MAX_DATE>1389034690059</MAX_DATE>
<MASTER_KEY_ID>2</MASTER_KEY_ID>
</DELEGATION_TOKEN_IDENTIFIER>
<EXPIRY_TIME>1387567027262</EXPIRY_TIME>
<EXPIRY_TIME>1388516290059</EXPIRY_TIME>
</DATA>
</RECORD>
<RECORD>
@ -788,11 +788,11 @@
<OWNER>andrew</OWNER>
<RENEWER>JobTracker</RENEWER>
<REALUSER></REALUSER>
<ISSUE_DATE>1387480627262</ISSUE_DATE>
<MAX_DATE>1388085427262</MAX_DATE>
<ISSUE_DATE>1388429890059</ISSUE_DATE>
<MAX_DATE>1389034690059</MAX_DATE>
<MASTER_KEY_ID>2</MASTER_KEY_ID>
</DELEGATION_TOKEN_IDENTIFIER>
<EXPIRY_TIME>1387567027281</EXPIRY_TIME>
<EXPIRY_TIME>1388516290109</EXPIRY_TIME>
</DATA>
</RECORD>
<RECORD>
@ -805,8 +805,8 @@
<OWNER>andrew</OWNER>
<RENEWER>JobTracker</RENEWER>
<REALUSER></REALUSER>
<ISSUE_DATE>1387480627262</ISSUE_DATE>
<MAX_DATE>1388085427262</MAX_DATE>
<ISSUE_DATE>1388429890059</ISSUE_DATE>
<MAX_DATE>1389034690059</MAX_DATE>
<MASTER_KEY_ID>2</MASTER_KEY_ID>
</DELEGATION_TOKEN_IDENTIFIER>
</DATA>
@ -821,7 +821,7 @@
<MODE>493</MODE>
<LIMIT>9223372036854775807</LIMIT>
<MAXRELATIVEEXPIRY>2305843009213693951</MAXRELATIVEEXPIRY>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>68</RPC_CALLID>
</DATA>
</RECORD>
@ -834,7 +834,7 @@
<GROUPNAME>party</GROUPNAME>
<MODE>448</MODE>
<LIMIT>1989</LIMIT>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>69</RPC_CALLID>
</DATA>
</RECORD>
@ -846,8 +846,8 @@
<PATH>/bar</PATH>
<REPLICATION>1</REPLICATION>
<POOL>poolparty</POOL>
<EXPIRATION>2305844396694321272</EXPIRATION>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<EXPIRATION>2305844397643584141</EXPIRATION>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>70</RPC_CALLID>
</DATA>
</RECORD>
@ -857,7 +857,7 @@
<TXID>64</TXID>
<ID>1</ID>
<PATH>/bar2</PATH>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>71</RPC_CALLID>
</DATA>
</RECORD>
@ -866,7 +866,7 @@
<DATA>
<TXID>65</TXID>
<ID>1</ID>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>72</RPC_CALLID>
</DATA>
</RECORD>
@ -875,7 +875,7 @@
<DATA>
<TXID>66</TXID>
<POOLNAME>poolparty</POOLNAME>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>73</RPC_CALLID>
</DATA>
</RECORD>
@ -887,17 +887,17 @@
<INODEID>16393</INODEID>
<PATH>/hard-lease-recovery-test</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480627356</MTIME>
<ATIME>1387480627356</ATIME>
<MTIME>1388429890261</MTIME>
<ATIME>1388429890261</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
<GROUPNAME>supergroup</GROUPNAME>
<MODE>420</MODE>
</PERMISSION_STATUS>
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
<RPC_CALLID>74</RPC_CALLID>
</DATA>
</RECORD>
@ -954,7 +954,23 @@
<OPCODE>OP_REASSIGN_LEASE</OPCODE>
<DATA>
<TXID>73</TXID>
<LEASEHOLDER>DFSClient_NONMAPREDUCE_1147796111_1</LEASEHOLDER>
<LEASEHOLDER>DFSClient_NONMAPREDUCE_-1396063717_1</LEASEHOLDER>
<PATH>/hard-lease-recovery-test</PATH>
<NEWHOLDER>HDFS_NameNode</NEWHOLDER>
</DATA>
</RECORD>
<RECORD>
<OPCODE>OP_SET_GENSTAMP_V2</OPCODE>
<DATA>
<TXID>74</TXID>
<GENSTAMPV2>1012</GENSTAMPV2>
</DATA>
</RECORD>
<RECORD>
<OPCODE>OP_REASSIGN_LEASE</OPCODE>
<DATA>
<TXID>75</TXID>
<LEASEHOLDER>HDFS_NameNode</LEASEHOLDER>
<PATH>/hard-lease-recovery-test</PATH>
<NEWHOLDER>HDFS_NameNode</NEWHOLDER>
</DATA>
@ -962,20 +978,20 @@
<RECORD>
<OPCODE>OP_CLOSE</OPCODE>
<DATA>
<TXID>74</TXID>
<TXID>76</TXID>
<LENGTH>0</LENGTH>
<INODEID>0</INODEID>
<PATH>/hard-lease-recovery-test</PATH>
<REPLICATION>1</REPLICATION>
<MTIME>1387480629729</MTIME>
<ATIME>1387480627356</ATIME>
<MTIME>1388429895216</MTIME>
<ATIME>1388429890261</ATIME>
<BLOCKSIZE>512</BLOCKSIZE>
<CLIENT_NAME></CLIENT_NAME>
<CLIENT_MACHINE></CLIENT_MACHINE>
<BLOCK>
<BLOCK_ID>1073741834</BLOCK_ID>
<NUM_BYTES>11</NUM_BYTES>
<GENSTAMP>1011</GENSTAMP>
<GENSTAMP>1012</GENSTAMP>
</BLOCK>
<PERMISSION_STATUS>
<USERNAME>andrew</USERNAME>
@ -987,7 +1003,7 @@
<RECORD>
<OPCODE>OP_END_LOG_SEGMENT</OPCODE>
<DATA>
<TXID>75</TXID>
<TXID>77</TXID>
</DATA>
</RECORD>
</EDITS>

View File

@ -77,6 +77,9 @@ Trunk (Unreleased)
MAPREDUCE-5189. Add policies and wiring to respond to preemption requests
from YARN. (Carlo Curino via cdouglas)
MAPREDUCE-5196. Add bookkeeping for managing checkpoints of task state.
(Carlo Curino via cdouglas)
BUG FIXES
MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@ -193,6 +196,8 @@ Release 2.4.0 - UNRELEASED
MAPREDUCE-5550. Task Status message (reporter.setStatus) not shown in UI
with Hadoop 2.0 (Gera Shegalov via Sandy Ryza)
MAPREDUCE-3310. Custom grouping comparator cannot be set for Combiners (tucu)
OPTIMIZATIONS
MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
@ -258,6 +263,15 @@ Release 2.4.0 - UNRELEASED
MAPREDUCE-5687. Fixed failure in TestYARNRunner caused by YARN-1446. (Jian He
via vinodkv)
MAPREDUCE-5694. Fixed MR AppMaster to shutdown the LogManager so as to avoid
losing syslog in some conditions. (Mohammad Kamrul Islam via vinodkv)
MAPREDUCE-5685. Fixed a bug with JobContext getCacheFiles API inside the
WrappedReducer class. (Yi Song via vinodkv)
MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps
cannot be fulfilled. (lohit via kasha)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -36,7 +36,9 @@ import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.mapred.SortedRanges.Range;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
@ -45,8 +47,8 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider;
@ -228,6 +230,22 @@ public class TaskAttemptListenerImpl extends CompositeService
TaskAttemptEventType.TA_COMMIT_PENDING));
}
@Override
public void preempted(TaskAttemptID taskAttemptID, TaskStatus taskStatus)
throws IOException, InterruptedException {
LOG.info("Preempted state update from " + taskAttemptID.toString());
// An attempt is telling us that it got preempted.
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
TypeConverter.toYarn(taskAttemptID);
preemptionPolicy.reportSuccessfulPreemption(attemptID);
taskHeartbeatHandler.progressing(attemptID);
context.getEventHandler().handle(
new TaskAttemptEvent(attemptID,
TaskAttemptEventType.TA_PREEMPTED));
}
@Override
public void done(TaskAttemptID taskAttemptID) throws IOException {
LOG.info("Done acknowledgement from " + taskAttemptID.toString());
@ -250,6 +268,10 @@ public class TaskAttemptListenerImpl extends CompositeService
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
TypeConverter.toYarn(taskAttemptID);
// handling checkpoints
preemptionPolicy.handleFailedContainer(attemptID);
context.getEventHandler().handle(
new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
}
@ -264,6 +286,10 @@ public class TaskAttemptListenerImpl extends CompositeService
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
TypeConverter.toYarn(taskAttemptID);
// handling checkpoints
preemptionPolicy.handleFailedContainer(attemptID);
context.getEventHandler().handle(
new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
}
@ -293,12 +319,6 @@ public class TaskAttemptListenerImpl extends CompositeService
return new MapTaskCompletionEventsUpdate(events, shouldReset);
}
@Override
public boolean ping(TaskAttemptID taskAttemptID) throws IOException {
LOG.info("Ping from " + taskAttemptID.toString());
return true;
}
@Override
public void reportDiagnosticInfo(TaskAttemptID taskAttemptID, String diagnosticInfo)
throws IOException {
@ -321,11 +341,33 @@ public class TaskAttemptListenerImpl extends CompositeService
}
@Override
public boolean statusUpdate(TaskAttemptID taskAttemptID,
public AMFeedback statusUpdate(TaskAttemptID taskAttemptID,
TaskStatus taskStatus) throws IOException, InterruptedException {
LOG.info("Status update from " + taskAttemptID.toString());
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID =
TypeConverter.toYarn(taskAttemptID);
AMFeedback feedback = new AMFeedback();
feedback.setTaskFound(true);
// Propagating preemption to the task if TASK_PREEMPTION is enabled
if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false)
&& preemptionPolicy.isPreempted(yarnAttemptID)) {
feedback.setPreemption(true);
LOG.info("Setting preemption bit for task: "+ yarnAttemptID
+ " of type " + yarnAttemptID.getTaskId().getTaskType());
}
if (taskStatus == null) {
//We are using statusUpdate only as a simple ping
LOG.info("Ping from " + taskAttemptID.toString());
taskHeartbeatHandler.progressing(yarnAttemptID);
return feedback;
}
// if we are here there is an actual status update to be processed
LOG.info("Status update from " + taskAttemptID.toString());
taskHeartbeatHandler.progressing(yarnAttemptID);
TaskAttemptStatus taskAttemptStatus =
new TaskAttemptStatus();
@ -386,7 +428,7 @@ public class TaskAttemptListenerImpl extends CompositeService
context.getEventHandler().handle(
new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id,
taskAttemptStatus));
return true;
return feedback;
}
@Override
@ -494,4 +536,18 @@ public class TaskAttemptListenerImpl extends CompositeService
return ProtocolSignature.getProtocolSignature(this,
protocol, clientVersion, clientMethodsHash);
}
// task checkpoint bookeeping
@Override
public TaskCheckpointID getCheckpointID(TaskID taskId) {
TaskId tid = TypeConverter.toYarn(taskId);
return preemptionPolicy.getCheckpointID(tid);
}
@Override
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
TaskId tid = TypeConverter.toYarn(taskId);
preemptionPolicy.setCheckpointID(tid, cid);
}
}

View File

@ -139,6 +139,7 @@ import org.apache.hadoop.yarn.security.client.ClientToAMTokenSecretManager;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.log4j.LogManager;
import com.google.common.annotations.VisibleForTesting;
@ -1395,6 +1396,8 @@ public class MRAppMaster extends CompositeService {
} catch (Throwable t) {
LOG.fatal("Error starting MRAppMaster", t);
System.exit(1);
} finally {
LogManager.shutdown();
}
}

View File

@ -47,6 +47,7 @@ public enum TaskAttemptEventType {
TA_FAILMSG,
TA_UPDATE,
TA_TIMED_OUT,
TA_PREEMPTED,
//Producer:TaskCleaner
TA_CLEANUP_DONE,

View File

@ -304,6 +304,9 @@ public abstract class TaskAttemptImpl implements
.addTransition(TaskAttemptStateInternal.RUNNING,
TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, TaskAttemptEventType.TA_KILL,
CLEANUP_CONTAINER_TRANSITION)
.addTransition(TaskAttemptStateInternal.RUNNING,
TaskAttemptStateInternal.KILLED,
TaskAttemptEventType.TA_PREEMPTED, new PreemptedTransition())
// Transitions from COMMIT_PENDING state
.addTransition(TaskAttemptStateInternal.COMMIT_PENDING,
@ -437,6 +440,7 @@ public abstract class TaskAttemptImpl implements
TaskAttemptEventType.TA_DONE,
TaskAttemptEventType.TA_FAILMSG,
TaskAttemptEventType.TA_CONTAINER_CLEANED,
TaskAttemptEventType.TA_PREEMPTED,
// Container launch events can arrive late
TaskAttemptEventType.TA_CONTAINER_LAUNCHED,
TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED))
@ -1874,6 +1878,27 @@ public abstract class TaskAttemptImpl implements
}
}
private static class PreemptedTransition implements
SingleArcTransition<TaskAttemptImpl,TaskAttemptEvent> {
@SuppressWarnings("unchecked")
@Override
public void transition(TaskAttemptImpl taskAttempt,
TaskAttemptEvent event) {
taskAttempt.setFinishTime();
taskAttempt.taskAttemptListener.unregister(
taskAttempt.attemptId, taskAttempt.jvmID);
taskAttempt.eventHandler.handle(new ContainerLauncherEvent(
taskAttempt.attemptId,
taskAttempt.getAssignedContainerID(), taskAttempt.getAssignedContainerMgrAddress(),
taskAttempt.container.getContainerToken(),
ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP));
taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
taskAttempt.attemptId,
TaskEventType.T_ATTEMPT_KILLED));
}
}
private static class CleanupContainerTransition implements
SingleArcTransition<TaskAttemptImpl, TaskAttemptEvent> {
@SuppressWarnings("unchecked")

View File

@ -229,7 +229,8 @@ public class RMContainerAllocator extends RMContainerRequestor
int completedMaps = getJob().getCompletedMaps();
int completedTasks = completedMaps + getJob().getCompletedReduces();
if (lastCompletedTasks != completedTasks) {
if ((lastCompletedTasks != completedTasks) ||
(scheduledRequests.maps.size() > 0)) {
lastCompletedTasks = completedTasks;
recalculateReduceSchedule = true;
}

View File

@ -19,10 +19,9 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
import java.util.List;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.yarn.api.records.Container;
@ -81,7 +80,7 @@ public interface AMPreemptionPolicy {
* successfully preempted (for bookeeping, counters, etc..)
* @param attemptID Task attempt that preempted
*/
public void reportSuccessfulPreemption(TaskAttemptID attemptID);
public void reportSuccessfulPreemption(TaskAttemptId attemptID);
/**
* Callback informing the policy of containers exiting with a failure. This
@ -98,20 +97,20 @@ public interface AMPreemptionPolicy {
public void handleCompletedContainer(TaskAttemptId attemptID);
/**
* Method to retrieve the latest checkpoint for a given {@link TaskID}
* Method to retrieve the latest checkpoint for a given {@link TaskId}
* @param taskId TaskID
* @return CheckpointID associated with this task or null
*/
public TaskCheckpointID getCheckpointID(TaskID taskId);
public TaskCheckpointID getCheckpointID(TaskId taskId);
/**
* Method to store the latest {@link
* org.apache.hadoop.mapreduce.checkpoint.CheckpointID} for a given {@link
* TaskID}. Assigning a null is akin to remove all previous checkpoints for
* TaskId}. Assigning a null is akin to remove all previous checkpoints for
* this task.
* @param taskId TaskID
* @param cid Checkpoint to assign or <tt>null</tt> to remove it.
*/
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid);
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid);
}

View File

@ -0,0 +1,290 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
import org.apache.hadoop.yarn.api.records.PreemptionContract;
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
import org.apache.hadoop.yarn.event.EventHandler;
/**
* This policy works in combination with an implementation of task
* checkpointing. It computes the tasks to be preempted in response to the RM
* request for preemption. For strict requests, it maps containers to
* corresponding tasks; for fungible requests, it attempts to pick the best
* containers to preempt (reducers in reverse allocation order). The
* TaskAttemptListener will interrogate this policy when handling a task
* heartbeat to check whether the task should be preempted or not. When handling
* fungible requests, the policy discount the RM ask by the amount of currently
* in-flight preemptions (i.e., tasks that are checkpointing).
*
* This class it is also used to maintain the list of checkpoints for existing
* tasks. Centralizing this functionality here, allows us to have visibility on
* preemption and checkpoints in a single location, thus coordinating preemption
* and checkpoint management decisions in a single policy.
*/
public class CheckpointAMPreemptionPolicy implements AMPreemptionPolicy {
// task attempts flagged for preemption
private final Set<TaskAttemptId> toBePreempted;
private final Set<TaskAttemptId> countedPreemptions;
private final Map<TaskId,TaskCheckpointID> checkpoints;
private final Map<TaskAttemptId,Resource> pendingFlexiblePreemptions;
@SuppressWarnings("rawtypes")
private EventHandler eventHandler;
static final Log LOG = LogFactory
.getLog(CheckpointAMPreemptionPolicy.class);
public CheckpointAMPreemptionPolicy() {
this(Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
Collections.synchronizedMap(new HashMap<TaskId,TaskCheckpointID>()),
Collections.synchronizedMap(new HashMap<TaskAttemptId,Resource>()));
}
CheckpointAMPreemptionPolicy(Set<TaskAttemptId> toBePreempted,
Set<TaskAttemptId> countedPreemptions,
Map<TaskId,TaskCheckpointID> checkpoints,
Map<TaskAttemptId,Resource> pendingFlexiblePreemptions) {
this.toBePreempted = toBePreempted;
this.countedPreemptions = countedPreemptions;
this.checkpoints = checkpoints;
this.pendingFlexiblePreemptions = pendingFlexiblePreemptions;
}
@Override
public void init(AppContext context) {
this.eventHandler = context.getEventHandler();
}
@Override
public void preempt(Context ctxt, PreemptionMessage preemptionRequests) {
if (preemptionRequests != null) {
// handling non-negotiable preemption
StrictPreemptionContract cStrict = preemptionRequests.getStrictContract();
if (cStrict != null
&& cStrict.getContainers() != null
&& cStrict.getContainers().size() > 0) {
LOG.info("strict preemption :" +
preemptionRequests.getStrictContract().getContainers().size() +
" containers to kill");
// handle strict preemptions. These containers are non-negotiable
for (PreemptionContainer c :
preemptionRequests.getStrictContract().getContainers()) {
ContainerId reqCont = c.getId();
TaskAttemptId reqTask = ctxt.getTaskAttempt(reqCont);
if (reqTask != null) {
// ignore requests for preempting containers running maps
if (org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE
.equals(reqTask.getTaskId().getTaskType())) {
toBePreempted.add(reqTask);
LOG.info("preempting " + reqCont + " running task:" + reqTask);
} else {
LOG.info("NOT preempting " + reqCont + " running task:" + reqTask);
}
}
}
}
// handling negotiable preemption
PreemptionContract cNegot = preemptionRequests.getContract();
if (cNegot != null
&& cNegot.getResourceRequest() != null
&& cNegot.getResourceRequest().size() > 0
&& cNegot.getContainers() != null
&& cNegot.getContainers().size() > 0) {
LOG.info("negotiable preemption :" +
preemptionRequests.getContract().getResourceRequest().size() +
" resourceReq, " +
preemptionRequests.getContract().getContainers().size() +
" containers");
// handle fungible preemption. Here we only look at the total amount of
// resources to be preempted and pick enough of our containers to
// satisfy that. We only support checkpointing for reducers for now.
List<PreemptionResourceRequest> reqResources =
preemptionRequests.getContract().getResourceRequest();
// compute the total amount of pending preemptions (to be discounted
// from current request)
int pendingPreemptionRam = 0;
int pendingPreemptionCores = 0;
for (Resource r : pendingFlexiblePreemptions.values()) {
pendingPreemptionRam += r.getMemory();
pendingPreemptionCores += r.getVirtualCores();
}
// discount preemption request based on currently pending preemption
for (PreemptionResourceRequest rr : reqResources) {
ResourceRequest reqRsrc = rr.getResourceRequest();
if (!ResourceRequest.ANY.equals(reqRsrc.getResourceName())) {
// For now, only respond to aggregate requests and ignore locality
continue;
}
LOG.info("ResourceRequest:" + reqRsrc);
int reqCont = reqRsrc.getNumContainers();
int reqMem = reqRsrc.getCapability().getMemory();
int totalMemoryToRelease = reqCont * reqMem;
int reqCores = reqRsrc.getCapability().getVirtualCores();
int totalCoresToRelease = reqCont * reqCores;
// remove
if (pendingPreemptionRam > 0) {
// if goes negative we simply exit
totalMemoryToRelease -= pendingPreemptionRam;
// decrement pending resources if zero or negatve we will
// ignore it while processing next PreemptionResourceRequest
pendingPreemptionRam -= totalMemoryToRelease;
}
if (pendingPreemptionCores > 0) {
totalCoresToRelease -= pendingPreemptionCores;
pendingPreemptionCores -= totalCoresToRelease;
}
// reverse order of allocation (for now)
List<Container> listOfCont = ctxt.getContainers(TaskType.REDUCE);
Collections.sort(listOfCont, new Comparator<Container>() {
@Override
public int compare(final Container o1, final Container o2) {
return o2.getId().getId() - o1.getId().getId();
}
});
// preempt reducers first
for (Container cont : listOfCont) {
if (totalMemoryToRelease <= 0 && totalCoresToRelease<=0) {
break;
}
TaskAttemptId reduceId = ctxt.getTaskAttempt(cont.getId());
int cMem = cont.getResource().getMemory();
int cCores = cont.getResource().getVirtualCores();
if (!toBePreempted.contains(reduceId)) {
totalMemoryToRelease -= cMem;
totalCoresToRelease -= cCores;
toBePreempted.add(reduceId);
pendingFlexiblePreemptions.put(reduceId, cont.getResource());
}
LOG.info("ResourceRequest:" + reqRsrc + " satisfied preempting "
+ reduceId);
}
// if map was preemptable we would do add them to toBePreempted here
}
}
}
}
@Override
public void handleFailedContainer(TaskAttemptId attemptID) {
toBePreempted.remove(attemptID);
checkpoints.remove(attemptID.getTaskId());
}
@Override
public void handleCompletedContainer(TaskAttemptId attemptID){
LOG.info(" task completed:" + attemptID);
toBePreempted.remove(attemptID);
pendingFlexiblePreemptions.remove(attemptID);
}
@Override
public boolean isPreempted(TaskAttemptId yarnAttemptID) {
if (toBePreempted.contains(yarnAttemptID)) {
updatePreemptionCounters(yarnAttemptID);
return true;
}
return false;
}
@Override
public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
// ignore
}
@Override
public TaskCheckpointID getCheckpointID(TaskId taskId) {
return checkpoints.get(taskId);
}
@Override
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
checkpoints.put(taskId, cid);
if (cid != null) {
updateCheckpointCounters(taskId, cid);
}
}
@SuppressWarnings({ "unchecked" })
private void updateCheckpointCounters(TaskId taskId, TaskCheckpointID cid) {
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId());
jce.addCounterUpdate(JobCounter.CHECKPOINTS, 1);
eventHandler.handle(jce);
jce = new JobCounterUpdateEvent(taskId.getJobId());
jce.addCounterUpdate(JobCounter.CHECKPOINT_BYTES, cid.getCheckpointBytes());
eventHandler.handle(jce);
jce = new JobCounterUpdateEvent(taskId.getJobId());
jce.addCounterUpdate(JobCounter.CHECKPOINT_TIME, cid.getCheckpointTime());
eventHandler.handle(jce);
}
@SuppressWarnings({ "unchecked" })
private void updatePreemptionCounters(TaskAttemptId yarnAttemptID) {
if (!countedPreemptions.contains(yarnAttemptID)) {
countedPreemptions.add(yarnAttemptID);
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(yarnAttemptID
.getTaskId().getJobId());
jce.addCounterUpdate(JobCounter.TASKS_REQ_PREEMPT, 1);
eventHandler.handle(jce);
}
}
}

View File

@ -19,11 +19,10 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
@ -89,17 +88,17 @@ public class KillAMPreemptionPolicy implements AMPreemptionPolicy {
}
@Override
public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
// ignore
}
@Override
public TaskCheckpointID getCheckpointID(TaskID taskId) {
public TaskCheckpointID getCheckpointID(TaskId taskId) {
return null;
}
@Override
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
// ignore
}

View File

@ -17,10 +17,9 @@
*/
package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
@ -50,17 +49,17 @@ public class NoopAMPreemptionPolicy implements AMPreemptionPolicy {
}
@Override
public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
// ignore
}
@Override
public TaskCheckpointID getCheckpointID(TaskID taskId) {
public TaskCheckpointID getCheckpointID(TaskId taskId) {
return null;
}
@Override
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
// ignore
}

View File

@ -17,26 +17,23 @@
*/
package org.apache.hadoop.mapred;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapreduce.checkpoint.EnumCounter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import junit.framework.Assert;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
@ -46,21 +43,31 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.util.SystemClock;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
public class TestTaskAttemptListenerImpl {
public static class MockTaskAttemptListenerImpl extends TaskAttemptListenerImpl {
public static class MockTaskAttemptListenerImpl
extends TaskAttemptListenerImpl {
public MockTaskAttemptListenerImpl(AppContext context,
JobTokenSecretManager jobTokenSecretManager,
RMHeartbeatHandler rmHeartbeatHandler,
TaskHeartbeatHandler hbHandler) {
super(context, jobTokenSecretManager, rmHeartbeatHandler, null);
TaskHeartbeatHandler hbHandler,
AMPreemptionPolicy policy) {
super(context, jobTokenSecretManager, rmHeartbeatHandler, policy);
this.taskHeartbeatHandler = hbHandler;
}
@ -87,9 +94,16 @@ public class TestTaskAttemptListenerImpl {
RMHeartbeatHandler rmHeartbeatHandler =
mock(RMHeartbeatHandler.class);
TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
Dispatcher dispatcher = mock(Dispatcher.class);
EventHandler ea = mock(EventHandler.class);
when(dispatcher.getEventHandler()).thenReturn(ea);
when(appCtx.getEventHandler()).thenReturn(ea);
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
policy.init(appCtx);
MockTaskAttemptListenerImpl listener =
new MockTaskAttemptListenerImpl(appCtx, secret,
rmHeartbeatHandler, hbHandler);
rmHeartbeatHandler, hbHandler, policy);
Configuration conf = new Configuration();
listener.init(conf);
listener.start();
@ -144,7 +158,7 @@ public class TestTaskAttemptListenerImpl {
assertNotNull(jvmid);
try {
JVMId.forName("jvm_001_002_m_004_006");
Assert.fail();
fail();
} catch (IllegalArgumentException e) {
assertEquals(e.getMessage(),
"TaskId string : jvm_001_002_m_004_006 is not properly formed");
@ -190,8 +204,14 @@ public class TestTaskAttemptListenerImpl {
RMHeartbeatHandler rmHeartbeatHandler =
mock(RMHeartbeatHandler.class);
final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
TaskAttemptListenerImpl listener =
new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
Dispatcher dispatcher = mock(Dispatcher.class);
EventHandler ea = mock(EventHandler.class);
when(dispatcher.getEventHandler()).thenReturn(ea);
when(appCtx.getEventHandler()).thenReturn(ea);
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
policy.init(appCtx);
TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
appCtx, secret, rmHeartbeatHandler, policy) {
@Override
protected void registerHeartbeatHandler(Configuration conf) {
taskHeartbeatHandler = hbHandler;
@ -219,7 +239,8 @@ public class TestTaskAttemptListenerImpl {
isMap ? org.apache.hadoop.mapreduce.v2.api.records.TaskType.MAP
: org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(tid, 0);
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null);
TaskAttemptCompletionEvent tce = recordFactory
.newRecordInstance(TaskAttemptCompletionEvent.class);
tce.setEventId(eventId);
@ -244,8 +265,14 @@ public class TestTaskAttemptListenerImpl {
RMHeartbeatHandler rmHeartbeatHandler =
mock(RMHeartbeatHandler.class);
final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
TaskAttemptListenerImpl listener =
new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
Dispatcher dispatcher = mock(Dispatcher.class);
EventHandler ea = mock(EventHandler.class);
when(dispatcher.getEventHandler()).thenReturn(ea);
when(appCtx.getEventHandler()).thenReturn(ea);
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
policy.init(appCtx);
TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
appCtx, secret, rmHeartbeatHandler, policy) {
@Override
protected void registerHeartbeatHandler(Configuration conf) {
taskHeartbeatHandler = hbHandler;
@ -270,4 +297,88 @@ public class TestTaskAttemptListenerImpl {
listener.stop();
}
@Test
public void testCheckpointIDTracking()
throws IOException, InterruptedException{
SystemClock clock = new SystemClock();
org.apache.hadoop.mapreduce.v2.app.job.Task mockTask =
mock(org.apache.hadoop.mapreduce.v2.app.job.Task.class);
when(mockTask.canCommit(any(TaskAttemptId.class))).thenReturn(true);
Job mockJob = mock(Job.class);
when(mockJob.getTask(any(TaskId.class))).thenReturn(mockTask);
Dispatcher dispatcher = mock(Dispatcher.class);
EventHandler ea = mock(EventHandler.class);
when(dispatcher.getEventHandler()).thenReturn(ea);
RMHeartbeatHandler rmHeartbeatHandler =
mock(RMHeartbeatHandler.class);
AppContext appCtx = mock(AppContext.class);
when(appCtx.getJob(any(JobId.class))).thenReturn(mockJob);
when(appCtx.getClock()).thenReturn(clock);
when(appCtx.getEventHandler()).thenReturn(ea);
JobTokenSecretManager secret = mock(JobTokenSecretManager.class);
final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
when(appCtx.getEventHandler()).thenReturn(ea);
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
policy.init(appCtx);
TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
appCtx, secret, rmHeartbeatHandler, policy) {
@Override
protected void registerHeartbeatHandler(Configuration conf) {
taskHeartbeatHandler = hbHandler;
}
};
Configuration conf = new Configuration();
conf.setBoolean(MRJobConfig.TASK_PREEMPTION, true);
//conf.setBoolean("preemption.reduce", true);
listener.init(conf);
listener.start();
TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.REDUCE, 1, 0);
List<Path> partialOut = new ArrayList<Path>();
partialOut.add(new Path("/prev1"));
partialOut.add(new Path("/prev2"));
Counters counters = mock(Counters.class);
final long CBYTES = 64L * 1024 * 1024;
final long CTIME = 4344L;
final Path CLOC = new Path("/test/1");
Counter cbytes = mock(Counter.class);
when(cbytes.getValue()).thenReturn(CBYTES);
Counter ctime = mock(Counter.class);
when(ctime.getValue()).thenReturn(CTIME);
when(counters.findCounter(eq(EnumCounter.CHECKPOINT_BYTES)))
.thenReturn(cbytes);
when(counters.findCounter(eq(EnumCounter.CHECKPOINT_MS)))
.thenReturn(ctime);
// propagating a taskstatus that contains a checkpoint id
TaskCheckpointID incid = new TaskCheckpointID(new FSCheckpointID(
CLOC), partialOut, counters);
listener.setCheckpointID(
org.apache.hadoop.mapred.TaskID.downgrade(tid.getTaskID()), incid);
// and try to get it back
CheckpointID outcid = listener.getCheckpointID(tid.getTaskID());
TaskCheckpointID tcid = (TaskCheckpointID) outcid;
assertEquals(CBYTES, tcid.getCheckpointBytes());
assertEquals(CTIME, tcid.getCheckpointTime());
assertTrue(partialOut.containsAll(tcid.getPartialCommittedOutput()));
assertTrue(tcid.getPartialCommittedOutput().containsAll(partialOut));
//assert it worked
assert outcid == incid;
listener.stop();
}
}

View File

@ -0,0 +1,329 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.v2.app;
import org.apache.hadoop.yarn.api.records.PreemptionContract;
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.util.resource.Resources;
import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.mapred.TaskAttemptListenerImpl;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext;
import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.junit.Before;
import org.junit.Test;
public class TestCheckpointPreemptionPolicy {
TaskAttemptListenerImpl pel= null;
RMContainerAllocator r;
JobId jid;
RunningAppContext mActxt;
Set<ContainerId> preemptedContainers = new HashSet<ContainerId>();
Map<ContainerId,TaskAttemptId> assignedContainers =
new HashMap<ContainerId, TaskAttemptId>();
private final RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null);
HashMap<ContainerId,Resource> contToResourceMap =
new HashMap<ContainerId, Resource>();
private int minAlloc = 1024;
@Before
@SuppressWarnings("rawtypes") // mocked generics
public void setup() {
ApplicationId appId = ApplicationId.newInstance(200, 1);
ApplicationAttemptId appAttemptId =
ApplicationAttemptId.newInstance(appId, 1);
jid = MRBuilderUtils.newJobId(appId, 1);
mActxt = mock(RunningAppContext.class);
EventHandler ea = mock(EventHandler.class);
when(mActxt.getEventHandler()).thenReturn(ea);
for (int i = 0; i < 40; ++i) {
ContainerId cId = ContainerId.newInstance(appAttemptId, i);
if (0 == i % 7) {
preemptedContainers.add(cId);
}
TaskId tId = 0 == i % 2
? MRBuilderUtils.newTaskId(jid, i / 2, TaskType.MAP)
: MRBuilderUtils.newTaskId(jid, i / 2 + 1, TaskType.REDUCE);
assignedContainers.put(cId, MRBuilderUtils.newTaskAttemptId(tId, 0));
contToResourceMap.put(cId, Resource.newInstance(2 * minAlloc, 2));
}
for (Map.Entry<ContainerId,TaskAttemptId> ent :
assignedContainers.entrySet()) {
System.out.println("cont:" + ent.getKey().getId() +
" type:" + ent.getValue().getTaskId().getTaskType() +
" res:" + contToResourceMap.get(ent.getKey()).getMemory() + "MB" );
}
}
@Test
public void testStrictPreemptionContract() {
final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
@Override
public TaskAttemptId getTaskAttempt(ContainerId cId) {
return containers.get(cId);
}
@Override
public List<Container> getContainers(TaskType t) {
List<Container> p = new ArrayList<Container>();
for (Map.Entry<ContainerId,TaskAttemptId> ent :
assignedContainers.entrySet()) {
if (ent.getValue().getTaskId().getTaskType().equals(t)) {
p.add(Container.newInstance(ent.getKey(), null, null,
contToResourceMap.get(ent.getKey()),
Priority.newInstance(0), null));
}
}
return p;
}
};
PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
contToResourceMap, Resource.newInstance(1024, 1), true);
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
policy.init(mActxt);
policy.preempt(mPctxt, pM);
for (ContainerId c : preemptedContainers) {
TaskAttemptId t = assignedContainers.get(c);
if (TaskType.MAP.equals(t.getTaskId().getTaskType())) {
assert policy.isPreempted(t) == false;
} else {
assert policy.isPreempted(t);
}
}
}
@Test
public void testPreemptionContract() {
final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
@Override
public TaskAttemptId getTaskAttempt(ContainerId cId) {
return containers.get(cId);
}
@Override
public List<Container> getContainers(TaskType t) {
List<Container> p = new ArrayList<Container>();
for (Map.Entry<ContainerId,TaskAttemptId> ent :
assignedContainers.entrySet()){
if(ent.getValue().getTaskId().getTaskType().equals(t)){
p.add(Container.newInstance(ent.getKey(), null, null,
contToResourceMap.get(ent.getKey()),
Priority.newInstance(0), null));
}
}
return p;
}
};
PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
contToResourceMap, Resource.newInstance(minAlloc, 1), false);
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
policy.init(mActxt);
int supposedMemPreemption = pM.getContract().getResourceRequest()
.get(0).getResourceRequest().getCapability().getMemory()
* pM.getContract().getResourceRequest().get(0).getResourceRequest()
.getNumContainers();
// first round of preemption
policy.preempt(mPctxt, pM);
List<TaskAttemptId> preempting =
validatePreemption(pM, policy, supposedMemPreemption);
// redundant message
policy.preempt(mPctxt, pM);
List<TaskAttemptId> preempting2 =
validatePreemption(pM, policy, supposedMemPreemption);
// check that nothing got added
assert preempting2.equals(preempting);
// simulate 2 task completions/successful preemption
policy.handleCompletedContainer(preempting.get(0));
policy.handleCompletedContainer(preempting.get(1));
// remove from assignedContainers
Iterator<Map.Entry<ContainerId,TaskAttemptId>> it =
assignedContainers.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<ContainerId,TaskAttemptId> ent = it.next();
if (ent.getValue().equals(preempting.get(0)) ||
ent.getValue().equals(preempting.get(1)))
it.remove();
}
// one more message asking for preemption
policy.preempt(mPctxt, pM);
// triggers preemption of 2 more containers (i.e., the preemption set changes)
List<TaskAttemptId> preempting3 =
validatePreemption(pM, policy, supposedMemPreemption);
assert preempting3.equals(preempting2) == false;
}
private List<TaskAttemptId> validatePreemption(PreemptionMessage pM,
CheckpointAMPreemptionPolicy policy, int supposedMemPreemption) {
Resource effectivelyPreempted = Resource.newInstance(0, 0);
List<TaskAttemptId> preempting = new ArrayList<TaskAttemptId>();
for (Map.Entry<ContainerId, TaskAttemptId> ent :
assignedContainers.entrySet()) {
if (policy.isPreempted(ent.getValue())) {
Resources.addTo(effectivelyPreempted,contToResourceMap.get(ent.getKey()));
// preempt only reducers
if (policy.isPreempted(ent.getValue())){
assertEquals(TaskType.REDUCE, ent.getValue().getTaskId().getTaskType());
preempting.add(ent.getValue());
}
}
}
// preempt enough
assert (effectivelyPreempted.getMemory() >= supposedMemPreemption)
: " preempted: " + effectivelyPreempted.getMemory();
// preempt not too much enough
assert effectivelyPreempted.getMemory() <= supposedMemPreemption + minAlloc;
return preempting;
}
private PreemptionMessage generatePreemptionMessage(
Set<ContainerId> containerToPreempt,
HashMap<ContainerId, Resource> resPerCont,
Resource minimumAllocation, boolean strict) {
Set<ContainerId> currentContPreemption = Collections.unmodifiableSet(
new HashSet<ContainerId>(containerToPreempt));
containerToPreempt.clear();
Resource tot = Resource.newInstance(0, 0);
for(ContainerId c : currentContPreemption){
Resources.addTo(tot,
resPerCont.get(c));
}
int numCont = (int) Math.ceil(tot.getMemory() /
(double) minimumAllocation.getMemory());
ResourceRequest rr = ResourceRequest.newInstance(
Priority.newInstance(0), ResourceRequest.ANY,
minimumAllocation, numCont);
if (strict) {
return generatePreemptionMessage(new Allocation(null, null,
currentContPreemption, null, null));
}
return generatePreemptionMessage(new Allocation(null, null,
null, currentContPreemption,
Collections.singletonList(rr)));
}
private PreemptionMessage generatePreemptionMessage(Allocation allocation) {
PreemptionMessage pMsg = null;
// assemble strict preemption request
if (allocation.getStrictContainerPreemptions() != null) {
pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
StrictPreemptionContract pStrict =
recordFactory.newRecordInstance(StrictPreemptionContract.class);
Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
for (ContainerId cId : allocation.getStrictContainerPreemptions()) {
PreemptionContainer pc =
recordFactory.newRecordInstance(PreemptionContainer.class);
pc.setId(cId);
pCont.add(pc);
}
pStrict.setContainers(pCont);
pMsg.setStrictContract(pStrict);
}
// assemble negotiable preemption request
if (allocation.getResourcePreemptions() != null &&
allocation.getResourcePreemptions().size() > 0 &&
allocation.getContainerPreemptions() != null &&
allocation.getContainerPreemptions().size() > 0) {
if (pMsg == null) {
pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
}
PreemptionContract contract =
recordFactory.newRecordInstance(PreemptionContract.class);
Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
for (ContainerId cId : allocation.getContainerPreemptions()) {
PreemptionContainer pc =
recordFactory.newRecordInstance(PreemptionContainer.class);
pc.setId(cId);
pCont.add(pc);
}
List<PreemptionResourceRequest> pRes =
new ArrayList<PreemptionResourceRequest>();
for (ResourceRequest crr : allocation.getResourcePreemptions()) {
PreemptionResourceRequest prr =
recordFactory.newRecordInstance(PreemptionResourceRequest.class);
prr.setResourceRequest(crr);
pRes.add(prr);
}
contract.setContainers(pCont);
contract.setResourceRequest(pRes);
pMsg.setContract(contract);
}
return pMsg;
}
}

View File

@ -1604,6 +1604,21 @@ public class TestRMContainerAllocator {
numPendingReduces,
maxReduceRampupLimit, reduceSlowStart);
verify(allocator).rampDownReduces(anyInt());
// Test reduce ramp-down for when there are scheduled maps
// Since we have two scheduled Maps, rampDownReducers
// should be invoked twice.
scheduledMaps = 2;
assignedReduces = 2;
doReturn(10 * 1024).when(allocator).getMemLimit();
allocator.scheduleReduces(
totalMaps, succeededMaps,
scheduledMaps, scheduledReduces,
assignedMaps, assignedReduces,
mapResourceReqt, reduceResourceReqt,
numPendingReduces,
maxReduceRampupLimit, reduceSlowStart);
verify(allocator, times(2)).rampDownReduces(anyInt());
}
private static class RecalculateContainerAllocator extends MyContainerAllocator {

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.mapreduce.QueueInfo;
import org.apache.hadoop.mapreduce.TaskCompletionEvent;
import org.apache.hadoop.mapreduce.TaskTrackerInfo;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@ -575,10 +576,17 @@ public class LocalJobRunner implements ClientProtocol {
// TaskUmbilicalProtocol methods
@Override
public JvmTask getTask(JvmContext context) { return null; }
public synchronized boolean statusUpdate(TaskAttemptID taskId,
@Override
public synchronized AMFeedback statusUpdate(TaskAttemptID taskId,
TaskStatus taskStatus) throws IOException, InterruptedException {
AMFeedback feedback = new AMFeedback();
feedback.setTaskFound(true);
if (null == taskStatus) {
return feedback;
}
// Serialize as we would if distributed in order to make deep copy
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
@ -618,7 +626,7 @@ public class LocalJobRunner implements ClientProtocol {
}
// ignore phase
return true;
return feedback;
}
/** Return the current values of the counters for this job,
@ -654,24 +662,24 @@ public class LocalJobRunner implements ClientProtocol {
statusUpdate(taskid, taskStatus);
}
@Override
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) {
// Ignore for now
}
@Override
public void reportNextRecordRange(TaskAttemptID taskid,
SortedRanges.Range range) throws IOException {
LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
}
public boolean ping(TaskAttemptID taskid) throws IOException {
return true;
}
@Override
public boolean canCommit(TaskAttemptID taskid)
throws IOException {
return true;
}
@Override
public void done(TaskAttemptID taskId) throws IOException {
int taskIndex = mapIds.indexOf(taskId);
if (taskIndex >= 0) { // mapping
@ -681,11 +689,13 @@ public class LocalJobRunner implements ClientProtocol {
}
}
@Override
public synchronized void fsError(TaskAttemptID taskId, String message)
throws IOException {
LOG.fatal("FSError: "+ message + "from task: " + taskId);
}
@Override
public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
}
@ -695,12 +705,30 @@ public class LocalJobRunner implements ClientProtocol {
LOG.fatal("Fatal: "+ msg + "from task: " + taskId);
}
@Override
public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId,
int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
return new MapTaskCompletionEventsUpdate(
org.apache.hadoop.mapred.TaskCompletionEvent.EMPTY_ARRAY, false);
}
@Override
public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
// ignore
}
@Override
public TaskCheckpointID getCheckpointID(TaskID taskId) {
// ignore
return null;
}
@Override
public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
// ignore
}
}
public LocalJobRunner(Configuration conf) throws IOException {

View File

@ -44,6 +44,8 @@ import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@ -83,11 +85,10 @@ public class TestMRWithDistributedCache extends TestCase {
private static final Log LOG =
LogFactory.getLog(TestMRWithDistributedCache.class);
public static class DistributedCacheChecker extends
Mapper<LongWritable, Text, NullWritable, NullWritable> {
private static class DistributedCacheChecker {
@Override
public void setup(Context context) throws IOException {
public void setup(TaskInputOutputContext<?, ?, ?, ?> context)
throws IOException {
Configuration conf = context.getConfiguration();
Path[] localFiles = context.getLocalCacheFiles();
URI[] files = context.getCacheFiles();
@ -101,6 +102,10 @@ public class TestMRWithDistributedCache extends TestCase {
TestCase.assertEquals(2, files.length);
TestCase.assertEquals(2, archives.length);
// Check the file name
TestCase.assertTrue(files[0].getPath().endsWith("distributed.first"));
TestCase.assertTrue(files[1].getPath().endsWith("distributed.second.jar"));
// Check lengths of the files
TestCase.assertEquals(1, fs.getFileStatus(localFiles[0]).getLen());
TestCase.assertTrue(fs.getFileStatus(localFiles[1]).getLen() > 1);
@ -130,6 +135,26 @@ public class TestMRWithDistributedCache extends TestCase {
TestCase.assertTrue("second file should be symlinked too",
expectedAbsentSymlinkFile.exists());
}
}
public static class DistributedCacheCheckerMapper extends
Mapper<LongWritable, Text, NullWritable, NullWritable> {
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
new DistributedCacheChecker().setup(context);
}
}
public static class DistributedCacheCheckerReducer extends
Reducer<LongWritable, Text, NullWritable, NullWritable> {
@Override
public void setup(Context context) throws IOException {
new DistributedCacheChecker().setup(context);
}
}
private void testWithConf(Configuration conf) throws IOException,
@ -146,7 +171,8 @@ public class TestMRWithDistributedCache extends TestCase {
Job job = Job.getInstance(conf);
job.setMapperClass(DistributedCacheChecker.class);
job.setMapperClass(DistributedCacheCheckerMapper.class);
job.setReducerClass(DistributedCacheCheckerReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration

View File

@ -0,0 +1,63 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/**
* This class is a simple struct to include both the taskFound information and
* a possible preemption request coming from the AM.
*/
public class AMFeedback implements Writable {
boolean taskFound;
boolean preemption;
public void setTaskFound(boolean t){
taskFound=t;
}
public boolean getTaskFound(){
return taskFound;
}
public void setPreemption(boolean preemption) {
this.preemption=preemption;
}
public boolean getPreemption() {
return preemption;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeBoolean(taskFound);
out.writeBoolean(preemption);
}
@Override
public void readFields(DataInput in) throws IOException {
taskFound = in.readBoolean();
preemption = in.readBoolean();
}
}

View File

@ -949,6 +949,23 @@ public class JobConf extends Configuration {
return get(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS);
}
/**
* Get the user defined {@link WritableComparable} comparator for
* grouping keys of inputs to the combiner.
*
* @return comparator set by the user for grouping values.
* @see #setCombinerKeyGroupingComparator(Class) for details.
*/
public RawComparator getCombinerKeyGroupingComparator() {
Class<? extends RawComparator> theClass = getClass(
JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class);
if (theClass == null) {
return getOutputKeyComparator();
}
return ReflectionUtils.newInstance(theClass, this);
}
/**
* Get the user defined {@link WritableComparable} comparator for
* grouping keys of inputs to the reduce.
@ -966,6 +983,37 @@ public class JobConf extends Configuration {
return ReflectionUtils.newInstance(theClass, this);
}
/**
* Set the user defined {@link RawComparator} comparator for
* grouping keys in the input to the combiner.
* <p/>
* <p>This comparator should be provided if the equivalence rules for keys
* for sorting the intermediates are different from those for grouping keys
* before each call to
* {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
* <p/>
* <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
* in a single call to the reduce function if K1 and K2 compare as equal.</p>
* <p/>
* <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
* how keys are sorted, this can be used in conjunction to simulate
* <i>secondary sort on values</i>.</p>
* <p/>
* <p><i>Note</i>: This is not a guarantee of the combiner sort being
* <i>stable</i> in any sense. (In any case, with the order of available
* map-outputs to the combiner being non-deterministic, it wouldn't make
* that much sense.)</p>
*
* @param theClass the comparator class to be used for grouping keys for the
* combiner. It should implement <code>RawComparator</code>.
* @see #setOutputKeyComparatorClass(Class)
*/
public void setCombinerKeyGroupingComparator(
Class<? extends RawComparator> theClass) {
setClass(JobContext.COMBINER_GROUP_COMPARATOR_CLASS,
theClass, RawComparator.class);
}
/**
* Set the user defined {@link RawComparator} comparator for
* grouping keys in the input to the reduce.
@ -990,6 +1038,7 @@ public class JobConf extends Configuration {
* @param theClass the comparator class to be used for grouping keys.
* It should implement <code>RawComparator</code>.
* @see #setOutputKeyComparatorClass(Class)
* @see #setCombinerKeyGroupingComparator(Class)
*/
public void setOutputValueGroupingComparator(
Class<? extends RawComparator> theClass) {

View File

@ -187,6 +187,7 @@ abstract public class Task implements Writable, Configurable {
protected SecretKey tokenSecret;
protected SecretKey shuffleSecret;
protected GcTimeUpdater gcUpdater;
final AtomicBoolean mustPreempt = new AtomicBoolean(false);
////////////////////////////////////////////
// Constructors
@ -711,6 +712,7 @@ abstract public class Task implements Writable, Configurable {
}
try {
boolean taskFound = true; // whether TT knows about this task
AMFeedback amFeedback = null;
// sleep for a bit
synchronized(lock) {
if (taskDone.get()) {
@ -728,12 +730,14 @@ abstract public class Task implements Writable, Configurable {
taskStatus.statusUpdate(taskProgress.get(),
taskProgress.toString(),
counters);
taskFound = umbilical.statusUpdate(taskId, taskStatus);
amFeedback = umbilical.statusUpdate(taskId, taskStatus);
taskFound = amFeedback.getTaskFound();
taskStatus.clearStatus();
}
else {
// send ping
taskFound = umbilical.ping(taskId);
amFeedback = umbilical.statusUpdate(taskId, null);
taskFound = amFeedback.getTaskFound();
}
// if Task Tracker is not aware of our task ID (probably because it died and
@ -744,6 +748,17 @@ abstract public class Task implements Writable, Configurable {
System.exit(66);
}
// Set a flag that says we should preempt this is read by
// ReduceTasks in places of the execution where it is
// safe/easy to preempt
boolean lastPreempt = mustPreempt.get();
mustPreempt.set(mustPreempt.get() || amFeedback.getPreemption());
if (lastPreempt ^ mustPreempt.get()) {
LOG.info("PREEMPTION TASK: setting mustPreempt to " +
mustPreempt.get() + " given " + amFeedback.getPreemption() +
" for "+ taskId + " task status: " +taskStatus.getPhase());
}
sendProgress = resetProgressFlag();
remainingRetries = MAX_RETRIES;
}
@ -992,10 +1007,17 @@ abstract public class Task implements Writable, Configurable {
public void done(TaskUmbilicalProtocol umbilical,
TaskReporter reporter
) throws IOException, InterruptedException {
updateCounters();
if (taskStatus.getRunState() == TaskStatus.State.PREEMPTED ) {
// If we are preempted, do no output promotion; signal done and exit
committer.commitTask(taskContext);
umbilical.preempted(taskId, taskStatus);
taskDone.set(true);
reporter.stopCommunicationThread();
return;
}
LOG.info("Task:" + taskId + " is done."
+ " And is in the process of committing");
updateCounters();
boolean commitRequired = isCommitRequired();
if (commitRequired) {
int retries = MAX_RETRIES;
@ -1054,7 +1076,7 @@ abstract public class Task implements Writable, Configurable {
int retries = MAX_RETRIES;
while (true) {
try {
if (!umbilical.statusUpdate(getTaskID(), taskStatus)) {
if (!umbilical.statusUpdate(getTaskID(), taskStatus).getTaskFound()) {
LOG.warn("Parent died. Exiting "+taskId);
System.exit(66);
}
@ -1098,8 +1120,8 @@ abstract public class Task implements Writable, Configurable {
if (isMapTask() && conf.getNumReduceTasks() > 0) {
try {
Path mapOutput = mapOutputFile.getOutputFile();
FileSystem localFS = FileSystem.getLocal(conf);
return localFS.getFileStatus(mapOutput).getLen();
FileSystem fs = mapOutput.getFileSystem(conf);
return fs.getFileStatus(mapOutput).getLen();
} catch (IOException e) {
LOG.warn ("Could not find output size " , e);
}
@ -1553,7 +1575,8 @@ abstract public class Task implements Writable, Configurable {
combinerClass = cls;
keyClass = (Class<K>) job.getMapOutputKeyClass();
valueClass = (Class<V>) job.getMapOutputValueClass();
comparator = (RawComparator<K>) job.getOutputKeyComparator();
comparator = (RawComparator<K>)
job.getCombinerKeyGroupingComparator();
}
@SuppressWarnings("unchecked")
@ -1602,7 +1625,7 @@ abstract public class Task implements Writable, Configurable {
this.taskId = taskId;
keyClass = (Class<K>) context.getMapOutputKeyClass();
valueClass = (Class<V>) context.getMapOutputValueClass();
comparator = (RawComparator<K>) context.getSortComparator();
comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
this.committer = committer;
}

View File

@ -51,7 +51,7 @@ public abstract class TaskStatus implements Writable, Cloneable {
@InterfaceAudience.Private
@InterfaceStability.Unstable
public static enum State {RUNNING, SUCCEEDED, FAILED, UNASSIGNED, KILLED,
COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN}
COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN, PREEMPTED}
private final TaskAttemptID taskid;
private float progress;

View File

@ -24,6 +24,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.ipc.VersionedProtocol;
import org.apache.hadoop.mapred.JvmTask;
import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.security.token.JobTokenSelector;
import org.apache.hadoop.security.token.TokenInfo;
@ -64,9 +67,10 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
* Version 17 Modified TaskID to be aware of the new TaskTypes
* Version 18 Added numRequiredSlots to TaskStatus for MAPREDUCE-516
* Version 19 Added fatalError for child to communicate fatal errors to TT
* Version 20 Added methods to manage checkpoints
* */
public static final long versionID = 19L;
public static final long versionID = 20L;
/**
* Called when a child task process starts, to get its task.
@ -78,7 +82,8 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
JvmTask getTask(JvmContext context) throws IOException;
/**
* Report child's progress to parent.
* Report child's progress to parent. Also invoked to report still alive (used
* to be in ping). It reports an AMFeedback used to propagate preemption requests.
*
* @param taskId task-id of the child
* @param taskStatus status of the child
@ -86,7 +91,7 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
* @throws InterruptedException
* @return True if the task is known
*/
boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException;
/** Report error messages back to parent. Calls should be sparing, since all
@ -105,11 +110,6 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
void reportNextRecordRange(TaskAttemptID taskid, SortedRanges.Range range)
throws IOException;
/** Periodically called by child to check if parent is still alive.
* @return True if the task is known
*/
boolean ping(TaskAttemptID taskid) throws IOException;
/** Report that the task is successfully completed. Failure is assumed if
* the task process exits without calling this.
* @param taskid task's id
@ -161,4 +161,33 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
TaskAttemptID id)
throws IOException;
/**
* Report to the AM that the task has been succesfully preempted.
*
* @param taskId task's id
* @param taskStatus status of the child
* @throws IOException
*/
void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException;
/**
* Return the latest CheckpointID for the given TaskID. This provides
* the task with a way to locate the checkpointed data and restart from
* that point in the computation.
*
* @param taskID task's id
* @return the most recent checkpoint (if any) for this task
* @throws IOException
*/
TaskCheckpointID getCheckpointID(TaskID taskID);
/**
* Send a CheckpointID for a given TaskID to be stored in the AM,
* to later restart a task from this checkpoint.
* @param tid
* @param cid
*/
void setCheckpointID(TaskID tid, TaskCheckpointID cid);
}

View File

@ -948,11 +948,27 @@ public class Job extends JobContextImpl implements JobContext {
conf.setOutputValueClass(theClass);
}
/**
* Define the comparator that controls which keys are grouped together
* for a single call to combiner,
* {@link Reducer#reduce(Object, Iterable,
* org.apache.hadoop.mapreduce.Reducer.Context)}
*
* @param cls the raw comparator to use
* @throws IllegalStateException if the job is submitted
*/
public void setCombinerKeyGroupingComparatorClass(
Class<? extends RawComparator> cls) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setCombinerKeyGroupingComparator(cls);
}
/**
* Define the comparator that controls how the keys are sorted before they
* are passed to the {@link Reducer}.
* @param cls the raw comparator
* @throws IllegalStateException if the job is submitted
* @see #setCombinerKeyGroupingComparatorClass(Class)
*/
public void setSortComparatorClass(Class<? extends RawComparator> cls
) throws IllegalStateException {
@ -967,6 +983,7 @@ public class Job extends JobContextImpl implements JobContext {
* org.apache.hadoop.mapreduce.Reducer.Context)}
* @param cls the raw comparator to use
* @throws IllegalStateException if the job is submitted
* @see #setCombinerKeyGroupingComparatorClass(Class)
*/
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
) throws IllegalStateException {

View File

@ -167,12 +167,22 @@ public interface JobContext extends MRJobConfig {
*/
public String getJar();
/**
* Get the user defined {@link RawComparator} comparator for
* grouping keys of inputs to the combiner.
*
* @return comparator set by the user for grouping values.
* @see Job#setCombinerKeyGroupingComparatorClass(Class)
*/
public RawComparator<?> getCombinerKeyGroupingComparator();
/**
* Get the user defined {@link RawComparator} comparator for
* grouping keys of inputs to the reduce.
*
* @return comparator set by the user for grouping values.
* @see Job#setGroupingComparatorClass(Class) for details.
* @see Job#setGroupingComparatorClass(Class)
* @see #getCombinerKeyGroupingComparator()
*/
public RawComparator<?> getGroupingComparator();

View File

@ -93,6 +93,8 @@ public interface MRJobConfig {
public static final String KEY_COMPARATOR = "mapreduce.job.output.key.comparator.class";
public static final String COMBINER_GROUP_COMPARATOR_CLASS = "mapreduce.job.combiner.group.comparator.class";
public static final String GROUP_COMPARATOR_CLASS = "mapreduce.job.output.group.comparator.class";
public static final String WORKING_DIR = "mapreduce.job.working.dir";

View File

@ -36,36 +36,30 @@ import org.apache.hadoop.mapred.Counters;
*/
public class TaskCheckpointID implements CheckpointID {
FSCheckpointID rawId;
private List<Path> partialOutput;
private Counters counters;
final FSCheckpointID rawId;
private final List<Path> partialOutput;
private final Counters counters;
public TaskCheckpointID() {
this.rawId = new FSCheckpointID();
this.partialOutput = new ArrayList<Path>();
this(new FSCheckpointID(), new ArrayList<Path>(), new Counters());
}
public TaskCheckpointID(FSCheckpointID rawId, List<Path> partialOutput,
Counters counters) {
this.rawId = rawId;
this.counters = counters;
if(partialOutput == null)
this.partialOutput = new ArrayList<Path>();
else
this.partialOutput = partialOutput;
this.partialOutput = null == partialOutput
? new ArrayList<Path>()
: partialOutput;
}
@Override
public void write(DataOutput out) throws IOException {
counters.write(out);
if (partialOutput == null) {
WritableUtils.writeVLong(out, 0L);
} else {
WritableUtils.writeVLong(out, partialOutput.size());
for (Path p : partialOutput) {
Text.writeString(out, p.toString());
}
}
rawId.write(out);
}
@ -74,21 +68,22 @@ public class TaskCheckpointID implements CheckpointID{
partialOutput.clear();
counters.readFields(in);
long numPout = WritableUtils.readVLong(in);
for(int i=0;i<numPout;i++)
for (int i = 0; i < numPout; i++) {
partialOutput.add(new Path(Text.readString(in)));
}
rawId.readFields(in);
}
@Override
public boolean equals(Object other) {
if (other instanceof TaskCheckpointID){
return this.rawId.equals(((TaskCheckpointID)other).rawId) &&
this.counters.equals(((TaskCheckpointID) other).counters) &&
this.partialOutput.containsAll(((TaskCheckpointID) other).partialOutput) &&
((TaskCheckpointID) other).partialOutput.containsAll(this.partialOutput);
} else {
return false;
TaskCheckpointID o = (TaskCheckpointID) other;
return rawId.equals(o.rawId) &&
counters.equals(o.counters) &&
partialOutput.containsAll(o.partialOutput) &&
o.partialOutput.containsAll(partialOutput);
}
return false;
}
@Override

View File

@ -166,6 +166,11 @@ class ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
return base.getFileTimestamps();
}
@Override
public RawComparator<?> getCombinerKeyGroupingComparator() {
return base.getCombinerKeyGroupingComparator();
}
@Override
public RawComparator<?> getGroupingComparator() {
return base.getGroupingComparator();

View File

@ -159,6 +159,11 @@ class ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
return base.getFileTimestamps();
}
@Override
public RawComparator<?> getCombinerKeyGroupingComparator() {
return base.getCombinerKeyGroupingComparator();
}
@Override
public RawComparator<?> getGroupingComparator() {
return base.getGroupingComparator();

View File

@ -168,6 +168,11 @@ public class WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
return mapContext.getFileTimestamps();
}
@Override
public RawComparator<?> getCombinerKeyGroupingComparator() {
return mapContext.getCombinerKeyGroupingComparator();
}
@Override
public RawComparator<?> getGroupingComparator() {
return mapContext.getGroupingComparator();

View File

@ -137,7 +137,7 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
@Override
public URI[] getCacheFiles() throws IOException {
return reduceContext.getCacheArchives();
return reduceContext.getCacheFiles();
}
@Override
@ -161,6 +161,11 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
return reduceContext.getFileTimestamps();
}
@Override
public RawComparator<?> getCombinerKeyGroupingComparator() {
return reduceContext.getCombinerKeyGroupingComparator();
}
@Override
public RawComparator<?> getGroupingComparator() {
return reduceContext.getGroupingComparator();

View File

@ -252,6 +252,17 @@ public class JobContextImpl implements JobContext {
return conf.getJar();
}
/**
* Get the user defined {@link RawComparator} comparator for
* grouping keys of inputs to the combiner.
*
* @return comparator set by the user for grouping values.
* @see Job#setCombinerKeyGroupingComparatorClass(Class) for details.
*/
public RawComparator<?> getCombinerKeyGroupingComparator() {
return conf.getCombinerKeyGroupingComparator();
}
/**
* Get the user defined {@link RawComparator} comparator for
* grouping keys of inputs to the reduce.

View File

@ -582,7 +582,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
RawComparator<K> comparator =
(RawComparator<K>)job.getOutputKeyComparator();
(RawComparator<K>)job.getCombinerKeyGroupingComparator();
try {
CombineValuesIterator values = new CombineValuesIterator(
kvIter, comparator, keyClass, valClass, job, Reporter.NULL,

View File

@ -88,6 +88,8 @@ import org.apache.hadoop.yarn.util.Records;
import org.apache.hadoop.yarn.webapp.WebApp;
import org.apache.hadoop.yarn.webapp.WebApps;
import com.google.common.annotations.VisibleForTesting;
/**
* This module is responsible for talking to the
* JobClient (user facing).
@ -142,7 +144,8 @@ public class HistoryClientService extends AbstractService {
super.serviceStart();
}
private void initializeWebApp(Configuration conf) {
@VisibleForTesting
protected void initializeWebApp(Configuration conf) {
webApp = new HsWebApp(history);
InetSocketAddress bindAddress = MRWebAppUtil.getJHSWebBindAddress(conf);
// NOTE: there should be a .at(InetSocketAddress)

View File

@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService;
import com.google.common.annotations.VisibleForTesting;
/******************************************************************
* {@link JobHistoryServer} is responsible for servicing all job history
* related requests from client.
@ -60,10 +62,10 @@ public class JobHistoryServer extends CompositeService {
public static final long historyServerTimeStamp = System.currentTimeMillis();
private static final Log LOG = LogFactory.getLog(JobHistoryServer.class);
private HistoryContext historyContext;
protected HistoryContext historyContext;
private HistoryClientService clientService;
private JobHistory jobHistoryService;
private JHSDelegationTokenSecretManager jhsDTSecretManager;
protected JHSDelegationTokenSecretManager jhsDTSecretManager;
private AggregatedLogDeletionService aggLogDelService;
private HSAdminServer hsAdminServer;
private HistoryServerStateStoreService stateStore;
@ -129,8 +131,7 @@ public class JobHistoryServer extends CompositeService {
historyContext = (HistoryContext)jobHistoryService;
stateStore = createStateStore(conf);
this.jhsDTSecretManager = createJHSSecretManager(conf, stateStore);
clientService = new HistoryClientService(historyContext,
this.jhsDTSecretManager);
clientService = createHistoryClientService();
aggLogDelService = new AggregatedLogDeletionService();
hsAdminServer = new HSAdminServer(aggLogDelService, jobHistoryService);
addService(stateStore);
@ -142,6 +143,12 @@ public class JobHistoryServer extends CompositeService {
super.serviceInit(config);
}
@VisibleForTesting
protected HistoryClientService createHistoryClientService() {
return new HistoryClientService(historyContext,
this.jhsDTSecretManager);
}
protected JHSDelegationTokenSecretManager createJHSSecretManager(
Configuration conf, HistoryServerStateStoreService store) {
long secretKeyInterval =

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import junit.framework.TestCase;
@ -29,20 +28,17 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.util.ReflectionUtils;
/**
@ -110,11 +106,16 @@ public class TestMapProgress extends TestCase {
statusUpdate(taskId, taskStatus);
}
public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
statusUpdate(taskId, taskStatus);
}
public boolean canCommit(TaskAttemptID taskid) throws IOException {
return true;
}
public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
StringBuffer buf = new StringBuffer("Task ");
buf.append(taskId);
@ -128,7 +129,9 @@ public class TestMapProgress extends TestCase {
LOG.info(buf.toString());
// ignore phase
// ignore counters
return true;
AMFeedback a = new AMFeedback();
a.setTaskFound(true);
return a;
}
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
@ -145,6 +148,17 @@ public class TestMapProgress extends TestCase {
SortedRanges.Range range) throws IOException {
LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
}
@Override
public TaskCheckpointID getCheckpointID(TaskID taskId) {
// do nothing
return null;
}
@Override
public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
// do nothing
}
}
private FileSystem fs = null;

View File

@ -0,0 +1,191 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import junit.framework.Assert;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.UUID;
public class TestOldCombinerGrouping {
private static String TEST_ROOT_DIR =
new File("build", UUID.randomUUID().toString()).getAbsolutePath();
public static class Map implements
Mapper<LongWritable, Text, Text, LongWritable> {
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, LongWritable> output, Reporter reporter)
throws IOException {
String v = value.toString();
String k = v.substring(0, v.indexOf(","));
v = v.substring(v.indexOf(",") + 1);
output.collect(new Text(k), new LongWritable(Long.parseLong(v)));
}
@Override
public void close() throws IOException {
}
@Override
public void configure(JobConf job) {
}
}
public static class Reduce implements
Reducer<Text, LongWritable, Text, LongWritable> {
@Override
public void reduce(Text key, Iterator<LongWritable> values,
OutputCollector<Text, LongWritable> output, Reporter reporter)
throws IOException {
LongWritable maxValue = null;
while (values.hasNext()) {
LongWritable value = values.next();
if (maxValue == null) {
maxValue = value;
} else if (value.compareTo(maxValue) > 0) {
maxValue = value;
}
}
output.collect(key, maxValue);
}
@Override
public void close() throws IOException {
}
@Override
public void configure(JobConf job) {
}
}
public static class Combiner extends Reduce {
}
public static class GroupComparator implements RawComparator<Text> {
@Override
public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
int i4) {
byte[] b1 = new byte[i2];
System.arraycopy(bytes, i, b1, 0, i2);
byte[] b2 = new byte[i4];
System.arraycopy(bytes2, i3, b2, 0, i4);
return compare(new Text(new String(b1)), new Text(new String(b2)));
}
@Override
public int compare(Text o1, Text o2) {
String s1 = o1.toString();
String s2 = o2.toString();
s1 = s1.substring(0, s1.indexOf("|"));
s2 = s2.substring(0, s2.indexOf("|"));
return s1.compareTo(s2);
}
}
@Test
public void testCombiner() throws Exception {
if (!new File(TEST_ROOT_DIR).mkdirs()) {
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
}
File in = new File(TEST_ROOT_DIR, "input");
if (!in.mkdirs()) {
throw new RuntimeException("Could not create test dir: " + in);
}
File out = new File(TEST_ROOT_DIR, "output");
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
pw.println("A|a,1");
pw.println("A|b,2");
pw.println("B|a,3");
pw.println("B|b,4");
pw.println("B|c,5");
pw.close();
JobConf job = new JobConf();
job.set("mapreduce.framework.name", "local");
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormat(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputValueGroupingComparator(GroupComparator.class);
job.setCombinerClass(Combiner.class);
job.setCombinerKeyGroupingComparator(GroupComparator.class);
job.setInt("min.num.spills.for.combine", 0);
JobClient client = new JobClient(job);
RunningJob runningJob = client.submitJob(job);
runningJob.waitForCompletion();
if (runningJob.isSuccessful()) {
Counters counters = runningJob.getCounters();
long combinerInputRecords = counters.getGroup(
"org.apache.hadoop.mapreduce.TaskCounter").
getCounter("COMBINE_INPUT_RECORDS");
long combinerOutputRecords = counters.getGroup(
"org.apache.hadoop.mapreduce.TaskCounter").
getCounter("COMBINE_OUTPUT_RECORDS");
Assert.assertTrue(combinerInputRecords > 0);
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
BufferedReader br = new BufferedReader(new FileReader(
new File(out, "part-00000")));
Set<String> output = new HashSet<String>();
String line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNull(line);
br.close();
Set<String> expected = new HashSet<String>();
expected.add("A2");
expected.add("B5");
Assert.assertEquals(expected, output);
} else {
Assert.fail("Job failed");
}
}
}

View File

@ -27,6 +27,10 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.mapred.SortedRanges.Range;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
public class TestTaskCommit extends HadoopTestCase {
Path rootDir =
@ -131,11 +135,6 @@ public class TestTaskCommit extends HadoopTestCase {
return null;
}
@Override
public boolean ping(TaskAttemptID taskid) throws IOException {
return true;
}
@Override
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace)
throws IOException {
@ -152,9 +151,11 @@ public class TestTaskCommit extends HadoopTestCase {
}
@Override
public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
return true;
AMFeedback a = new AMFeedback();
a.setTaskFound(true);
return a;
}
@Override
@ -168,6 +169,22 @@ public class TestTaskCommit extends HadoopTestCase {
long clientVersion, int clientMethodsHash) throws IOException {
return null;
}
@Override
public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
fail("Task should not go to commit-pending");
}
@Override
public TaskCheckpointID getCheckpointID(TaskID taskId) {
return null;
}
@Override
public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
// ignore
}
}
/**

View File

@ -0,0 +1,178 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import junit.framework.Assert;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Set;
import java.util.UUID;
public class TestNewCombinerGrouping {
private static String TEST_ROOT_DIR =
new File("build", UUID.randomUUID().toString()).getAbsolutePath();
public static class Map extends
Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String v = value.toString();
String k = v.substring(0, v.indexOf(","));
v = v.substring(v.indexOf(",") + 1);
context.write(new Text(k), new LongWritable(Long.parseLong(v)));
}
}
public static class Reduce extends
Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values,
Context context)
throws IOException, InterruptedException {
LongWritable maxValue = null;
for (LongWritable value : values) {
if (maxValue == null) {
maxValue = value;
} else if (value.compareTo(maxValue) > 0) {
maxValue = value;
}
}
context.write(key, maxValue);
}
}
public static class Combiner extends Reduce {
}
public static class GroupComparator implements RawComparator<Text> {
@Override
public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
int i4) {
byte[] b1 = new byte[i2];
System.arraycopy(bytes, i, b1, 0, i2);
byte[] b2 = new byte[i4];
System.arraycopy(bytes2, i3, b2, 0, i4);
return compare(new Text(new String(b1)), new Text(new String(b2)));
}
@Override
public int compare(Text o1, Text o2) {
String s1 = o1.toString();
String s2 = o2.toString();
s1 = s1.substring(0, s1.indexOf("|"));
s2 = s2.substring(0, s2.indexOf("|"));
return s1.compareTo(s2);
}
}
@Test
public void testCombiner() throws Exception {
if (!new File(TEST_ROOT_DIR).mkdirs()) {
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
}
File in = new File(TEST_ROOT_DIR, "input");
if (!in.mkdirs()) {
throw new RuntimeException("Could not create test dir: " + in);
}
File out = new File(TEST_ROOT_DIR, "output");
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
pw.println("A|a,1");
pw.println("A|b,2");
pw.println("B|a,3");
pw.println("B|b,4");
pw.println("B|c,5");
pw.close();
JobConf conf = new JobConf();
conf.set("mapreduce.framework.name", "local");
Job job = new Job(conf);
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setGroupingComparatorClass(GroupComparator.class);
job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
job.setCombinerClass(Combiner.class);
job.getConfiguration().setInt("min.num.spills.for.combine", 0);
job.submit();
job.waitForCompletion(false);
if (job.isSuccessful()) {
Counters counters = job.getCounters();
long combinerInputRecords = counters.findCounter(
"org.apache.hadoop.mapreduce.TaskCounter",
"COMBINE_INPUT_RECORDS").getValue();
long combinerOutputRecords = counters.findCounter(
"org.apache.hadoop.mapreduce.TaskCounter",
"COMBINE_OUTPUT_RECORDS").getValue();
Assert.assertTrue(combinerInputRecords > 0);
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
BufferedReader br = new BufferedReader(new FileReader(
new File(out, "part-r-00000")));
Set<String> output = new HashSet<String>();
String line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNull(line);
br.close();
Set<String> expected = new HashSet<String>();
expected.add("A2");
expected.add("B5");
Assert.assertEquals(expected, output);
} else {
Assert.fail("Job failed");
}
}
}

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.CancelDelegationTokenR
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequest;
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.RenewDelegationTokenRequest;
import org.apache.hadoop.mapreduce.v2.hs.HistoryClientService;
import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService;
import org.apache.hadoop.mapreduce.v2.hs.JHSDelegationTokenSecretManager;
import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
@ -94,6 +95,17 @@ public class TestJHSSecurity {
return new JHSDelegationTokenSecretManager(initialInterval,
maxLifetime, renewInterval, 3600000, store);
}
@Override
protected HistoryClientService createHistoryClientService() {
return new HistoryClientService(historyContext,
this.jhsDTSecretManager) {
@Override
protected void initializeWebApp(Configuration conf) {
// Don't need it, skip.;
}
};
}
};
// final JobHistoryServer jobHistoryServer = jhServer;
jobHistoryServer.init(conf);

View File

@ -115,7 +115,7 @@ public class TestUmbilicalProtocolWithJobToken {
proxy = (TaskUmbilicalProtocol) RPC.getProxy(
TaskUmbilicalProtocol.class, TaskUmbilicalProtocol.versionID,
addr, conf);
proxy.ping(null);
proxy.statusUpdate(null, null);
} finally {
server.stop();
if (proxy != null) {

View File

@ -519,7 +519,7 @@
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
<version>1.1.3</version>
<exclusions>
<exclusion>
<groupId>avalon-framework</groupId>

View File

@ -64,8 +64,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@ -105,8 +106,8 @@ public class ResourceSchedulerWrapper implements
private Configuration conf;
private ResourceScheduler scheduler;
private Map<ApplicationAttemptId, String> appQueueMap =
new ConcurrentHashMap<ApplicationAttemptId, String>();
private Map<ApplicationId, String> appQueueMap =
new ConcurrentHashMap<ApplicationId, String>();
private BufferedWriter jobRuntimeLogBW;
// Priority of the ResourceSchedulerWrapper shutdown hook.
@ -240,7 +241,7 @@ public class ResourceSchedulerWrapper implements
(AppAttemptRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId =
appRemoveEvent.getApplicationAttemptID();
String queue = appQueueMap.get(appAttemptId);
String queue = appQueueMap.get(appAttemptId.getApplicationId());
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
if (! app.getLiveContainers().isEmpty()) { // have 0 or 1
// should have one container which is AM container
@ -262,20 +263,18 @@ public class ResourceSchedulerWrapper implements
schedulerHandleCounter.inc();
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED
&& schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
SLSRunner.decreaseRemainingApps();
AppAttemptRemovedSchedulerEvent appRemoveEvent =
(AppAttemptRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId =
appRemoveEvent.getApplicationAttemptID();
appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED
&& schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
AppAttemptAddedSchedulerEvent appAddEvent =
(AppAttemptAddedSchedulerEvent) schedulerEvent;
AppRemovedSchedulerEvent appRemoveEvent =
(AppRemovedSchedulerEvent) schedulerEvent;
appQueueMap.remove(appRemoveEvent.getApplicationID());
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
&& schedulerEvent instanceof AppAddedSchedulerEvent) {
AppAddedSchedulerEvent appAddEvent =
(AppAddedSchedulerEvent) schedulerEvent;
String queueName = appAddEvent.getQueue();
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
appQueueMap.put(appAddEvent.getApplicationId(), queueName);
}
}
}
@ -297,7 +296,9 @@ public class ResourceSchedulerWrapper implements
continue;
}
String queue = appQueueMap.get(containerId.getApplicationAttemptId());
String queue =
appQueueMap.get(containerId.getApplicationAttemptId()
.getApplicationId());
int releasedMemory = 0, releasedVCores = 0;
if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
for (RMContainer rmc : app.getLiveContainers()) {
@ -329,7 +330,7 @@ public class ResourceSchedulerWrapper implements
// update queue information
Resource pendingResource = Resources.createResource(0, 0);
Resource allocatedResource = Resources.createResource(0, 0);
String queueName = appQueueMap.get(attemptId);
String queueName = appQueueMap.get(attemptId.getApplicationId());
// container requested
for (ResourceRequest request : resourceRequests) {
if (request.getResourceName().equals(ResourceRequest.ANY)) {

View File

@ -285,8 +285,11 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
&& schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
AppAttemptAddedSchedulerEvent appAddEvent =
(AppAttemptAddedSchedulerEvent) schedulerEvent;
String queueName = appAddEvent.getQueue();
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
SchedulerApplication app =
applications.get(appAddEvent.getApplicationAttemptId()
.getApplicationId());
appQueueMap.put(appAddEvent.getApplicationAttemptId(), app.getQueue()
.getQueueName());
}
}
}

View File

@ -187,6 +187,16 @@ Release 2.4.0 - UNRELEASED
YARN-1307. Redesign znode structure for Zookeeper based RM state-store for
better organization and scalability. (Tsuyoshi OZAWA via vinodkv)
YARN-1172. Convert SecretManagers in RM to services (Tsuyoshi OZAWA via kasha)
YARN-1523. Use StandbyException instead of RMNotYetReadyException (kasha)
YARN-1541. Changed ResourceManager to invalidate ApplicationMaster host/port
information once an AM crashes. (Jian He via vinodkv)
YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize
app-attempts separately from apps. (Jian He via vinodkv)
OPTIMIZATIONS
BUG FIXES
@ -267,6 +277,21 @@ Release 2.4.0 - UNRELEASED
YARN-1451. TestResourceManager relies on the scheduler assigning multiple
containers in a single node update. (Sandy Ryza via kasha)
YARN-1527. Fix yarn rmadmin command to print the correct usage info.
(Akira AJISAKA via jianhe)
YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was
causing it to randomly fail. (Liyin Liang via vinodkv)
YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that
was causing unamanged AMs to not finish correctly. (haosdent via vinodkv)
YARN-1559. Race between ServerRMProxy and ClientRMProxy setting
RMProxy#INSTANCE. (kasha and vinodkv via kasha)
YARN-1560. Fixed TestYarnClient#testAMMRTokens failure with null AMRM token.
(Ted Yu via jianhe)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES
@ -473,6 +498,9 @@ Release 2.2.0 - 2013-10-13
YARN-1278. Fixed NodeManager to not delete local resources for apps on resync
command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv)
YARN-1463. Tests should avoid starting http-server where possible or creates
spnego keytab/principals (vinodkv via kasha)
Release 2.1.1-beta - 2013-09-23
INCOMPATIBLE CHANGES

View File

@ -309,13 +309,4 @@
<Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
<Bug pattern="IS2_INCONSISTENT_SYNC" />
</Match>
<!-- Ignore INSTANCE not being final as it is created in sub-classes -->
<Match>
<Class name="org.apache.hadoop.yarn.client.RMProxy" />
<Field name="INSTANCE" />
<Bug pattern="MS_SHOULD_BE_FINAL"/>
</Match>
</FindBugsFilter>

View File

@ -24,10 +24,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.classification.InterfaceStability.Stable;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.tools.GetUserMappingsProtocol;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.ResourceOption;
import org.apache.hadoop.yarn.exceptions.RMNotYetActiveException;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse;
@ -51,25 +51,25 @@ public interface ResourceManagerAdministrationProtocol extends GetUserMappingsPr
@Public
@Stable
public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request)
throws RMNotYetActiveException, YarnException, IOException;
throws StandbyException, YarnException, IOException;
@Public
@Stable
public RefreshNodesResponse refreshNodes(RefreshNodesRequest request)
throws RMNotYetActiveException, YarnException, IOException;
throws StandbyException, YarnException, IOException;
@Public
@Stable
public RefreshSuperUserGroupsConfigurationResponse
refreshSuperUserGroupsConfiguration(
RefreshSuperUserGroupsConfigurationRequest request)
throws RMNotYetActiveException, YarnException, IOException;
throws StandbyException, YarnException, IOException;
@Public
@Stable
public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings(
RefreshUserToGroupsMappingsRequest request)
throws RMNotYetActiveException, YarnException, IOException;
throws StandbyException, YarnException, IOException;
@Public
@Stable

Some files were not shown because too many files have changed in this diff Show More