Merge trunk to HDFS-4685.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4685@1556097 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
2fbb3d694e
|
@ -92,6 +92,11 @@
|
|||
<artifactId>hadoop-minikdc</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -16,10 +16,15 @@ package org.apache.hadoop.security.authentication.server;
|
|||
import org.apache.hadoop.security.authentication.client.AuthenticationException;
|
||||
import org.apache.hadoop.security.authentication.client.PseudoAuthenticator;
|
||||
|
||||
import org.apache.http.client.utils.URLEncodedUtils;
|
||||
import org.apache.http.NameValuePair;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
|
@ -48,6 +53,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
|
|||
*/
|
||||
public static final String ANONYMOUS_ALLOWED = TYPE + ".anonymous.allowed";
|
||||
|
||||
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
|
||||
private boolean acceptAnonymous;
|
||||
|
||||
/**
|
||||
|
@ -114,6 +120,18 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
|
|||
return true;
|
||||
}
|
||||
|
||||
private String getUserName(HttpServletRequest request) {
|
||||
List<NameValuePair> list = URLEncodedUtils.parse(request.getQueryString(), UTF8_CHARSET);
|
||||
if (list != null) {
|
||||
for (NameValuePair nv : list) {
|
||||
if (PseudoAuthenticator.USER_NAME.equals(nv.getName())) {
|
||||
return nv.getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Authenticates an HTTP client request.
|
||||
* <p/>
|
||||
|
@ -139,7 +157,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
|
|||
public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
|
||||
throws IOException, AuthenticationException {
|
||||
AuthenticationToken token;
|
||||
String userName = request.getParameter(PseudoAuthenticator.USER_NAME);
|
||||
String userName = getUserName(request);
|
||||
if (userName == null) {
|
||||
if (getAcceptAnonymous()) {
|
||||
token = AuthenticationToken.ANONYMOUS;
|
||||
|
|
|
@ -94,7 +94,7 @@ public class TestPseudoAuthenticationHandler {
|
|||
|
||||
HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
|
||||
HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
|
||||
Mockito.when(request.getParameter(PseudoAuthenticator.USER_NAME)).thenReturn("user");
|
||||
Mockito.when(request.getQueryString()).thenReturn(PseudoAuthenticator.USER_NAME + "=" + "user");
|
||||
|
||||
AuthenticationToken token = handler.authenticate(request, response);
|
||||
|
||||
|
|
|
@ -108,6 +108,8 @@ Trunk (Unreleased)
|
|||
HADOOP-10141. Create KeyProvider API to separate encryption key storage
|
||||
from the applications. (omalley)
|
||||
|
||||
HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-9451. Fault single-layer config if node group topology is enabled.
|
||||
|
@ -407,6 +409,9 @@ Release 2.4.0 - UNRELEASED
|
|||
HADOOP-10169. Remove the unnecessary synchronized in JvmMetrics class.
|
||||
(Liang Xie via jing9)
|
||||
|
||||
HADOOP-10198. DomainSocket: add support for socketpair.
|
||||
(Colin Patrick McCabe via wang)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn)
|
||||
|
@ -416,6 +421,9 @@ Release 2.4.0 - UNRELEASED
|
|||
|
||||
HADOOP-10172. Cache SASL server factories (daryn)
|
||||
|
||||
HADOOP-10173. Remove UGI from DIGEST-MD5 SASL server creation (daryn via
|
||||
kihwal)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize
|
||||
|
@ -489,6 +497,9 @@ Release 2.4.0 - UNRELEASED
|
|||
|
||||
HADOOP-10171. TestRPC fails intermittently on jkd7 (Mit Desai via jeagles)
|
||||
|
||||
HADOOP-10147 HDFS-5678 Upgrade to commons-logging 1.1.3 to avoid potential
|
||||
deadlock in MiniDFSCluster (stevel)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -568,6 +579,12 @@ Release 2.3.0 - UNRELEASED
|
|||
HADOOP-10175. Har files system authority should preserve userinfo.
|
||||
(Chuan Liu via cnauroth)
|
||||
|
||||
HADOOP-10090. Jobtracker metrics not updated properly after execution
|
||||
of a mapreduce job. (ivanmi)
|
||||
|
||||
HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream.
|
||||
(gchanan via tucu)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -36,8 +36,11 @@ import java.security.KeyStoreException;
|
|||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.UnrecoverableKeyException;
|
||||
import java.security.cert.CertificateException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -56,6 +59,7 @@ import java.util.Map;
|
|||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class JavaKeyStoreProvider extends KeyProvider {
|
||||
private static final String KEY_METADATA = "KeyMetadata";
|
||||
public static final String SCHEME_NAME = "jceks";
|
||||
public static final String KEYSTORE_PASSWORD_NAME =
|
||||
"HADOOP_KEYSTORE_PASSWORD";
|
||||
|
@ -117,6 +121,44 @@ public class JavaKeyStoreProvider extends KeyProvider {
|
|||
return new KeyVersion(versionName, key.getEncoded());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeys() throws IOException {
|
||||
ArrayList<String> list = new ArrayList<String>();
|
||||
String alias = null;
|
||||
try {
|
||||
Enumeration<String> e = keyStore.aliases();
|
||||
while (e.hasMoreElements()) {
|
||||
alias = e.nextElement();
|
||||
// only include the metadata key names in the list of names
|
||||
if (!alias.contains("@")) {
|
||||
list.add(alias);
|
||||
}
|
||||
}
|
||||
} catch (KeyStoreException e) {
|
||||
throw new IOException("Can't get key " + alias + " from " + path, e);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyVersion> getKeyVersions(String name) throws IOException {
|
||||
List<KeyVersion> list = new ArrayList<KeyVersion>();
|
||||
Metadata km = getMetadata(name);
|
||||
if (km != null) {
|
||||
int latestVersion = km.getVersions();
|
||||
KeyVersion v = null;
|
||||
String versionName = null;
|
||||
for (int i = 0; i < latestVersion; i++) {
|
||||
versionName = buildVersionName(name, i);
|
||||
v = getKeyVersion(versionName);
|
||||
if (v != null) {
|
||||
list.add(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Metadata getMetadata(String name) throws IOException {
|
||||
if (cache.containsKey(name)) {
|
||||
|
@ -288,7 +330,7 @@ public class JavaKeyStoreProvider extends KeyProvider {
|
|||
|
||||
@Override
|
||||
public String getFormat() {
|
||||
return "KeyMetadata";
|
||||
return KEY_METADATA;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -254,6 +254,20 @@ public abstract class KeyProvider {
|
|||
public abstract KeyVersion getKeyVersion(String versionName
|
||||
) throws IOException;
|
||||
|
||||
/**
|
||||
* Get the key names for all keys.
|
||||
* @return the list of key names
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract List<String> getKeys() throws IOException;
|
||||
|
||||
/**
|
||||
* Get the key material for all versions of a specific key name.
|
||||
* @return the list of key material
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract List<KeyVersion> getKeyVersions(String name) throws IOException;
|
||||
|
||||
/**
|
||||
* Get the current version of the key, which should be used for encrypting new
|
||||
* data.
|
||||
|
|
|
@ -20,8 +20,10 @@ package org.apache.hadoop.crypto.key;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
@ -142,4 +144,32 @@ public class UserProvider extends KeyProvider {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeys() throws IOException {
|
||||
List<String> list = new ArrayList<String>();
|
||||
List<Text> keys = credentials.getAllSecretKeys();
|
||||
for (Text key : keys) {
|
||||
if (key.find("@") == -1) {
|
||||
list.add(key.toString());
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<KeyVersion> getKeyVersions(String name) throws IOException {
|
||||
List<KeyVersion> list = new ArrayList<KeyVersion>();
|
||||
Metadata km = getMetadata(name);
|
||||
if (km != null) {
|
||||
int latestVersion = km.getVersions();
|
||||
for (int i = 0; i < latestVersion; i++) {
|
||||
KeyVersion v = getKeyVersion(buildVersionName(name, i));
|
||||
if (v != null) {
|
||||
list.add(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
|
|||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.http.lib.StaticUserWebFilter;
|
||||
import org.apache.hadoop.security.authorize.Service;
|
||||
|
||||
/**
|
||||
* This class contains constants for configuration keys used
|
||||
|
@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
|
|||
/** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
|
||||
public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
|
||||
10000;
|
||||
|
||||
public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
|
||||
"hadoop.user.group.metrics.percentiles.intervals";
|
||||
}
|
||||
|
|
|
@ -276,6 +276,24 @@ public class DomainSocket implements Closeable {
|
|||
return new DomainSocket(path, fd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pair of UNIX domain sockets which are connected to each other
|
||||
* by calling socketpair(2).
|
||||
*
|
||||
* @return An array of two UNIX domain sockets connected to
|
||||
* each other.
|
||||
* @throws IOException on error.
|
||||
*/
|
||||
public static DomainSocket[] socketpair() throws IOException {
|
||||
int fds[] = socketpair0();
|
||||
return new DomainSocket[] {
|
||||
new DomainSocket("(anonymous0)", fds[0]),
|
||||
new DomainSocket("(anonymous1)", fds[1])
|
||||
};
|
||||
}
|
||||
|
||||
private static native int[] socketpair0() throws IOException;
|
||||
|
||||
private static native int accept0(int fd) throws IOException;
|
||||
|
||||
/**
|
||||
|
|
|
@ -29,7 +29,9 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -73,15 +75,6 @@ public class Credentials implements Writable {
|
|||
this.addAll(credentials);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the key bytes for the alias
|
||||
* @param alias the alias for the key
|
||||
* @return key for this alias
|
||||
*/
|
||||
public byte[] getSecretKey(Text alias) {
|
||||
return secretKeysMap.get(alias);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Token object for the alias
|
||||
* @param alias the alias for the Token
|
||||
|
@ -118,6 +111,15 @@ public class Credentials implements Writable {
|
|||
return tokenMap.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the key bytes for the alias
|
||||
* @param alias the alias for the key
|
||||
* @return key for this alias
|
||||
*/
|
||||
public byte[] getSecretKey(Text alias) {
|
||||
return secretKeysMap.get(alias);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return number of keys in the in-memory map
|
||||
*/
|
||||
|
@ -142,6 +144,16 @@ public class Credentials implements Writable {
|
|||
secretKeysMap.remove(alias);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all the secret key entries in the in-memory map
|
||||
*/
|
||||
public List<Text> getAllSecretKeys() {
|
||||
List<Text> list = new java.util.ArrayList<Text>();
|
||||
list.addAll(secretKeysMap.keySet());
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method for reading a token storage file, and loading the Tokens
|
||||
* therein in the passed UGI
|
||||
|
|
|
@ -138,6 +138,7 @@ public class Groups {
|
|||
List<String> groupList = impl.getGroups(user);
|
||||
long endMs = Time.monotonicNow();
|
||||
long deltaMs = endMs - startMs ;
|
||||
UserGroupInformation.metrics.addGetGroups(deltaMs);
|
||||
if (deltaMs > warningDeltaMs) {
|
||||
LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
|
||||
"took " + deltaMs + " milliseconds.");
|
||||
|
|
|
@ -131,7 +131,7 @@ public class SaslRpcServer {
|
|||
public SaslServer create(Connection connection,
|
||||
SecretManager<TokenIdentifier> secretManager
|
||||
) throws IOException, InterruptedException {
|
||||
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
|
||||
UserGroupInformation ugi = null;
|
||||
final CallbackHandler callback;
|
||||
switch (authMethod) {
|
||||
case TOKEN: {
|
||||
|
@ -139,6 +139,7 @@ public class SaslRpcServer {
|
|||
break;
|
||||
}
|
||||
case KERBEROS: {
|
||||
ugi = UserGroupInformation.getCurrentUser();
|
||||
if (serverId.isEmpty()) {
|
||||
throw new AccessControlException(
|
||||
"Kerberos principal name does NOT have the expected "
|
||||
|
@ -153,7 +154,9 @@ public class SaslRpcServer {
|
|||
"Server does not support SASL " + authMethod);
|
||||
}
|
||||
|
||||
SaslServer saslServer = ugi.doAs(
|
||||
final SaslServer saslServer;
|
||||
if (ugi != null) {
|
||||
saslServer = ugi.doAs(
|
||||
new PrivilegedExceptionAction<SaslServer>() {
|
||||
@Override
|
||||
public SaslServer run() throws SaslException {
|
||||
|
@ -161,6 +164,10 @@ public class SaslRpcServer {
|
|||
SaslRpcServer.SASL_PROPS, callback);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
saslServer = saslFactory.createSaslServer(mechanism, protocol, serverId,
|
||||
SaslRpcServer.SASL_PROPS, callback);
|
||||
}
|
||||
if (saslServer == null) {
|
||||
throw new AccessControlException(
|
||||
"Unable to find SASL server implementation for " + mechanism);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.security;
|
|||
|
||||
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
|
||||
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
|
||||
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -58,6 +59,8 @@ import org.apache.hadoop.io.Text;
|
|||
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||
import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
|
||||
import org.apache.hadoop.security.authentication.util.KerberosUtil;
|
||||
|
@ -92,14 +95,27 @@ public class UserGroupInformation {
|
|||
*/
|
||||
@Metrics(about="User and group related metrics", context="ugi")
|
||||
static class UgiMetrics {
|
||||
final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
|
||||
|
||||
@Metric("Rate of successful kerberos logins and latency (milliseconds)")
|
||||
MutableRate loginSuccess;
|
||||
@Metric("Rate of failed kerberos logins and latency (milliseconds)")
|
||||
MutableRate loginFailure;
|
||||
@Metric("GetGroups") MutableRate getGroups;
|
||||
MutableQuantiles[] getGroupsQuantiles;
|
||||
|
||||
static UgiMetrics create() {
|
||||
return DefaultMetricsSystem.instance().register(new UgiMetrics());
|
||||
}
|
||||
|
||||
void addGetGroups(long latency) {
|
||||
getGroups.add(latency);
|
||||
if (getGroupsQuantiles != null) {
|
||||
for (MutableQuantiles q : getGroupsQuantiles) {
|
||||
q.add(latency);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -250,6 +266,20 @@ public class UserGroupInformation {
|
|||
groups = Groups.getUserToGroupsMappingService(conf);
|
||||
}
|
||||
UserGroupInformation.conf = conf;
|
||||
|
||||
if (metrics.getGroupsQuantiles == null) {
|
||||
int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
|
||||
if (intervals != null && intervals.length > 0) {
|
||||
final int length = intervals.length;
|
||||
MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
getGroupsQuantiles[i] = metrics.registry.newQuantiles(
|
||||
"getGroups" + intervals[i] + "s",
|
||||
"Get groups", "ops", "latency", intervals[i]);
|
||||
}
|
||||
metrics.getGroupsQuantiles = getGroupsQuantiles;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -928,8 +928,10 @@ public class StringUtils {
|
|||
* @param args List of arguments.
|
||||
* @return null if the option was not found; the value of the
|
||||
* option otherwise.
|
||||
* @throws IllegalArgumentException if the option's argument is not present
|
||||
*/
|
||||
public static String popOptionWithArgument(String name, List<String> args) {
|
||||
public static String popOptionWithArgument(String name, List<String> args)
|
||||
throws IllegalArgumentException {
|
||||
String val = null;
|
||||
for (Iterator<String> iter = args.iterator(); iter.hasNext(); ) {
|
||||
String cur = iter.next();
|
||||
|
@ -939,7 +941,7 @@ public class StringUtils {
|
|||
} else if (cur.equals(name)) {
|
||||
iter.remove();
|
||||
if (!iter.hasNext()) {
|
||||
throw new RuntimeException("option " + name + " requires 1 " +
|
||||
throw new IllegalArgumentException("option " + name + " requires 1 " +
|
||||
"argument.");
|
||||
}
|
||||
val = iter.next();
|
||||
|
|
|
@ -364,6 +364,50 @@ JNIEnv *env, jclass clazz, jstring path)
|
|||
return fd;
|
||||
}
|
||||
|
||||
#define SOCKETPAIR_ARRAY_LEN 2
|
||||
|
||||
JNIEXPORT jarray JNICALL
|
||||
Java_org_apache_hadoop_net_unix_DomainSocket_socketpair0(
|
||||
JNIEnv *env, jclass clazz)
|
||||
{
|
||||
jarray arr = NULL;
|
||||
int idx, err, fds[SOCKETPAIR_ARRAY_LEN] = { -1, -1 };
|
||||
jthrowable jthr = NULL;
|
||||
|
||||
arr = (*env)->NewIntArray(env, SOCKETPAIR_ARRAY_LEN);
|
||||
jthr = (*env)->ExceptionOccurred(env);
|
||||
if (jthr) {
|
||||
(*env)->ExceptionClear(env);
|
||||
goto done;
|
||||
}
|
||||
if (socketpair(PF_UNIX, SOCK_STREAM, 0, fds) < 0) {
|
||||
err = errno;
|
||||
jthr = newSocketException(env, err,
|
||||
"socketpair(2) error: %s", terror(err));
|
||||
goto done;
|
||||
}
|
||||
(*env)->SetIntArrayRegion(env, arr, 0, SOCKETPAIR_ARRAY_LEN, fds);
|
||||
jthr = (*env)->ExceptionOccurred(env);
|
||||
if (jthr) {
|
||||
(*env)->ExceptionClear(env);
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
if (jthr) {
|
||||
(*env)->DeleteLocalRef(env, arr);
|
||||
arr = NULL;
|
||||
for (idx = 0; idx < SOCKETPAIR_ARRAY_LEN; idx++) {
|
||||
if (fds[idx] >= 0) {
|
||||
close(fds[idx]);
|
||||
fds[idx] = -1;
|
||||
}
|
||||
}
|
||||
(*env)->Throw(env, jthr);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_org_apache_hadoop_net_unix_DomainSocket_accept0(
|
||||
JNIEnv *env, jclass clazz, jint fd)
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.security.Credentials;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
|
@ -160,6 +161,16 @@ public class TestKeyProviderFactory {
|
|||
provider.getCurrentKey("key4").getMaterial());
|
||||
assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial());
|
||||
assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName());
|
||||
|
||||
List<String> keys = provider.getKeys();
|
||||
assertTrue("Keys should have been returned.", keys.size() == 2);
|
||||
assertTrue("Returned Keys should have included key3.", keys.contains("key3"));
|
||||
assertTrue("Returned Keys should have included key4.", keys.contains("key4"));
|
||||
|
||||
List<KeyVersion> kvl = provider.getKeyVersions("key3");
|
||||
assertTrue("KeyVersions should have been returned for key3.", kvl.size() == 1);
|
||||
assertTrue("KeyVersions should have included key3@0.", kvl.get(0).getVersionName().equals("key3@0"));
|
||||
assertArrayEquals(key3, kvl.get(0).getMaterial());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.metrics2.impl;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.metrics2.MetricsSource;
|
||||
import org.apache.hadoop.metrics2.MetricsTag;
|
||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.metrics2.lib.MetricsAnnotations;
|
||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||
import org.apache.hadoop.metrics2.lib.MetricsSourceBuilder;
|
||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestMetricsSourceAdapter {
|
||||
|
||||
@Test
|
||||
public void testGetMetricsAndJmx() throws Exception {
|
||||
// create test source with a single metric counter of value 0
|
||||
TestSource source = new TestSource("test");
|
||||
MetricsSourceBuilder sb = MetricsAnnotations.newSourceBuilder(source);
|
||||
final MetricsSource s = sb.build();
|
||||
|
||||
List<MetricsTag> injectedTags = new ArrayList<MetricsTag>();
|
||||
MetricsSourceAdapter sa = new MetricsSourceAdapter(
|
||||
"test", "test", "test desc", s, injectedTags, null, null, 1, false);
|
||||
|
||||
// all metrics are initially assumed to have changed
|
||||
MetricsCollectorImpl builder = new MetricsCollectorImpl();
|
||||
Iterable<MetricsRecordImpl> metricsRecords = sa.getMetrics(builder, true);
|
||||
|
||||
// Validate getMetrics and JMX initial values
|
||||
MetricsRecordImpl metricsRecord = metricsRecords.iterator().next();
|
||||
assertEquals(0L,
|
||||
metricsRecord.metrics().iterator().next().value().longValue());
|
||||
|
||||
Thread.sleep(100); // skip JMX cache TTL
|
||||
assertEquals(0L, (Number)sa.getAttribute("C1"));
|
||||
|
||||
// change metric value
|
||||
source.incrementCnt();
|
||||
|
||||
// validate getMetrics and JMX
|
||||
builder = new MetricsCollectorImpl();
|
||||
metricsRecords = sa.getMetrics(builder, true);
|
||||
metricsRecord = metricsRecords.iterator().next();
|
||||
assertTrue(metricsRecord.metrics().iterator().hasNext());
|
||||
Thread.sleep(100); // skip JMX cache TTL
|
||||
assertEquals(1L, (Number)sa.getAttribute("C1"));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@Metrics(context="test")
|
||||
private static class TestSource {
|
||||
@Metric("C1 desc") MutableCounterLong c1;
|
||||
final MetricsRegistry registry;
|
||||
|
||||
TestSource(String recName) {
|
||||
registry = new MetricsRegistry(recName);
|
||||
}
|
||||
|
||||
public void incrementCnt() {
|
||||
c1.incr();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -420,7 +420,8 @@ public class TestDomainSocket {
|
|||
* @throws IOException
|
||||
*/
|
||||
void testClientServer1(final Class<? extends WriteStrategy> writeStrategyClass,
|
||||
final Class<? extends ReadStrategy> readStrategyClass) throws Exception {
|
||||
final Class<? extends ReadStrategy> readStrategyClass,
|
||||
final DomainSocket preConnectedSockets[]) throws Exception {
|
||||
final String TEST_PATH = new File(sockDir.getDir(),
|
||||
"test_sock_client_server1").getAbsolutePath();
|
||||
final byte clientMsg1[] = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 };
|
||||
|
@ -428,13 +429,15 @@ public class TestDomainSocket {
|
|||
final byte clientMsg2 = 0x45;
|
||||
final ArrayBlockingQueue<Throwable> threadResults =
|
||||
new ArrayBlockingQueue<Throwable>(2);
|
||||
final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
|
||||
final DomainSocket serv = (preConnectedSockets != null) ?
|
||||
null : DomainSocket.bindAndListen(TEST_PATH);
|
||||
Thread serverThread = new Thread() {
|
||||
public void run(){
|
||||
// Run server
|
||||
DomainSocket conn = null;
|
||||
try {
|
||||
conn = serv.accept();
|
||||
conn = preConnectedSockets != null ?
|
||||
preConnectedSockets[0] : serv.accept();
|
||||
byte in1[] = new byte[clientMsg1.length];
|
||||
ReadStrategy reader = readStrategyClass.newInstance();
|
||||
reader.init(conn);
|
||||
|
@ -459,7 +462,8 @@ public class TestDomainSocket {
|
|||
Thread clientThread = new Thread() {
|
||||
public void run(){
|
||||
try {
|
||||
DomainSocket client = DomainSocket.connect(TEST_PATH);
|
||||
DomainSocket client = preConnectedSockets != null ?
|
||||
preConnectedSockets[1] : DomainSocket.connect(TEST_PATH);
|
||||
WriteStrategy writer = writeStrategyClass.newInstance();
|
||||
writer.init(client);
|
||||
writer.write(clientMsg1);
|
||||
|
@ -487,25 +491,45 @@ public class TestDomainSocket {
|
|||
}
|
||||
serverThread.join(120000);
|
||||
clientThread.join(120000);
|
||||
if (serv != null) {
|
||||
serv.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testClientServerOutStreamInStream() throws Exception {
|
||||
testClientServer1(OutputStreamWriteStrategy.class,
|
||||
InputStreamReadStrategy.class);
|
||||
InputStreamReadStrategy.class, null);
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testClientServerOutStreamInStreamWithSocketpair() throws Exception {
|
||||
testClientServer1(OutputStreamWriteStrategy.class,
|
||||
InputStreamReadStrategy.class, DomainSocket.socketpair());
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testClientServerOutStreamInDbb() throws Exception {
|
||||
testClientServer1(OutputStreamWriteStrategy.class,
|
||||
DirectByteBufferReadStrategy.class);
|
||||
DirectByteBufferReadStrategy.class, null);
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testClientServerOutStreamInDbbWithSocketpair() throws Exception {
|
||||
testClientServer1(OutputStreamWriteStrategy.class,
|
||||
DirectByteBufferReadStrategy.class, DomainSocket.socketpair());
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testClientServerOutStreamInAbb() throws Exception {
|
||||
testClientServer1(OutputStreamWriteStrategy.class,
|
||||
ArrayBackedByteBufferReadStrategy.class);
|
||||
ArrayBackedByteBufferReadStrategy.class, null);
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testClientServerOutStreamInAbbWithSocketpair() throws Exception {
|
||||
testClientServer1(OutputStreamWriteStrategy.class,
|
||||
ArrayBackedByteBufferReadStrategy.class, DomainSocket.socketpair());
|
||||
}
|
||||
|
||||
static private class PassedFile {
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.hadoop.security;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.ipc.TestSaslRPC;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
|
||||
import org.apache.hadoop.security.authentication.util.KerberosName;
|
||||
|
@ -40,9 +39,9 @@ import java.util.Collection;
|
|||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
|
||||
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
|
||||
import static org.apache.hadoop.ipc.TestSaslRPC.*;
|
||||
import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.*;
|
||||
import static org.junit.Assert.*;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
@ -55,6 +54,8 @@ public class TestUserGroupInformation {
|
|||
final private static String GROUP3_NAME = "group3";
|
||||
final private static String[] GROUP_NAMES =
|
||||
new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
|
||||
// Rollover interval of percentile metrics (in seconds)
|
||||
private static final int PERCENTILES_INTERVAL = 1;
|
||||
private static Configuration conf;
|
||||
|
||||
/**
|
||||
|
@ -80,7 +81,8 @@ public class TestUserGroupInformation {
|
|||
// doesn't matter what it is, but getGroups needs it set...
|
||||
// use HADOOP_HOME environment variable to prevent interfering with logic
|
||||
// that finds winutils.exe
|
||||
System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
|
||||
String home = System.getenv("HADOOP_HOME");
|
||||
System.setProperty("hadoop.home.dir", (home != null ? home : "."));
|
||||
// fake the realm is kerberos is enabled
|
||||
System.setProperty("java.security.krb5.kdc", "");
|
||||
System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
|
||||
|
@ -150,11 +152,15 @@ public class TestUserGroupInformation {
|
|||
/** Test login method */
|
||||
@Test (timeout = 30000)
|
||||
public void testLogin() throws Exception {
|
||||
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
|
||||
String.valueOf(PERCENTILES_INTERVAL));
|
||||
UserGroupInformation.setConfiguration(conf);
|
||||
// login from unix
|
||||
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
|
||||
assertEquals(UserGroupInformation.getCurrentUser(),
|
||||
UserGroupInformation.getLoginUser());
|
||||
assertTrue(ugi.getGroupNames().length >= 1);
|
||||
verifyGroupMetrics(1);
|
||||
|
||||
// ensure that doAs works correctly
|
||||
UserGroupInformation userGroupInfo =
|
||||
|
@ -728,6 +734,21 @@ public class TestUserGroupInformation {
|
|||
}
|
||||
}
|
||||
|
||||
private static void verifyGroupMetrics(
|
||||
long groups) throws InterruptedException {
|
||||
MetricsRecordBuilder rb = getMetrics("UgiMetrics");
|
||||
if (groups > 0) {
|
||||
assertCounter("GetGroupsNumOps", groups, rb);
|
||||
double avg = getDoubleGauge("GetGroupsAvgTime", rb);
|
||||
assertTrue(avg >= 0.0);
|
||||
|
||||
// Sleep for an interval+slop to let the percentiles rollover
|
||||
Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
|
||||
// Check that the percentiles were updated
|
||||
assertQuantileGauges("GetGroups1s", rb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for the case that UserGroupInformation.getCurrentUser()
|
||||
* is called when the AccessControlContext has a Subject associated
|
||||
|
|
|
@ -13,10 +13,6 @@ Trunk (Unreleased)
|
|||
|
||||
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
|
||||
|
||||
HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
|
||||
as a collection of storages (see breakdown of tasks below for features and
|
||||
contributors).
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common.
|
||||
|
@ -243,13 +239,18 @@ Trunk (Unreleased)
|
|||
|
||||
HDFS-5636. Enforce a max TTL per cache pool. (awang via cmccabe)
|
||||
|
||||
HDFS-5651. Remove dfs.namenode.caching.enabled and improve CRM locking.
|
||||
(cmccabe via wang)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe)
|
||||
|
||||
HDFS-5665. Remove the unnecessary writeLock while initializing CacheManager
|
||||
in FsNameSystem Ctor. (Uma Maheswara Rao G via Andrew Wang)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-9635 Fix potential Stack Overflow in DomainSocket.c (V. Karthik Kumar
|
||||
via cmccabe)
|
||||
|
||||
|
@ -444,139 +445,27 @@ Trunk (Unreleased)
|
|||
|
||||
HDFS-5626. dfsadmin -report shows incorrect cache values. (cmccabe)
|
||||
|
||||
HDFS-5406. Send incremental block reports for all storages in a
|
||||
single call. (Arpit Agarwal)
|
||||
|
||||
HDFS-5454. DataNode UUID should be assigned prior to FsDataset
|
||||
initialization. (Arpit Agarwal)
|
||||
|
||||
HDFS-5679. TestCacheDirectives should handle the case where native code
|
||||
is not available. (wang)
|
||||
|
||||
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
||||
HDFS-5701. Fix the CacheAdmin -addPool -maxTtl option name.
|
||||
(Stephen Chu via wang)
|
||||
|
||||
HDFS-4985. Add storage type to the protocol and expose it in block report
|
||||
and block locations. (Arpit Agarwal)
|
||||
HDFS-5708. The CacheManager throws a NPE in the DataNode logs when
|
||||
processing cache reports that refer to a block not known to the
|
||||
BlockManager. (cmccabe via wang)
|
||||
|
||||
HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
|
||||
HDFS-5659. dfsadmin -report doesn't output cache information properly.
|
||||
(wang)
|
||||
|
||||
HDFS-5000. DataNode configuration should allow specifying storage type.
|
||||
(Arpit Agarwal)
|
||||
HDFS-5705. TestSecondaryNameNodeUpgrade#testChangeNsIDFails may fail due
|
||||
to ConcurrentModificationException. (Ted Yu via brandonli)
|
||||
|
||||
HDFS-4987. Namenode changes to track multiple storages per datanode.
|
||||
(szetszwo)
|
||||
HDFS-5719. FSImage#doRollback() should close prevState before return
|
||||
(Ted Yu via brandonli)
|
||||
|
||||
HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
|
||||
(Junping Du via szetszwo)
|
||||
|
||||
HDFS-5009. Include storage information in the LocatedBlock. (szetszwo)
|
||||
|
||||
HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
|
||||
firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
|
||||
fix a synchronization problem in DatanodeStorageInfo. (szetszwo)
|
||||
|
||||
HDFS-5157. Add StorageType to FsVolume. (Junping Du via szetszwo)
|
||||
|
||||
HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
|
||||
datanodes. (szetszwo)
|
||||
|
||||
HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
|
||||
|
||||
HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
|
||||
|
||||
HDFS-5222. Move block schedule information from DatanodeDescriptor to
|
||||
DatanodeStorageInfo. (szetszwo)
|
||||
|
||||
HDFS-4988. Datanode must support all the volumes as individual storages.
|
||||
(Arpit Agarwal)
|
||||
|
||||
HDFS-5377. Heartbeats from Datandode should include one storage report
|
||||
per storage directory. (Arpit Agarwal)
|
||||
|
||||
HDFS-5398. NameNode changes to process storage reports per storage
|
||||
directory. (Arpit Agarwal)
|
||||
|
||||
HDFS-5390. Send one incremental block report per storage directory.
|
||||
(Arpit Agarwal)
|
||||
|
||||
HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
|
||||
|
||||
HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities. (szetszwo)
|
||||
|
||||
HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
|
||||
|
||||
HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
|
||||
Arpit Agarwal)
|
||||
|
||||
HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
|
||||
|
||||
HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
|
||||
|
||||
HDFS-5448. Datanode should generate its ID on first registration. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
|
||||
|
||||
HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
|
||||
(Contributed by szetszwo)
|
||||
|
||||
HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
|
||||
by szetszwo)
|
||||
|
||||
HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
|
||||
(Contributed by szetszwo)
|
||||
|
||||
HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
|
||||
TestNNThroughputBenchmark (Contributed by szetszwo)
|
||||
|
||||
HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
|
||||
by Junping Du)
|
||||
|
||||
HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
|
||||
|
||||
HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
|
||||
|
||||
HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
|
||||
(Junping Du via Arpit Agarwal)
|
||||
|
||||
HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
|
||||
szetszwo)
|
||||
|
||||
HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
|
||||
|
||||
HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
|
||||
not accurate. (Eric Sirianni via Arpit Agarwal)
|
||||
|
||||
HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
|
||||
HDFS-5589. Namenode loops caching and uncaching when data should be
|
||||
uncached (awang via cmccabe)
|
||||
|
||||
Release 2.4.0 - UNRELEASED
|
||||
|
||||
|
@ -608,6 +497,10 @@ Release 2.4.0 - UNRELEASED
|
|||
|
||||
HDFS-5514. FSNamesystem's fsLock should allow custom implementation (daryn)
|
||||
|
||||
HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
|
||||
as a collection of storages (see breakdown of tasks below for features and
|
||||
contributors).
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu)
|
||||
|
@ -618,9 +511,6 @@ Release 2.4.0 - UNRELEASED
|
|||
HDFS-5004. Add additional JMX bean for NameNode status data
|
||||
(Trevor Lorimer via cos)
|
||||
|
||||
HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
|
||||
(shv)
|
||||
|
||||
HDFS-4994. Audit log getContentSummary() calls. (Robert Parker via kihwal)
|
||||
|
||||
HDFS-5144. Document time unit to NameNodeMetrics. (Akira Ajisaka via
|
||||
|
@ -768,6 +658,11 @@ Release 2.4.0 - UNRELEASED
|
|||
HDFS-2933. Improve DataNode Web UI Index Page. (Vivek Ganesan via
|
||||
Arpit Agarwal)
|
||||
|
||||
HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
|
||||
(Haohui Mai via jing9)
|
||||
|
||||
HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
|
||||
|
@ -833,6 +728,139 @@ Release 2.4.0 - UNRELEASED
|
|||
HDFS-5690. DataNode fails to start in secure mode when dfs.http.policy equals to
|
||||
HTTP_ONLY. (Haohui Mai via jing9)
|
||||
|
||||
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
||||
|
||||
HDFS-4985. Add storage type to the protocol and expose it in block report
|
||||
and block locations. (Arpit Agarwal)
|
||||
|
||||
HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
|
||||
|
||||
HDFS-5000. DataNode configuration should allow specifying storage type.
|
||||
(Arpit Agarwal)
|
||||
|
||||
HDFS-4987. Namenode changes to track multiple storages per datanode.
|
||||
(szetszwo)
|
||||
|
||||
HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
|
||||
(Junping Du via szetszwo)
|
||||
|
||||
HDFS-5009. Include storage information in the LocatedBlock. (szetszwo)
|
||||
|
||||
HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
|
||||
firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
|
||||
fix a synchronization problem in DatanodeStorageInfo. (szetszwo)
|
||||
|
||||
HDFS-5157. Add StorageType to FsVolume. (Junping Du via szetszwo)
|
||||
|
||||
HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
|
||||
datanodes. (szetszwo)
|
||||
|
||||
HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
|
||||
|
||||
HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
|
||||
|
||||
HDFS-5222. Move block schedule information from DatanodeDescriptor to
|
||||
DatanodeStorageInfo. (szetszwo)
|
||||
|
||||
HDFS-4988. Datanode must support all the volumes as individual storages.
|
||||
(Arpit Agarwal)
|
||||
|
||||
HDFS-5377. Heartbeats from Datandode should include one storage report
|
||||
per storage directory. (Arpit Agarwal)
|
||||
|
||||
HDFS-5398. NameNode changes to process storage reports per storage
|
||||
directory. (Arpit Agarwal)
|
||||
|
||||
HDFS-5390. Send one incremental block report per storage directory.
|
||||
(Arpit Agarwal)
|
||||
|
||||
HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
|
||||
|
||||
HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities. (szetszwo)
|
||||
|
||||
HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
|
||||
|
||||
HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
|
||||
Arpit Agarwal)
|
||||
|
||||
HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
|
||||
|
||||
HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
|
||||
|
||||
HDFS-5448. Datanode should generate its ID on first registration. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
|
||||
|
||||
HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
|
||||
(Contributed by szetszwo)
|
||||
|
||||
HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
|
||||
by szetszwo)
|
||||
|
||||
HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
|
||||
(Contributed by szetszwo)
|
||||
|
||||
HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
|
||||
TestNNThroughputBenchmark (Contributed by szetszwo)
|
||||
|
||||
HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
|
||||
by Junping Du)
|
||||
|
||||
HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
|
||||
|
||||
HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
|
||||
|
||||
HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
|
||||
(Junping Du via Arpit Agarwal)
|
||||
|
||||
HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
|
||||
|
||||
HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
|
||||
Agarwal)
|
||||
|
||||
HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
|
||||
szetszwo)
|
||||
|
||||
HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
|
||||
|
||||
HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
|
||||
not accurate. (Eric Sirianni via Arpit Agarwal)
|
||||
|
||||
HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
|
||||
|
||||
HDFS-5406. Send incremental block reports for all storages in a
|
||||
single call. (Arpit Agarwal)
|
||||
|
||||
HDFS-5454. DataNode UUID should be assigned prior to FsDataset
|
||||
initialization. (Arpit Agarwal)
|
||||
|
||||
HDFS-5667. Include DatanodeStorage in StorageReport. (Arpit Agarwal)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -866,6 +894,12 @@ Release 2.3.0 - UNRELEASED
|
|||
HDFS-5662. Can't decommission a DataNode due to file's replication factor
|
||||
larger than the rest of the cluster size. (brandonli)
|
||||
|
||||
HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
|
||||
(shv)
|
||||
|
||||
HDFS-5675. Add Mkdirs operation to NNThroughputBenchmark.
|
||||
(Plamen Jeliazkov via shv)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
@ -1016,6 +1050,11 @@ Release 2.3.0 - UNRELEASED
|
|||
HDFS-5661. Browsing FileSystem via web ui, should use datanode's fqdn instead of ip
|
||||
address. (Benoy Antony via jing9)
|
||||
|
||||
HDFS-5582. hdfs getconf -excludeFile or -includeFile always failed (sathish
|
||||
via cmccabe)
|
||||
|
||||
HDFS-5671. Fix socket leak in DFSInputStream#getBlockReader. (JamesLi via umamahesh)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -108,8 +108,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||
public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0;
|
||||
public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume";
|
||||
public static final int DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT = 4;
|
||||
public static final String DFS_NAMENODE_CACHING_ENABLED_KEY = "dfs.namenode.caching.enabled";
|
||||
public static final boolean DFS_NAMENODE_CACHING_ENABLED_DEFAULT = false;
|
||||
public static final String DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT =
|
||||
"dfs.namenode.path.based.cache.block.map.allocation.percent";
|
||||
public static final float DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f;
|
||||
|
||||
public static final String DFS_NAMENODE_HTTP_PORT_KEY = "dfs.http.port";
|
||||
public static final int DFS_NAMENODE_HTTP_PORT_DEFAULT = 50070;
|
||||
|
|
|
@ -1188,11 +1188,21 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
|||
}
|
||||
// Try to create a new remote peer.
|
||||
Peer peer = newTcpPeer(dnAddr);
|
||||
return BlockReaderFactory.newBlockReader(
|
||||
dfsClient.getConf(), file, block, blockToken, startOffset,
|
||||
len, verifyChecksum, clientName, peer, chosenNode,
|
||||
dsFactory, peerCache, fileInputStreamCache, false,
|
||||
try {
|
||||
reader = BlockReaderFactory.newBlockReader(dfsClient.getConf(), file,
|
||||
block, blockToken, startOffset, len, verifyChecksum, clientName,
|
||||
peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false,
|
||||
curCachingStrategy);
|
||||
return reader;
|
||||
} catch (IOException ex) {
|
||||
DFSClient.LOG.debug(
|
||||
"Exception while getting block reader, closing stale " + peer, ex);
|
||||
throw ex;
|
||||
} finally {
|
||||
if (reader == null) {
|
||||
IOUtils.closeQuietly(peer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -533,21 +533,7 @@ public class PBHelper {
|
|||
|
||||
static public DatanodeInfoProto convertDatanodeInfo(DatanodeInfo di) {
|
||||
if (di == null) return null;
|
||||
DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
|
||||
if (di.getNetworkLocation() != null) {
|
||||
builder.setLocation(di.getNetworkLocation());
|
||||
}
|
||||
|
||||
return builder.
|
||||
setId(PBHelper.convert((DatanodeID) di)).
|
||||
setCapacity(di.getCapacity()).
|
||||
setDfsUsed(di.getDfsUsed()).
|
||||
setRemaining(di.getRemaining()).
|
||||
setBlockPoolUsed(di.getBlockPoolUsed()).
|
||||
setLastUpdate(di.getLastUpdate()).
|
||||
setXceiverCount(di.getXceiverCount()).
|
||||
setAdminState(PBHelper.convert(di.getAdminState())).
|
||||
build();
|
||||
return convert(di);
|
||||
}
|
||||
|
||||
|
||||
|
@ -591,15 +577,20 @@ public class PBHelper {
|
|||
|
||||
public static DatanodeInfoProto convert(DatanodeInfo info) {
|
||||
DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
|
||||
builder.setBlockPoolUsed(info.getBlockPoolUsed());
|
||||
builder.setAdminState(PBHelper.convert(info.getAdminState()));
|
||||
builder.setCapacity(info.getCapacity())
|
||||
.setDfsUsed(info.getDfsUsed())
|
||||
if (info.getNetworkLocation() != null) {
|
||||
builder.setLocation(info.getNetworkLocation());
|
||||
}
|
||||
builder
|
||||
.setId(PBHelper.convert((DatanodeID)info))
|
||||
.setLastUpdate(info.getLastUpdate())
|
||||
.setLocation(info.getNetworkLocation())
|
||||
.setCapacity(info.getCapacity())
|
||||
.setDfsUsed(info.getDfsUsed())
|
||||
.setRemaining(info.getRemaining())
|
||||
.setBlockPoolUsed(info.getBlockPoolUsed())
|
||||
.setCacheCapacity(info.getCacheCapacity())
|
||||
.setCacheUsed(info.getCacheUsed())
|
||||
.setLastUpdate(info.getLastUpdate())
|
||||
.setXceiverCount(info.getXceiverCount())
|
||||
.setAdminState(PBHelper.convert(info.getAdminState()))
|
||||
.build();
|
||||
return builder.build();
|
||||
}
|
||||
|
@ -1591,13 +1582,17 @@ public class PBHelper {
|
|||
StorageReportProto.Builder builder = StorageReportProto.newBuilder()
|
||||
.setBlockPoolUsed(r.getBlockPoolUsed()).setCapacity(r.getCapacity())
|
||||
.setDfsUsed(r.getDfsUsed()).setRemaining(r.getRemaining())
|
||||
.setStorageUuid(r.getStorageID());
|
||||
.setStorageUuid(r.getStorage().getStorageID())
|
||||
.setStorage(convert(r.getStorage()));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
public static StorageReport convert(StorageReportProto p) {
|
||||
return new StorageReport(p.getStorageUuid(), p.getFailed(),
|
||||
p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
|
||||
return new StorageReport(
|
||||
p.hasStorage() ?
|
||||
convert(p.getStorage()) :
|
||||
new DatanodeStorage(p.getStorageUuid()),
|
||||
p.getFailed(), p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
|
||||
p.getBlockPoolUsed());
|
||||
}
|
||||
|
||||
|
|
|
@ -21,12 +21,14 @@ import static org.apache.hadoop.util.ExitUtil.terminate;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Condition;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
@ -76,7 +78,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
/**
|
||||
* Pseudorandom number source
|
||||
*/
|
||||
private final Random random = new Random();
|
||||
private static final Random random = new Random();
|
||||
|
||||
/**
|
||||
* The interval at which we scan the namesystem for caching changes.
|
||||
|
@ -87,17 +89,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
* The CacheReplicationMonitor (CRM) lock. Used to synchronize starting and
|
||||
* waiting for rescan operations.
|
||||
*/
|
||||
private final ReentrantLock lock = new ReentrantLock();
|
||||
private final ReentrantLock lock;
|
||||
|
||||
/**
|
||||
* Notifies the scan thread that an immediate rescan is needed.
|
||||
*/
|
||||
private final Condition doRescan = lock.newCondition();
|
||||
private final Condition doRescan;
|
||||
|
||||
/**
|
||||
* Notifies waiting threads that a rescan has finished.
|
||||
*/
|
||||
private final Condition scanFinished = lock.newCondition();
|
||||
private final Condition scanFinished;
|
||||
|
||||
/**
|
||||
* Whether there are pending CacheManager operations that necessitate a
|
||||
|
@ -121,11 +123,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
*/
|
||||
private boolean shutdown = false;
|
||||
|
||||
/**
|
||||
* The monotonic time at which the current scan started.
|
||||
*/
|
||||
private long startTimeMs;
|
||||
|
||||
/**
|
||||
* Mark status of the current scan.
|
||||
*/
|
||||
|
@ -142,24 +139,27 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
private long scannedBlocks;
|
||||
|
||||
public CacheReplicationMonitor(FSNamesystem namesystem,
|
||||
CacheManager cacheManager, long intervalMs) {
|
||||
CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
|
||||
this.namesystem = namesystem;
|
||||
this.blockManager = namesystem.getBlockManager();
|
||||
this.cacheManager = cacheManager;
|
||||
this.cachedBlocks = cacheManager.getCachedBlocks();
|
||||
this.intervalMs = intervalMs;
|
||||
this.lock = lock;
|
||||
this.doRescan = this.lock.newCondition();
|
||||
this.scanFinished = this.lock.newCondition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
startTimeMs = 0;
|
||||
long startTimeMs = 0;
|
||||
Thread.currentThread().setName("CacheReplicationMonitor(" +
|
||||
System.identityHashCode(this) + ")");
|
||||
LOG.info("Starting CacheReplicationMonitor with interval " +
|
||||
intervalMs + " milliseconds");
|
||||
try {
|
||||
long curTimeMs = Time.monotonicNow();
|
||||
while (true) {
|
||||
// Not all of the variables accessed here need the CRM lock, but take
|
||||
// it anyway for simplicity
|
||||
lock.lock();
|
||||
try {
|
||||
while (true) {
|
||||
|
@ -180,12 +180,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
doRescan.await(delta, TimeUnit.MILLISECONDS);
|
||||
curTimeMs = Time.monotonicNow();
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
// Mark scan as started, clear needsRescan
|
||||
lock.lock();
|
||||
try {
|
||||
isScanning = true;
|
||||
needsRescan = false;
|
||||
} finally {
|
||||
|
@ -195,7 +189,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
mark = !mark;
|
||||
rescan();
|
||||
curTimeMs = Time.monotonicNow();
|
||||
// Retake the CRM lock to update synchronization-related variables
|
||||
// Update synchronization-related variables.
|
||||
lock.lock();
|
||||
try {
|
||||
isScanning = false;
|
||||
|
@ -208,32 +202,15 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
scannedBlocks + " block(s) in " + (curTimeMs - startTimeMs) + " " +
|
||||
"millisecond(s).");
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
LOG.info("Shutting down CacheReplicationMonitor.");
|
||||
return;
|
||||
} catch (Throwable t) {
|
||||
LOG.fatal("Thread exiting", t);
|
||||
terminate(1, t);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to {@link CacheReplicationMonitor#waitForRescan()}, except it only
|
||||
* waits if there are pending operations that necessitate a rescan as
|
||||
* indicated by {@link #setNeedsRescan()}.
|
||||
* <p>
|
||||
* Note that this call may release the FSN lock, so operations before and
|
||||
* after are not necessarily atomic.
|
||||
*/
|
||||
public void waitForRescanIfNeeded() {
|
||||
lock.lock();
|
||||
try {
|
||||
if (!needsRescan) {
|
||||
return;
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
waitForRescan();
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits for a rescan to complete. This doesn't guarantee consistency with
|
||||
* pending operations, only relative recency, since it will not force a new
|
||||
|
@ -242,33 +219,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
* Note that this call will release the FSN lock, so operations before and
|
||||
* after are not atomic.
|
||||
*/
|
||||
public void waitForRescan() {
|
||||
// Drop the FSN lock temporarily and retake it after we finish waiting
|
||||
// Need to handle both the read lock and the write lock
|
||||
boolean retakeWriteLock = false;
|
||||
if (namesystem.hasWriteLock()) {
|
||||
namesystem.writeUnlock();
|
||||
retakeWriteLock = true;
|
||||
} else if (namesystem.hasReadLock()) {
|
||||
namesystem.readUnlock();
|
||||
} else {
|
||||
// Expected to have at least one of the locks
|
||||
Preconditions.checkState(false,
|
||||
"Need to be holding either the read or write lock");
|
||||
public void waitForRescanIfNeeded() {
|
||||
Preconditions.checkArgument(!namesystem.hasWriteLock(),
|
||||
"Must not hold the FSN write lock when waiting for a rescan.");
|
||||
Preconditions.checkArgument(lock.isHeldByCurrentThread(),
|
||||
"Must hold the CRM lock when waiting for a rescan.");
|
||||
if (!needsRescan) {
|
||||
return;
|
||||
}
|
||||
// try/finally for retaking FSN lock
|
||||
try {
|
||||
lock.lock();
|
||||
// try/finally for releasing CRM lock
|
||||
try {
|
||||
// If no scan is already ongoing, mark the CRM as dirty and kick
|
||||
if (!isScanning) {
|
||||
needsRescan = true;
|
||||
doRescan.signal();
|
||||
}
|
||||
// Wait until the scan finishes and the count advances
|
||||
final long startCount = scanCount;
|
||||
while (startCount >= scanCount) {
|
||||
while ((!shutdown) && (startCount >= scanCount)) {
|
||||
try {
|
||||
scanFinished.await();
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -277,16 +242,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
} finally {
|
||||
if (retakeWriteLock) {
|
||||
namesystem.writeLock();
|
||||
} else {
|
||||
namesystem.readLock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -294,42 +249,43 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
* changes that require a rescan.
|
||||
*/
|
||||
public void setNeedsRescan() {
|
||||
lock.lock();
|
||||
try {
|
||||
Preconditions.checkArgument(lock.isHeldByCurrentThread(),
|
||||
"Must hold the CRM lock when setting the needsRescan bit.");
|
||||
this.needsRescan = true;
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shut down and join the monitor thread.
|
||||
* Shut down the monitor thread.
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
Preconditions.checkArgument(namesystem.hasWriteLock());
|
||||
lock.lock();
|
||||
try {
|
||||
if (shutdown) return;
|
||||
// Since we hold both the FSN write lock and the CRM lock here,
|
||||
// we know that the CRM thread cannot be currently modifying
|
||||
// the cache manager state while we're closing it.
|
||||
// Since the CRM thread checks the value of 'shutdown' after waiting
|
||||
// for a lock, we know that the thread will not modify the cache
|
||||
// manager state after this point.
|
||||
shutdown = true;
|
||||
doRescan.signalAll();
|
||||
scanFinished.signalAll();
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
try {
|
||||
if (this.isAlive()) {
|
||||
this.join(60000);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
private void rescan() {
|
||||
private void rescan() throws InterruptedException {
|
||||
scannedDirectives = 0;
|
||||
scannedBlocks = 0;
|
||||
namesystem.writeLock();
|
||||
try {
|
||||
if (shutdown) {
|
||||
throw new InterruptedException("CacheReplicationMonitor was " +
|
||||
"shut down.");
|
||||
}
|
||||
resetStatistics();
|
||||
rescanCacheDirectives();
|
||||
rescanCachedBlockMap();
|
||||
|
@ -356,8 +312,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
FSDirectory fsDir = namesystem.getFSDirectory();
|
||||
final long now = new Date().getTime();
|
||||
for (CacheDirective directive : cacheManager.getCacheDirectives()) {
|
||||
// Reset the directive's statistics
|
||||
directive.resetStatistics();
|
||||
// Skip processing this entry if it has expired
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Directive expiry is at " + directive.getExpiryTime());
|
||||
|
@ -460,14 +414,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
directive.getReplication()) * blockInfo.getNumBytes();
|
||||
cachedTotal += cachedByBlock;
|
||||
|
||||
if (mark != ocblock.getMark()) {
|
||||
// Mark hasn't been set in this scan, so update replication and mark.
|
||||
if ((mark != ocblock.getMark()) ||
|
||||
(ocblock.getReplication() < directive.getReplication())) {
|
||||
//
|
||||
// Overwrite the block's replication and mark in two cases:
|
||||
//
|
||||
// 1. If the mark on the CachedBlock is different from the mark for
|
||||
// this scan, that means the block hasn't been updated during this
|
||||
// scan, and we should overwrite whatever is there, since it is no
|
||||
// longer valid.
|
||||
//
|
||||
// 2. If the replication in the CachedBlock is less than what the
|
||||
// directive asks for, we want to increase the block's replication
|
||||
// field to what the directive asks for.
|
||||
//
|
||||
ocblock.setReplicationAndMark(directive.getReplication(), mark);
|
||||
} else {
|
||||
// Mark already set in this scan. Set replication to highest value in
|
||||
// any CacheDirective that covers this file.
|
||||
ocblock.setReplicationAndMark((short)Math.max(
|
||||
directive.getReplication(), ocblock.getReplication()), mark);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -483,6 +444,39 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private String findReasonForNotCaching(CachedBlock cblock,
|
||||
BlockInfo blockInfo) {
|
||||
if (blockInfo == null) {
|
||||
// Somehow, a cache report with the block arrived, but the block
|
||||
// reports from the DataNode haven't (yet?) described such a block.
|
||||
// Alternately, the NameNode might have invalidated the block, but the
|
||||
// DataNode hasn't caught up. In any case, we want to tell the DN
|
||||
// to uncache this.
|
||||
return "not tracked by the BlockManager";
|
||||
} else if (!blockInfo.isComplete()) {
|
||||
// When a cached block changes state from complete to some other state
|
||||
// on the DataNode (perhaps because of append), it will begin the
|
||||
// uncaching process. However, the uncaching process is not
|
||||
// instantaneous, especially if clients have pinned the block. So
|
||||
// there may be a period of time when incomplete blocks remain cached
|
||||
// on the DataNodes.
|
||||
return "not complete";
|
||||
} else if (cblock.getReplication() == 0) {
|
||||
// Since 0 is not a valid value for a cache directive's replication
|
||||
// field, seeing a replication of 0 on a CacheBlock means that it
|
||||
// has never been reached by any sweep.
|
||||
return "not needed by any directives";
|
||||
} else if (cblock.getMark() != mark) {
|
||||
// Although the block was needed in the past, we didn't reach it during
|
||||
// the current sweep. Therefore, it doesn't need to be cached any more.
|
||||
// Need to set the replication to 0 so it doesn't flip back to cached
|
||||
// when the mark flips on the next scan
|
||||
cblock.setReplicationAndMark((short)0, mark);
|
||||
return "no longer needed by any directives";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan through the cached block map.
|
||||
* Any blocks which are under-replicated should be assigned new Datanodes.
|
||||
|
@ -508,11 +502,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
iter.remove();
|
||||
}
|
||||
}
|
||||
// If the block's mark doesn't match with the mark of this scan, that
|
||||
// means that this block couldn't be reached during this scan. That means
|
||||
// it doesn't need to be cached any more.
|
||||
int neededCached = (cblock.getMark() != mark) ?
|
||||
0 : cblock.getReplication();
|
||||
BlockInfo blockInfo = blockManager.
|
||||
getStoredBlock(new Block(cblock.getBlockId()));
|
||||
String reason = findReasonForNotCaching(cblock, blockInfo);
|
||||
int neededCached = 0;
|
||||
if (reason != null) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("not caching " + cblock + " because it is " + reason);
|
||||
}
|
||||
} else {
|
||||
neededCached = cblock.getReplication();
|
||||
}
|
||||
int numCached = cached.size();
|
||||
if (numCached >= neededCached) {
|
||||
// If we have enough replicas, drop all pending cached.
|
||||
|
@ -566,9 +566,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
private void addNewPendingUncached(int neededUncached,
|
||||
CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
|
||||
List<DatanodeDescriptor> pendingUncached) {
|
||||
if (!cacheManager.isActive()) {
|
||||
return;
|
||||
}
|
||||
// Figure out which replicas can be uncached.
|
||||
LinkedList<DatanodeDescriptor> possibilities =
|
||||
new LinkedList<DatanodeDescriptor>();
|
||||
|
@ -601,19 +598,18 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
* @param pendingCached A list of DataNodes that will soon cache the
|
||||
* block.
|
||||
*/
|
||||
private void addNewPendingCached(int neededCached,
|
||||
private void addNewPendingCached(final int neededCached,
|
||||
CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
|
||||
List<DatanodeDescriptor> pendingCached) {
|
||||
if (!cacheManager.isActive()) {
|
||||
return;
|
||||
}
|
||||
// To figure out which replicas can be cached, we consult the
|
||||
// blocksMap. We don't want to try to cache a corrupt replica, though.
|
||||
BlockInfo blockInfo = blockManager.
|
||||
getStoredBlock(new Block(cachedBlock.getBlockId()));
|
||||
if (blockInfo == null) {
|
||||
LOG.debug("Not caching block " + cachedBlock + " because it " +
|
||||
"was deleted from all DataNodes.");
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Not caching block " + cachedBlock + " because there " +
|
||||
"is no record of it on the NameNode.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!blockInfo.isComplete()) {
|
||||
|
@ -623,35 +619,156 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|||
}
|
||||
return;
|
||||
}
|
||||
List<DatanodeDescriptor> possibilities = new LinkedList<DatanodeDescriptor>();
|
||||
// Filter the list of replicas to only the valid targets
|
||||
List<DatanodeDescriptor> possibilities =
|
||||
new LinkedList<DatanodeDescriptor>();
|
||||
int numReplicas = blockInfo.getCapacity();
|
||||
Collection<DatanodeDescriptor> corrupt =
|
||||
blockManager.getCorruptReplicas(blockInfo);
|
||||
int outOfCapacity = 0;
|
||||
for (int i = 0; i < numReplicas; i++) {
|
||||
DatanodeDescriptor datanode = blockInfo.getDatanode(i);
|
||||
if ((datanode != null) &&
|
||||
((!pendingCached.contains(datanode)) &&
|
||||
((corrupt == null) || (!corrupt.contains(datanode))))) {
|
||||
if (datanode == null) {
|
||||
continue;
|
||||
}
|
||||
if (datanode.isDecommissioned() || datanode.isDecommissionInProgress()) {
|
||||
continue;
|
||||
}
|
||||
if (corrupt != null && corrupt.contains(datanode)) {
|
||||
continue;
|
||||
}
|
||||
if (pendingCached.contains(datanode) || cached.contains(datanode)) {
|
||||
continue;
|
||||
}
|
||||
long pendingCapacity = datanode.getCacheRemaining();
|
||||
// Subtract pending cached blocks from effective capacity
|
||||
Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
|
||||
while (it.hasNext()) {
|
||||
CachedBlock cBlock = it.next();
|
||||
BlockInfo info =
|
||||
blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
|
||||
if (info != null) {
|
||||
pendingCapacity -= info.getNumBytes();
|
||||
}
|
||||
}
|
||||
it = datanode.getPendingUncached().iterator();
|
||||
// Add pending uncached blocks from effective capacity
|
||||
while (it.hasNext()) {
|
||||
CachedBlock cBlock = it.next();
|
||||
BlockInfo info =
|
||||
blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
|
||||
if (info != null) {
|
||||
pendingCapacity += info.getNumBytes();
|
||||
}
|
||||
}
|
||||
if (pendingCapacity < blockInfo.getNumBytes()) {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Datanode " + datanode + " is not a valid possibility for"
|
||||
+ " block " + blockInfo.getBlockId() + " of size "
|
||||
+ blockInfo.getNumBytes() + " bytes, only has "
|
||||
+ datanode.getCacheRemaining() + " bytes of cache remaining.");
|
||||
}
|
||||
outOfCapacity++;
|
||||
continue;
|
||||
}
|
||||
possibilities.add(datanode);
|
||||
}
|
||||
}
|
||||
while (neededCached > 0) {
|
||||
if (possibilities.isEmpty()) {
|
||||
LOG.warn("We need " + neededCached + " more replica(s) than " +
|
||||
"actually exist to provide a cache replication of " +
|
||||
cachedBlock.getReplication() + " for " + cachedBlock);
|
||||
return;
|
||||
}
|
||||
DatanodeDescriptor datanode =
|
||||
possibilities.remove(random.nextInt(possibilities.size()));
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("AddNewPendingCached: datanode " + datanode +
|
||||
" will now cache block " + cachedBlock);
|
||||
}
|
||||
List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
|
||||
neededCached, blockManager.getDatanodeManager().getStaleInterval());
|
||||
for (DatanodeDescriptor datanode : chosen) {
|
||||
pendingCached.add(datanode);
|
||||
boolean added = datanode.getPendingCached().add(cachedBlock);
|
||||
assert added;
|
||||
neededCached--;
|
||||
}
|
||||
// We were unable to satisfy the requested replication factor
|
||||
if (neededCached > chosen.size()) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(
|
||||
"Only have " +
|
||||
(cachedBlock.getReplication() - neededCached + chosen.size()) +
|
||||
" of " + cachedBlock.getReplication() + " cached replicas for " +
|
||||
cachedBlock + " (" + outOfCapacity + " nodes have insufficient " +
|
||||
"capacity).");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chooses datanode locations for caching from a list of valid possibilities.
|
||||
* Non-stale nodes are chosen before stale nodes.
|
||||
*
|
||||
* @param possibilities List of candidate datanodes
|
||||
* @param neededCached Number of replicas needed
|
||||
* @param staleInterval Age of a stale datanode
|
||||
* @return A list of chosen datanodes
|
||||
*/
|
||||
private static List<DatanodeDescriptor> chooseDatanodesForCaching(
|
||||
final List<DatanodeDescriptor> possibilities, final int neededCached,
|
||||
final long staleInterval) {
|
||||
// Make a copy that we can modify
|
||||
List<DatanodeDescriptor> targets =
|
||||
new ArrayList<DatanodeDescriptor>(possibilities);
|
||||
// Selected targets
|
||||
List<DatanodeDescriptor> chosen = new LinkedList<DatanodeDescriptor>();
|
||||
|
||||
// Filter out stale datanodes
|
||||
List<DatanodeDescriptor> stale = new LinkedList<DatanodeDescriptor>();
|
||||
Iterator<DatanodeDescriptor> it = targets.iterator();
|
||||
while (it.hasNext()) {
|
||||
DatanodeDescriptor d = it.next();
|
||||
if (d.isStale(staleInterval)) {
|
||||
it.remove();
|
||||
stale.add(d);
|
||||
}
|
||||
}
|
||||
// Select targets
|
||||
while (chosen.size() < neededCached) {
|
||||
// Try to use stale nodes if we're out of non-stale nodes, else we're done
|
||||
if (targets.isEmpty()) {
|
||||
if (!stale.isEmpty()) {
|
||||
targets = stale;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Select a random target
|
||||
DatanodeDescriptor target =
|
||||
chooseRandomDatanodeByRemainingCapacity(targets);
|
||||
chosen.add(target);
|
||||
targets.remove(target);
|
||||
}
|
||||
return chosen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose a single datanode from the provided list of possible
|
||||
* targets, weighted by the percentage of free space remaining on the node.
|
||||
*
|
||||
* @return The chosen datanode
|
||||
*/
|
||||
private static DatanodeDescriptor chooseRandomDatanodeByRemainingCapacity(
|
||||
final List<DatanodeDescriptor> targets) {
|
||||
// Use a weighted probability to choose the target datanode
|
||||
float total = 0;
|
||||
for (DatanodeDescriptor d : targets) {
|
||||
total += d.getCacheRemainingPercent();
|
||||
}
|
||||
// Give each datanode a portion of keyspace equal to its relative weight
|
||||
// [0, w1) selects d1, [w1, w2) selects d2, etc.
|
||||
TreeMap<Integer, DatanodeDescriptor> lottery =
|
||||
new TreeMap<Integer, DatanodeDescriptor>();
|
||||
int offset = 0;
|
||||
for (DatanodeDescriptor d : targets) {
|
||||
// Since we're using floats, be paranoid about negative values
|
||||
int weight =
|
||||
Math.max(1, (int)((d.getCacheRemainingPercent() / total) * 1000000));
|
||||
offset += weight;
|
||||
lottery.put(offset, d);
|
||||
}
|
||||
// Choose a number from [0, offset), which is the total amount of weight,
|
||||
// to select the winner
|
||||
DatanodeDescriptor winner =
|
||||
lottery.higherEntry(random.nextInt(offset)).getValue();
|
||||
return winner;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -355,11 +355,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
|
|||
setLastUpdate(Time.now());
|
||||
this.volumeFailures = volFailures;
|
||||
for (StorageReport report : reports) {
|
||||
DatanodeStorageInfo storage = storageMap.get(report.getStorageID());
|
||||
DatanodeStorageInfo storage = storageMap.get(report.getStorage().getStorageID());
|
||||
if (storage == null) {
|
||||
// This is seen during cluster initialization when the heartbeat
|
||||
// is received before the initial block reports from each storage.
|
||||
storage = updateStorage(new DatanodeStorage(report.getStorageID()));
|
||||
storage = updateStorage(report.getStorage());
|
||||
}
|
||||
storage.receivedHeartbeat(report);
|
||||
totalCapacity += report.getCapacity();
|
||||
|
|
|
@ -1443,6 +1443,13 @@ public class DatanodeManager {
|
|||
return getClass().getSimpleName() + ": " + host2DatanodeMap;
|
||||
}
|
||||
|
||||
public void clearPendingCachingCommands() {
|
||||
for (DatanodeDescriptor dn : datanodeMap.values()) {
|
||||
dn.getPendingCached().clear();
|
||||
dn.getPendingUncached().clear();
|
||||
}
|
||||
}
|
||||
|
||||
public void setShouldSendCachingCommands(boolean shouldSendCachingCommands) {
|
||||
this.shouldSendCachingCommands = shouldSendCachingCommands;
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
reports = new StorageReport[volumes.volumes.size()];
|
||||
int i = 0;
|
||||
for (FsVolumeImpl volume : volumes.volumes) {
|
||||
reports[i++] = new StorageReport(volume.getStorageID(),
|
||||
reports[i++] = new StorageReport(volume.toDatanodeStorage(),
|
||||
false,
|
||||
volume.getCapacity(),
|
||||
volume.getDfsUsed(),
|
||||
|
@ -237,12 +237,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|||
final List<FsVolumeImpl> volArray = new ArrayList<FsVolumeImpl>(
|
||||
storage.getNumStorageDirs());
|
||||
for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
|
||||
// TODO: getStorageTypeFromLocations() is only a temporary workaround and
|
||||
// should be replaced with getting storage type from DataStorage (missing
|
||||
// storage type now) directly.
|
||||
Storage.StorageDirectory sd = storage.getStorageDir(idx);
|
||||
final File dir = sd.getCurrentDir();
|
||||
final StorageType storageType = getStorageTypeFromLocations(dataLocations, dir);
|
||||
final StorageType storageType = getStorageTypeFromLocations(dataLocations, sd.getRoot());
|
||||
volArray.add(new FsVolumeImpl(this, sd.getStorageUuid(), dir, conf,
|
||||
storageType));
|
||||
LOG.info("Added volume - " + dir + ", StorageType: " + storageType);
|
||||
|
|
|
@ -19,10 +19,10 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
@ -54,7 +54,7 @@ class FsVolumeImpl implements FsVolumeSpi {
|
|||
private final String storageID;
|
||||
private final StorageType storageType;
|
||||
private final Map<String, BlockPoolSlice> bpSlices
|
||||
= new HashMap<String, BlockPoolSlice>();
|
||||
= new ConcurrentHashMap<String, BlockPoolSlice>();
|
||||
private final File currentDir; // <StorageDirectory>/current
|
||||
private final DF usage;
|
||||
private final long reserved;
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
|
||||
|
@ -40,6 +40,7 @@ import java.util.List;
|
|||
import java.util.Map.Entry;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -62,7 +63,6 @@ import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
|
|||
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
|
@ -85,7 +85,7 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
/**
|
||||
* The Cache Manager handles caching on DataNodes.
|
||||
*
|
||||
* This class is instantiated by the FSNamesystem when caching is enabled.
|
||||
* This class is instantiated by the FSNamesystem.
|
||||
* It maintains the mapping of cached blocks to datanodes via processing
|
||||
* datanode cache reports. Based on these reports and addition and removal of
|
||||
* caching directives, we will schedule caching and uncaching work.
|
||||
|
@ -94,6 +94,8 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
public final class CacheManager {
|
||||
public static final Log LOG = LogFactory.getLog(CacheManager.class);
|
||||
|
||||
private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
|
||||
|
||||
// TODO: add pending / underCached / schedule cached blocks stats.
|
||||
|
||||
/**
|
||||
|
@ -148,34 +150,16 @@ public final class CacheManager {
|
|||
*/
|
||||
private final long scanIntervalMs;
|
||||
|
||||
/**
|
||||
* Whether caching is enabled.
|
||||
*
|
||||
* If caching is disabled, we will not process cache reports or store
|
||||
* information about what is cached where. We also do not start the
|
||||
* CacheReplicationMonitor thread. This will save resources, but provide
|
||||
* less functionality.
|
||||
*
|
||||
* Even when caching is disabled, we still store path-based cache
|
||||
* information. This information is stored in the edit log and fsimage. We
|
||||
* don't want to lose it just because a configuration setting was turned off.
|
||||
* However, we will not act on this information if caching is disabled.
|
||||
*/
|
||||
private final boolean enabled;
|
||||
|
||||
/**
|
||||
* Whether the CacheManager is active.
|
||||
*
|
||||
* When the CacheManager is active, it tells the DataNodes what to cache
|
||||
* and uncache. The CacheManager cannot become active if enabled = false.
|
||||
*/
|
||||
private boolean active = false;
|
||||
|
||||
/**
|
||||
* All cached blocks.
|
||||
*/
|
||||
private final GSet<CachedBlock, CachedBlock> cachedBlocks;
|
||||
|
||||
/**
|
||||
* Lock which protects the CacheReplicationMonitor.
|
||||
*/
|
||||
private final ReentrantLock crmLock = new ReentrantLock();
|
||||
|
||||
/**
|
||||
* The CacheReplicationMonitor.
|
||||
*/
|
||||
|
@ -195,56 +179,53 @@ public final class CacheManager {
|
|||
scanIntervalMs = conf.getLong(
|
||||
DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
|
||||
DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
|
||||
this.enabled = conf.getBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY,
|
||||
DFS_NAMENODE_CACHING_ENABLED_DEFAULT);
|
||||
this.cachedBlocks = !enabled ? null :
|
||||
new LightWeightGSet<CachedBlock, CachedBlock>(
|
||||
LightWeightGSet.computeCapacity(0.25, "cachedBlocks"));
|
||||
float cachedBlocksPercent = conf.getFloat(
|
||||
DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
|
||||
DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
|
||||
if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
|
||||
LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
|
||||
" for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
|
||||
cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
|
||||
}
|
||||
this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
|
||||
LightWeightGSet.computeCapacity(cachedBlocksPercent,
|
||||
"cachedBlocks"));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Activate the cache manager.
|
||||
*
|
||||
* When the cache manager is active, tell the datanodes where to cache files.
|
||||
*/
|
||||
public void activate() {
|
||||
public void startMonitorThread() {
|
||||
crmLock.lock();
|
||||
try {
|
||||
if (this.monitor == null) {
|
||||
this.monitor = new CacheReplicationMonitor(namesystem, this,
|
||||
scanIntervalMs, crmLock);
|
||||
this.monitor.start();
|
||||
}
|
||||
} finally {
|
||||
crmLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public void stopMonitorThread() {
|
||||
crmLock.lock();
|
||||
try {
|
||||
if (this.monitor != null) {
|
||||
CacheReplicationMonitor prevMonitor = this.monitor;
|
||||
this.monitor = null;
|
||||
IOUtils.closeQuietly(prevMonitor);
|
||||
}
|
||||
} finally {
|
||||
crmLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public void clearDirectiveStats() {
|
||||
assert namesystem.hasWriteLock();
|
||||
if (enabled && (!active)) {
|
||||
LOG.info("Activating CacheManager. " +
|
||||
"Starting replication monitor thread...");
|
||||
active = true;
|
||||
monitor = new CacheReplicationMonitor(namesystem, this,
|
||||
scanIntervalMs);
|
||||
monitor.start();
|
||||
for (CacheDirective directive : directivesById.values()) {
|
||||
directive.resetStatistics();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deactivate the cache manager.
|
||||
*
|
||||
* When the cache manager is inactive, it does not tell the datanodes where to
|
||||
* cache files.
|
||||
*/
|
||||
public void deactivate() {
|
||||
assert namesystem.hasWriteLock();
|
||||
if (active) {
|
||||
LOG.info("Deactivating CacheManager. " +
|
||||
"stopping CacheReplicationMonitor thread...");
|
||||
active = false;
|
||||
IOUtils.closeQuietly(monitor);
|
||||
monitor = null;
|
||||
LOG.info("CacheReplicationMonitor thread stopped and deactivated.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true only if the cache manager is active.
|
||||
* Must be called under the FSN read or write lock.
|
||||
*/
|
||||
public boolean isActive() {
|
||||
return active;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Unmodifiable view of the collection of CachePools.
|
||||
*/
|
||||
|
@ -481,9 +462,7 @@ public final class CacheManager {
|
|||
directive.addBytesNeeded(stats.getBytesNeeded());
|
||||
directive.addFilesNeeded(directive.getFilesNeeded());
|
||||
|
||||
if (monitor != null) {
|
||||
monitor.setNeedsRescan();
|
||||
}
|
||||
setNeedsRescan();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -515,10 +494,6 @@ public final class CacheManager {
|
|||
long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
|
||||
// Do quota validation if required
|
||||
if (!flags.contains(CacheFlag.FORCE)) {
|
||||
// Can't kick and wait if caching is disabled
|
||||
if (monitor != null) {
|
||||
monitor.waitForRescan();
|
||||
}
|
||||
checkLimit(pool, path, replication);
|
||||
}
|
||||
// All validation passed
|
||||
|
@ -623,9 +598,7 @@ public final class CacheManager {
|
|||
validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
|
||||
|
||||
// Indicate changes to the CRM
|
||||
if (monitor != null) {
|
||||
monitor.setNeedsRescan();
|
||||
}
|
||||
setNeedsRescan();
|
||||
|
||||
// Validation passed
|
||||
removeInternal(prevEntry);
|
||||
|
@ -660,9 +633,7 @@ public final class CacheManager {
|
|||
pool.getDirectiveList().remove(directive);
|
||||
assert directive.getPool() == null;
|
||||
|
||||
if (monitor != null) {
|
||||
monitor.setNeedsRescan();
|
||||
}
|
||||
setNeedsRescan();
|
||||
}
|
||||
|
||||
public void removeDirective(long id, FSPermissionChecker pc)
|
||||
|
@ -695,9 +666,6 @@ public final class CacheManager {
|
|||
if (filter.getReplication() != null) {
|
||||
throw new IOException("Filtering by replication is unsupported.");
|
||||
}
|
||||
if (monitor != null) {
|
||||
monitor.waitForRescanIfNeeded();
|
||||
}
|
||||
ArrayList<CacheDirectiveEntry> replies =
|
||||
new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
|
||||
int numReplies = 0;
|
||||
|
@ -806,9 +774,7 @@ public final class CacheManager {
|
|||
bld.append(prefix).append("set limit to " + info.getLimit());
|
||||
prefix = "; ";
|
||||
// New limit changes stats, need to set needs refresh
|
||||
if (monitor != null) {
|
||||
monitor.setNeedsRescan();
|
||||
}
|
||||
setNeedsRescan();
|
||||
}
|
||||
if (info.getMaxRelativeExpiryMs() != null) {
|
||||
final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
|
||||
|
@ -854,9 +820,7 @@ public final class CacheManager {
|
|||
directivesById.remove(directive.getId());
|
||||
iter.remove();
|
||||
}
|
||||
if (monitor != null) {
|
||||
monitor.setNeedsRescan();
|
||||
}
|
||||
setNeedsRescan();
|
||||
} catch (IOException e) {
|
||||
LOG.info("removeCachePool of " + poolName + " failed: ", e);
|
||||
throw e;
|
||||
|
@ -867,9 +831,6 @@ public final class CacheManager {
|
|||
public BatchedListEntries<CachePoolEntry>
|
||||
listCachePools(FSPermissionChecker pc, String prevKey) {
|
||||
assert namesystem.hasReadLock();
|
||||
if (monitor != null) {
|
||||
monitor.waitForRescanIfNeeded();
|
||||
}
|
||||
final int NUM_PRE_ALLOCATED_ENTRIES = 16;
|
||||
ArrayList<CachePoolEntry> results =
|
||||
new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
|
||||
|
@ -885,9 +846,6 @@ public final class CacheManager {
|
|||
}
|
||||
|
||||
public void setCachedLocations(LocatedBlock block) {
|
||||
if (!enabled) {
|
||||
return;
|
||||
}
|
||||
CachedBlock cachedBlock =
|
||||
new CachedBlock(block.getBlock().getBlockId(),
|
||||
(short)0, false);
|
||||
|
@ -903,12 +861,6 @@ public final class CacheManager {
|
|||
|
||||
public final void processCacheReport(final DatanodeID datanodeID,
|
||||
final List<Long> blockIds) throws IOException {
|
||||
if (!enabled) {
|
||||
LOG.info("Ignoring cache report from " + datanodeID +
|
||||
" because " + DFS_NAMENODE_CACHING_ENABLED_KEY + " = false. " +
|
||||
"number of blocks: " + blockIds.size());
|
||||
return;
|
||||
}
|
||||
namesystem.writeLock();
|
||||
final long startTime = Time.monotonicNow();
|
||||
final long endTime;
|
||||
|
@ -940,39 +892,28 @@ public final class CacheManager {
|
|||
final List<Long> blockIds) {
|
||||
CachedBlocksList cached = datanode.getCached();
|
||||
cached.clear();
|
||||
CachedBlocksList cachedList = datanode.getCached();
|
||||
CachedBlocksList pendingCachedList = datanode.getPendingCached();
|
||||
for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
|
||||
Block block = new Block(iter.next());
|
||||
BlockInfo blockInfo = blockManager.getStoredBlock(block);
|
||||
if (!blockInfo.isComplete()) {
|
||||
LOG.warn("Ignoring block id " + block.getBlockId() + ", because " +
|
||||
"it is in not complete yet. It is in state " +
|
||||
blockInfo.getBlockUCState());
|
||||
continue;
|
||||
}
|
||||
Collection<DatanodeDescriptor> corruptReplicas =
|
||||
blockManager.getCorruptReplicas(blockInfo);
|
||||
if ((corruptReplicas != null) && corruptReplicas.contains(datanode)) {
|
||||
// The NameNode will eventually remove or update the corrupt block.
|
||||
// Until then, we pretend that it isn't cached.
|
||||
LOG.warn("Ignoring cached replica on " + datanode + " of " + block +
|
||||
" because it is corrupt.");
|
||||
continue;
|
||||
}
|
||||
long blockId = iter.next();
|
||||
CachedBlock cachedBlock =
|
||||
new CachedBlock(block.getBlockId(), (short)0, false);
|
||||
new CachedBlock(blockId, (short)0, false);
|
||||
CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
|
||||
// Use the existing CachedBlock if it's present; otherwise,
|
||||
// insert a new one.
|
||||
// Add the block ID from the cache report to the cachedBlocks map
|
||||
// if it's not already there.
|
||||
if (prevCachedBlock != null) {
|
||||
cachedBlock = prevCachedBlock;
|
||||
} else {
|
||||
cachedBlocks.put(cachedBlock);
|
||||
}
|
||||
if (!cachedBlock.isPresent(datanode.getCached())) {
|
||||
datanode.getCached().add(cachedBlock);
|
||||
// Add the block to the datanode's implicit cached block list
|
||||
// if it's not already there. Similarly, remove it from the pending
|
||||
// cached block list if it exists there.
|
||||
if (!cachedBlock.isPresent(cachedList)) {
|
||||
cachedList.add(cachedBlock);
|
||||
}
|
||||
if (cachedBlock.isPresent(datanode.getPendingCached())) {
|
||||
datanode.getPendingCached().remove(cachedBlock);
|
||||
if (cachedBlock.isPresent(pendingCachedList)) {
|
||||
pendingCachedList.remove(cachedBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1097,4 +1038,36 @@ public final class CacheManager {
|
|||
}
|
||||
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
||||
}
|
||||
|
||||
public void waitForRescanIfNeeded() {
|
||||
crmLock.lock();
|
||||
try {
|
||||
if (monitor != null) {
|
||||
monitor.waitForRescanIfNeeded();
|
||||
}
|
||||
} finally {
|
||||
crmLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void setNeedsRescan() {
|
||||
crmLock.lock();
|
||||
try {
|
||||
if (monitor != null) {
|
||||
monitor.setNeedsRescan();
|
||||
}
|
||||
} finally {
|
||||
crmLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public Thread getCacheReplicationMonitor() {
|
||||
crmLock.lock();
|
||||
try {
|
||||
return monitor;
|
||||
} finally {
|
||||
crmLock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -405,6 +405,7 @@ public class FSImage implements Closeable {
|
|||
// Directories that don't have previous state do not rollback
|
||||
boolean canRollback = false;
|
||||
FSImage prevState = new FSImage(conf);
|
||||
try {
|
||||
prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
|
||||
for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
|
||||
StorageDirectory sd = it.next();
|
||||
|
@ -459,6 +460,9 @@ public class FSImage implements Closeable {
|
|||
LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
|
||||
}
|
||||
isUpgradeFinalized = true;
|
||||
} finally {
|
||||
prevState.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void doFinalize(StorageDirectory sd) throws IOException {
|
||||
|
|
|
@ -931,7 +931,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
writeLock();
|
||||
try {
|
||||
if (blockManager != null) blockManager.close();
|
||||
cacheManager.deactivate();
|
||||
} finally {
|
||||
writeUnlock();
|
||||
}
|
||||
|
@ -1001,7 +1000,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
editLogRollerThreshold, editLogRollerInterval));
|
||||
nnEditLogRoller.start();
|
||||
|
||||
cacheManager.activate();
|
||||
cacheManager.startMonitorThread();
|
||||
blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
|
||||
} finally {
|
||||
writeUnlock();
|
||||
|
@ -1052,7 +1051,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
// so that the tailer starts from the right spot.
|
||||
dir.fsImage.updateLastAppliedTxIdFromWritten();
|
||||
}
|
||||
cacheManager.deactivate();
|
||||
cacheManager.stopMonitorThread();
|
||||
cacheManager.clearDirectiveStats();
|
||||
blockManager.getDatanodeManager().clearPendingCachingCommands();
|
||||
blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
|
||||
} finally {
|
||||
writeUnlock();
|
||||
|
@ -7066,6 +7067,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
return (Long) cacheEntry.getPayload();
|
||||
}
|
||||
boolean success = false;
|
||||
if (!flags.contains(CacheFlag.FORCE)) {
|
||||
cacheManager.waitForRescanIfNeeded();
|
||||
}
|
||||
writeLock();
|
||||
Long result = null;
|
||||
try {
|
||||
|
@ -7107,6 +7111,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
if (cacheEntry != null && cacheEntry.isSuccess()) {
|
||||
return;
|
||||
}
|
||||
if (!flags.contains(CacheFlag.FORCE)) {
|
||||
cacheManager.waitForRescanIfNeeded();
|
||||
}
|
||||
writeLock();
|
||||
try {
|
||||
checkOperation(OperationCategory.WRITE);
|
||||
|
@ -7166,6 +7173,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
final FSPermissionChecker pc = isPermissionEnabled ?
|
||||
getPermissionChecker() : null;
|
||||
BatchedListEntries<CacheDirectiveEntry> results;
|
||||
cacheManager.waitForRescanIfNeeded();
|
||||
readLock();
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -7289,6 +7297,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
BatchedListEntries<CachePoolEntry> results;
|
||||
checkOperation(OperationCategory.READ);
|
||||
boolean success = false;
|
||||
cacheManager.waitForRescanIfNeeded();
|
||||
readLock();
|
||||
try {
|
||||
checkOperation(OperationCategory.READ);
|
||||
|
|
|
@ -480,6 +480,14 @@ public class NameNode implements NameNodeStatusMXBean {
|
|||
* @param conf the configuration
|
||||
*/
|
||||
protected void initialize(Configuration conf) throws IOException {
|
||||
if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
|
||||
String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
|
||||
if (intervals != null) {
|
||||
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
|
||||
intervals);
|
||||
}
|
||||
}
|
||||
|
||||
UserGroupInformation.setConfiguration(conf);
|
||||
loginAsNameNodeUser(conf);
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.protocol;
|
|||
* Utilization report for a Datanode storage
|
||||
*/
|
||||
public class StorageReport {
|
||||
private final String storageID;
|
||||
private final DatanodeStorage storage;
|
||||
private final boolean failed;
|
||||
private final long capacity;
|
||||
private final long dfsUsed;
|
||||
|
@ -30,9 +30,9 @@ public class StorageReport {
|
|||
|
||||
public static final StorageReport[] EMPTY_ARRAY = {};
|
||||
|
||||
public StorageReport(String sid, boolean failed, long capacity, long dfsUsed,
|
||||
long remaining, long bpUsed) {
|
||||
this.storageID = sid;
|
||||
public StorageReport(DatanodeStorage storage, boolean failed,
|
||||
long capacity, long dfsUsed, long remaining, long bpUsed) {
|
||||
this.storage = storage;
|
||||
this.failed = failed;
|
||||
this.capacity = capacity;
|
||||
this.dfsUsed = dfsUsed;
|
||||
|
@ -40,8 +40,8 @@ public class StorageReport {
|
|||
this.blockPoolUsed = bpUsed;
|
||||
}
|
||||
|
||||
public String getStorageID() {
|
||||
return storageID;
|
||||
public DatanodeStorage getStorage() {
|
||||
return storage;
|
||||
}
|
||||
|
||||
public boolean isFailed() {
|
||||
|
|
|
@ -84,7 +84,12 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
for (int j = 1; j < args.length; j++) {
|
||||
argsList.add(args[j]);
|
||||
}
|
||||
try {
|
||||
return command.run(getConf(), argsList);
|
||||
} catch (IllegalArgumentException e) {
|
||||
System.err.println(prettifyException(e));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] argsArray) throws IOException {
|
||||
|
@ -135,6 +140,20 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
return maxTtl;
|
||||
}
|
||||
|
||||
private static Expiration parseExpirationString(String ttlString)
|
||||
throws IOException {
|
||||
Expiration ex = null;
|
||||
if (ttlString != null) {
|
||||
if (ttlString.equalsIgnoreCase("never")) {
|
||||
ex = CacheDirectiveInfo.Expiration.NEVER;
|
||||
} else {
|
||||
long ttl = DFSUtil.parseRelativeTime(ttlString);
|
||||
ex = CacheDirectiveInfo.Expiration.newRelative(ttl);
|
||||
}
|
||||
}
|
||||
return ex;
|
||||
}
|
||||
|
||||
interface Command {
|
||||
String getName();
|
||||
String getShortUsage();
|
||||
|
@ -171,6 +190,7 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
listing.addRow("<time-to-live>", "How long the directive is " +
|
||||
"valid. Can be specified in minutes, hours, and days, e.g. " +
|
||||
"30m, 4h, 2d. Valid units are [smhd]." +
|
||||
" \"never\" indicates a directive that never expires." +
|
||||
" If unspecified, the directive never expires.");
|
||||
return getShortUsage() + "\n" +
|
||||
"Add a new cache directive.\n\n" +
|
||||
|
@ -203,16 +223,16 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
}
|
||||
|
||||
String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
|
||||
if (ttlString != null) {
|
||||
try {
|
||||
long ttl = DFSUtil.parseRelativeTime(ttlString);
|
||||
builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
|
||||
Expiration ex = parseExpirationString(ttlString);
|
||||
if (ex != null) {
|
||||
builder.setExpiration(ex);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println(
|
||||
"Error while parsing ttl value: " + e.getMessage());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
System.err.println("Can't understand argument: " + args.get(0));
|
||||
|
@ -326,7 +346,7 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
listing.addRow("<time-to-live>", "How long the directive is " +
|
||||
"valid. Can be specified in minutes, hours, and days, e.g. " +
|
||||
"30m, 4h, 2d. Valid units are [smhd]." +
|
||||
" If unspecified, the directive never expires.");
|
||||
" \"never\" indicates a directive that never expires.");
|
||||
return getShortUsage() + "\n" +
|
||||
"Modify a cache directive.\n\n" +
|
||||
listing.toString();
|
||||
|
@ -362,18 +382,17 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
modified = true;
|
||||
}
|
||||
String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
|
||||
if (ttlString != null) {
|
||||
long ttl;
|
||||
try {
|
||||
ttl = DFSUtil.parseRelativeTime(ttlString);
|
||||
Expiration ex = parseExpirationString(ttlString);
|
||||
if (ex != null) {
|
||||
builder.setExpiration(ex);
|
||||
modified = true;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println(
|
||||
"Error while parsing ttl value: " + e.getMessage());
|
||||
return 1;
|
||||
}
|
||||
builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
|
||||
modified = true;
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
System.err.println("Can't understand argument: " + args.get(0));
|
||||
System.err.println("Usage is " + getShortUsage());
|
||||
|
@ -578,7 +597,7 @@ public class CacheAdmin extends Configured implements Tool {
|
|||
public String getShortUsage() {
|
||||
return "[" + NAME + " <name> [-owner <owner>] " +
|
||||
"[-group <group>] [-mode <mode>] [-limit <limit>] " +
|
||||
"[-maxttl <maxTtl>]\n";
|
||||
"[-maxTtl <maxTtl>]\n";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Map;
|
|||
import org.apache.hadoop.HadoopIllegalArgumentException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
|
||||
|
@ -85,9 +86,9 @@ public class GetConf extends Configured implements Tool {
|
|||
map.put(BACKUP.getName().toLowerCase(),
|
||||
new BackupNodesCommandHandler());
|
||||
map.put(INCLUDE_FILE.getName().toLowerCase(),
|
||||
new CommandHandler("DFSConfigKeys.DFS_HOSTS"));
|
||||
new CommandHandler(DFSConfigKeys.DFS_HOSTS));
|
||||
map.put(EXCLUDE_FILE.getName().toLowerCase(),
|
||||
new CommandHandler("DFSConfigKeys.DFS_HOSTS_EXCLUDE"));
|
||||
new CommandHandler(DFSConfigKeys.DFS_HOSTS_EXCLUDE));
|
||||
map.put(NNRPCADDRESSES.getName().toLowerCase(),
|
||||
new NNRpcAddressesCommandHandler());
|
||||
map.put(CONFKEY.getName().toLowerCase(),
|
||||
|
|
|
@ -196,12 +196,13 @@ message HeartbeatRequestProto {
|
|||
}
|
||||
|
||||
message StorageReportProto {
|
||||
required string storageUuid = 1;
|
||||
required string storageUuid = 1 [ deprecated = true ];
|
||||
optional bool failed = 2 [ default = false ];
|
||||
optional uint64 capacity = 3 [ default = 0 ];
|
||||
optional uint64 dfsUsed = 4 [ default = 0 ];
|
||||
optional uint64 remaining = 5 [ default = 0 ];
|
||||
optional uint64 blockPoolUsed = 6 [ default = 0 ];
|
||||
optional DatanodeStorageProto storage = 7; // supersedes StorageUuid
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1476,13 +1476,13 @@
|
|||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.caching.enabled</name>
|
||||
<value>false</value>
|
||||
<name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
|
||||
<value>0.25</value>
|
||||
<description>
|
||||
Set to true to enable block caching. This flag enables the NameNode to
|
||||
maintain a mapping of cached blocks to DataNodes via processing DataNode
|
||||
cache reports. Based on these reports and addition and removal of caching
|
||||
directives, the NameNode will schedule caching and uncaching work.
|
||||
The percentage of the Java heap which we will allocate to the cached blocks
|
||||
map. The cached blocks map is a hash map which uses chained hashing.
|
||||
Smaller maps may be accessed more slowly if the number of cached blocks is
|
||||
large; larger maps will consume more memory.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
|
|
@ -242,12 +242,6 @@ Centralized Cache Management in HDFS
|
|||
|
||||
Be sure to configure the following:
|
||||
|
||||
* dfs.namenode.caching.enabled
|
||||
|
||||
This must be set to true to enable caching. If this is false, the NameNode
|
||||
will ignore cache reports, and will not ask DataNodes to cache
|
||||
blocks.
|
||||
|
||||
* dfs.datanode.max.locked.memory
|
||||
|
||||
The DataNode will treat this as the maximum amount of memory it can use for
|
||||
|
@ -281,6 +275,13 @@ Centralized Cache Management in HDFS
|
|||
|
||||
By default, this parameter is set to 10000, which is 10 seconds.
|
||||
|
||||
* dfs.namenode.path.based.cache.block.map.allocation.percent
|
||||
|
||||
The percentage of the Java heap which we will allocate to the cached blocks
|
||||
map. The cached blocks map is a hash map which uses chained hashing.
|
||||
Smaller maps may be accessed more slowly if the number of cached blocks is
|
||||
large; larger maps will consume more memory. The default is 0.25 percent.
|
||||
|
||||
** {OS Limits}
|
||||
|
||||
If you get the error "Cannot start datanode because the configured max
|
||||
|
|
|
@ -140,6 +140,7 @@ public class MiniDFSCluster {
|
|||
private int nameNodeHttpPort = 0;
|
||||
private final Configuration conf;
|
||||
private int numDataNodes = 1;
|
||||
private StorageType storageType = StorageType.DEFAULT;
|
||||
private boolean format = true;
|
||||
private boolean manageNameDfsDirs = true;
|
||||
private boolean manageNameDfsSharedDirs = true;
|
||||
|
@ -185,6 +186,14 @@ public class MiniDFSCluster {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default: StorageType.DEFAULT
|
||||
*/
|
||||
public Builder storageType(StorageType type) {
|
||||
this.storageType = type;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default: true
|
||||
*/
|
||||
|
@ -341,6 +350,7 @@ public class MiniDFSCluster {
|
|||
|
||||
initMiniDFSCluster(builder.conf,
|
||||
builder.numDataNodes,
|
||||
builder.storageType,
|
||||
builder.format,
|
||||
builder.manageNameDfsDirs,
|
||||
builder.manageNameDfsSharedDirs,
|
||||
|
@ -592,7 +602,7 @@ public class MiniDFSCluster {
|
|||
String[] racks, String hosts[],
|
||||
long[] simulatedCapacities) throws IOException {
|
||||
this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
|
||||
initMiniDFSCluster(conf, numDataNodes, format,
|
||||
initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
|
||||
manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
|
||||
operation, racks, hosts,
|
||||
simulatedCapacities, null, true, false,
|
||||
|
@ -601,7 +611,7 @@ public class MiniDFSCluster {
|
|||
|
||||
private void initMiniDFSCluster(
|
||||
Configuration conf,
|
||||
int numDataNodes, boolean format, boolean manageNameDfsDirs,
|
||||
int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
|
||||
boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
|
||||
boolean manageDataDfsDirs, StartupOption operation, String[] racks,
|
||||
String[] hosts, long[] simulatedCapacities, String clusterId,
|
||||
|
@ -670,7 +680,7 @@ public class MiniDFSCluster {
|
|||
}
|
||||
|
||||
// Start the DataNodes
|
||||
startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
|
||||
startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, operation, racks,
|
||||
hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig);
|
||||
waitClusterUp();
|
||||
//make sure ProxyUsers uses the latest conf
|
||||
|
@ -990,6 +1000,19 @@ public class MiniDFSCluster {
|
|||
}
|
||||
}
|
||||
|
||||
String makeDataNodeDirs(int dnIndex, StorageType storageType) throws IOException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
|
||||
File dir = getInstanceStorageDir(dnIndex, j);
|
||||
dir.mkdirs();
|
||||
if (!dir.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
|
||||
}
|
||||
sb.append((j > 0 ? "," : "") + "[" + storageType + "]" + fileAsURI(dir));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Modify the config and start up additional DataNodes. The info port for
|
||||
* DataNodes is guaranteed to use a free port.
|
||||
|
@ -1052,7 +1075,7 @@ public class MiniDFSCluster {
|
|||
String[] racks, String[] hosts,
|
||||
long[] simulatedCapacities,
|
||||
boolean setupHostsFile) throws IOException {
|
||||
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
|
||||
startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
|
||||
simulatedCapacities, setupHostsFile, false, false);
|
||||
}
|
||||
|
||||
|
@ -1066,7 +1089,7 @@ public class MiniDFSCluster {
|
|||
long[] simulatedCapacities,
|
||||
boolean setupHostsFile,
|
||||
boolean checkDataNodeAddrConfig) throws IOException {
|
||||
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
|
||||
startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
|
||||
simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false);
|
||||
}
|
||||
|
||||
|
@ -1098,7 +1121,7 @@ public class MiniDFSCluster {
|
|||
* @throws IllegalStateException if NameNode has been shutdown
|
||||
*/
|
||||
public synchronized void startDataNodes(Configuration conf, int numDataNodes,
|
||||
boolean manageDfsDirs, StartupOption operation,
|
||||
StorageType storageType, boolean manageDfsDirs, StartupOption operation,
|
||||
String[] racks, String[] hosts,
|
||||
long[] simulatedCapacities,
|
||||
boolean setupHostsFile,
|
||||
|
@ -1154,16 +1177,7 @@ public class MiniDFSCluster {
|
|||
// Set up datanode address
|
||||
setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
|
||||
if (manageDfsDirs) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
|
||||
File dir = getInstanceStorageDir(i, j);
|
||||
dir.mkdirs();
|
||||
if (!dir.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
|
||||
}
|
||||
sb.append((j > 0 ? "," : "") + fileAsURI(dir));
|
||||
}
|
||||
String dirs = sb.toString();
|
||||
String dirs = makeDataNodeDirs(i, storageType);
|
||||
dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
|
||||
conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
|
|||
}
|
||||
|
||||
public synchronized void startDataNodes(Configuration conf, int numDataNodes,
|
||||
boolean manageDfsDirs, StartupOption operation,
|
||||
StorageType storageType, boolean manageDfsDirs, StartupOption operation,
|
||||
String[] racks, String[] nodeGroups, String[] hosts,
|
||||
long[] simulatedCapacities,
|
||||
boolean setupHostsFile,
|
||||
|
@ -112,15 +112,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
|
|||
// Set up datanode address
|
||||
setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
|
||||
if (manageDfsDirs) {
|
||||
File dir1 = getInstanceStorageDir(i, 0);
|
||||
File dir2 = getInstanceStorageDir(i, 1);
|
||||
dir1.mkdirs();
|
||||
dir2.mkdirs();
|
||||
if (!dir1.isDirectory() || !dir2.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create directory for DataNode "
|
||||
+ i + ": " + dir1 + " or " + dir2);
|
||||
}
|
||||
String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
|
||||
String dirs = makeDataNodeDirs(i, storageType);
|
||||
dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
|
||||
conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
|
||||
}
|
||||
|
@ -198,7 +190,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
|
|||
String[] racks, String[] nodeGroups, String[] hosts,
|
||||
long[] simulatedCapacities,
|
||||
boolean setupHostsFile) throws IOException {
|
||||
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups,
|
||||
startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, nodeGroups,
|
||||
hosts, simulatedCapacities, setupHostsFile, false, false);
|
||||
}
|
||||
|
||||
|
@ -213,13 +205,13 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
|
|||
// This is for initialize from parent class.
|
||||
@Override
|
||||
public synchronized void startDataNodes(Configuration conf, int numDataNodes,
|
||||
boolean manageDfsDirs, StartupOption operation,
|
||||
StorageType storageType, boolean manageDfsDirs, StartupOption operation,
|
||||
String[] racks, String[] hosts,
|
||||
long[] simulatedCapacities,
|
||||
boolean setupHostsFile,
|
||||
boolean checkDataNodeAddrConfig,
|
||||
boolean checkDataNodeHostConfig) throws IOException {
|
||||
startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks,
|
||||
startDataNodes(conf, numDataNodes, storageType, manageDfsDirs, operation, racks,
|
||||
NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile,
|
||||
checkDataNodeAddrConfig, checkDataNodeHostConfig);
|
||||
}
|
||||
|
|
|
@ -257,8 +257,10 @@ public class BlockManagerTestUtil {
|
|||
DatanodeDescriptor dnd) {
|
||||
ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
|
||||
for (DatanodeStorageInfo storage : dnd.getStorageInfos()) {
|
||||
DatanodeStorage dns = new DatanodeStorage(
|
||||
storage.getStorageID(), storage.getState(), storage.getStorageType());
|
||||
StorageReport report = new StorageReport(
|
||||
storage.getStorageID(), false, storage.getCapacity(),
|
||||
dns ,false, storage.getCapacity(),
|
||||
storage.getDfsUsed(), storage.getRemaining(),
|
||||
storage.getBlockPoolUsed());
|
||||
reports.add(report);
|
||||
|
|
|
@ -470,11 +470,14 @@ public class TestJspHelper {
|
|||
BlockManagerTestUtil.updateStorage(dnDesc1, new DatanodeStorage("dnStorage1"));
|
||||
BlockManagerTestUtil.updateStorage(dnDesc2, new DatanodeStorage("dnStorage2"));
|
||||
|
||||
DatanodeStorage dns1 = new DatanodeStorage("dnStorage1");
|
||||
DatanodeStorage dns2 = new DatanodeStorage("dnStorage2");
|
||||
|
||||
StorageReport[] report1 = new StorageReport[] {
|
||||
new StorageReport("dnStorage1", false, 1024, 100, 924, 100)
|
||||
new StorageReport(dns1, false, 1024, 100, 924, 100)
|
||||
};
|
||||
StorageReport[] report2 = new StorageReport[] {
|
||||
new StorageReport("dnStorage2", false, 2500, 200, 1848, 200)
|
||||
new StorageReport(dns2, false, 2500, 200, 1848, 200)
|
||||
};
|
||||
dnDesc1.updateHeartbeat(report1, 5l, 3l, 10, 2);
|
||||
dnDesc2.updateHeartbeat(report2, 10l, 2l, 20, 1);
|
||||
|
|
|
@ -394,8 +394,9 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
|
|||
}
|
||||
|
||||
synchronized StorageReport getStorageReport(String bpid) {
|
||||
return new StorageReport(getStorageUuid(), false, getCapacity(),
|
||||
getUsed(), getFree(), map.get(bpid).getUsed());
|
||||
return new StorageReport(new DatanodeStorage(getStorageUuid()),
|
||||
false, getCapacity(), getUsed(), getFree(),
|
||||
map.get(bpid).getUsed());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|||
import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
|
||||
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
|
||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
|
||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
import org.junit.After;
|
||||
|
@ -186,9 +187,8 @@ public class TestDiskError {
|
|||
// Check permissions on directories in 'dfs.datanode.data.dir'
|
||||
FileSystem localFS = FileSystem.getLocal(conf);
|
||||
for (DataNode dn : cluster.getDataNodes()) {
|
||||
String[] dataDirs =
|
||||
dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
|
||||
for (String dir : dataDirs) {
|
||||
for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) {
|
||||
String dir = v.getBasePath();
|
||||
Path dataDir = new Path(dir);
|
||||
FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
|
||||
assertEquals("Permission for dir: " + dataDir + ", is " + actual +
|
||||
|
|
|
@ -36,16 +36,20 @@ import java.util.Set;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.HdfsBlockLocation;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.LogVerificationAppender;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
||||
|
@ -82,7 +86,11 @@ public class TestFsDatasetCache {
|
|||
|
||||
// Most Linux installs allow a default of 64KB locked memory
|
||||
private static final long CACHE_CAPACITY = 64 * 1024;
|
||||
private static final long BLOCK_SIZE = 4096;
|
||||
// mlock always locks the entire page. So we don't need to deal with this
|
||||
// rounding, use the OS page size for the block size.
|
||||
private static final long PAGE_SIZE =
|
||||
NativeIO.POSIX.getCacheManipulator().getOperatingSystemPageSize();
|
||||
private static final long BLOCK_SIZE = PAGE_SIZE;
|
||||
|
||||
private static Configuration conf;
|
||||
private static MiniDFSCluster cluster = null;
|
||||
|
@ -104,14 +112,13 @@ public class TestFsDatasetCache {
|
|||
public void setUp() throws Exception {
|
||||
assumeTrue(!Path.WINDOWS);
|
||||
conf = new HdfsConfiguration();
|
||||
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
|
||||
conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS,
|
||||
500);
|
||||
conf.setLong(
|
||||
DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 100);
|
||||
conf.setLong(DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 500);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||
conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
|
||||
CACHE_CAPACITY);
|
||||
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
||||
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
|
||||
|
||||
prevCacheManipulator = NativeIO.POSIX.getCacheManipulator();
|
||||
NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator());
|
||||
|
@ -325,7 +332,7 @@ public class TestFsDatasetCache {
|
|||
|
||||
// Create some test files that will exceed total cache capacity
|
||||
final int numFiles = 5;
|
||||
final long fileSize = 15000;
|
||||
final long fileSize = CACHE_CAPACITY / (numFiles-1);
|
||||
|
||||
final Path[] testFiles = new Path[numFiles];
|
||||
final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][];
|
||||
|
@ -451,4 +458,65 @@ public class TestFsDatasetCache {
|
|||
}
|
||||
}, 100, 10000);
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
public void testPageRounder() throws Exception {
|
||||
// Write a small file
|
||||
Path fileName = new Path("/testPageRounder");
|
||||
final int smallBlocks = 512; // This should be smaller than the page size
|
||||
assertTrue("Page size should be greater than smallBlocks!",
|
||||
PAGE_SIZE > smallBlocks);
|
||||
final int numBlocks = 5;
|
||||
final int fileLen = smallBlocks * numBlocks;
|
||||
FSDataOutputStream out =
|
||||
fs.create(fileName, false, 4096, (short)1, smallBlocks);
|
||||
out.write(new byte[fileLen]);
|
||||
out.close();
|
||||
HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations(
|
||||
fileName, 0, fileLen);
|
||||
// Cache the file and check the sizes match the page size
|
||||
setHeartbeatResponse(cacheBlocks(locs));
|
||||
verifyExpectedCacheUsage(PAGE_SIZE * numBlocks, numBlocks);
|
||||
// Uncache and check that it decrements by the page size too
|
||||
setHeartbeatResponse(uncacheBlocks(locs));
|
||||
verifyExpectedCacheUsage(0, 0);
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
public void testUncacheQuiesces() throws Exception {
|
||||
// Create a file
|
||||
Path fileName = new Path("/testUncacheQuiesces");
|
||||
int fileLen = 4096;
|
||||
DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD);
|
||||
// Cache it
|
||||
DistributedFileSystem dfs = cluster.getFileSystem();
|
||||
dfs.addCachePool(new CachePoolInfo("pool"));
|
||||
dfs.addCacheDirective(new CacheDirectiveInfo.Builder()
|
||||
.setPool("pool").setPath(fileName).setReplication((short)3).build());
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
|
||||
long blocksCached =
|
||||
MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
|
||||
return blocksCached > 0;
|
||||
}
|
||||
}, 1000, 30000);
|
||||
// Uncache it
|
||||
dfs.removeCacheDirective(1);
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
|
||||
long blocksUncached =
|
||||
MetricsAsserts.getLongCounter("BlocksUncached", dnMetrics);
|
||||
return blocksUncached > 0;
|
||||
}
|
||||
}, 1000, 30000);
|
||||
// Make sure that no additional messages were sent
|
||||
Thread.sleep(10000);
|
||||
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
|
||||
MetricsAsserts.assertCounter("BlocksCached", 1l, dnMetrics);
|
||||
MetricsAsserts.assertCounter("BlocksUncached", 1l, dnMetrics);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.datanode;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.*;
|
||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
||||
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import static org.hamcrest.core.Is.is;
|
||||
import static org.junit.Assert.assertNotSame;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.mockito.Matchers.any;
|
||||
import static org.mockito.Matchers.anyInt;
|
||||
import static org.mockito.Matchers.anyLong;
|
||||
|
||||
public class TestStorageReport {
|
||||
public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
|
||||
|
||||
private static short REPL_FACTOR = 1;
|
||||
private static final StorageType storageType = StorageType.SSD; // pick non-default.
|
||||
|
||||
private static Configuration conf;
|
||||
private MiniDFSCluster cluster;
|
||||
private DistributedFileSystem fs;
|
||||
static String bpid;
|
||||
|
||||
@Before
|
||||
public void startUpCluster() throws IOException {
|
||||
conf = new HdfsConfiguration();
|
||||
cluster = new MiniDFSCluster.Builder(conf)
|
||||
.numDataNodes(REPL_FACTOR)
|
||||
.storageType(storageType)
|
||||
.build();
|
||||
fs = cluster.getFileSystem();
|
||||
bpid = cluster.getNamesystem().getBlockPoolId();
|
||||
}
|
||||
|
||||
@After
|
||||
public void shutDownCluster() throws IOException {
|
||||
if (cluster != null) {
|
||||
fs.close();
|
||||
cluster.shutdown();
|
||||
cluster = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure that storage type and storage state are propagated
|
||||
* in Storage Reports.
|
||||
*/
|
||||
@Test
|
||||
public void testStorageReportHasStorageTypeAndState() throws IOException {
|
||||
|
||||
// Make sure we are not testing with the default type, that would not
|
||||
// be a very good test.
|
||||
assertNotSame(storageType, StorageType.DEFAULT);
|
||||
NameNode nn = cluster.getNameNode();
|
||||
DataNode dn = cluster.getDataNodes().get(0);
|
||||
|
||||
// Insert a spy object for the NN RPC.
|
||||
DatanodeProtocolClientSideTranslatorPB nnSpy =
|
||||
DataNodeTestUtils.spyOnBposToNN(dn, nn);
|
||||
|
||||
// Trigger a heartbeat so there is an interaction with the spy
|
||||
// object.
|
||||
DataNodeTestUtils.triggerHeartbeat(dn);
|
||||
|
||||
// Verify that the callback passed in the expected parameters.
|
||||
ArgumentCaptor<StorageReport[]> captor =
|
||||
ArgumentCaptor.forClass(StorageReport[].class);
|
||||
|
||||
Mockito.verify(nnSpy).sendHeartbeat(
|
||||
any(DatanodeRegistration.class),
|
||||
captor.capture(),
|
||||
anyLong(), anyLong(), anyInt(), anyInt(), anyInt());
|
||||
|
||||
StorageReport[] reports = captor.getValue();
|
||||
|
||||
for (StorageReport report: reports) {
|
||||
assertThat(report.getStorage().getStorageType(), is(storageType));
|
||||
assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -605,6 +605,98 @@ public class NNThroughputBenchmark implements Tool {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Directory creation statistics.
|
||||
*
|
||||
* Each thread creates the same (+ or -1) number of directories.
|
||||
* Directory names are pre-generated during initialization.
|
||||
*/
|
||||
class MkdirsStats extends OperationStatsBase {
|
||||
// Operation types
|
||||
static final String OP_MKDIRS_NAME = "mkdirs";
|
||||
static final String OP_MKDIRS_USAGE = "-op mkdirs [-threads T] [-dirs N] " +
|
||||
"[-dirsPerDir P]";
|
||||
|
||||
protected FileNameGenerator nameGenerator;
|
||||
protected String[][] dirPaths;
|
||||
|
||||
MkdirsStats(List<String> args) {
|
||||
super();
|
||||
parseArguments(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
String getOpName() {
|
||||
return OP_MKDIRS_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
void parseArguments(List<String> args) {
|
||||
boolean ignoreUnrelatedOptions = verifyOpArgument(args);
|
||||
int nrDirsPerDir = 2;
|
||||
for (int i = 2; i < args.size(); i++) { // parse command line
|
||||
if(args.get(i).equals("-dirs")) {
|
||||
if(i+1 == args.size()) printUsage();
|
||||
numOpsRequired = Integer.parseInt(args.get(++i));
|
||||
} else if(args.get(i).equals("-threads")) {
|
||||
if(i+1 == args.size()) printUsage();
|
||||
numThreads = Integer.parseInt(args.get(++i));
|
||||
} else if(args.get(i).equals("-dirsPerDir")) {
|
||||
if(i+1 == args.size()) printUsage();
|
||||
nrDirsPerDir = Integer.parseInt(args.get(++i));
|
||||
} else if(!ignoreUnrelatedOptions)
|
||||
printUsage();
|
||||
}
|
||||
nameGenerator = new FileNameGenerator(getBaseDir(), nrDirsPerDir);
|
||||
}
|
||||
|
||||
@Override
|
||||
void generateInputs(int[] opsPerThread) throws IOException {
|
||||
assert opsPerThread.length == numThreads : "Error opsPerThread.length";
|
||||
nameNodeProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE,
|
||||
false);
|
||||
LOG.info("Generate " + numOpsRequired + " inputs for " + getOpName());
|
||||
dirPaths = new String[numThreads][];
|
||||
for(int idx=0; idx < numThreads; idx++) {
|
||||
int threadOps = opsPerThread[idx];
|
||||
dirPaths[idx] = new String[threadOps];
|
||||
for(int jdx=0; jdx < threadOps; jdx++)
|
||||
dirPaths[idx][jdx] = nameGenerator.
|
||||
getNextFileName("ThroughputBench");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns client name
|
||||
*/
|
||||
@Override
|
||||
String getExecutionArgument(int daemonId) {
|
||||
return getClientName(daemonId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do mkdirs operation.
|
||||
*/
|
||||
@Override
|
||||
long executeOp(int daemonId, int inputIdx, String clientName)
|
||||
throws IOException {
|
||||
long start = Time.now();
|
||||
nameNodeProto.mkdirs(dirPaths[daemonId][inputIdx],
|
||||
FsPermission.getDefault(), true);
|
||||
long end = Time.now();
|
||||
return end-start;
|
||||
}
|
||||
|
||||
@Override
|
||||
void printResults() {
|
||||
LOG.info("--- " + getOpName() + " inputs ---");
|
||||
LOG.info("nrDirs = " + numOpsRequired);
|
||||
LOG.info("nrThreads = " + numThreads);
|
||||
LOG.info("nrDirsPerDir = " + nameGenerator.getFilesPerDirectory());
|
||||
printStats();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open file statistics.
|
||||
*
|
||||
|
@ -846,7 +938,7 @@ public class NNThroughputBenchmark implements Tool {
|
|||
// register datanode
|
||||
dnRegistration = nameNodeProto.registerDatanode(dnRegistration);
|
||||
//first block reports
|
||||
storage = new DatanodeStorage(dnRegistration.getDatanodeUuid());
|
||||
storage = new DatanodeStorage(DatanodeStorage.generateUuid());
|
||||
final StorageBlockReport[] reports = {
|
||||
new StorageBlockReport(storage,
|
||||
new BlockListAsLongs(null, null).getBlockListAsLongs())
|
||||
|
@ -862,8 +954,8 @@ public class NNThroughputBenchmark implements Tool {
|
|||
void sendHeartbeat() throws IOException {
|
||||
// register datanode
|
||||
// TODO:FEDERATION currently a single block pool is supported
|
||||
StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
|
||||
false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
|
||||
StorageReport[] rep = { new StorageReport(storage, false,
|
||||
DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
|
||||
DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration, rep,
|
||||
0L, 0L, 0, 0, 0).getCommands();
|
||||
if(cmds != null) {
|
||||
|
@ -909,7 +1001,7 @@ public class NNThroughputBenchmark implements Tool {
|
|||
@SuppressWarnings("unused") // keep it for future blockReceived benchmark
|
||||
int replicateBlocks() throws IOException {
|
||||
// register datanode
|
||||
StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
|
||||
StorageReport[] rep = { new StorageReport(storage,
|
||||
false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
|
||||
DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
|
||||
rep, 0L, 0L, 0, 0, 0).getCommands();
|
||||
|
@ -918,7 +1010,8 @@ public class NNThroughputBenchmark implements Tool {
|
|||
if (cmd.getAction() == DatanodeProtocol.DNA_TRANSFER) {
|
||||
// Send a copy of a block to another datanode
|
||||
BlockCommand bcmd = (BlockCommand)cmd;
|
||||
return transferBlocks(bcmd.getBlocks(), bcmd.getTargets());
|
||||
return transferBlocks(bcmd.getBlocks(), bcmd.getTargets(),
|
||||
bcmd.getTargetStorageIDs());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -931,12 +1024,14 @@ public class NNThroughputBenchmark implements Tool {
|
|||
* that the blocks have been received.
|
||||
*/
|
||||
private int transferBlocks( Block blocks[],
|
||||
DatanodeInfo xferTargets[][]
|
||||
DatanodeInfo xferTargets[][],
|
||||
String targetStorageIDs[][]
|
||||
) throws IOException {
|
||||
for(int i = 0; i < blocks.length; i++) {
|
||||
DatanodeInfo blockTargets[] = xferTargets[i];
|
||||
for(int t = 0; t < blockTargets.length; t++) {
|
||||
DatanodeInfo dnInfo = blockTargets[t];
|
||||
String targetStorageID = targetStorageIDs[i][t];
|
||||
DatanodeRegistration receivedDNReg;
|
||||
receivedDNReg = new DatanodeRegistration(dnInfo,
|
||||
new DataStorage(nsInfo),
|
||||
|
@ -946,7 +1041,7 @@ public class NNThroughputBenchmark implements Tool {
|
|||
blocks[i], ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
|
||||
null) };
|
||||
StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
|
||||
receivedDNReg.getDatanodeUuid(), rdBlocks) };
|
||||
targetStorageID, rdBlocks) };
|
||||
nameNodeProto.blockReceivedAndDeleted(receivedDNReg, nameNode
|
||||
.getNamesystem().getBlockPoolId(), report);
|
||||
}
|
||||
|
@ -1035,7 +1130,7 @@ public class NNThroughputBenchmark implements Tool {
|
|||
}
|
||||
|
||||
// create files
|
||||
LOG.info("Creating " + nrFiles + " with " + blocksPerFile + " blocks each.");
|
||||
LOG.info("Creating " + nrFiles + " files with " + blocksPerFile + " blocks each.");
|
||||
FileNameGenerator nameGenerator;
|
||||
nameGenerator = new FileNameGenerator(getBaseDir(), 100);
|
||||
String clientName = getClientName(007);
|
||||
|
@ -1069,7 +1164,7 @@ public class NNThroughputBenchmark implements Tool {
|
|||
loc.getBlock().getLocalBlock(),
|
||||
ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null) };
|
||||
StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
|
||||
datanodes[dnIdx].dnRegistration.getDatanodeUuid(), rdBlocks) };
|
||||
datanodes[dnIdx].storage.getStorageID(), rdBlocks) };
|
||||
nameNodeProto.blockReceivedAndDeleted(datanodes[dnIdx].dnRegistration, loc
|
||||
.getBlock().getBlockPoolId(), report);
|
||||
}
|
||||
|
@ -1279,6 +1374,7 @@ public class NNThroughputBenchmark implements Tool {
|
|||
System.err.println("Usage: NNThroughputBenchmark"
|
||||
+ "\n\t" + OperationStatsBase.OP_ALL_USAGE
|
||||
+ " | \n\t" + CreateFileStats.OP_CREATE_USAGE
|
||||
+ " | \n\t" + MkdirsStats.OP_MKDIRS_USAGE
|
||||
+ " | \n\t" + OpenFileStats.OP_OPEN_USAGE
|
||||
+ " | \n\t" + DeleteFileStats.OP_DELETE_USAGE
|
||||
+ " | \n\t" + FileStatusStats.OP_FILE_STATUS_USAGE
|
||||
|
@ -1328,6 +1424,10 @@ public class NNThroughputBenchmark implements Tool {
|
|||
opStat = new CreateFileStats(args);
|
||||
ops.add(opStat);
|
||||
}
|
||||
if(runAll || MkdirsStats.OP_MKDIRS_NAME.equals(type)) {
|
||||
opStat = new MkdirsStats(args);
|
||||
ops.add(opStat);
|
||||
}
|
||||
if(runAll || OpenFileStats.OP_OPEN_NAME.equals(type)) {
|
||||
opStat = new OpenFileStats(args);
|
||||
ops.add(opStat);
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -29,25 +28,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
|||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Options.Rename;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.DFSClientAdapter;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||
import org.apache.hadoop.hdfs.server.common.Util;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
|
||||
/**
|
||||
* OfflineEditsViewerHelper is a helper class for TestOfflineEditsViewer,
|
||||
|
@ -135,151 +122,11 @@ public class OfflineEditsViewerHelper {
|
|||
* OP_CLEAR_NS_QUOTA (12)
|
||||
*/
|
||||
private CheckpointSignature runOperations() throws IOException {
|
||||
|
||||
LOG.info("Creating edits by performing fs operations");
|
||||
// no check, if it's not it throws an exception which is what we want
|
||||
DistributedFileSystem dfs =
|
||||
(DistributedFileSystem)cluster.getFileSystem();
|
||||
FileContext fc = FileContext.getFileContext(cluster.getURI(0), config);
|
||||
// OP_ADD 0
|
||||
Path pathFileCreate = new Path("/file_create_u\1F431");
|
||||
FSDataOutputStream s = dfs.create(pathFileCreate);
|
||||
// OP_CLOSE 9
|
||||
s.close();
|
||||
// OP_RENAME_OLD 1
|
||||
Path pathFileMoved = new Path("/file_moved");
|
||||
dfs.rename(pathFileCreate, pathFileMoved);
|
||||
// OP_DELETE 2
|
||||
dfs.delete(pathFileMoved, false);
|
||||
// OP_MKDIR 3
|
||||
Path pathDirectoryMkdir = new Path("/directory_mkdir");
|
||||
dfs.mkdirs(pathDirectoryMkdir);
|
||||
// OP_ALLOW_SNAPSHOT 29
|
||||
dfs.allowSnapshot(pathDirectoryMkdir);
|
||||
// OP_DISALLOW_SNAPSHOT 30
|
||||
dfs.disallowSnapshot(pathDirectoryMkdir);
|
||||
// OP_CREATE_SNAPSHOT 26
|
||||
String ssName = "snapshot1";
|
||||
dfs.allowSnapshot(pathDirectoryMkdir);
|
||||
dfs.createSnapshot(pathDirectoryMkdir, ssName);
|
||||
// OP_RENAME_SNAPSHOT 28
|
||||
String ssNewName = "snapshot2";
|
||||
dfs.renameSnapshot(pathDirectoryMkdir, ssName, ssNewName);
|
||||
// OP_DELETE_SNAPSHOT 27
|
||||
dfs.deleteSnapshot(pathDirectoryMkdir, ssNewName);
|
||||
// OP_SET_REPLICATION 4
|
||||
s = dfs.create(pathFileCreate);
|
||||
s.close();
|
||||
dfs.setReplication(pathFileCreate, (short)1);
|
||||
// OP_SET_PERMISSIONS 7
|
||||
Short permission = 0777;
|
||||
dfs.setPermission(pathFileCreate, new FsPermission(permission));
|
||||
// OP_SET_OWNER 8
|
||||
dfs.setOwner(pathFileCreate, new String("newOwner"), null);
|
||||
// OP_CLOSE 9 see above
|
||||
// OP_SET_GENSTAMP 10 see above
|
||||
// OP_SET_NS_QUOTA 11 obsolete
|
||||
// OP_CLEAR_NS_QUOTA 12 obsolete
|
||||
// OP_TIMES 13
|
||||
long mtime = 1285195527000L; // Wed, 22 Sep 2010 22:45:27 GMT
|
||||
long atime = mtime;
|
||||
dfs.setTimes(pathFileCreate, mtime, atime);
|
||||
// OP_SET_QUOTA 14
|
||||
dfs.setQuota(pathDirectoryMkdir, 1000L, HdfsConstants.QUOTA_DONT_SET);
|
||||
// OP_RENAME 15
|
||||
fc.rename(pathFileCreate, pathFileMoved, Rename.NONE);
|
||||
// OP_CONCAT_DELETE 16
|
||||
Path pathConcatTarget = new Path("/file_concat_target");
|
||||
Path[] pathConcatFiles = new Path[2];
|
||||
pathConcatFiles[0] = new Path("/file_concat_0");
|
||||
pathConcatFiles[1] = new Path("/file_concat_1");
|
||||
|
||||
long length = blockSize * 3; // multiple of blocksize for concat
|
||||
short replication = 1;
|
||||
long seed = 1;
|
||||
|
||||
DFSTestUtil.createFile(dfs, pathConcatTarget, length, replication, seed);
|
||||
DFSTestUtil.createFile(dfs, pathConcatFiles[0], length, replication, seed);
|
||||
DFSTestUtil.createFile(dfs, pathConcatFiles[1], length, replication, seed);
|
||||
dfs.concat(pathConcatTarget, pathConcatFiles);
|
||||
// OP_SYMLINK 17
|
||||
Path pathSymlink = new Path("/file_symlink");
|
||||
fc.createSymlink(pathConcatTarget, pathSymlink, false);
|
||||
// OP_GET_DELEGATION_TOKEN 18
|
||||
// OP_RENEW_DELEGATION_TOKEN 19
|
||||
// OP_CANCEL_DELEGATION_TOKEN 20
|
||||
// see TestDelegationToken.java
|
||||
// fake the user to renew token for
|
||||
final Token<?>[] tokens = dfs.addDelegationTokens("JobTracker", null);
|
||||
UserGroupInformation longUgi = UserGroupInformation.createRemoteUser(
|
||||
"JobTracker/foo.com@FOO.COM");
|
||||
try {
|
||||
longUgi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||
@Override
|
||||
public Object run() throws IOException, InterruptedException {
|
||||
for (Token<?> token : tokens) {
|
||||
token.renew(config);
|
||||
token.cancel(config);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch(InterruptedException e) {
|
||||
throw new IOException(
|
||||
"renewDelegationToken threw InterruptedException", e);
|
||||
}
|
||||
// OP_UPDATE_MASTER_KEY 21
|
||||
// done by getDelegationTokenSecretManager().startThreads();
|
||||
|
||||
// OP_ADD_CACHE_POOL 35
|
||||
final String pool = "poolparty";
|
||||
dfs.addCachePool(new CachePoolInfo(pool));
|
||||
// OP_MODIFY_CACHE_POOL 36
|
||||
dfs.modifyCachePool(new CachePoolInfo(pool)
|
||||
.setOwnerName("carlton")
|
||||
.setGroupName("party")
|
||||
.setMode(new FsPermission((short)0700))
|
||||
.setLimit(1989l));
|
||||
// OP_ADD_PATH_BASED_CACHE_DIRECTIVE 33
|
||||
long id = dfs.addCacheDirective(
|
||||
new CacheDirectiveInfo.Builder().
|
||||
setPath(new Path("/bar")).
|
||||
setReplication((short)1).
|
||||
setPool(pool).
|
||||
build());
|
||||
// OP_MODIFY_PATH_BASED_CACHE_DIRECTIVE 38
|
||||
dfs.modifyCacheDirective(
|
||||
new CacheDirectiveInfo.Builder().
|
||||
setId(id).
|
||||
setPath(new Path("/bar2")).
|
||||
build());
|
||||
// OP_REMOVE_PATH_BASED_CACHE_DIRECTIVE 34
|
||||
dfs.removeCacheDirective(id);
|
||||
// OP_REMOVE_CACHE_POOL 37
|
||||
dfs.removeCachePool(pool);
|
||||
// sync to disk, otherwise we parse partial edits
|
||||
cluster.getNameNode().getFSImage().getEditLog().logSync();
|
||||
|
||||
// OP_REASSIGN_LEASE 22
|
||||
String filePath = "/hard-lease-recovery-test";
|
||||
byte[] bytes = "foo-bar-baz".getBytes();
|
||||
DFSClientAdapter.stopLeaseRenewer(dfs);
|
||||
FSDataOutputStream leaseRecoveryPath = dfs.create(new Path(filePath));
|
||||
leaseRecoveryPath.write(bytes);
|
||||
leaseRecoveryPath.hflush();
|
||||
// Set the hard lease timeout to 1 second.
|
||||
cluster.setLeasePeriod(60 * 1000, 1000);
|
||||
// wait for lease recovery to complete
|
||||
LocatedBlocks locatedBlocks;
|
||||
do {
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.info("Innocuous exception", e);
|
||||
}
|
||||
locatedBlocks = DFSClientAdapter.callGetBlockLocations(
|
||||
cluster.getNameNodeRpc(), filePath, 0L, bytes.length);
|
||||
} while (locatedBlocks.isUnderConstruction());
|
||||
DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
|
||||
DFSTestUtil.runOperations(cluster, dfs, cluster.getConfiguration(0),
|
||||
dfs.getDefaultBlockSize(), 0);
|
||||
|
||||
// Force a roll so we get an OP_END_LOG_SEGMENT txn
|
||||
return cluster.getNameNodeRpc().rollEditLog();
|
||||
|
|
|
@ -21,7 +21,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
|
|||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
|
||||
import static org.apache.hadoop.hdfs.protocol.CachePoolInfo.RELATIVE_EXPIRY_NEVER;
|
||||
import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
|
||||
|
@ -58,17 +57,21 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.LogVerificationAppender;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
|
||||
import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
|
||||
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
|
||||
import org.apache.hadoop.hdfs.protocol.CachePoolStats;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheManipulator;
|
||||
|
@ -79,6 +82,7 @@ import org.apache.hadoop.test.GenericTestUtils;
|
|||
import org.apache.hadoop.util.GSet;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
|
@ -104,7 +108,7 @@ public class TestCacheDirectives {
|
|||
EditLogFileOutputStream.setShouldSkipFsyncForTesting(false);
|
||||
}
|
||||
|
||||
private static final long BLOCK_SIZE = 512;
|
||||
private static final long BLOCK_SIZE = 4096;
|
||||
private static final int NUM_DATANODES = 4;
|
||||
// Most Linux installs will allow non-root users to lock 64KB.
|
||||
// In this test though, we stub out mlock so this doesn't matter.
|
||||
|
@ -115,7 +119,6 @@ public class TestCacheDirectives {
|
|||
conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||
conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY);
|
||||
conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
||||
conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, true);
|
||||
conf.setLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||
conf.setLong(DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1000);
|
||||
// set low limits here for testing purposes
|
||||
|
@ -602,8 +605,8 @@ public class TestCacheDirectives {
|
|||
* Wait for the NameNode to have an expected number of cached blocks
|
||||
* and replicas.
|
||||
* @param nn NameNode
|
||||
* @param expectedCachedBlocks
|
||||
* @param expectedCachedReplicas
|
||||
* @param expectedCachedBlocks if -1, treat as wildcard
|
||||
* @param expectedCachedReplicas if -1, treat as wildcard
|
||||
* @throws Exception
|
||||
*/
|
||||
private static void waitForCachedBlocks(NameNode nn,
|
||||
|
@ -632,17 +635,19 @@ public class TestCacheDirectives {
|
|||
} finally {
|
||||
namesystem.readUnlock();
|
||||
}
|
||||
if ((numCachedBlocks == expectedCachedBlocks) &&
|
||||
(numCachedReplicas == expectedCachedReplicas)) {
|
||||
if (expectedCachedBlocks == -1 ||
|
||||
numCachedBlocks == expectedCachedBlocks) {
|
||||
if (expectedCachedReplicas == -1 ||
|
||||
numCachedReplicas == expectedCachedReplicas) {
|
||||
return true;
|
||||
} else {
|
||||
}
|
||||
}
|
||||
LOG.info(logString + " cached blocks: have " + numCachedBlocks +
|
||||
" / " + expectedCachedBlocks + ". " +
|
||||
"cached replicas: have " + numCachedReplicas +
|
||||
" / " + expectedCachedReplicas);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}, 500, 60000);
|
||||
}
|
||||
|
||||
|
@ -796,7 +801,15 @@ public class TestCacheDirectives {
|
|||
}
|
||||
}, 500, 60000);
|
||||
|
||||
// Send a cache report referring to a bogus block. It is important that
|
||||
// the NameNode be robust against this.
|
||||
NamenodeProtocols nnRpc = namenode.getRpcServer();
|
||||
DataNode dn0 = cluster.getDataNodes().get(0);
|
||||
String bpid = cluster.getNamesystem().getBlockPoolId();
|
||||
LinkedList<Long> bogusBlockIds = new LinkedList<Long> ();
|
||||
bogusBlockIds.add(999999L);
|
||||
nnRpc.cacheReport(dn0.getDNRegistrationForBP(bpid), bpid, bogusBlockIds);
|
||||
|
||||
Path rootDir = helper.getDefaultWorkingDirectory(dfs);
|
||||
// Create the pool
|
||||
final String pool = "friendlyPool";
|
||||
|
@ -826,6 +839,24 @@ public class TestCacheDirectives {
|
|||
waitForCachedBlocks(namenode, expected, expected,
|
||||
"testWaitForCachedReplicas:1");
|
||||
}
|
||||
|
||||
// Check that the datanodes have the right cache values
|
||||
DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
|
||||
assertEquals("Unexpected number of live nodes", NUM_DATANODES, live.length);
|
||||
long totalUsed = 0;
|
||||
for (DatanodeInfo dn : live) {
|
||||
final long cacheCapacity = dn.getCacheCapacity();
|
||||
final long cacheUsed = dn.getCacheUsed();
|
||||
final long cacheRemaining = dn.getCacheRemaining();
|
||||
assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
|
||||
assertEquals("Capacity not equal to used + remaining",
|
||||
cacheCapacity, cacheUsed + cacheRemaining);
|
||||
assertEquals("Remaining not equal to capacity - used",
|
||||
cacheCapacity - cacheUsed, cacheRemaining);
|
||||
totalUsed += cacheUsed;
|
||||
}
|
||||
assertEquals(expected*BLOCK_SIZE, totalUsed);
|
||||
|
||||
// Uncache and check each path in sequence
|
||||
RemoteIterator<CacheDirectiveEntry> entries =
|
||||
new CacheDirectiveIterator(nnRpc, null);
|
||||
|
@ -838,55 +869,6 @@ public class TestCacheDirectives {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout=120000)
|
||||
public void testAddingCacheDirectiveInfosWhenCachingIsDisabled()
|
||||
throws Exception {
|
||||
cluster.shutdown();
|
||||
HdfsConfiguration conf = createCachingConf();
|
||||
conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, false);
|
||||
MiniDFSCluster cluster =
|
||||
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
||||
|
||||
try {
|
||||
cluster.waitActive();
|
||||
DistributedFileSystem dfs = cluster.getFileSystem();
|
||||
NameNode namenode = cluster.getNameNode();
|
||||
// Create the pool
|
||||
String pool = "pool1";
|
||||
namenode.getRpcServer().addCachePool(new CachePoolInfo(pool));
|
||||
// Create some test files
|
||||
final int numFiles = 2;
|
||||
final int numBlocksPerFile = 2;
|
||||
final List<String> paths = new ArrayList<String>(numFiles);
|
||||
for (int i=0; i<numFiles; i++) {
|
||||
Path p = new Path("/testCachePaths-" + i);
|
||||
FileSystemTestHelper.createFile(dfs, p, numBlocksPerFile,
|
||||
(int)BLOCK_SIZE);
|
||||
paths.add(p.toUri().getPath());
|
||||
}
|
||||
// Check the initial statistics at the namenode
|
||||
waitForCachedBlocks(namenode, 0, 0,
|
||||
"testAddingCacheDirectiveInfosWhenCachingIsDisabled:0");
|
||||
// Cache and check each path in sequence
|
||||
int expected = 0;
|
||||
for (int i=0; i<numFiles; i++) {
|
||||
CacheDirectiveInfo directive =
|
||||
new CacheDirectiveInfo.Builder().
|
||||
setPath(new Path(paths.get(i))).
|
||||
setPool(pool).
|
||||
build();
|
||||
dfs.addCacheDirective(directive);
|
||||
waitForCachedBlocks(namenode, expected, 0,
|
||||
"testAddingCacheDirectiveInfosWhenCachingIsDisabled:1");
|
||||
}
|
||||
Thread.sleep(20000);
|
||||
waitForCachedBlocks(namenode, expected, 0,
|
||||
"testAddingCacheDirectiveInfosWhenCachingIsDisabled:2");
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=120000)
|
||||
public void testWaitForCachedReplicasInDirectory() throws Exception {
|
||||
// Create the pool
|
||||
|
@ -965,7 +947,6 @@ public class TestCacheDirectives {
|
|||
(4+3) * numBlocksPerFile * BLOCK_SIZE,
|
||||
3, 2,
|
||||
poolInfo, "testWaitForCachedReplicasInDirectory:2:pool");
|
||||
|
||||
// remove and watch numCached go to 0
|
||||
dfs.removeCacheDirective(id);
|
||||
dfs.removeCacheDirective(id2);
|
||||
|
@ -1374,4 +1355,39 @@ public class TestCacheDirectives {
|
|||
.setExpiration(Expiration.newRelative(RELATIVE_EXPIRY_NEVER - 1))
|
||||
.build());
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
public void testExceedsCapacity() throws Exception {
|
||||
// Create a giant file
|
||||
final Path fileName = new Path("/exceeds");
|
||||
final long fileLen = CACHE_CAPACITY * (NUM_DATANODES*2);
|
||||
int numCachedReplicas = (int) ((CACHE_CAPACITY*NUM_DATANODES)/BLOCK_SIZE);
|
||||
DFSTestUtil.createFile(dfs, fileName, fileLen, (short) NUM_DATANODES,
|
||||
0xFADED);
|
||||
// Set up a log appender watcher
|
||||
final LogVerificationAppender appender = new LogVerificationAppender();
|
||||
final Logger logger = Logger.getRootLogger();
|
||||
logger.addAppender(appender);
|
||||
dfs.addCachePool(new CachePoolInfo("pool"));
|
||||
dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool")
|
||||
.setPath(fileName).setReplication((short) 1).build());
|
||||
waitForCachedBlocks(namenode, -1, numCachedReplicas,
|
||||
"testExceeds:1");
|
||||
// Check that no DNs saw an excess CACHE message
|
||||
int lines = appender.countLinesWithMessage(
|
||||
"more bytes in the cache: " +
|
||||
DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
|
||||
assertEquals("Namenode should not send extra CACHE commands", 0, lines);
|
||||
// Try creating a file with giant-sized blocks that exceed cache capacity
|
||||
dfs.delete(fileName, false);
|
||||
DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2,
|
||||
(short) 1, 0xFADED);
|
||||
// Nothing will get cached, so just force sleep for a bit
|
||||
Thread.sleep(4000);
|
||||
// Still should not see any excess commands
|
||||
lines = appender.countLinesWithMessage(
|
||||
"more bytes in the cache: " +
|
||||
DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
|
||||
assertEquals("Namenode should not send extra CACHE commands", 0, lines);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -140,8 +140,9 @@ public class TestDeadDatanode {
|
|||
|
||||
// Ensure heartbeat from dead datanode is rejected with a command
|
||||
// that asks datanode to register again
|
||||
StorageReport[] rep = { new StorageReport(reg.getDatanodeUuid(), false, 0, 0,
|
||||
0, 0) };
|
||||
StorageReport[] rep = { new StorageReport(
|
||||
new DatanodeStorage(reg.getDatanodeUuid()),
|
||||
false, 0, 0, 0, 0) };
|
||||
DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, rep, 0L, 0L, 0, 0, 0)
|
||||
.getCommands();
|
||||
assertEquals(1, cmd.length);
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.io.File;
|
|||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.LinkedList;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -59,6 +60,8 @@ import org.junit.Assert;
|
|||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import com.google.common.util.concurrent.Uninterruptibles;
|
||||
|
||||
/**
|
||||
* Tests state transition from active->standby, and manual failover
|
||||
* and failback between two namenodes.
|
||||
|
@ -124,6 +127,17 @@ public class TestHAStateTransitions {
|
|||
}
|
||||
}
|
||||
|
||||
private void addCrmThreads(MiniDFSCluster cluster,
|
||||
LinkedList<Thread> crmThreads) {
|
||||
for (int nn = 0; nn <= 1; nn++) {
|
||||
Thread thread = cluster.getNameNode(nn).getNamesystem().
|
||||
getCacheManager().getCacheReplicationMonitor();
|
||||
if (thread != null) {
|
||||
crmThreads.add(thread);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that transitioning a service to the state that it is already
|
||||
* in is a nop, specifically, an exception is not thrown.
|
||||
|
@ -131,19 +145,30 @@ public class TestHAStateTransitions {
|
|||
@Test
|
||||
public void testTransitionToCurrentStateIsANop() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1L);
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||
.numDataNodes(1)
|
||||
.build();
|
||||
LinkedList<Thread> crmThreads = new LinkedList<Thread>();
|
||||
try {
|
||||
cluster.waitActive();
|
||||
addCrmThreads(cluster, crmThreads);
|
||||
cluster.transitionToActive(0);
|
||||
addCrmThreads(cluster, crmThreads);
|
||||
cluster.transitionToActive(0);
|
||||
addCrmThreads(cluster, crmThreads);
|
||||
cluster.transitionToStandby(0);
|
||||
addCrmThreads(cluster, crmThreads);
|
||||
cluster.transitionToStandby(0);
|
||||
addCrmThreads(cluster, crmThreads);
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
// Verify that all cacheReplicationMonitor threads shut down
|
||||
for (Thread thread : crmThreads) {
|
||||
Uninterruptibles.joinUninterruptibly(thread);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
|||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.metrics2.MetricsSource;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.test.MetricsAsserts;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.log4j.Level;
|
||||
|
@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
|
|||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
|
||||
if (source != null) {
|
||||
// Run only once since the UGI metrics is cleaned up during teardown
|
||||
MetricsRecordBuilder rb = getMetrics(source);
|
||||
assertQuantileGauges("GetGroups1s", rb);
|
||||
}
|
||||
cluster.shutdown();
|
||||
}
|
||||
|
||||
|
|
|
@ -33,10 +33,15 @@ import java.io.PrintStream;
|
|||
import java.net.InetSocketAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
|
@ -55,7 +60,7 @@ public class TestGetConf {
|
|||
enum TestType {
|
||||
NAMENODE, BACKUP, SECONDARY, NNRPCADDRESSES
|
||||
}
|
||||
|
||||
FileSystem localFileSys;
|
||||
/** Setup federation nameServiceIds in the configuration */
|
||||
private void setupNameServices(HdfsConfiguration conf, int nameServiceIdCount) {
|
||||
StringBuilder nsList = new StringBuilder();
|
||||
|
@ -379,4 +384,70 @@ public class TestGetConf {
|
|||
}
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void TestGetConfExcludeCommand() throws Exception{
|
||||
HdfsConfiguration conf = new HdfsConfiguration();
|
||||
// Set up the hosts/exclude files.
|
||||
localFileSys = FileSystem.getLocal(conf);
|
||||
Path workingDir = localFileSys.getWorkingDirectory();
|
||||
Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
|
||||
Path hostsFile = new Path(dir, "hosts");
|
||||
Path excludeFile = new Path(dir, "exclude");
|
||||
|
||||
// Setup conf
|
||||
conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
|
||||
conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
|
||||
writeConfigFile(hostsFile, null);
|
||||
writeConfigFile(excludeFile, null);
|
||||
String[] args = {"-excludeFile"};
|
||||
String ret = runTool(conf, args, true);
|
||||
assertEquals(excludeFile.toUri().getPath(),ret.trim());
|
||||
cleanupFile(localFileSys, excludeFile.getParent());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestGetConfIncludeCommand() throws Exception{
|
||||
HdfsConfiguration conf = new HdfsConfiguration();
|
||||
// Set up the hosts/exclude files.
|
||||
localFileSys = FileSystem.getLocal(conf);
|
||||
Path workingDir = localFileSys.getWorkingDirectory();
|
||||
Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
|
||||
Path hostsFile = new Path(dir, "hosts");
|
||||
Path excludeFile = new Path(dir, "exclude");
|
||||
|
||||
// Setup conf
|
||||
conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
|
||||
conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
|
||||
writeConfigFile(hostsFile, null);
|
||||
writeConfigFile(excludeFile, null);
|
||||
String[] args = {"-includeFile"};
|
||||
String ret = runTool(conf, args, true);
|
||||
assertEquals(hostsFile.toUri().getPath(),ret.trim());
|
||||
cleanupFile(localFileSys, excludeFile.getParent());
|
||||
}
|
||||
|
||||
private void writeConfigFile(Path name, ArrayList<String> nodes)
|
||||
throws IOException {
|
||||
// delete if it already exists
|
||||
if (localFileSys.exists(name)) {
|
||||
localFileSys.delete(name, true);
|
||||
}
|
||||
|
||||
FSDataOutputStream stm = localFileSys.create(name);
|
||||
|
||||
if (nodes != null) {
|
||||
for (Iterator<String> it = nodes.iterator(); it.hasNext();) {
|
||||
String node = it.next();
|
||||
stm.writeBytes(node);
|
||||
stm.writeBytes("\n");
|
||||
}
|
||||
}
|
||||
stm.close();
|
||||
}
|
||||
|
||||
private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
|
||||
assertTrue(fileSys.exists(name));
|
||||
fileSys.delete(name, true);
|
||||
assertTrue(!fileSys.exists(name));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,8 +26,6 @@ import java.io.FileOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -36,64 +34,58 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
|
|||
import org.apache.hadoop.hdfs.server.namenode.OfflineEditsViewerHelper;
|
||||
import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer.Flags;
|
||||
import org.apache.hadoop.test.PathUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
||||
public class TestOfflineEditsViewer {
|
||||
private static final Log LOG = LogFactory.getLog(TestOfflineEditsViewer.class);
|
||||
private static final Log LOG = LogFactory
|
||||
.getLog(TestOfflineEditsViewer.class);
|
||||
|
||||
private static final Map<FSEditLogOpCodes, Boolean> obsoleteOpCodes =
|
||||
new HashMap<FSEditLogOpCodes, Boolean>();
|
||||
|
||||
private static final Map<FSEditLogOpCodes, Boolean> missingOpCodes =
|
||||
new HashMap<FSEditLogOpCodes, Boolean>();
|
||||
|
||||
static {
|
||||
initializeObsoleteOpCodes();
|
||||
initializeMissingOpCodes();
|
||||
}
|
||||
|
||||
private static String buildDir =
|
||||
PathUtils.getTestDirName(TestOfflineEditsViewer.class);
|
||||
|
||||
private static String cacheDir =
|
||||
System.getProperty("test.cache.data", "build/test/cache");
|
||||
private static String buildDir = PathUtils
|
||||
.getTestDirName(TestOfflineEditsViewer.class);
|
||||
|
||||
// to create edits and get edits filename
|
||||
private static final OfflineEditsViewerHelper nnHelper
|
||||
= new OfflineEditsViewerHelper();
|
||||
private static final OfflineEditsViewerHelper nnHelper = new OfflineEditsViewerHelper();
|
||||
private static final ImmutableSet<FSEditLogOpCodes> skippedOps = skippedOps();
|
||||
|
||||
/**
|
||||
* Initialize obsoleteOpCodes
|
||||
*
|
||||
* Reason for suppressing "deprecation" warnings:
|
||||
*
|
||||
* These are the opcodes that are not used anymore, some
|
||||
* are marked deprecated, we need to include them here to make
|
||||
* sure we exclude them when checking for completeness of testing,
|
||||
* that's why the "deprecation" warnings are suppressed.
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
private static void initializeObsoleteOpCodes() {
|
||||
obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_ADD, true);
|
||||
obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_REMOVE, true);
|
||||
obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_NS_QUOTA, true);
|
||||
obsoleteOpCodes.put(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA, true);
|
||||
private static ImmutableSet<FSEditLogOpCodes> skippedOps() {
|
||||
ImmutableSet.Builder<FSEditLogOpCodes> b = ImmutableSet
|
||||
.<FSEditLogOpCodes> builder();
|
||||
|
||||
// Deprecated opcodes
|
||||
b.add(FSEditLogOpCodes.OP_DATANODE_ADD)
|
||||
.add(FSEditLogOpCodes.OP_DATANODE_REMOVE)
|
||||
.add(FSEditLogOpCodes.OP_SET_NS_QUOTA)
|
||||
.add(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA)
|
||||
.add(FSEditLogOpCodes.OP_SET_GENSTAMP_V1);
|
||||
|
||||
// Cannot test delegation token related code in insecure set up
|
||||
b.add(FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN)
|
||||
.add(FSEditLogOpCodes.OP_RENEW_DELEGATION_TOKEN)
|
||||
.add(FSEditLogOpCodes.OP_CANCEL_DELEGATION_TOKEN);
|
||||
|
||||
// Skip invalid opcode
|
||||
b.add(FSEditLogOpCodes.OP_INVALID);
|
||||
return b.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize missingOpcodes
|
||||
*
|
||||
* Opcodes that are not available except after uprade from
|
||||
* an older version. We don't test these here.
|
||||
*/
|
||||
private static void initializeMissingOpCodes() {
|
||||
obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_GENSTAMP_V1, true);
|
||||
}
|
||||
@Rule
|
||||
public final TemporaryFolder folder = new TemporaryFolder();
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
new File(cacheDir).mkdirs();
|
||||
public void setUp() throws IOException {
|
||||
nnHelper.startCluster(buildDir + "/dfs/");
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws IOException {
|
||||
nnHelper.shutdownCluster();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -101,54 +93,42 @@ public class TestOfflineEditsViewer {
|
|||
*/
|
||||
@Test
|
||||
public void testGenerated() throws IOException {
|
||||
|
||||
LOG.info("START - testing with generated edits");
|
||||
|
||||
nnHelper.startCluster(buildDir + "/dfs/");
|
||||
|
||||
// edits generated by nnHelper (MiniDFSCluster), should have all op codes
|
||||
// binary, XML, reparsed binary
|
||||
String edits = nnHelper.generateEdits();
|
||||
String editsParsedXml = cacheDir + "/editsParsed.xml";
|
||||
String editsReparsed = cacheDir + "/editsReparsed";
|
||||
String editsParsedXml = folder.newFile("editsParsed.xml").getAbsolutePath();
|
||||
String editsReparsed = folder.newFile("editsParsed").getAbsolutePath();
|
||||
|
||||
// parse to XML then back to binary
|
||||
assertEquals(0, runOev(edits, editsParsedXml, "xml", false));
|
||||
assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));
|
||||
|
||||
// judgment time
|
||||
assertTrue(
|
||||
"Edits " + edits + " should have all op codes",
|
||||
assertTrue("Edits " + edits + " should have all op codes",
|
||||
hasAllOpCodes(edits));
|
||||
LOG.info("Comparing generated file " + editsReparsed +
|
||||
" with reference file " + edits);
|
||||
LOG.info("Comparing generated file " + editsReparsed
|
||||
+ " with reference file " + edits);
|
||||
assertTrue(
|
||||
"Generated edits and reparsed (bin to XML to bin) should be same",
|
||||
filesEqualIgnoreTrailingZeros(edits, editsReparsed));
|
||||
|
||||
// removes edits so do this at the end
|
||||
nnHelper.shutdownCluster();
|
||||
|
||||
LOG.info("END");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRecoveryMode() throws IOException {
|
||||
LOG.info("START - testing with generated edits");
|
||||
|
||||
nnHelper.startCluster(buildDir + "/dfs/");
|
||||
|
||||
// edits generated by nnHelper (MiniDFSCluster), should have all op codes
|
||||
// binary, XML, reparsed binary
|
||||
String edits = nnHelper.generateEdits();
|
||||
|
||||
FileOutputStream os = new FileOutputStream(edits, true);
|
||||
// Corrupt the file by truncating the end
|
||||
FileChannel editsFile = new FileOutputStream(edits, true).getChannel();
|
||||
FileChannel editsFile = os.getChannel();
|
||||
editsFile.truncate(editsFile.size() - 5);
|
||||
|
||||
String editsParsedXml = cacheDir + "/editsRecoveredParsed.xml";
|
||||
String editsReparsed = cacheDir + "/editsRecoveredReparsed";
|
||||
String editsParsedXml2 = cacheDir + "/editsRecoveredParsed2.xml";
|
||||
String editsParsedXml = folder.newFile("editsRecoveredParsed.xml")
|
||||
.getAbsolutePath();
|
||||
String editsReparsed = folder.newFile("editsRecoveredReparsed")
|
||||
.getAbsolutePath();
|
||||
String editsParsedXml2 = folder.newFile("editsRecoveredParsed2.xml")
|
||||
.getAbsolutePath();
|
||||
|
||||
// Can't read the corrupted file without recovery mode
|
||||
assertEquals(-1, runOev(edits, editsParsedXml, "xml", false));
|
||||
|
@ -162,18 +142,14 @@ public class TestOfflineEditsViewer {
|
|||
assertTrue("Test round trip",
|
||||
filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));
|
||||
|
||||
// removes edits so do this at the end
|
||||
nnHelper.shutdownCluster();
|
||||
|
||||
LOG.info("END");
|
||||
os.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStored() throws IOException {
|
||||
|
||||
LOG.info("START - testing with stored reference edits");
|
||||
|
||||
// reference edits stored with source code (see build.xml)
|
||||
final String cacheDir = System.getProperty("test.cache.data",
|
||||
"build/test/cache");
|
||||
// binary, XML, reparsed binary
|
||||
String editsStored = cacheDir + "/editsStored";
|
||||
String editsStoredParsedXml = cacheDir + "/editsStoredParsed.xml";
|
||||
|
@ -183,21 +159,17 @@ public class TestOfflineEditsViewer {
|
|||
|
||||
// parse to XML then back to binary
|
||||
assertEquals(0, runOev(editsStored, editsStoredParsedXml, "xml", false));
|
||||
assertEquals(0, runOev(editsStoredParsedXml, editsStoredReparsed,
|
||||
"binary", false));
|
||||
assertEquals(0,
|
||||
runOev(editsStoredParsedXml, editsStoredReparsed, "binary", false));
|
||||
|
||||
// judgement time
|
||||
assertTrue(
|
||||
"Edits " + editsStored + " should have all op codes",
|
||||
assertTrue("Edits " + editsStored + " should have all op codes",
|
||||
hasAllOpCodes(editsStored));
|
||||
assertTrue(
|
||||
"Reference XML edits and parsed to XML should be same",
|
||||
assertTrue("Reference XML edits and parsed to XML should be same",
|
||||
filesEqual(editsStoredXml, editsStoredParsedXml));
|
||||
assertTrue(
|
||||
"Reference edits and reparsed (bin to XML to bin) should be same",
|
||||
filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
|
||||
|
||||
LOG.info("END");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -233,19 +205,14 @@ public class TestOfflineEditsViewer {
|
|||
OfflineEditsViewer oev = new OfflineEditsViewer();
|
||||
if (oev.go(inFilename, outFilename, "stats", new Flags(), visitor) != 0)
|
||||
return false;
|
||||
LOG.info("Statistics for " + inFilename + "\n" +
|
||||
visitor.getStatisticsString());
|
||||
LOG.info("Statistics for " + inFilename + "\n"
|
||||
+ visitor.getStatisticsString());
|
||||
|
||||
boolean hasAllOpCodes = true;
|
||||
for (FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
|
||||
// don't need to test obsolete opCodes
|
||||
if(obsoleteOpCodes.containsKey(opCode)) {
|
||||
if (skippedOps.contains(opCode))
|
||||
continue;
|
||||
} else if (missingOpCodes.containsKey(opCode)) {
|
||||
continue;
|
||||
} else if (opCode == FSEditLogOpCodes.OP_INVALID) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Long count = visitor.getStatistics().get(opCode);
|
||||
if ((count == null) || (count == 0)) {
|
||||
|
@ -257,9 +224,9 @@ public class TestOfflineEditsViewer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compare two files, ignore trailing zeros at the end,
|
||||
* for edits log the trailing zeros do not make any difference,
|
||||
* throw exception is the files are not same
|
||||
* Compare two files, ignore trailing zeros at the end, for edits log the
|
||||
* trailing zeros do not make any difference, throw exception is the files are
|
||||
* not same
|
||||
*
|
||||
* @param filenameSmall first file to compare (doesn't have to be smaller)
|
||||
* @param filenameLarge second file to compare (doesn't have to be larger)
|
||||
|
@ -288,7 +255,9 @@ public class TestOfflineEditsViewer {
|
|||
large.limit(small.capacity());
|
||||
|
||||
// compares position to limit
|
||||
if(!small.equals(large)) { return false; }
|
||||
if (!small.equals(large)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// everything after limit should be 0xFF
|
||||
int i = large.limit();
|
||||
|
|
Binary file not shown.
|
@ -13,8 +13,8 @@
|
|||
<TXID>2</TXID>
|
||||
<DELEGATION_KEY>
|
||||
<KEY_ID>1</KEY_ID>
|
||||
<EXPIRY_DATE>1388171826188</EXPIRY_DATE>
|
||||
<KEY>c7d869c22c8afce1</KEY>
|
||||
<EXPIRY_DATE>1389121087930</EXPIRY_DATE>
|
||||
<KEY>d48b4b3e6a43707b</KEY>
|
||||
</DELEGATION_KEY>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -24,8 +24,8 @@
|
|||
<TXID>3</TXID>
|
||||
<DELEGATION_KEY>
|
||||
<KEY_ID>2</KEY_ID>
|
||||
<EXPIRY_DATE>1388171826191</EXPIRY_DATE>
|
||||
<KEY>a3c41446507dfca9</KEY>
|
||||
<EXPIRY_DATE>1389121087937</EXPIRY_DATE>
|
||||
<KEY>62b6fae6bff918a9</KEY>
|
||||
</DELEGATION_KEY>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -37,17 +37,17 @@
|
|||
<INODEID>16386</INODEID>
|
||||
<PATH>/file_create_u\0001;F431</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480626844</MTIME>
|
||||
<ATIME>1387480626844</ATIME>
|
||||
<MTIME>1388429889312</MTIME>
|
||||
<ATIME>1388429889312</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
|
||||
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>420</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>7</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -59,8 +59,8 @@
|
|||
<INODEID>0</INODEID>
|
||||
<PATH>/file_create_u\0001;F431</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480626885</MTIME>
|
||||
<ATIME>1387480626844</ATIME>
|
||||
<MTIME>1388429889328</MTIME>
|
||||
<ATIME>1388429889312</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME></CLIENT_NAME>
|
||||
<CLIENT_MACHINE></CLIENT_MACHINE>
|
||||
|
@ -78,8 +78,8 @@
|
|||
<LENGTH>0</LENGTH>
|
||||
<SRC>/file_create_u\0001;F431</SRC>
|
||||
<DST>/file_moved</DST>
|
||||
<TIMESTAMP>1387480626894</TIMESTAMP>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<TIMESTAMP>1388429889336</TIMESTAMP>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>9</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -89,8 +89,8 @@
|
|||
<TXID>7</TXID>
|
||||
<LENGTH>0</LENGTH>
|
||||
<PATH>/file_moved</PATH>
|
||||
<TIMESTAMP>1387480626905</TIMESTAMP>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<TIMESTAMP>1388429889346</TIMESTAMP>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>10</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -101,7 +101,7 @@
|
|||
<LENGTH>0</LENGTH>
|
||||
<INODEID>16387</INODEID>
|
||||
<PATH>/directory_mkdir</PATH>
|
||||
<TIMESTAMP>1387480626917</TIMESTAMP>
|
||||
<TIMESTAMP>1388429889357</TIMESTAMP>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
|
@ -136,7 +136,7 @@
|
|||
<TXID>12</TXID>
|
||||
<SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
|
||||
<SNAPSHOTNAME>snapshot1</SNAPSHOTNAME>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>15</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -147,7 +147,7 @@
|
|||
<SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
|
||||
<SNAPSHOTOLDNAME>snapshot1</SNAPSHOTOLDNAME>
|
||||
<SNAPSHOTNEWNAME>snapshot2</SNAPSHOTNEWNAME>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>16</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -157,7 +157,7 @@
|
|||
<TXID>14</TXID>
|
||||
<SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
|
||||
<SNAPSHOTNAME>snapshot2</SNAPSHOTNAME>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>17</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -169,17 +169,17 @@
|
|||
<INODEID>16388</INODEID>
|
||||
<PATH>/file_create_u\0001;F431</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480626978</MTIME>
|
||||
<ATIME>1387480626978</ATIME>
|
||||
<MTIME>1388429889412</MTIME>
|
||||
<ATIME>1388429889412</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
|
||||
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>420</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>18</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -191,8 +191,8 @@
|
|||
<INODEID>0</INODEID>
|
||||
<PATH>/file_create_u\0001;F431</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480626985</MTIME>
|
||||
<ATIME>1387480626978</ATIME>
|
||||
<MTIME>1388429889420</MTIME>
|
||||
<ATIME>1388429889412</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME></CLIENT_NAME>
|
||||
<CLIENT_MACHINE></CLIENT_MACHINE>
|
||||
|
@ -253,9 +253,9 @@
|
|||
<LENGTH>0</LENGTH>
|
||||
<SRC>/file_create_u\0001;F431</SRC>
|
||||
<DST>/file_moved</DST>
|
||||
<TIMESTAMP>1387480627035</TIMESTAMP>
|
||||
<TIMESTAMP>1388429889495</TIMESTAMP>
|
||||
<OPTIONS>NONE</OPTIONS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>25</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -267,17 +267,17 @@
|
|||
<INODEID>16389</INODEID>
|
||||
<PATH>/file_concat_target</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627043</MTIME>
|
||||
<ATIME>1387480627043</ATIME>
|
||||
<MTIME>1388429889511</MTIME>
|
||||
<ATIME>1388429889511</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
|
||||
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>420</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>27</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -388,8 +388,8 @@
|
|||
<INODEID>0</INODEID>
|
||||
<PATH>/file_concat_target</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627148</MTIME>
|
||||
<ATIME>1387480627043</ATIME>
|
||||
<MTIME>1388429889812</MTIME>
|
||||
<ATIME>1388429889511</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME></CLIENT_NAME>
|
||||
<CLIENT_MACHINE></CLIENT_MACHINE>
|
||||
|
@ -423,17 +423,17 @@
|
|||
<INODEID>16390</INODEID>
|
||||
<PATH>/file_concat_0</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627155</MTIME>
|
||||
<ATIME>1387480627155</ATIME>
|
||||
<MTIME>1388429889825</MTIME>
|
||||
<ATIME>1388429889825</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
|
||||
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>420</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>40</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -544,8 +544,8 @@
|
|||
<INODEID>0</INODEID>
|
||||
<PATH>/file_concat_0</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627193</MTIME>
|
||||
<ATIME>1387480627155</ATIME>
|
||||
<MTIME>1388429889909</MTIME>
|
||||
<ATIME>1388429889825</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME></CLIENT_NAME>
|
||||
<CLIENT_MACHINE></CLIENT_MACHINE>
|
||||
|
@ -579,17 +579,17 @@
|
|||
<INODEID>16391</INODEID>
|
||||
<PATH>/file_concat_1</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627200</MTIME>
|
||||
<ATIME>1387480627200</ATIME>
|
||||
<MTIME>1388429889920</MTIME>
|
||||
<ATIME>1388429889920</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
|
||||
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>420</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>52</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -700,8 +700,8 @@
|
|||
<INODEID>0</INODEID>
|
||||
<PATH>/file_concat_1</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627238</MTIME>
|
||||
<ATIME>1387480627200</ATIME>
|
||||
<MTIME>1388429890016</MTIME>
|
||||
<ATIME>1388429889920</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME></CLIENT_NAME>
|
||||
<CLIENT_MACHINE></CLIENT_MACHINE>
|
||||
|
@ -733,12 +733,12 @@
|
|||
<TXID>56</TXID>
|
||||
<LENGTH>0</LENGTH>
|
||||
<TRG>/file_concat_target</TRG>
|
||||
<TIMESTAMP>1387480627246</TIMESTAMP>
|
||||
<TIMESTAMP>1388429890031</TIMESTAMP>
|
||||
<SOURCES>
|
||||
<SOURCE1>/file_concat_0</SOURCE1>
|
||||
<SOURCE2>/file_concat_1</SOURCE2>
|
||||
</SOURCES>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>63</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -750,14 +750,14 @@
|
|||
<INODEID>16392</INODEID>
|
||||
<PATH>/file_symlink</PATH>
|
||||
<VALUE>/file_concat_target</VALUE>
|
||||
<MTIME>1387480627255</MTIME>
|
||||
<ATIME>1387480627255</ATIME>
|
||||
<MTIME>1388429890046</MTIME>
|
||||
<ATIME>1388429890046</ATIME>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>511</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>64</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -771,11 +771,11 @@
|
|||
<OWNER>andrew</OWNER>
|
||||
<RENEWER>JobTracker</RENEWER>
|
||||
<REALUSER></REALUSER>
|
||||
<ISSUE_DATE>1387480627262</ISSUE_DATE>
|
||||
<MAX_DATE>1388085427262</MAX_DATE>
|
||||
<ISSUE_DATE>1388429890059</ISSUE_DATE>
|
||||
<MAX_DATE>1389034690059</MAX_DATE>
|
||||
<MASTER_KEY_ID>2</MASTER_KEY_ID>
|
||||
</DELEGATION_TOKEN_IDENTIFIER>
|
||||
<EXPIRY_TIME>1387567027262</EXPIRY_TIME>
|
||||
<EXPIRY_TIME>1388516290059</EXPIRY_TIME>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
<RECORD>
|
||||
|
@ -788,11 +788,11 @@
|
|||
<OWNER>andrew</OWNER>
|
||||
<RENEWER>JobTracker</RENEWER>
|
||||
<REALUSER></REALUSER>
|
||||
<ISSUE_DATE>1387480627262</ISSUE_DATE>
|
||||
<MAX_DATE>1388085427262</MAX_DATE>
|
||||
<ISSUE_DATE>1388429890059</ISSUE_DATE>
|
||||
<MAX_DATE>1389034690059</MAX_DATE>
|
||||
<MASTER_KEY_ID>2</MASTER_KEY_ID>
|
||||
</DELEGATION_TOKEN_IDENTIFIER>
|
||||
<EXPIRY_TIME>1387567027281</EXPIRY_TIME>
|
||||
<EXPIRY_TIME>1388516290109</EXPIRY_TIME>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
<RECORD>
|
||||
|
@ -805,8 +805,8 @@
|
|||
<OWNER>andrew</OWNER>
|
||||
<RENEWER>JobTracker</RENEWER>
|
||||
<REALUSER></REALUSER>
|
||||
<ISSUE_DATE>1387480627262</ISSUE_DATE>
|
||||
<MAX_DATE>1388085427262</MAX_DATE>
|
||||
<ISSUE_DATE>1388429890059</ISSUE_DATE>
|
||||
<MAX_DATE>1389034690059</MAX_DATE>
|
||||
<MASTER_KEY_ID>2</MASTER_KEY_ID>
|
||||
</DELEGATION_TOKEN_IDENTIFIER>
|
||||
</DATA>
|
||||
|
@ -821,7 +821,7 @@
|
|||
<MODE>493</MODE>
|
||||
<LIMIT>9223372036854775807</LIMIT>
|
||||
<MAXRELATIVEEXPIRY>2305843009213693951</MAXRELATIVEEXPIRY>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>68</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -834,7 +834,7 @@
|
|||
<GROUPNAME>party</GROUPNAME>
|
||||
<MODE>448</MODE>
|
||||
<LIMIT>1989</LIMIT>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>69</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -846,8 +846,8 @@
|
|||
<PATH>/bar</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<POOL>poolparty</POOL>
|
||||
<EXPIRATION>2305844396694321272</EXPIRATION>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<EXPIRATION>2305844397643584141</EXPIRATION>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>70</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -857,7 +857,7 @@
|
|||
<TXID>64</TXID>
|
||||
<ID>1</ID>
|
||||
<PATH>/bar2</PATH>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>71</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -866,7 +866,7 @@
|
|||
<DATA>
|
||||
<TXID>65</TXID>
|
||||
<ID>1</ID>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>72</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -875,7 +875,7 @@
|
|||
<DATA>
|
||||
<TXID>66</TXID>
|
||||
<POOLNAME>poolparty</POOLNAME>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>73</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -887,17 +887,17 @@
|
|||
<INODEID>16393</INODEID>
|
||||
<PATH>/hard-lease-recovery-test</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480627356</MTIME>
|
||||
<ATIME>1387480627356</ATIME>
|
||||
<MTIME>1388429890261</MTIME>
|
||||
<ATIME>1388429890261</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
|
||||
<CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
|
||||
<CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
<GROUPNAME>supergroup</GROUPNAME>
|
||||
<MODE>420</MODE>
|
||||
</PERMISSION_STATUS>
|
||||
<RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
|
||||
<RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
|
||||
<RPC_CALLID>74</RPC_CALLID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
|
@ -954,7 +954,23 @@
|
|||
<OPCODE>OP_REASSIGN_LEASE</OPCODE>
|
||||
<DATA>
|
||||
<TXID>73</TXID>
|
||||
<LEASEHOLDER>DFSClient_NONMAPREDUCE_1147796111_1</LEASEHOLDER>
|
||||
<LEASEHOLDER>DFSClient_NONMAPREDUCE_-1396063717_1</LEASEHOLDER>
|
||||
<PATH>/hard-lease-recovery-test</PATH>
|
||||
<NEWHOLDER>HDFS_NameNode</NEWHOLDER>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
<RECORD>
|
||||
<OPCODE>OP_SET_GENSTAMP_V2</OPCODE>
|
||||
<DATA>
|
||||
<TXID>74</TXID>
|
||||
<GENSTAMPV2>1012</GENSTAMPV2>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
<RECORD>
|
||||
<OPCODE>OP_REASSIGN_LEASE</OPCODE>
|
||||
<DATA>
|
||||
<TXID>75</TXID>
|
||||
<LEASEHOLDER>HDFS_NameNode</LEASEHOLDER>
|
||||
<PATH>/hard-lease-recovery-test</PATH>
|
||||
<NEWHOLDER>HDFS_NameNode</NEWHOLDER>
|
||||
</DATA>
|
||||
|
@ -962,20 +978,20 @@
|
|||
<RECORD>
|
||||
<OPCODE>OP_CLOSE</OPCODE>
|
||||
<DATA>
|
||||
<TXID>74</TXID>
|
||||
<TXID>76</TXID>
|
||||
<LENGTH>0</LENGTH>
|
||||
<INODEID>0</INODEID>
|
||||
<PATH>/hard-lease-recovery-test</PATH>
|
||||
<REPLICATION>1</REPLICATION>
|
||||
<MTIME>1387480629729</MTIME>
|
||||
<ATIME>1387480627356</ATIME>
|
||||
<MTIME>1388429895216</MTIME>
|
||||
<ATIME>1388429890261</ATIME>
|
||||
<BLOCKSIZE>512</BLOCKSIZE>
|
||||
<CLIENT_NAME></CLIENT_NAME>
|
||||
<CLIENT_MACHINE></CLIENT_MACHINE>
|
||||
<BLOCK>
|
||||
<BLOCK_ID>1073741834</BLOCK_ID>
|
||||
<NUM_BYTES>11</NUM_BYTES>
|
||||
<GENSTAMP>1011</GENSTAMP>
|
||||
<GENSTAMP>1012</GENSTAMP>
|
||||
</BLOCK>
|
||||
<PERMISSION_STATUS>
|
||||
<USERNAME>andrew</USERNAME>
|
||||
|
@ -987,7 +1003,7 @@
|
|||
<RECORD>
|
||||
<OPCODE>OP_END_LOG_SEGMENT</OPCODE>
|
||||
<DATA>
|
||||
<TXID>75</TXID>
|
||||
<TXID>77</TXID>
|
||||
</DATA>
|
||||
</RECORD>
|
||||
</EDITS>
|
||||
|
|
|
@ -77,6 +77,9 @@ Trunk (Unreleased)
|
|||
MAPREDUCE-5189. Add policies and wiring to respond to preemption requests
|
||||
from YARN. (Carlo Curino via cdouglas)
|
||||
|
||||
MAPREDUCE-5196. Add bookkeeping for managing checkpoints of task state.
|
||||
(Carlo Curino via cdouglas)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
|
||||
|
@ -193,6 +196,8 @@ Release 2.4.0 - UNRELEASED
|
|||
MAPREDUCE-5550. Task Status message (reporter.setStatus) not shown in UI
|
||||
with Hadoop 2.0 (Gera Shegalov via Sandy Ryza)
|
||||
|
||||
MAPREDUCE-3310. Custom grouping comparator cannot be set for Combiners (tucu)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
|
||||
|
@ -258,6 +263,15 @@ Release 2.4.0 - UNRELEASED
|
|||
MAPREDUCE-5687. Fixed failure in TestYARNRunner caused by YARN-1446. (Jian He
|
||||
via vinodkv)
|
||||
|
||||
MAPREDUCE-5694. Fixed MR AppMaster to shutdown the LogManager so as to avoid
|
||||
losing syslog in some conditions. (Mohammad Kamrul Islam via vinodkv)
|
||||
|
||||
MAPREDUCE-5685. Fixed a bug with JobContext getCacheFiles API inside the
|
||||
WrappedReducer class. (Yi Song via vinodkv)
|
||||
|
||||
MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps
|
||||
cannot be fulfilled. (lohit via kasha)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -36,7 +36,9 @@ import org.apache.hadoop.ipc.Server;
|
|||
import org.apache.hadoop.mapred.SortedRanges.Range;
|
||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
|
||||
import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
|
||||
|
@ -45,8 +47,8 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task;
|
|||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
|
||||
import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider;
|
||||
|
@ -228,6 +230,22 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
TaskAttemptEventType.TA_COMMIT_PENDING));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preempted(TaskAttemptID taskAttemptID, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException {
|
||||
LOG.info("Preempted state update from " + taskAttemptID.toString());
|
||||
// An attempt is telling us that it got preempted.
|
||||
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
|
||||
TypeConverter.toYarn(taskAttemptID);
|
||||
|
||||
preemptionPolicy.reportSuccessfulPreemption(attemptID);
|
||||
taskHeartbeatHandler.progressing(attemptID);
|
||||
|
||||
context.getEventHandler().handle(
|
||||
new TaskAttemptEvent(attemptID,
|
||||
TaskAttemptEventType.TA_PREEMPTED));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void done(TaskAttemptID taskAttemptID) throws IOException {
|
||||
LOG.info("Done acknowledgement from " + taskAttemptID.toString());
|
||||
|
@ -250,6 +268,10 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
|
||||
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
|
||||
TypeConverter.toYarn(taskAttemptID);
|
||||
|
||||
// handling checkpoints
|
||||
preemptionPolicy.handleFailedContainer(attemptID);
|
||||
|
||||
context.getEventHandler().handle(
|
||||
new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
|
||||
}
|
||||
|
@ -264,6 +286,10 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
|
||||
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
|
||||
TypeConverter.toYarn(taskAttemptID);
|
||||
|
||||
// handling checkpoints
|
||||
preemptionPolicy.handleFailedContainer(attemptID);
|
||||
|
||||
context.getEventHandler().handle(
|
||||
new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
|
||||
}
|
||||
|
@ -293,12 +319,6 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
return new MapTaskCompletionEventsUpdate(events, shouldReset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ping(TaskAttemptID taskAttemptID) throws IOException {
|
||||
LOG.info("Ping from " + taskAttemptID.toString());
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportDiagnosticInfo(TaskAttemptID taskAttemptID, String diagnosticInfo)
|
||||
throws IOException {
|
||||
|
@ -321,11 +341,33 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean statusUpdate(TaskAttemptID taskAttemptID,
|
||||
public AMFeedback statusUpdate(TaskAttemptID taskAttemptID,
|
||||
TaskStatus taskStatus) throws IOException, InterruptedException {
|
||||
LOG.info("Status update from " + taskAttemptID.toString());
|
||||
|
||||
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID =
|
||||
TypeConverter.toYarn(taskAttemptID);
|
||||
|
||||
AMFeedback feedback = new AMFeedback();
|
||||
feedback.setTaskFound(true);
|
||||
|
||||
// Propagating preemption to the task if TASK_PREEMPTION is enabled
|
||||
if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false)
|
||||
&& preemptionPolicy.isPreempted(yarnAttemptID)) {
|
||||
feedback.setPreemption(true);
|
||||
LOG.info("Setting preemption bit for task: "+ yarnAttemptID
|
||||
+ " of type " + yarnAttemptID.getTaskId().getTaskType());
|
||||
}
|
||||
|
||||
if (taskStatus == null) {
|
||||
//We are using statusUpdate only as a simple ping
|
||||
LOG.info("Ping from " + taskAttemptID.toString());
|
||||
taskHeartbeatHandler.progressing(yarnAttemptID);
|
||||
return feedback;
|
||||
}
|
||||
|
||||
// if we are here there is an actual status update to be processed
|
||||
LOG.info("Status update from " + taskAttemptID.toString());
|
||||
|
||||
taskHeartbeatHandler.progressing(yarnAttemptID);
|
||||
TaskAttemptStatus taskAttemptStatus =
|
||||
new TaskAttemptStatus();
|
||||
|
@ -386,7 +428,7 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
context.getEventHandler().handle(
|
||||
new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id,
|
||||
taskAttemptStatus));
|
||||
return true;
|
||||
return feedback;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -494,4 +536,18 @@ public class TaskAttemptListenerImpl extends CompositeService
|
|||
return ProtocolSignature.getProtocolSignature(this,
|
||||
protocol, clientVersion, clientMethodsHash);
|
||||
}
|
||||
|
||||
// task checkpoint bookeeping
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId) {
|
||||
TaskId tid = TypeConverter.toYarn(taskId);
|
||||
return preemptionPolicy.getCheckpointID(tid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
|
||||
TaskId tid = TypeConverter.toYarn(taskId);
|
||||
preemptionPolicy.setCheckpointID(tid, cid);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -139,6 +139,7 @@ import org.apache.hadoop.yarn.security.client.ClientToAMTokenSecretManager;
|
|||
import org.apache.hadoop.yarn.util.Clock;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
import org.apache.hadoop.yarn.util.SystemClock;
|
||||
import org.apache.log4j.LogManager;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
|
@ -1395,6 +1396,8 @@ public class MRAppMaster extends CompositeService {
|
|||
} catch (Throwable t) {
|
||||
LOG.fatal("Error starting MRAppMaster", t);
|
||||
System.exit(1);
|
||||
} finally {
|
||||
LogManager.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@ public enum TaskAttemptEventType {
|
|||
TA_FAILMSG,
|
||||
TA_UPDATE,
|
||||
TA_TIMED_OUT,
|
||||
TA_PREEMPTED,
|
||||
|
||||
//Producer:TaskCleaner
|
||||
TA_CLEANUP_DONE,
|
||||
|
|
|
@ -304,6 +304,9 @@ public abstract class TaskAttemptImpl implements
|
|||
.addTransition(TaskAttemptStateInternal.RUNNING,
|
||||
TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, TaskAttemptEventType.TA_KILL,
|
||||
CLEANUP_CONTAINER_TRANSITION)
|
||||
.addTransition(TaskAttemptStateInternal.RUNNING,
|
||||
TaskAttemptStateInternal.KILLED,
|
||||
TaskAttemptEventType.TA_PREEMPTED, new PreemptedTransition())
|
||||
|
||||
// Transitions from COMMIT_PENDING state
|
||||
.addTransition(TaskAttemptStateInternal.COMMIT_PENDING,
|
||||
|
@ -437,6 +440,7 @@ public abstract class TaskAttemptImpl implements
|
|||
TaskAttemptEventType.TA_DONE,
|
||||
TaskAttemptEventType.TA_FAILMSG,
|
||||
TaskAttemptEventType.TA_CONTAINER_CLEANED,
|
||||
TaskAttemptEventType.TA_PREEMPTED,
|
||||
// Container launch events can arrive late
|
||||
TaskAttemptEventType.TA_CONTAINER_LAUNCHED,
|
||||
TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED))
|
||||
|
@ -1874,6 +1878,27 @@ public abstract class TaskAttemptImpl implements
|
|||
}
|
||||
}
|
||||
|
||||
private static class PreemptedTransition implements
|
||||
SingleArcTransition<TaskAttemptImpl,TaskAttemptEvent> {
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void transition(TaskAttemptImpl taskAttempt,
|
||||
TaskAttemptEvent event) {
|
||||
taskAttempt.setFinishTime();
|
||||
taskAttempt.taskAttemptListener.unregister(
|
||||
taskAttempt.attemptId, taskAttempt.jvmID);
|
||||
taskAttempt.eventHandler.handle(new ContainerLauncherEvent(
|
||||
taskAttempt.attemptId,
|
||||
taskAttempt.getAssignedContainerID(), taskAttempt.getAssignedContainerMgrAddress(),
|
||||
taskAttempt.container.getContainerToken(),
|
||||
ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP));
|
||||
taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
|
||||
taskAttempt.attemptId,
|
||||
TaskEventType.T_ATTEMPT_KILLED));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private static class CleanupContainerTransition implements
|
||||
SingleArcTransition<TaskAttemptImpl, TaskAttemptEvent> {
|
||||
@SuppressWarnings("unchecked")
|
||||
|
|
|
@ -229,7 +229,8 @@ public class RMContainerAllocator extends RMContainerRequestor
|
|||
|
||||
int completedMaps = getJob().getCompletedMaps();
|
||||
int completedTasks = completedMaps + getJob().getCompletedReduces();
|
||||
if (lastCompletedTasks != completedTasks) {
|
||||
if ((lastCompletedTasks != completedTasks) ||
|
||||
(scheduledRequests.maps.size() > 0)) {
|
||||
lastCompletedTasks = completedTasks;
|
||||
recalculateReduceSchedule = true;
|
||||
}
|
||||
|
|
|
@ -19,10 +19,9 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.mapred.TaskAttemptID;
|
||||
import org.apache.hadoop.mapred.TaskID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
|
@ -81,7 +80,7 @@ public interface AMPreemptionPolicy {
|
|||
* successfully preempted (for bookeeping, counters, etc..)
|
||||
* @param attemptID Task attempt that preempted
|
||||
*/
|
||||
public void reportSuccessfulPreemption(TaskAttemptID attemptID);
|
||||
public void reportSuccessfulPreemption(TaskAttemptId attemptID);
|
||||
|
||||
/**
|
||||
* Callback informing the policy of containers exiting with a failure. This
|
||||
|
@ -98,20 +97,20 @@ public interface AMPreemptionPolicy {
|
|||
public void handleCompletedContainer(TaskAttemptId attemptID);
|
||||
|
||||
/**
|
||||
* Method to retrieve the latest checkpoint for a given {@link TaskID}
|
||||
* Method to retrieve the latest checkpoint for a given {@link TaskId}
|
||||
* @param taskId TaskID
|
||||
* @return CheckpointID associated with this task or null
|
||||
*/
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId);
|
||||
public TaskCheckpointID getCheckpointID(TaskId taskId);
|
||||
|
||||
/**
|
||||
* Method to store the latest {@link
|
||||
* org.apache.hadoop.mapreduce.checkpoint.CheckpointID} for a given {@link
|
||||
* TaskID}. Assigning a null is akin to remove all previous checkpoints for
|
||||
* TaskId}. Assigning a null is akin to remove all previous checkpoints for
|
||||
* this task.
|
||||
* @param taskId TaskID
|
||||
* @param cid Checkpoint to assign or <tt>null</tt> to remove it.
|
||||
*/
|
||||
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid);
|
||||
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid);
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,290 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.mapreduce.JobCounter;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionContract;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
|
||||
/**
|
||||
* This policy works in combination with an implementation of task
|
||||
* checkpointing. It computes the tasks to be preempted in response to the RM
|
||||
* request for preemption. For strict requests, it maps containers to
|
||||
* corresponding tasks; for fungible requests, it attempts to pick the best
|
||||
* containers to preempt (reducers in reverse allocation order). The
|
||||
* TaskAttemptListener will interrogate this policy when handling a task
|
||||
* heartbeat to check whether the task should be preempted or not. When handling
|
||||
* fungible requests, the policy discount the RM ask by the amount of currently
|
||||
* in-flight preemptions (i.e., tasks that are checkpointing).
|
||||
*
|
||||
* This class it is also used to maintain the list of checkpoints for existing
|
||||
* tasks. Centralizing this functionality here, allows us to have visibility on
|
||||
* preemption and checkpoints in a single location, thus coordinating preemption
|
||||
* and checkpoint management decisions in a single policy.
|
||||
*/
|
||||
public class CheckpointAMPreemptionPolicy implements AMPreemptionPolicy {
|
||||
|
||||
// task attempts flagged for preemption
|
||||
private final Set<TaskAttemptId> toBePreempted;
|
||||
|
||||
private final Set<TaskAttemptId> countedPreemptions;
|
||||
|
||||
private final Map<TaskId,TaskCheckpointID> checkpoints;
|
||||
|
||||
private final Map<TaskAttemptId,Resource> pendingFlexiblePreemptions;
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private EventHandler eventHandler;
|
||||
|
||||
static final Log LOG = LogFactory
|
||||
.getLog(CheckpointAMPreemptionPolicy.class);
|
||||
|
||||
public CheckpointAMPreemptionPolicy() {
|
||||
this(Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
|
||||
Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
|
||||
Collections.synchronizedMap(new HashMap<TaskId,TaskCheckpointID>()),
|
||||
Collections.synchronizedMap(new HashMap<TaskAttemptId,Resource>()));
|
||||
}
|
||||
|
||||
CheckpointAMPreemptionPolicy(Set<TaskAttemptId> toBePreempted,
|
||||
Set<TaskAttemptId> countedPreemptions,
|
||||
Map<TaskId,TaskCheckpointID> checkpoints,
|
||||
Map<TaskAttemptId,Resource> pendingFlexiblePreemptions) {
|
||||
this.toBePreempted = toBePreempted;
|
||||
this.countedPreemptions = countedPreemptions;
|
||||
this.checkpoints = checkpoints;
|
||||
this.pendingFlexiblePreemptions = pendingFlexiblePreemptions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(AppContext context) {
|
||||
this.eventHandler = context.getEventHandler();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preempt(Context ctxt, PreemptionMessage preemptionRequests) {
|
||||
|
||||
if (preemptionRequests != null) {
|
||||
|
||||
// handling non-negotiable preemption
|
||||
|
||||
StrictPreemptionContract cStrict = preemptionRequests.getStrictContract();
|
||||
if (cStrict != null
|
||||
&& cStrict.getContainers() != null
|
||||
&& cStrict.getContainers().size() > 0) {
|
||||
LOG.info("strict preemption :" +
|
||||
preemptionRequests.getStrictContract().getContainers().size() +
|
||||
" containers to kill");
|
||||
|
||||
// handle strict preemptions. These containers are non-negotiable
|
||||
for (PreemptionContainer c :
|
||||
preemptionRequests.getStrictContract().getContainers()) {
|
||||
ContainerId reqCont = c.getId();
|
||||
TaskAttemptId reqTask = ctxt.getTaskAttempt(reqCont);
|
||||
if (reqTask != null) {
|
||||
// ignore requests for preempting containers running maps
|
||||
if (org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE
|
||||
.equals(reqTask.getTaskId().getTaskType())) {
|
||||
toBePreempted.add(reqTask);
|
||||
LOG.info("preempting " + reqCont + " running task:" + reqTask);
|
||||
} else {
|
||||
LOG.info("NOT preempting " + reqCont + " running task:" + reqTask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handling negotiable preemption
|
||||
PreemptionContract cNegot = preemptionRequests.getContract();
|
||||
if (cNegot != null
|
||||
&& cNegot.getResourceRequest() != null
|
||||
&& cNegot.getResourceRequest().size() > 0
|
||||
&& cNegot.getContainers() != null
|
||||
&& cNegot.getContainers().size() > 0) {
|
||||
|
||||
LOG.info("negotiable preemption :" +
|
||||
preemptionRequests.getContract().getResourceRequest().size() +
|
||||
" resourceReq, " +
|
||||
preemptionRequests.getContract().getContainers().size() +
|
||||
" containers");
|
||||
// handle fungible preemption. Here we only look at the total amount of
|
||||
// resources to be preempted and pick enough of our containers to
|
||||
// satisfy that. We only support checkpointing for reducers for now.
|
||||
List<PreemptionResourceRequest> reqResources =
|
||||
preemptionRequests.getContract().getResourceRequest();
|
||||
|
||||
// compute the total amount of pending preemptions (to be discounted
|
||||
// from current request)
|
||||
int pendingPreemptionRam = 0;
|
||||
int pendingPreemptionCores = 0;
|
||||
for (Resource r : pendingFlexiblePreemptions.values()) {
|
||||
pendingPreemptionRam += r.getMemory();
|
||||
pendingPreemptionCores += r.getVirtualCores();
|
||||
}
|
||||
|
||||
// discount preemption request based on currently pending preemption
|
||||
for (PreemptionResourceRequest rr : reqResources) {
|
||||
ResourceRequest reqRsrc = rr.getResourceRequest();
|
||||
if (!ResourceRequest.ANY.equals(reqRsrc.getResourceName())) {
|
||||
// For now, only respond to aggregate requests and ignore locality
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG.info("ResourceRequest:" + reqRsrc);
|
||||
int reqCont = reqRsrc.getNumContainers();
|
||||
int reqMem = reqRsrc.getCapability().getMemory();
|
||||
int totalMemoryToRelease = reqCont * reqMem;
|
||||
int reqCores = reqRsrc.getCapability().getVirtualCores();
|
||||
int totalCoresToRelease = reqCont * reqCores;
|
||||
|
||||
// remove
|
||||
if (pendingPreemptionRam > 0) {
|
||||
// if goes negative we simply exit
|
||||
totalMemoryToRelease -= pendingPreemptionRam;
|
||||
// decrement pending resources if zero or negatve we will
|
||||
// ignore it while processing next PreemptionResourceRequest
|
||||
pendingPreemptionRam -= totalMemoryToRelease;
|
||||
}
|
||||
if (pendingPreemptionCores > 0) {
|
||||
totalCoresToRelease -= pendingPreemptionCores;
|
||||
pendingPreemptionCores -= totalCoresToRelease;
|
||||
}
|
||||
|
||||
// reverse order of allocation (for now)
|
||||
List<Container> listOfCont = ctxt.getContainers(TaskType.REDUCE);
|
||||
Collections.sort(listOfCont, new Comparator<Container>() {
|
||||
@Override
|
||||
public int compare(final Container o1, final Container o2) {
|
||||
return o2.getId().getId() - o1.getId().getId();
|
||||
}
|
||||
});
|
||||
|
||||
// preempt reducers first
|
||||
for (Container cont : listOfCont) {
|
||||
if (totalMemoryToRelease <= 0 && totalCoresToRelease<=0) {
|
||||
break;
|
||||
}
|
||||
TaskAttemptId reduceId = ctxt.getTaskAttempt(cont.getId());
|
||||
int cMem = cont.getResource().getMemory();
|
||||
int cCores = cont.getResource().getVirtualCores();
|
||||
|
||||
if (!toBePreempted.contains(reduceId)) {
|
||||
totalMemoryToRelease -= cMem;
|
||||
totalCoresToRelease -= cCores;
|
||||
toBePreempted.add(reduceId);
|
||||
pendingFlexiblePreemptions.put(reduceId, cont.getResource());
|
||||
}
|
||||
LOG.info("ResourceRequest:" + reqRsrc + " satisfied preempting "
|
||||
+ reduceId);
|
||||
}
|
||||
// if map was preemptable we would do add them to toBePreempted here
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleFailedContainer(TaskAttemptId attemptID) {
|
||||
toBePreempted.remove(attemptID);
|
||||
checkpoints.remove(attemptID.getTaskId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleCompletedContainer(TaskAttemptId attemptID){
|
||||
LOG.info(" task completed:" + attemptID);
|
||||
toBePreempted.remove(attemptID);
|
||||
pendingFlexiblePreemptions.remove(attemptID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPreempted(TaskAttemptId yarnAttemptID) {
|
||||
if (toBePreempted.contains(yarnAttemptID)) {
|
||||
updatePreemptionCounters(yarnAttemptID);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskId taskId) {
|
||||
return checkpoints.get(taskId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
|
||||
checkpoints.put(taskId, cid);
|
||||
if (cid != null) {
|
||||
updateCheckpointCounters(taskId, cid);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "unchecked" })
|
||||
private void updateCheckpointCounters(TaskId taskId, TaskCheckpointID cid) {
|
||||
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId());
|
||||
jce.addCounterUpdate(JobCounter.CHECKPOINTS, 1);
|
||||
eventHandler.handle(jce);
|
||||
jce = new JobCounterUpdateEvent(taskId.getJobId());
|
||||
jce.addCounterUpdate(JobCounter.CHECKPOINT_BYTES, cid.getCheckpointBytes());
|
||||
eventHandler.handle(jce);
|
||||
jce = new JobCounterUpdateEvent(taskId.getJobId());
|
||||
jce.addCounterUpdate(JobCounter.CHECKPOINT_TIME, cid.getCheckpointTime());
|
||||
eventHandler.handle(jce);
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "unchecked" })
|
||||
private void updatePreemptionCounters(TaskAttemptId yarnAttemptID) {
|
||||
if (!countedPreemptions.contains(yarnAttemptID)) {
|
||||
countedPreemptions.add(yarnAttemptID);
|
||||
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(yarnAttemptID
|
||||
.getTaskId().getJobId());
|
||||
jce.addCounterUpdate(JobCounter.TASKS_REQ_PREEMPT, 1);
|
||||
eventHandler.handle(jce);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -19,11 +19,10 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.mapred.TaskAttemptID;
|
||||
import org.apache.hadoop.mapred.TaskID;
|
||||
import org.apache.hadoop.mapreduce.JobCounter;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
|
||||
|
@ -89,17 +88,17 @@ public class KillAMPreemptionPolicy implements AMPreemptionPolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
|
||||
public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId) {
|
||||
public TaskCheckpointID getCheckpointID(TaskId taskId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
|
||||
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
|
|
|
@ -17,10 +17,9 @@
|
|||
*/
|
||||
package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
|
||||
|
||||
import org.apache.hadoop.mapred.TaskAttemptID;
|
||||
import org.apache.hadoop.mapred.TaskID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
|
||||
|
||||
|
@ -50,17 +49,17 @@ public class NoopAMPreemptionPolicy implements AMPreemptionPolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
|
||||
public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId) {
|
||||
public TaskCheckpointID getCheckpointID(TaskId taskId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
|
||||
public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
|
|
|
@ -17,26 +17,23 @@
|
|||
*/
|
||||
package org.apache.hadoop.mapred;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.mockito.Matchers.any;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.never;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
import org.apache.hadoop.mapred.Counters;
|
||||
import org.apache.hadoop.mapred.Counters.Counter;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.EnumCounter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
|
||||
|
@ -46,21 +43,31 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
|||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.Job;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
|
||||
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
|
||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.util.SystemClock;
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
public class TestTaskAttemptListenerImpl {
|
||||
public static class MockTaskAttemptListenerImpl extends TaskAttemptListenerImpl {
|
||||
public static class MockTaskAttemptListenerImpl
|
||||
extends TaskAttemptListenerImpl {
|
||||
|
||||
public MockTaskAttemptListenerImpl(AppContext context,
|
||||
JobTokenSecretManager jobTokenSecretManager,
|
||||
RMHeartbeatHandler rmHeartbeatHandler,
|
||||
TaskHeartbeatHandler hbHandler) {
|
||||
super(context, jobTokenSecretManager, rmHeartbeatHandler, null);
|
||||
TaskHeartbeatHandler hbHandler,
|
||||
AMPreemptionPolicy policy) {
|
||||
|
||||
super(context, jobTokenSecretManager, rmHeartbeatHandler, policy);
|
||||
this.taskHeartbeatHandler = hbHandler;
|
||||
}
|
||||
|
||||
|
@ -87,9 +94,16 @@ public class TestTaskAttemptListenerImpl {
|
|||
RMHeartbeatHandler rmHeartbeatHandler =
|
||||
mock(RMHeartbeatHandler.class);
|
||||
TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
|
||||
Dispatcher dispatcher = mock(Dispatcher.class);
|
||||
EventHandler ea = mock(EventHandler.class);
|
||||
when(dispatcher.getEventHandler()).thenReturn(ea);
|
||||
|
||||
when(appCtx.getEventHandler()).thenReturn(ea);
|
||||
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
|
||||
policy.init(appCtx);
|
||||
MockTaskAttemptListenerImpl listener =
|
||||
new MockTaskAttemptListenerImpl(appCtx, secret,
|
||||
rmHeartbeatHandler, hbHandler);
|
||||
rmHeartbeatHandler, hbHandler, policy);
|
||||
Configuration conf = new Configuration();
|
||||
listener.init(conf);
|
||||
listener.start();
|
||||
|
@ -144,7 +158,7 @@ public class TestTaskAttemptListenerImpl {
|
|||
assertNotNull(jvmid);
|
||||
try {
|
||||
JVMId.forName("jvm_001_002_m_004_006");
|
||||
Assert.fail();
|
||||
fail();
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals(e.getMessage(),
|
||||
"TaskId string : jvm_001_002_m_004_006 is not properly formed");
|
||||
|
@ -190,8 +204,14 @@ public class TestTaskAttemptListenerImpl {
|
|||
RMHeartbeatHandler rmHeartbeatHandler =
|
||||
mock(RMHeartbeatHandler.class);
|
||||
final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
|
||||
TaskAttemptListenerImpl listener =
|
||||
new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
|
||||
Dispatcher dispatcher = mock(Dispatcher.class);
|
||||
EventHandler ea = mock(EventHandler.class);
|
||||
when(dispatcher.getEventHandler()).thenReturn(ea);
|
||||
when(appCtx.getEventHandler()).thenReturn(ea);
|
||||
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
|
||||
policy.init(appCtx);
|
||||
TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
|
||||
appCtx, secret, rmHeartbeatHandler, policy) {
|
||||
@Override
|
||||
protected void registerHeartbeatHandler(Configuration conf) {
|
||||
taskHeartbeatHandler = hbHandler;
|
||||
|
@ -219,7 +239,8 @@ public class TestTaskAttemptListenerImpl {
|
|||
isMap ? org.apache.hadoop.mapreduce.v2.api.records.TaskType.MAP
|
||||
: org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE);
|
||||
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(tid, 0);
|
||||
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
|
||||
RecordFactory recordFactory =
|
||||
RecordFactoryProvider.getRecordFactory(null);
|
||||
TaskAttemptCompletionEvent tce = recordFactory
|
||||
.newRecordInstance(TaskAttemptCompletionEvent.class);
|
||||
tce.setEventId(eventId);
|
||||
|
@ -244,8 +265,14 @@ public class TestTaskAttemptListenerImpl {
|
|||
RMHeartbeatHandler rmHeartbeatHandler =
|
||||
mock(RMHeartbeatHandler.class);
|
||||
final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
|
||||
TaskAttemptListenerImpl listener =
|
||||
new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
|
||||
Dispatcher dispatcher = mock(Dispatcher.class);
|
||||
EventHandler ea = mock(EventHandler.class);
|
||||
when(dispatcher.getEventHandler()).thenReturn(ea);
|
||||
when(appCtx.getEventHandler()).thenReturn(ea);
|
||||
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
|
||||
policy.init(appCtx);
|
||||
TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
|
||||
appCtx, secret, rmHeartbeatHandler, policy) {
|
||||
@Override
|
||||
protected void registerHeartbeatHandler(Configuration conf) {
|
||||
taskHeartbeatHandler = hbHandler;
|
||||
|
@ -270,4 +297,88 @@ public class TestTaskAttemptListenerImpl {
|
|||
|
||||
listener.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckpointIDTracking()
|
||||
throws IOException, InterruptedException{
|
||||
|
||||
SystemClock clock = new SystemClock();
|
||||
|
||||
org.apache.hadoop.mapreduce.v2.app.job.Task mockTask =
|
||||
mock(org.apache.hadoop.mapreduce.v2.app.job.Task.class);
|
||||
when(mockTask.canCommit(any(TaskAttemptId.class))).thenReturn(true);
|
||||
Job mockJob = mock(Job.class);
|
||||
when(mockJob.getTask(any(TaskId.class))).thenReturn(mockTask);
|
||||
|
||||
Dispatcher dispatcher = mock(Dispatcher.class);
|
||||
EventHandler ea = mock(EventHandler.class);
|
||||
when(dispatcher.getEventHandler()).thenReturn(ea);
|
||||
|
||||
RMHeartbeatHandler rmHeartbeatHandler =
|
||||
mock(RMHeartbeatHandler.class);
|
||||
|
||||
AppContext appCtx = mock(AppContext.class);
|
||||
when(appCtx.getJob(any(JobId.class))).thenReturn(mockJob);
|
||||
when(appCtx.getClock()).thenReturn(clock);
|
||||
when(appCtx.getEventHandler()).thenReturn(ea);
|
||||
JobTokenSecretManager secret = mock(JobTokenSecretManager.class);
|
||||
final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
|
||||
when(appCtx.getEventHandler()).thenReturn(ea);
|
||||
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
|
||||
policy.init(appCtx);
|
||||
TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
|
||||
appCtx, secret, rmHeartbeatHandler, policy) {
|
||||
@Override
|
||||
protected void registerHeartbeatHandler(Configuration conf) {
|
||||
taskHeartbeatHandler = hbHandler;
|
||||
}
|
||||
};
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.setBoolean(MRJobConfig.TASK_PREEMPTION, true);
|
||||
//conf.setBoolean("preemption.reduce", true);
|
||||
|
||||
listener.init(conf);
|
||||
listener.start();
|
||||
|
||||
TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.REDUCE, 1, 0);
|
||||
|
||||
List<Path> partialOut = new ArrayList<Path>();
|
||||
partialOut.add(new Path("/prev1"));
|
||||
partialOut.add(new Path("/prev2"));
|
||||
|
||||
Counters counters = mock(Counters.class);
|
||||
final long CBYTES = 64L * 1024 * 1024;
|
||||
final long CTIME = 4344L;
|
||||
final Path CLOC = new Path("/test/1");
|
||||
Counter cbytes = mock(Counter.class);
|
||||
when(cbytes.getValue()).thenReturn(CBYTES);
|
||||
Counter ctime = mock(Counter.class);
|
||||
when(ctime.getValue()).thenReturn(CTIME);
|
||||
when(counters.findCounter(eq(EnumCounter.CHECKPOINT_BYTES)))
|
||||
.thenReturn(cbytes);
|
||||
when(counters.findCounter(eq(EnumCounter.CHECKPOINT_MS)))
|
||||
.thenReturn(ctime);
|
||||
|
||||
// propagating a taskstatus that contains a checkpoint id
|
||||
TaskCheckpointID incid = new TaskCheckpointID(new FSCheckpointID(
|
||||
CLOC), partialOut, counters);
|
||||
listener.setCheckpointID(
|
||||
org.apache.hadoop.mapred.TaskID.downgrade(tid.getTaskID()), incid);
|
||||
|
||||
// and try to get it back
|
||||
CheckpointID outcid = listener.getCheckpointID(tid.getTaskID());
|
||||
TaskCheckpointID tcid = (TaskCheckpointID) outcid;
|
||||
assertEquals(CBYTES, tcid.getCheckpointBytes());
|
||||
assertEquals(CTIME, tcid.getCheckpointTime());
|
||||
assertTrue(partialOut.containsAll(tcid.getPartialCommittedOutput()));
|
||||
assertTrue(tcid.getPartialCommittedOutput().containsAll(partialOut));
|
||||
|
||||
//assert it worked
|
||||
assert outcid == incid;
|
||||
|
||||
listener.stop();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,329 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.mapreduce.v2.app;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionContract;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionMessage;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.mapred.TaskAttemptListenerImpl;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
|
||||
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
|
||||
import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestCheckpointPreemptionPolicy {
|
||||
|
||||
TaskAttemptListenerImpl pel= null;
|
||||
RMContainerAllocator r;
|
||||
JobId jid;
|
||||
RunningAppContext mActxt;
|
||||
Set<ContainerId> preemptedContainers = new HashSet<ContainerId>();
|
||||
Map<ContainerId,TaskAttemptId> assignedContainers =
|
||||
new HashMap<ContainerId, TaskAttemptId>();
|
||||
private final RecordFactory recordFactory =
|
||||
RecordFactoryProvider.getRecordFactory(null);
|
||||
HashMap<ContainerId,Resource> contToResourceMap =
|
||||
new HashMap<ContainerId, Resource>();
|
||||
|
||||
private int minAlloc = 1024;
|
||||
|
||||
@Before
|
||||
@SuppressWarnings("rawtypes") // mocked generics
|
||||
public void setup() {
|
||||
ApplicationId appId = ApplicationId.newInstance(200, 1);
|
||||
ApplicationAttemptId appAttemptId =
|
||||
ApplicationAttemptId.newInstance(appId, 1);
|
||||
jid = MRBuilderUtils.newJobId(appId, 1);
|
||||
|
||||
mActxt = mock(RunningAppContext.class);
|
||||
EventHandler ea = mock(EventHandler.class);
|
||||
when(mActxt.getEventHandler()).thenReturn(ea);
|
||||
for (int i = 0; i < 40; ++i) {
|
||||
ContainerId cId = ContainerId.newInstance(appAttemptId, i);
|
||||
if (0 == i % 7) {
|
||||
preemptedContainers.add(cId);
|
||||
}
|
||||
TaskId tId = 0 == i % 2
|
||||
? MRBuilderUtils.newTaskId(jid, i / 2, TaskType.MAP)
|
||||
: MRBuilderUtils.newTaskId(jid, i / 2 + 1, TaskType.REDUCE);
|
||||
assignedContainers.put(cId, MRBuilderUtils.newTaskAttemptId(tId, 0));
|
||||
contToResourceMap.put(cId, Resource.newInstance(2 * minAlloc, 2));
|
||||
}
|
||||
|
||||
for (Map.Entry<ContainerId,TaskAttemptId> ent :
|
||||
assignedContainers.entrySet()) {
|
||||
System.out.println("cont:" + ent.getKey().getId() +
|
||||
" type:" + ent.getValue().getTaskId().getTaskType() +
|
||||
" res:" + contToResourceMap.get(ent.getKey()).getMemory() + "MB" );
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStrictPreemptionContract() {
|
||||
|
||||
final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
|
||||
AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
|
||||
@Override
|
||||
public TaskAttemptId getTaskAttempt(ContainerId cId) {
|
||||
return containers.get(cId);
|
||||
}
|
||||
@Override
|
||||
public List<Container> getContainers(TaskType t) {
|
||||
List<Container> p = new ArrayList<Container>();
|
||||
for (Map.Entry<ContainerId,TaskAttemptId> ent :
|
||||
assignedContainers.entrySet()) {
|
||||
if (ent.getValue().getTaskId().getTaskType().equals(t)) {
|
||||
p.add(Container.newInstance(ent.getKey(), null, null,
|
||||
contToResourceMap.get(ent.getKey()),
|
||||
Priority.newInstance(0), null));
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
};
|
||||
|
||||
PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
|
||||
contToResourceMap, Resource.newInstance(1024, 1), true);
|
||||
|
||||
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
|
||||
policy.init(mActxt);
|
||||
policy.preempt(mPctxt, pM);
|
||||
|
||||
|
||||
for (ContainerId c : preemptedContainers) {
|
||||
TaskAttemptId t = assignedContainers.get(c);
|
||||
if (TaskType.MAP.equals(t.getTaskId().getTaskType())) {
|
||||
assert policy.isPreempted(t) == false;
|
||||
} else {
|
||||
assert policy.isPreempted(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPreemptionContract() {
|
||||
final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
|
||||
AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
|
||||
@Override
|
||||
public TaskAttemptId getTaskAttempt(ContainerId cId) {
|
||||
return containers.get(cId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Container> getContainers(TaskType t) {
|
||||
List<Container> p = new ArrayList<Container>();
|
||||
for (Map.Entry<ContainerId,TaskAttemptId> ent :
|
||||
assignedContainers.entrySet()){
|
||||
if(ent.getValue().getTaskId().getTaskType().equals(t)){
|
||||
p.add(Container.newInstance(ent.getKey(), null, null,
|
||||
contToResourceMap.get(ent.getKey()),
|
||||
Priority.newInstance(0), null));
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
};
|
||||
|
||||
PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
|
||||
contToResourceMap, Resource.newInstance(minAlloc, 1), false);
|
||||
|
||||
CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
|
||||
policy.init(mActxt);
|
||||
|
||||
int supposedMemPreemption = pM.getContract().getResourceRequest()
|
||||
.get(0).getResourceRequest().getCapability().getMemory()
|
||||
* pM.getContract().getResourceRequest().get(0).getResourceRequest()
|
||||
.getNumContainers();
|
||||
|
||||
// first round of preemption
|
||||
policy.preempt(mPctxt, pM);
|
||||
List<TaskAttemptId> preempting =
|
||||
validatePreemption(pM, policy, supposedMemPreemption);
|
||||
|
||||
// redundant message
|
||||
policy.preempt(mPctxt, pM);
|
||||
List<TaskAttemptId> preempting2 =
|
||||
validatePreemption(pM, policy, supposedMemPreemption);
|
||||
|
||||
// check that nothing got added
|
||||
assert preempting2.equals(preempting);
|
||||
|
||||
// simulate 2 task completions/successful preemption
|
||||
policy.handleCompletedContainer(preempting.get(0));
|
||||
policy.handleCompletedContainer(preempting.get(1));
|
||||
|
||||
// remove from assignedContainers
|
||||
Iterator<Map.Entry<ContainerId,TaskAttemptId>> it =
|
||||
assignedContainers.entrySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Map.Entry<ContainerId,TaskAttemptId> ent = it.next();
|
||||
if (ent.getValue().equals(preempting.get(0)) ||
|
||||
ent.getValue().equals(preempting.get(1)))
|
||||
it.remove();
|
||||
}
|
||||
|
||||
// one more message asking for preemption
|
||||
policy.preempt(mPctxt, pM);
|
||||
|
||||
// triggers preemption of 2 more containers (i.e., the preemption set changes)
|
||||
List<TaskAttemptId> preempting3 =
|
||||
validatePreemption(pM, policy, supposedMemPreemption);
|
||||
assert preempting3.equals(preempting2) == false;
|
||||
}
|
||||
|
||||
private List<TaskAttemptId> validatePreemption(PreemptionMessage pM,
|
||||
CheckpointAMPreemptionPolicy policy, int supposedMemPreemption) {
|
||||
Resource effectivelyPreempted = Resource.newInstance(0, 0);
|
||||
|
||||
List<TaskAttemptId> preempting = new ArrayList<TaskAttemptId>();
|
||||
|
||||
for (Map.Entry<ContainerId, TaskAttemptId> ent :
|
||||
assignedContainers.entrySet()) {
|
||||
if (policy.isPreempted(ent.getValue())) {
|
||||
Resources.addTo(effectivelyPreempted,contToResourceMap.get(ent.getKey()));
|
||||
// preempt only reducers
|
||||
if (policy.isPreempted(ent.getValue())){
|
||||
assertEquals(TaskType.REDUCE, ent.getValue().getTaskId().getTaskType());
|
||||
preempting.add(ent.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// preempt enough
|
||||
assert (effectivelyPreempted.getMemory() >= supposedMemPreemption)
|
||||
: " preempted: " + effectivelyPreempted.getMemory();
|
||||
|
||||
// preempt not too much enough
|
||||
assert effectivelyPreempted.getMemory() <= supposedMemPreemption + minAlloc;
|
||||
return preempting;
|
||||
}
|
||||
|
||||
private PreemptionMessage generatePreemptionMessage(
|
||||
Set<ContainerId> containerToPreempt,
|
||||
HashMap<ContainerId, Resource> resPerCont,
|
||||
Resource minimumAllocation, boolean strict) {
|
||||
|
||||
Set<ContainerId> currentContPreemption = Collections.unmodifiableSet(
|
||||
new HashSet<ContainerId>(containerToPreempt));
|
||||
containerToPreempt.clear();
|
||||
Resource tot = Resource.newInstance(0, 0);
|
||||
for(ContainerId c : currentContPreemption){
|
||||
Resources.addTo(tot,
|
||||
resPerCont.get(c));
|
||||
}
|
||||
int numCont = (int) Math.ceil(tot.getMemory() /
|
||||
(double) minimumAllocation.getMemory());
|
||||
ResourceRequest rr = ResourceRequest.newInstance(
|
||||
Priority.newInstance(0), ResourceRequest.ANY,
|
||||
minimumAllocation, numCont);
|
||||
if (strict) {
|
||||
return generatePreemptionMessage(new Allocation(null, null,
|
||||
currentContPreemption, null, null));
|
||||
}
|
||||
return generatePreemptionMessage(new Allocation(null, null,
|
||||
null, currentContPreemption,
|
||||
Collections.singletonList(rr)));
|
||||
}
|
||||
|
||||
|
||||
private PreemptionMessage generatePreemptionMessage(Allocation allocation) {
|
||||
PreemptionMessage pMsg = null;
|
||||
// assemble strict preemption request
|
||||
if (allocation.getStrictContainerPreemptions() != null) {
|
||||
pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
|
||||
StrictPreemptionContract pStrict =
|
||||
recordFactory.newRecordInstance(StrictPreemptionContract.class);
|
||||
Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
|
||||
for (ContainerId cId : allocation.getStrictContainerPreemptions()) {
|
||||
PreemptionContainer pc =
|
||||
recordFactory.newRecordInstance(PreemptionContainer.class);
|
||||
pc.setId(cId);
|
||||
pCont.add(pc);
|
||||
}
|
||||
pStrict.setContainers(pCont);
|
||||
pMsg.setStrictContract(pStrict);
|
||||
}
|
||||
|
||||
// assemble negotiable preemption request
|
||||
if (allocation.getResourcePreemptions() != null &&
|
||||
allocation.getResourcePreemptions().size() > 0 &&
|
||||
allocation.getContainerPreemptions() != null &&
|
||||
allocation.getContainerPreemptions().size() > 0) {
|
||||
if (pMsg == null) {
|
||||
pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
|
||||
}
|
||||
PreemptionContract contract =
|
||||
recordFactory.newRecordInstance(PreemptionContract.class);
|
||||
Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
|
||||
for (ContainerId cId : allocation.getContainerPreemptions()) {
|
||||
PreemptionContainer pc =
|
||||
recordFactory.newRecordInstance(PreemptionContainer.class);
|
||||
pc.setId(cId);
|
||||
pCont.add(pc);
|
||||
}
|
||||
List<PreemptionResourceRequest> pRes =
|
||||
new ArrayList<PreemptionResourceRequest>();
|
||||
for (ResourceRequest crr : allocation.getResourcePreemptions()) {
|
||||
PreemptionResourceRequest prr =
|
||||
recordFactory.newRecordInstance(PreemptionResourceRequest.class);
|
||||
prr.setResourceRequest(crr);
|
||||
pRes.add(prr);
|
||||
}
|
||||
contract.setContainers(pCont);
|
||||
contract.setResourceRequest(pRes);
|
||||
pMsg.setContract(contract);
|
||||
}
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
}
|
|
@ -1604,6 +1604,21 @@ public class TestRMContainerAllocator {
|
|||
numPendingReduces,
|
||||
maxReduceRampupLimit, reduceSlowStart);
|
||||
verify(allocator).rampDownReduces(anyInt());
|
||||
|
||||
// Test reduce ramp-down for when there are scheduled maps
|
||||
// Since we have two scheduled Maps, rampDownReducers
|
||||
// should be invoked twice.
|
||||
scheduledMaps = 2;
|
||||
assignedReduces = 2;
|
||||
doReturn(10 * 1024).when(allocator).getMemLimit();
|
||||
allocator.scheduleReduces(
|
||||
totalMaps, succeededMaps,
|
||||
scheduledMaps, scheduledReduces,
|
||||
assignedMaps, assignedReduces,
|
||||
mapResourceReqt, reduceResourceReqt,
|
||||
numPendingReduces,
|
||||
maxReduceRampupLimit, reduceSlowStart);
|
||||
verify(allocator, times(2)).rampDownReduces(anyInt());
|
||||
}
|
||||
|
||||
private static class RecalculateContainerAllocator extends MyContainerAllocator {
|
||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.hadoop.mapreduce.QueueInfo;
|
|||
import org.apache.hadoop.mapreduce.TaskCompletionEvent;
|
||||
import org.apache.hadoop.mapreduce.TaskTrackerInfo;
|
||||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
|
||||
import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
|
@ -575,10 +576,17 @@ public class LocalJobRunner implements ClientProtocol {
|
|||
|
||||
// TaskUmbilicalProtocol methods
|
||||
|
||||
@Override
|
||||
public JvmTask getTask(JvmContext context) { return null; }
|
||||
|
||||
public synchronized boolean statusUpdate(TaskAttemptID taskId,
|
||||
@Override
|
||||
public synchronized AMFeedback statusUpdate(TaskAttemptID taskId,
|
||||
TaskStatus taskStatus) throws IOException, InterruptedException {
|
||||
AMFeedback feedback = new AMFeedback();
|
||||
feedback.setTaskFound(true);
|
||||
if (null == taskStatus) {
|
||||
return feedback;
|
||||
}
|
||||
// Serialize as we would if distributed in order to make deep copy
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream dos = new DataOutputStream(baos);
|
||||
|
@ -618,7 +626,7 @@ public class LocalJobRunner implements ClientProtocol {
|
|||
}
|
||||
|
||||
// ignore phase
|
||||
return true;
|
||||
return feedback;
|
||||
}
|
||||
|
||||
/** Return the current values of the counters for this job,
|
||||
|
@ -654,24 +662,24 @@ public class LocalJobRunner implements ClientProtocol {
|
|||
statusUpdate(taskid, taskStatus);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) {
|
||||
// Ignore for now
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportNextRecordRange(TaskAttemptID taskid,
|
||||
SortedRanges.Range range) throws IOException {
|
||||
LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
|
||||
}
|
||||
|
||||
public boolean ping(TaskAttemptID taskid) throws IOException {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canCommit(TaskAttemptID taskid)
|
||||
throws IOException {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void done(TaskAttemptID taskId) throws IOException {
|
||||
int taskIndex = mapIds.indexOf(taskId);
|
||||
if (taskIndex >= 0) { // mapping
|
||||
|
@ -681,11 +689,13 @@ public class LocalJobRunner implements ClientProtocol {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void fsError(TaskAttemptID taskId, String message)
|
||||
throws IOException {
|
||||
LOG.fatal("FSError: "+ message + "from task: " + taskId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
|
||||
LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
|
||||
}
|
||||
|
@ -695,12 +705,30 @@ public class LocalJobRunner implements ClientProtocol {
|
|||
LOG.fatal("Fatal: "+ msg + "from task: " + taskId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId,
|
||||
int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
|
||||
return new MapTaskCompletionEventsUpdate(
|
||||
org.apache.hadoop.mapred.TaskCompletionEvent.EMPTY_ARRAY, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId) {
|
||||
// ignore
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public LocalJobRunner(Configuration conf) throws IOException {
|
||||
|
|
|
@ -44,6 +44,8 @@ import org.apache.hadoop.io.NullWritable;
|
|||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
|
@ -83,11 +85,10 @@ public class TestMRWithDistributedCache extends TestCase {
|
|||
private static final Log LOG =
|
||||
LogFactory.getLog(TestMRWithDistributedCache.class);
|
||||
|
||||
public static class DistributedCacheChecker extends
|
||||
Mapper<LongWritable, Text, NullWritable, NullWritable> {
|
||||
private static class DistributedCacheChecker {
|
||||
|
||||
@Override
|
||||
public void setup(Context context) throws IOException {
|
||||
public void setup(TaskInputOutputContext<?, ?, ?, ?> context)
|
||||
throws IOException {
|
||||
Configuration conf = context.getConfiguration();
|
||||
Path[] localFiles = context.getLocalCacheFiles();
|
||||
URI[] files = context.getCacheFiles();
|
||||
|
@ -101,6 +102,10 @@ public class TestMRWithDistributedCache extends TestCase {
|
|||
TestCase.assertEquals(2, files.length);
|
||||
TestCase.assertEquals(2, archives.length);
|
||||
|
||||
// Check the file name
|
||||
TestCase.assertTrue(files[0].getPath().endsWith("distributed.first"));
|
||||
TestCase.assertTrue(files[1].getPath().endsWith("distributed.second.jar"));
|
||||
|
||||
// Check lengths of the files
|
||||
TestCase.assertEquals(1, fs.getFileStatus(localFiles[0]).getLen());
|
||||
TestCase.assertTrue(fs.getFileStatus(localFiles[1]).getLen() > 1);
|
||||
|
@ -130,6 +135,26 @@ public class TestMRWithDistributedCache extends TestCase {
|
|||
TestCase.assertTrue("second file should be symlinked too",
|
||||
expectedAbsentSymlinkFile.exists());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class DistributedCacheCheckerMapper extends
|
||||
Mapper<LongWritable, Text, NullWritable, NullWritable> {
|
||||
|
||||
@Override
|
||||
protected void setup(Context context) throws IOException,
|
||||
InterruptedException {
|
||||
new DistributedCacheChecker().setup(context);
|
||||
}
|
||||
}
|
||||
|
||||
public static class DistributedCacheCheckerReducer extends
|
||||
Reducer<LongWritable, Text, NullWritable, NullWritable> {
|
||||
|
||||
@Override
|
||||
public void setup(Context context) throws IOException {
|
||||
new DistributedCacheChecker().setup(context);
|
||||
}
|
||||
}
|
||||
|
||||
private void testWithConf(Configuration conf) throws IOException,
|
||||
|
@ -146,7 +171,8 @@ public class TestMRWithDistributedCache extends TestCase {
|
|||
|
||||
|
||||
Job job = Job.getInstance(conf);
|
||||
job.setMapperClass(DistributedCacheChecker.class);
|
||||
job.setMapperClass(DistributedCacheCheckerMapper.class);
|
||||
job.setReducerClass(DistributedCacheCheckerReducer.class);
|
||||
job.setOutputFormatClass(NullOutputFormat.class);
|
||||
FileInputFormat.setInputPaths(job, first);
|
||||
// Creates the Job Configuration
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.mapred;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
* This class is a simple struct to include both the taskFound information and
|
||||
* a possible preemption request coming from the AM.
|
||||
*/
|
||||
public class AMFeedback implements Writable {
|
||||
|
||||
boolean taskFound;
|
||||
boolean preemption;
|
||||
|
||||
public void setTaskFound(boolean t){
|
||||
taskFound=t;
|
||||
}
|
||||
|
||||
public boolean getTaskFound(){
|
||||
return taskFound;
|
||||
}
|
||||
|
||||
public void setPreemption(boolean preemption) {
|
||||
this.preemption=preemption;
|
||||
}
|
||||
|
||||
public boolean getPreemption() {
|
||||
return preemption;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeBoolean(taskFound);
|
||||
out.writeBoolean(preemption);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
taskFound = in.readBoolean();
|
||||
preemption = in.readBoolean();
|
||||
}
|
||||
|
||||
}
|
|
@ -949,6 +949,23 @@ public class JobConf extends Configuration {
|
|||
return get(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the user defined {@link WritableComparable} comparator for
|
||||
* grouping keys of inputs to the combiner.
|
||||
*
|
||||
* @return comparator set by the user for grouping values.
|
||||
* @see #setCombinerKeyGroupingComparator(Class) for details.
|
||||
*/
|
||||
public RawComparator getCombinerKeyGroupingComparator() {
|
||||
Class<? extends RawComparator> theClass = getClass(
|
||||
JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class);
|
||||
if (theClass == null) {
|
||||
return getOutputKeyComparator();
|
||||
}
|
||||
|
||||
return ReflectionUtils.newInstance(theClass, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the user defined {@link WritableComparable} comparator for
|
||||
* grouping keys of inputs to the reduce.
|
||||
|
@ -966,6 +983,37 @@ public class JobConf extends Configuration {
|
|||
return ReflectionUtils.newInstance(theClass, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the user defined {@link RawComparator} comparator for
|
||||
* grouping keys in the input to the combiner.
|
||||
* <p/>
|
||||
* <p>This comparator should be provided if the equivalence rules for keys
|
||||
* for sorting the intermediates are different from those for grouping keys
|
||||
* before each call to
|
||||
* {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
|
||||
* <p/>
|
||||
* <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
|
||||
* in a single call to the reduce function if K1 and K2 compare as equal.</p>
|
||||
* <p/>
|
||||
* <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
|
||||
* how keys are sorted, this can be used in conjunction to simulate
|
||||
* <i>secondary sort on values</i>.</p>
|
||||
* <p/>
|
||||
* <p><i>Note</i>: This is not a guarantee of the combiner sort being
|
||||
* <i>stable</i> in any sense. (In any case, with the order of available
|
||||
* map-outputs to the combiner being non-deterministic, it wouldn't make
|
||||
* that much sense.)</p>
|
||||
*
|
||||
* @param theClass the comparator class to be used for grouping keys for the
|
||||
* combiner. It should implement <code>RawComparator</code>.
|
||||
* @see #setOutputKeyComparatorClass(Class)
|
||||
*/
|
||||
public void setCombinerKeyGroupingComparator(
|
||||
Class<? extends RawComparator> theClass) {
|
||||
setClass(JobContext.COMBINER_GROUP_COMPARATOR_CLASS,
|
||||
theClass, RawComparator.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the user defined {@link RawComparator} comparator for
|
||||
* grouping keys in the input to the reduce.
|
||||
|
@ -990,6 +1038,7 @@ public class JobConf extends Configuration {
|
|||
* @param theClass the comparator class to be used for grouping keys.
|
||||
* It should implement <code>RawComparator</code>.
|
||||
* @see #setOutputKeyComparatorClass(Class)
|
||||
* @see #setCombinerKeyGroupingComparator(Class)
|
||||
*/
|
||||
public void setOutputValueGroupingComparator(
|
||||
Class<? extends RawComparator> theClass) {
|
||||
|
|
|
@ -187,6 +187,7 @@ abstract public class Task implements Writable, Configurable {
|
|||
protected SecretKey tokenSecret;
|
||||
protected SecretKey shuffleSecret;
|
||||
protected GcTimeUpdater gcUpdater;
|
||||
final AtomicBoolean mustPreempt = new AtomicBoolean(false);
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Constructors
|
||||
|
@ -711,6 +712,7 @@ abstract public class Task implements Writable, Configurable {
|
|||
}
|
||||
try {
|
||||
boolean taskFound = true; // whether TT knows about this task
|
||||
AMFeedback amFeedback = null;
|
||||
// sleep for a bit
|
||||
synchronized(lock) {
|
||||
if (taskDone.get()) {
|
||||
|
@ -728,12 +730,14 @@ abstract public class Task implements Writable, Configurable {
|
|||
taskStatus.statusUpdate(taskProgress.get(),
|
||||
taskProgress.toString(),
|
||||
counters);
|
||||
taskFound = umbilical.statusUpdate(taskId, taskStatus);
|
||||
amFeedback = umbilical.statusUpdate(taskId, taskStatus);
|
||||
taskFound = amFeedback.getTaskFound();
|
||||
taskStatus.clearStatus();
|
||||
}
|
||||
else {
|
||||
// send ping
|
||||
taskFound = umbilical.ping(taskId);
|
||||
amFeedback = umbilical.statusUpdate(taskId, null);
|
||||
taskFound = amFeedback.getTaskFound();
|
||||
}
|
||||
|
||||
// if Task Tracker is not aware of our task ID (probably because it died and
|
||||
|
@ -744,6 +748,17 @@ abstract public class Task implements Writable, Configurable {
|
|||
System.exit(66);
|
||||
}
|
||||
|
||||
// Set a flag that says we should preempt this is read by
|
||||
// ReduceTasks in places of the execution where it is
|
||||
// safe/easy to preempt
|
||||
boolean lastPreempt = mustPreempt.get();
|
||||
mustPreempt.set(mustPreempt.get() || amFeedback.getPreemption());
|
||||
|
||||
if (lastPreempt ^ mustPreempt.get()) {
|
||||
LOG.info("PREEMPTION TASK: setting mustPreempt to " +
|
||||
mustPreempt.get() + " given " + amFeedback.getPreemption() +
|
||||
" for "+ taskId + " task status: " +taskStatus.getPhase());
|
||||
}
|
||||
sendProgress = resetProgressFlag();
|
||||
remainingRetries = MAX_RETRIES;
|
||||
}
|
||||
|
@ -992,10 +1007,17 @@ abstract public class Task implements Writable, Configurable {
|
|||
public void done(TaskUmbilicalProtocol umbilical,
|
||||
TaskReporter reporter
|
||||
) throws IOException, InterruptedException {
|
||||
updateCounters();
|
||||
if (taskStatus.getRunState() == TaskStatus.State.PREEMPTED ) {
|
||||
// If we are preempted, do no output promotion; signal done and exit
|
||||
committer.commitTask(taskContext);
|
||||
umbilical.preempted(taskId, taskStatus);
|
||||
taskDone.set(true);
|
||||
reporter.stopCommunicationThread();
|
||||
return;
|
||||
}
|
||||
LOG.info("Task:" + taskId + " is done."
|
||||
+ " And is in the process of committing");
|
||||
updateCounters();
|
||||
|
||||
boolean commitRequired = isCommitRequired();
|
||||
if (commitRequired) {
|
||||
int retries = MAX_RETRIES;
|
||||
|
@ -1054,7 +1076,7 @@ abstract public class Task implements Writable, Configurable {
|
|||
int retries = MAX_RETRIES;
|
||||
while (true) {
|
||||
try {
|
||||
if (!umbilical.statusUpdate(getTaskID(), taskStatus)) {
|
||||
if (!umbilical.statusUpdate(getTaskID(), taskStatus).getTaskFound()) {
|
||||
LOG.warn("Parent died. Exiting "+taskId);
|
||||
System.exit(66);
|
||||
}
|
||||
|
@ -1098,8 +1120,8 @@ abstract public class Task implements Writable, Configurable {
|
|||
if (isMapTask() && conf.getNumReduceTasks() > 0) {
|
||||
try {
|
||||
Path mapOutput = mapOutputFile.getOutputFile();
|
||||
FileSystem localFS = FileSystem.getLocal(conf);
|
||||
return localFS.getFileStatus(mapOutput).getLen();
|
||||
FileSystem fs = mapOutput.getFileSystem(conf);
|
||||
return fs.getFileStatus(mapOutput).getLen();
|
||||
} catch (IOException e) {
|
||||
LOG.warn ("Could not find output size " , e);
|
||||
}
|
||||
|
@ -1553,7 +1575,8 @@ abstract public class Task implements Writable, Configurable {
|
|||
combinerClass = cls;
|
||||
keyClass = (Class<K>) job.getMapOutputKeyClass();
|
||||
valueClass = (Class<V>) job.getMapOutputValueClass();
|
||||
comparator = (RawComparator<K>) job.getOutputKeyComparator();
|
||||
comparator = (RawComparator<K>)
|
||||
job.getCombinerKeyGroupingComparator();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -1602,7 +1625,7 @@ abstract public class Task implements Writable, Configurable {
|
|||
this.taskId = taskId;
|
||||
keyClass = (Class<K>) context.getMapOutputKeyClass();
|
||||
valueClass = (Class<V>) context.getMapOutputValueClass();
|
||||
comparator = (RawComparator<K>) context.getSortComparator();
|
||||
comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
|
||||
this.committer = committer;
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ public abstract class TaskStatus implements Writable, Cloneable {
|
|||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Unstable
|
||||
public static enum State {RUNNING, SUCCEEDED, FAILED, UNASSIGNED, KILLED,
|
||||
COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN}
|
||||
COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN, PREEMPTED}
|
||||
|
||||
private final TaskAttemptID taskid;
|
||||
private float progress;
|
||||
|
|
|
@ -24,6 +24,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
|||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.ipc.VersionedProtocol;
|
||||
import org.apache.hadoop.mapred.JvmTask;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.security.token.JobTokenSelector;
|
||||
import org.apache.hadoop.security.token.TokenInfo;
|
||||
|
||||
|
@ -64,9 +67,10 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
|
|||
* Version 17 Modified TaskID to be aware of the new TaskTypes
|
||||
* Version 18 Added numRequiredSlots to TaskStatus for MAPREDUCE-516
|
||||
* Version 19 Added fatalError for child to communicate fatal errors to TT
|
||||
* Version 20 Added methods to manage checkpoints
|
||||
* */
|
||||
|
||||
public static final long versionID = 19L;
|
||||
public static final long versionID = 20L;
|
||||
|
||||
/**
|
||||
* Called when a child task process starts, to get its task.
|
||||
|
@ -78,7 +82,8 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
|
|||
JvmTask getTask(JvmContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Report child's progress to parent.
|
||||
* Report child's progress to parent. Also invoked to report still alive (used
|
||||
* to be in ping). It reports an AMFeedback used to propagate preemption requests.
|
||||
*
|
||||
* @param taskId task-id of the child
|
||||
* @param taskStatus status of the child
|
||||
|
@ -86,7 +91,7 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
|
|||
* @throws InterruptedException
|
||||
* @return True if the task is known
|
||||
*/
|
||||
boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException;
|
||||
|
||||
/** Report error messages back to parent. Calls should be sparing, since all
|
||||
|
@ -105,11 +110,6 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
|
|||
void reportNextRecordRange(TaskAttemptID taskid, SortedRanges.Range range)
|
||||
throws IOException;
|
||||
|
||||
/** Periodically called by child to check if parent is still alive.
|
||||
* @return True if the task is known
|
||||
*/
|
||||
boolean ping(TaskAttemptID taskid) throws IOException;
|
||||
|
||||
/** Report that the task is successfully completed. Failure is assumed if
|
||||
* the task process exits without calling this.
|
||||
* @param taskid task's id
|
||||
|
@ -161,4 +161,33 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
|
|||
TaskAttemptID id)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Report to the AM that the task has been succesfully preempted.
|
||||
*
|
||||
* @param taskId task's id
|
||||
* @param taskStatus status of the child
|
||||
* @throws IOException
|
||||
*/
|
||||
void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException;
|
||||
|
||||
/**
|
||||
* Return the latest CheckpointID for the given TaskID. This provides
|
||||
* the task with a way to locate the checkpointed data and restart from
|
||||
* that point in the computation.
|
||||
*
|
||||
* @param taskID task's id
|
||||
* @return the most recent checkpoint (if any) for this task
|
||||
* @throws IOException
|
||||
*/
|
||||
TaskCheckpointID getCheckpointID(TaskID taskID);
|
||||
|
||||
/**
|
||||
* Send a CheckpointID for a given TaskID to be stored in the AM,
|
||||
* to later restart a task from this checkpoint.
|
||||
* @param tid
|
||||
* @param cid
|
||||
*/
|
||||
void setCheckpointID(TaskID tid, TaskCheckpointID cid);
|
||||
|
||||
}
|
||||
|
|
|
@ -948,11 +948,27 @@ public class Job extends JobContextImpl implements JobContext {
|
|||
conf.setOutputValueClass(theClass);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define the comparator that controls which keys are grouped together
|
||||
* for a single call to combiner,
|
||||
* {@link Reducer#reduce(Object, Iterable,
|
||||
* org.apache.hadoop.mapreduce.Reducer.Context)}
|
||||
*
|
||||
* @param cls the raw comparator to use
|
||||
* @throws IllegalStateException if the job is submitted
|
||||
*/
|
||||
public void setCombinerKeyGroupingComparatorClass(
|
||||
Class<? extends RawComparator> cls) throws IllegalStateException {
|
||||
ensureState(JobState.DEFINE);
|
||||
conf.setCombinerKeyGroupingComparator(cls);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define the comparator that controls how the keys are sorted before they
|
||||
* are passed to the {@link Reducer}.
|
||||
* @param cls the raw comparator
|
||||
* @throws IllegalStateException if the job is submitted
|
||||
* @see #setCombinerKeyGroupingComparatorClass(Class)
|
||||
*/
|
||||
public void setSortComparatorClass(Class<? extends RawComparator> cls
|
||||
) throws IllegalStateException {
|
||||
|
@ -967,6 +983,7 @@ public class Job extends JobContextImpl implements JobContext {
|
|||
* org.apache.hadoop.mapreduce.Reducer.Context)}
|
||||
* @param cls the raw comparator to use
|
||||
* @throws IllegalStateException if the job is submitted
|
||||
* @see #setCombinerKeyGroupingComparatorClass(Class)
|
||||
*/
|
||||
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
|
||||
) throws IllegalStateException {
|
||||
|
|
|
@ -167,12 +167,22 @@ public interface JobContext extends MRJobConfig {
|
|||
*/
|
||||
public String getJar();
|
||||
|
||||
/**
|
||||
* Get the user defined {@link RawComparator} comparator for
|
||||
* grouping keys of inputs to the combiner.
|
||||
*
|
||||
* @return comparator set by the user for grouping values.
|
||||
* @see Job#setCombinerKeyGroupingComparatorClass(Class)
|
||||
*/
|
||||
public RawComparator<?> getCombinerKeyGroupingComparator();
|
||||
|
||||
/**
|
||||
* Get the user defined {@link RawComparator} comparator for
|
||||
* grouping keys of inputs to the reduce.
|
||||
*
|
||||
* @return comparator set by the user for grouping values.
|
||||
* @see Job#setGroupingComparatorClass(Class) for details.
|
||||
* @see Job#setGroupingComparatorClass(Class)
|
||||
* @see #getCombinerKeyGroupingComparator()
|
||||
*/
|
||||
public RawComparator<?> getGroupingComparator();
|
||||
|
||||
|
|
|
@ -93,6 +93,8 @@ public interface MRJobConfig {
|
|||
|
||||
public static final String KEY_COMPARATOR = "mapreduce.job.output.key.comparator.class";
|
||||
|
||||
public static final String COMBINER_GROUP_COMPARATOR_CLASS = "mapreduce.job.combiner.group.comparator.class";
|
||||
|
||||
public static final String GROUP_COMPARATOR_CLASS = "mapreduce.job.output.group.comparator.class";
|
||||
|
||||
public static final String WORKING_DIR = "mapreduce.job.working.dir";
|
||||
|
|
|
@ -36,36 +36,30 @@ import org.apache.hadoop.mapred.Counters;
|
|||
*/
|
||||
public class TaskCheckpointID implements CheckpointID {
|
||||
|
||||
FSCheckpointID rawId;
|
||||
private List<Path> partialOutput;
|
||||
private Counters counters;
|
||||
final FSCheckpointID rawId;
|
||||
private final List<Path> partialOutput;
|
||||
private final Counters counters;
|
||||
|
||||
public TaskCheckpointID() {
|
||||
this.rawId = new FSCheckpointID();
|
||||
this.partialOutput = new ArrayList<Path>();
|
||||
this(new FSCheckpointID(), new ArrayList<Path>(), new Counters());
|
||||
}
|
||||
|
||||
public TaskCheckpointID(FSCheckpointID rawId, List<Path> partialOutput,
|
||||
Counters counters) {
|
||||
this.rawId = rawId;
|
||||
this.counters = counters;
|
||||
if(partialOutput == null)
|
||||
this.partialOutput = new ArrayList<Path>();
|
||||
else
|
||||
this.partialOutput = partialOutput;
|
||||
this.partialOutput = null == partialOutput
|
||||
? new ArrayList<Path>()
|
||||
: partialOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
counters.write(out);
|
||||
if (partialOutput == null) {
|
||||
WritableUtils.writeVLong(out, 0L);
|
||||
} else {
|
||||
WritableUtils.writeVLong(out, partialOutput.size());
|
||||
for (Path p : partialOutput) {
|
||||
Text.writeString(out, p.toString());
|
||||
}
|
||||
}
|
||||
rawId.write(out);
|
||||
}
|
||||
|
||||
|
@ -74,21 +68,22 @@ public class TaskCheckpointID implements CheckpointID{
|
|||
partialOutput.clear();
|
||||
counters.readFields(in);
|
||||
long numPout = WritableUtils.readVLong(in);
|
||||
for(int i=0;i<numPout;i++)
|
||||
for (int i = 0; i < numPout; i++) {
|
||||
partialOutput.add(new Path(Text.readString(in)));
|
||||
}
|
||||
rawId.readFields(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof TaskCheckpointID){
|
||||
return this.rawId.equals(((TaskCheckpointID)other).rawId) &&
|
||||
this.counters.equals(((TaskCheckpointID) other).counters) &&
|
||||
this.partialOutput.containsAll(((TaskCheckpointID) other).partialOutput) &&
|
||||
((TaskCheckpointID) other).partialOutput.containsAll(this.partialOutput);
|
||||
} else {
|
||||
return false;
|
||||
TaskCheckpointID o = (TaskCheckpointID) other;
|
||||
return rawId.equals(o.rawId) &&
|
||||
counters.equals(o.counters) &&
|
||||
partialOutput.containsAll(o.partialOutput) &&
|
||||
o.partialOutput.containsAll(partialOutput);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -166,6 +166,11 @@ class ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
|
|||
return base.getFileTimestamps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getCombinerKeyGroupingComparator() {
|
||||
return base.getCombinerKeyGroupingComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getGroupingComparator() {
|
||||
return base.getGroupingComparator();
|
||||
|
|
|
@ -159,6 +159,11 @@ class ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
|
|||
return base.getFileTimestamps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getCombinerKeyGroupingComparator() {
|
||||
return base.getCombinerKeyGroupingComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getGroupingComparator() {
|
||||
return base.getGroupingComparator();
|
||||
|
|
|
@ -168,6 +168,11 @@ public class WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
|
|||
return mapContext.getFileTimestamps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getCombinerKeyGroupingComparator() {
|
||||
return mapContext.getCombinerKeyGroupingComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getGroupingComparator() {
|
||||
return mapContext.getGroupingComparator();
|
||||
|
|
|
@ -137,7 +137,7 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
|
|||
|
||||
@Override
|
||||
public URI[] getCacheFiles() throws IOException {
|
||||
return reduceContext.getCacheArchives();
|
||||
return reduceContext.getCacheFiles();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -161,6 +161,11 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
|
|||
return reduceContext.getFileTimestamps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getCombinerKeyGroupingComparator() {
|
||||
return reduceContext.getCombinerKeyGroupingComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawComparator<?> getGroupingComparator() {
|
||||
return reduceContext.getGroupingComparator();
|
||||
|
|
|
@ -252,6 +252,17 @@ public class JobContextImpl implements JobContext {
|
|||
return conf.getJar();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the user defined {@link RawComparator} comparator for
|
||||
* grouping keys of inputs to the combiner.
|
||||
*
|
||||
* @return comparator set by the user for grouping values.
|
||||
* @see Job#setCombinerKeyGroupingComparatorClass(Class) for details.
|
||||
*/
|
||||
public RawComparator<?> getCombinerKeyGroupingComparator() {
|
||||
return conf.getCombinerKeyGroupingComparator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the user defined {@link RawComparator} comparator for
|
||||
* grouping keys of inputs to the reduce.
|
||||
|
|
|
@ -582,7 +582,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
|
|||
Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
|
||||
Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
|
||||
RawComparator<K> comparator =
|
||||
(RawComparator<K>)job.getOutputKeyComparator();
|
||||
(RawComparator<K>)job.getCombinerKeyGroupingComparator();
|
||||
try {
|
||||
CombineValuesIterator values = new CombineValuesIterator(
|
||||
kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
|
||||
|
|
|
@ -88,6 +88,8 @@ import org.apache.hadoop.yarn.util.Records;
|
|||
import org.apache.hadoop.yarn.webapp.WebApp;
|
||||
import org.apache.hadoop.yarn.webapp.WebApps;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
/**
|
||||
* This module is responsible for talking to the
|
||||
* JobClient (user facing).
|
||||
|
@ -142,7 +144,8 @@ public class HistoryClientService extends AbstractService {
|
|||
super.serviceStart();
|
||||
}
|
||||
|
||||
private void initializeWebApp(Configuration conf) {
|
||||
@VisibleForTesting
|
||||
protected void initializeWebApp(Configuration conf) {
|
||||
webApp = new HsWebApp(history);
|
||||
InetSocketAddress bindAddress = MRWebAppUtil.getJHSWebBindAddress(conf);
|
||||
// NOTE: there should be a .at(InetSocketAddress)
|
||||
|
|
|
@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.event.Dispatcher;
|
|||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
/******************************************************************
|
||||
* {@link JobHistoryServer} is responsible for servicing all job history
|
||||
* related requests from client.
|
||||
|
@ -60,10 +62,10 @@ public class JobHistoryServer extends CompositeService {
|
|||
public static final long historyServerTimeStamp = System.currentTimeMillis();
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(JobHistoryServer.class);
|
||||
private HistoryContext historyContext;
|
||||
protected HistoryContext historyContext;
|
||||
private HistoryClientService clientService;
|
||||
private JobHistory jobHistoryService;
|
||||
private JHSDelegationTokenSecretManager jhsDTSecretManager;
|
||||
protected JHSDelegationTokenSecretManager jhsDTSecretManager;
|
||||
private AggregatedLogDeletionService aggLogDelService;
|
||||
private HSAdminServer hsAdminServer;
|
||||
private HistoryServerStateStoreService stateStore;
|
||||
|
@ -129,8 +131,7 @@ public class JobHistoryServer extends CompositeService {
|
|||
historyContext = (HistoryContext)jobHistoryService;
|
||||
stateStore = createStateStore(conf);
|
||||
this.jhsDTSecretManager = createJHSSecretManager(conf, stateStore);
|
||||
clientService = new HistoryClientService(historyContext,
|
||||
this.jhsDTSecretManager);
|
||||
clientService = createHistoryClientService();
|
||||
aggLogDelService = new AggregatedLogDeletionService();
|
||||
hsAdminServer = new HSAdminServer(aggLogDelService, jobHistoryService);
|
||||
addService(stateStore);
|
||||
|
@ -142,6 +143,12 @@ public class JobHistoryServer extends CompositeService {
|
|||
super.serviceInit(config);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
protected HistoryClientService createHistoryClientService() {
|
||||
return new HistoryClientService(historyContext,
|
||||
this.jhsDTSecretManager);
|
||||
}
|
||||
|
||||
protected JHSDelegationTokenSecretManager createJHSSecretManager(
|
||||
Configuration conf, HistoryServerStateStoreService store) {
|
||||
long secretKeyInterval =
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
|
|||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
@ -29,20 +28,17 @@ import org.apache.commons.logging.Log;
|
|||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.BytesWritable;
|
||||
import org.apache.hadoop.ipc.ProtocolSignature;
|
||||
import org.apache.hadoop.mapreduce.InputFormat;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.OutputFormat;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
|
||||
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
|
||||
import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
|
||||
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
|
||||
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
|
||||
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
|
||||
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
|
||||
/**
|
||||
|
@ -110,11 +106,16 @@ public class TestMapProgress extends TestCase {
|
|||
statusUpdate(taskId, taskStatus);
|
||||
}
|
||||
|
||||
public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException {
|
||||
statusUpdate(taskId, taskStatus);
|
||||
}
|
||||
|
||||
public boolean canCommit(TaskAttemptID taskid) throws IOException {
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException {
|
||||
StringBuffer buf = new StringBuffer("Task ");
|
||||
buf.append(taskId);
|
||||
|
@ -128,7 +129,9 @@ public class TestMapProgress extends TestCase {
|
|||
LOG.info(buf.toString());
|
||||
// ignore phase
|
||||
// ignore counters
|
||||
return true;
|
||||
AMFeedback a = new AMFeedback();
|
||||
a.setTaskFound(true);
|
||||
return a;
|
||||
}
|
||||
|
||||
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
|
||||
|
@ -145,6 +148,17 @@ public class TestMapProgress extends TestCase {
|
|||
SortedRanges.Range range) throws IOException {
|
||||
LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId) {
|
||||
// do nothing
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
private FileSystem fs = null;
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.mapred;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.RawComparator;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
public class TestOldCombinerGrouping {
|
||||
private static String TEST_ROOT_DIR =
|
||||
new File("build", UUID.randomUUID().toString()).getAbsolutePath();
|
||||
|
||||
public static class Map implements
|
||||
Mapper<LongWritable, Text, Text, LongWritable> {
|
||||
@Override
|
||||
public void map(LongWritable key, Text value,
|
||||
OutputCollector<Text, LongWritable> output, Reporter reporter)
|
||||
throws IOException {
|
||||
String v = value.toString();
|
||||
String k = v.substring(0, v.indexOf(","));
|
||||
v = v.substring(v.indexOf(",") + 1);
|
||||
output.collect(new Text(k), new LongWritable(Long.parseLong(v)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(JobConf job) {
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reduce implements
|
||||
Reducer<Text, LongWritable, Text, LongWritable> {
|
||||
|
||||
@Override
|
||||
public void reduce(Text key, Iterator<LongWritable> values,
|
||||
OutputCollector<Text, LongWritable> output, Reporter reporter)
|
||||
throws IOException {
|
||||
LongWritable maxValue = null;
|
||||
while (values.hasNext()) {
|
||||
LongWritable value = values.next();
|
||||
if (maxValue == null) {
|
||||
maxValue = value;
|
||||
} else if (value.compareTo(maxValue) > 0) {
|
||||
maxValue = value;
|
||||
}
|
||||
}
|
||||
output.collect(key, maxValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(JobConf job) {
|
||||
}
|
||||
}
|
||||
|
||||
public static class Combiner extends Reduce {
|
||||
}
|
||||
|
||||
public static class GroupComparator implements RawComparator<Text> {
|
||||
@Override
|
||||
public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
|
||||
int i4) {
|
||||
byte[] b1 = new byte[i2];
|
||||
System.arraycopy(bytes, i, b1, 0, i2);
|
||||
|
||||
byte[] b2 = new byte[i4];
|
||||
System.arraycopy(bytes2, i3, b2, 0, i4);
|
||||
|
||||
return compare(new Text(new String(b1)), new Text(new String(b2)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Text o1, Text o2) {
|
||||
String s1 = o1.toString();
|
||||
String s2 = o2.toString();
|
||||
s1 = s1.substring(0, s1.indexOf("|"));
|
||||
s2 = s2.substring(0, s2.indexOf("|"));
|
||||
return s1.compareTo(s2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombiner() throws Exception {
|
||||
if (!new File(TEST_ROOT_DIR).mkdirs()) {
|
||||
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
|
||||
}
|
||||
File in = new File(TEST_ROOT_DIR, "input");
|
||||
if (!in.mkdirs()) {
|
||||
throw new RuntimeException("Could not create test dir: " + in);
|
||||
}
|
||||
File out = new File(TEST_ROOT_DIR, "output");
|
||||
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
|
||||
pw.println("A|a,1");
|
||||
pw.println("A|b,2");
|
||||
pw.println("B|a,3");
|
||||
pw.println("B|b,4");
|
||||
pw.println("B|c,5");
|
||||
pw.close();
|
||||
JobConf job = new JobConf();
|
||||
job.set("mapreduce.framework.name", "local");
|
||||
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
|
||||
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
|
||||
job.setMapperClass(Map.class);
|
||||
job.setReducerClass(Reduce.class);
|
||||
job.setInputFormat(TextInputFormat.class);
|
||||
job.setMapOutputKeyClass(Text.class);
|
||||
job.setMapOutputValueClass(LongWritable.class);
|
||||
job.setOutputFormat(TextOutputFormat.class);
|
||||
job.setOutputValueGroupingComparator(GroupComparator.class);
|
||||
|
||||
job.setCombinerClass(Combiner.class);
|
||||
job.setCombinerKeyGroupingComparator(GroupComparator.class);
|
||||
job.setInt("min.num.spills.for.combine", 0);
|
||||
|
||||
JobClient client = new JobClient(job);
|
||||
RunningJob runningJob = client.submitJob(job);
|
||||
runningJob.waitForCompletion();
|
||||
if (runningJob.isSuccessful()) {
|
||||
Counters counters = runningJob.getCounters();
|
||||
|
||||
long combinerInputRecords = counters.getGroup(
|
||||
"org.apache.hadoop.mapreduce.TaskCounter").
|
||||
getCounter("COMBINE_INPUT_RECORDS");
|
||||
long combinerOutputRecords = counters.getGroup(
|
||||
"org.apache.hadoop.mapreduce.TaskCounter").
|
||||
getCounter("COMBINE_OUTPUT_RECORDS");
|
||||
Assert.assertTrue(combinerInputRecords > 0);
|
||||
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
|
||||
|
||||
BufferedReader br = new BufferedReader(new FileReader(
|
||||
new File(out, "part-00000")));
|
||||
Set<String> output = new HashSet<String>();
|
||||
String line = br.readLine();
|
||||
Assert.assertNotNull(line);
|
||||
output.add(line.substring(0, 1) + line.substring(4, 5));
|
||||
line = br.readLine();
|
||||
Assert.assertNotNull(line);
|
||||
output.add(line.substring(0, 1) + line.substring(4, 5));
|
||||
line = br.readLine();
|
||||
Assert.assertNull(line);
|
||||
br.close();
|
||||
|
||||
Set<String> expected = new HashSet<String>();
|
||||
expected.add("A2");
|
||||
expected.add("B5");
|
||||
|
||||
Assert.assertEquals(expected, output);
|
||||
|
||||
} else {
|
||||
Assert.fail("Job failed");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -27,6 +27,10 @@ import org.apache.hadoop.io.Text;
|
|||
import org.apache.hadoop.ipc.ProtocolSignature;
|
||||
import org.apache.hadoop.mapred.SortedRanges.Range;
|
||||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
|
||||
import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
|
||||
|
||||
|
||||
public class TestTaskCommit extends HadoopTestCase {
|
||||
Path rootDir =
|
||||
|
@ -131,11 +135,6 @@ public class TestTaskCommit extends HadoopTestCase {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ping(TaskAttemptID taskid) throws IOException {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace)
|
||||
throws IOException {
|
||||
|
@ -152,9 +151,11 @@ public class TestTaskCommit extends HadoopTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException {
|
||||
return true;
|
||||
AMFeedback a = new AMFeedback();
|
||||
a.setTaskFound(true);
|
||||
return a;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -168,6 +169,22 @@ public class TestTaskCommit extends HadoopTestCase {
|
|||
long clientVersion, int clientMethodsHash) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
|
||||
throws IOException, InterruptedException {
|
||||
fail("Task should not go to commit-pending");
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskCheckpointID getCheckpointID(TaskID taskId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.mapreduce;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.RawComparator;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
public class TestNewCombinerGrouping {
|
||||
private static String TEST_ROOT_DIR =
|
||||
new File("build", UUID.randomUUID().toString()).getAbsolutePath();
|
||||
|
||||
public static class Map extends
|
||||
Mapper<LongWritable, Text, Text, LongWritable> {
|
||||
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value,
|
||||
Context context)
|
||||
throws IOException, InterruptedException {
|
||||
String v = value.toString();
|
||||
String k = v.substring(0, v.indexOf(","));
|
||||
v = v.substring(v.indexOf(",") + 1);
|
||||
context.write(new Text(k), new LongWritable(Long.parseLong(v)));
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reduce extends
|
||||
Reducer<Text, LongWritable, Text, LongWritable> {
|
||||
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<LongWritable> values,
|
||||
Context context)
|
||||
throws IOException, InterruptedException {
|
||||
LongWritable maxValue = null;
|
||||
for (LongWritable value : values) {
|
||||
if (maxValue == null) {
|
||||
maxValue = value;
|
||||
} else if (value.compareTo(maxValue) > 0) {
|
||||
maxValue = value;
|
||||
}
|
||||
}
|
||||
context.write(key, maxValue);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Combiner extends Reduce {
|
||||
}
|
||||
|
||||
public static class GroupComparator implements RawComparator<Text> {
|
||||
@Override
|
||||
public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
|
||||
int i4) {
|
||||
byte[] b1 = new byte[i2];
|
||||
System.arraycopy(bytes, i, b1, 0, i2);
|
||||
|
||||
byte[] b2 = new byte[i4];
|
||||
System.arraycopy(bytes2, i3, b2, 0, i4);
|
||||
|
||||
return compare(new Text(new String(b1)), new Text(new String(b2)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Text o1, Text o2) {
|
||||
String s1 = o1.toString();
|
||||
String s2 = o2.toString();
|
||||
s1 = s1.substring(0, s1.indexOf("|"));
|
||||
s2 = s2.substring(0, s2.indexOf("|"));
|
||||
return s1.compareTo(s2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombiner() throws Exception {
|
||||
if (!new File(TEST_ROOT_DIR).mkdirs()) {
|
||||
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
|
||||
}
|
||||
File in = new File(TEST_ROOT_DIR, "input");
|
||||
if (!in.mkdirs()) {
|
||||
throw new RuntimeException("Could not create test dir: " + in);
|
||||
}
|
||||
File out = new File(TEST_ROOT_DIR, "output");
|
||||
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
|
||||
pw.println("A|a,1");
|
||||
pw.println("A|b,2");
|
||||
pw.println("B|a,3");
|
||||
pw.println("B|b,4");
|
||||
pw.println("B|c,5");
|
||||
pw.close();
|
||||
JobConf conf = new JobConf();
|
||||
conf.set("mapreduce.framework.name", "local");
|
||||
Job job = new Job(conf);
|
||||
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
|
||||
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
|
||||
|
||||
job.setMapperClass(Map.class);
|
||||
job.setReducerClass(Reduce.class);
|
||||
job.setInputFormatClass(TextInputFormat.class);
|
||||
job.setMapOutputKeyClass(Text.class);
|
||||
job.setMapOutputValueClass(LongWritable.class);
|
||||
job.setOutputFormatClass(TextOutputFormat.class);
|
||||
job.setGroupingComparatorClass(GroupComparator.class);
|
||||
|
||||
job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
|
||||
job.setCombinerClass(Combiner.class);
|
||||
job.getConfiguration().setInt("min.num.spills.for.combine", 0);
|
||||
|
||||
job.submit();
|
||||
job.waitForCompletion(false);
|
||||
if (job.isSuccessful()) {
|
||||
Counters counters = job.getCounters();
|
||||
|
||||
long combinerInputRecords = counters.findCounter(
|
||||
"org.apache.hadoop.mapreduce.TaskCounter",
|
||||
"COMBINE_INPUT_RECORDS").getValue();
|
||||
long combinerOutputRecords = counters.findCounter(
|
||||
"org.apache.hadoop.mapreduce.TaskCounter",
|
||||
"COMBINE_OUTPUT_RECORDS").getValue();
|
||||
Assert.assertTrue(combinerInputRecords > 0);
|
||||
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
|
||||
|
||||
BufferedReader br = new BufferedReader(new FileReader(
|
||||
new File(out, "part-r-00000")));
|
||||
Set<String> output = new HashSet<String>();
|
||||
String line = br.readLine();
|
||||
Assert.assertNotNull(line);
|
||||
output.add(line.substring(0, 1) + line.substring(4, 5));
|
||||
line = br.readLine();
|
||||
Assert.assertNotNull(line);
|
||||
output.add(line.substring(0, 1) + line.substring(4, 5));
|
||||
line = br.readLine();
|
||||
Assert.assertNull(line);
|
||||
br.close();
|
||||
|
||||
Set<String> expected = new HashSet<String>();
|
||||
expected.add("A2");
|
||||
expected.add("B5");
|
||||
|
||||
Assert.assertEquals(expected, output);
|
||||
|
||||
} else {
|
||||
Assert.fail("Job failed");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.CancelDelegationTokenR
|
|||
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequest;
|
||||
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
|
||||
import org.apache.hadoop.mapreduce.v2.api.protocolrecords.RenewDelegationTokenRequest;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.HistoryClientService;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.JHSDelegationTokenSecretManager;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
|
||||
|
@ -94,6 +95,17 @@ public class TestJHSSecurity {
|
|||
return new JHSDelegationTokenSecretManager(initialInterval,
|
||||
maxLifetime, renewInterval, 3600000, store);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HistoryClientService createHistoryClientService() {
|
||||
return new HistoryClientService(historyContext,
|
||||
this.jhsDTSecretManager) {
|
||||
@Override
|
||||
protected void initializeWebApp(Configuration conf) {
|
||||
// Don't need it, skip.;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
// final JobHistoryServer jobHistoryServer = jhServer;
|
||||
jobHistoryServer.init(conf);
|
||||
|
|
|
@ -115,7 +115,7 @@ public class TestUmbilicalProtocolWithJobToken {
|
|||
proxy = (TaskUmbilicalProtocol) RPC.getProxy(
|
||||
TaskUmbilicalProtocol.class, TaskUmbilicalProtocol.versionID,
|
||||
addr, conf);
|
||||
proxy.ping(null);
|
||||
proxy.statusUpdate(null, null);
|
||||
} finally {
|
||||
server.stop();
|
||||
if (proxy != null) {
|
||||
|
|
|
@ -519,7 +519,7 @@
|
|||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<version>1.1.1</version>
|
||||
<version>1.1.3</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>avalon-framework</groupId>
|
||||
|
|
|
@ -64,8 +64,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
|
||||
|
@ -105,8 +106,8 @@ public class ResourceSchedulerWrapper implements
|
|||
|
||||
private Configuration conf;
|
||||
private ResourceScheduler scheduler;
|
||||
private Map<ApplicationAttemptId, String> appQueueMap =
|
||||
new ConcurrentHashMap<ApplicationAttemptId, String>();
|
||||
private Map<ApplicationId, String> appQueueMap =
|
||||
new ConcurrentHashMap<ApplicationId, String>();
|
||||
private BufferedWriter jobRuntimeLogBW;
|
||||
|
||||
// Priority of the ResourceSchedulerWrapper shutdown hook.
|
||||
|
@ -240,7 +241,7 @@ public class ResourceSchedulerWrapper implements
|
|||
(AppAttemptRemovedSchedulerEvent) schedulerEvent;
|
||||
ApplicationAttemptId appAttemptId =
|
||||
appRemoveEvent.getApplicationAttemptID();
|
||||
String queue = appQueueMap.get(appAttemptId);
|
||||
String queue = appQueueMap.get(appAttemptId.getApplicationId());
|
||||
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
if (! app.getLiveContainers().isEmpty()) { // have 0 or 1
|
||||
// should have one container which is AM container
|
||||
|
@ -262,20 +263,18 @@ public class ResourceSchedulerWrapper implements
|
|||
schedulerHandleCounter.inc();
|
||||
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
|
||||
|
||||
if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED
|
||||
&& schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
|
||||
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
|
||||
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
|
||||
SLSRunner.decreaseRemainingApps();
|
||||
AppAttemptRemovedSchedulerEvent appRemoveEvent =
|
||||
(AppAttemptRemovedSchedulerEvent) schedulerEvent;
|
||||
ApplicationAttemptId appAttemptId =
|
||||
appRemoveEvent.getApplicationAttemptID();
|
||||
appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
|
||||
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED
|
||||
&& schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
|
||||
AppAttemptAddedSchedulerEvent appAddEvent =
|
||||
(AppAttemptAddedSchedulerEvent) schedulerEvent;
|
||||
AppRemovedSchedulerEvent appRemoveEvent =
|
||||
(AppRemovedSchedulerEvent) schedulerEvent;
|
||||
appQueueMap.remove(appRemoveEvent.getApplicationID());
|
||||
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
|
||||
&& schedulerEvent instanceof AppAddedSchedulerEvent) {
|
||||
AppAddedSchedulerEvent appAddEvent =
|
||||
(AppAddedSchedulerEvent) schedulerEvent;
|
||||
String queueName = appAddEvent.getQueue();
|
||||
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
|
||||
appQueueMap.put(appAddEvent.getApplicationId(), queueName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -297,7 +296,9 @@ public class ResourceSchedulerWrapper implements
|
|||
continue;
|
||||
}
|
||||
|
||||
String queue = appQueueMap.get(containerId.getApplicationAttemptId());
|
||||
String queue =
|
||||
appQueueMap.get(containerId.getApplicationAttemptId()
|
||||
.getApplicationId());
|
||||
int releasedMemory = 0, releasedVCores = 0;
|
||||
if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
|
||||
for (RMContainer rmc : app.getLiveContainers()) {
|
||||
|
@ -329,7 +330,7 @@ public class ResourceSchedulerWrapper implements
|
|||
// update queue information
|
||||
Resource pendingResource = Resources.createResource(0, 0);
|
||||
Resource allocatedResource = Resources.createResource(0, 0);
|
||||
String queueName = appQueueMap.get(attemptId);
|
||||
String queueName = appQueueMap.get(attemptId.getApplicationId());
|
||||
// container requested
|
||||
for (ResourceRequest request : resourceRequests) {
|
||||
if (request.getResourceName().equals(ResourceRequest.ANY)) {
|
||||
|
|
|
@ -285,8 +285,11 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
|
|||
&& schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
|
||||
AppAttemptAddedSchedulerEvent appAddEvent =
|
||||
(AppAttemptAddedSchedulerEvent) schedulerEvent;
|
||||
String queueName = appAddEvent.getQueue();
|
||||
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
|
||||
SchedulerApplication app =
|
||||
applications.get(appAddEvent.getApplicationAttemptId()
|
||||
.getApplicationId());
|
||||
appQueueMap.put(appAddEvent.getApplicationAttemptId(), app.getQueue()
|
||||
.getQueueName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,6 +187,16 @@ Release 2.4.0 - UNRELEASED
|
|||
YARN-1307. Redesign znode structure for Zookeeper based RM state-store for
|
||||
better organization and scalability. (Tsuyoshi OZAWA via vinodkv)
|
||||
|
||||
YARN-1172. Convert SecretManagers in RM to services (Tsuyoshi OZAWA via kasha)
|
||||
|
||||
YARN-1523. Use StandbyException instead of RMNotYetReadyException (kasha)
|
||||
|
||||
YARN-1541. Changed ResourceManager to invalidate ApplicationMaster host/port
|
||||
information once an AM crashes. (Jian He via vinodkv)
|
||||
|
||||
YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize
|
||||
app-attempts separately from apps. (Jian He via vinodkv)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
@ -267,6 +277,21 @@ Release 2.4.0 - UNRELEASED
|
|||
YARN-1451. TestResourceManager relies on the scheduler assigning multiple
|
||||
containers in a single node update. (Sandy Ryza via kasha)
|
||||
|
||||
YARN-1527. Fix yarn rmadmin command to print the correct usage info.
|
||||
(Akira AJISAKA via jianhe)
|
||||
|
||||
YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was
|
||||
causing it to randomly fail. (Liyin Liang via vinodkv)
|
||||
|
||||
YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that
|
||||
was causing unamanged AMs to not finish correctly. (haosdent via vinodkv)
|
||||
|
||||
YARN-1559. Race between ServerRMProxy and ClientRMProxy setting
|
||||
RMProxy#INSTANCE. (kasha and vinodkv via kasha)
|
||||
|
||||
YARN-1560. Fixed TestYarnClient#testAMMRTokens failure with null AMRM token.
|
||||
(Ted Yu via jianhe)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -473,6 +498,9 @@ Release 2.2.0 - 2013-10-13
|
|||
YARN-1278. Fixed NodeManager to not delete local resources for apps on resync
|
||||
command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv)
|
||||
|
||||
YARN-1463. Tests should avoid starting http-server where possible or creates
|
||||
spnego keytab/principals (vinodkv via kasha)
|
||||
|
||||
Release 2.1.1-beta - 2013-09-23
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -309,13 +309,4 @@
|
|||
<Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
|
||||
<Bug pattern="IS2_INCONSISTENT_SYNC" />
|
||||
</Match>
|
||||
|
||||
<!-- Ignore INSTANCE not being final as it is created in sub-classes -->
|
||||
<Match>
|
||||
<Class name="org.apache.hadoop.yarn.client.RMProxy" />
|
||||
<Field name="INSTANCE" />
|
||||
<Bug pattern="MS_SHOULD_BE_FINAL"/>
|
||||
</Match>
|
||||
|
||||
|
||||
</FindBugsFilter>
|
||||
|
|
|
@ -24,10 +24,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
|||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Stable;
|
||||
import org.apache.hadoop.ipc.StandbyException;
|
||||
import org.apache.hadoop.tools.GetUserMappingsProtocol;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.exceptions.RMNotYetActiveException;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse;
|
||||
|
@ -51,25 +51,25 @@ public interface ResourceManagerAdministrationProtocol extends GetUserMappingsPr
|
|||
@Public
|
||||
@Stable
|
||||
public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request)
|
||||
throws RMNotYetActiveException, YarnException, IOException;
|
||||
throws StandbyException, YarnException, IOException;
|
||||
|
||||
@Public
|
||||
@Stable
|
||||
public RefreshNodesResponse refreshNodes(RefreshNodesRequest request)
|
||||
throws RMNotYetActiveException, YarnException, IOException;
|
||||
throws StandbyException, YarnException, IOException;
|
||||
|
||||
@Public
|
||||
@Stable
|
||||
public RefreshSuperUserGroupsConfigurationResponse
|
||||
refreshSuperUserGroupsConfiguration(
|
||||
RefreshSuperUserGroupsConfigurationRequest request)
|
||||
throws RMNotYetActiveException, YarnException, IOException;
|
||||
throws StandbyException, YarnException, IOException;
|
||||
|
||||
@Public
|
||||
@Stable
|
||||
public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings(
|
||||
RefreshUserToGroupsMappingsRequest request)
|
||||
throws RMNotYetActiveException, YarnException, IOException;
|
||||
throws StandbyException, YarnException, IOException;
|
||||
|
||||
@Public
|
||||
@Stable
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue