Merging r1616894 through r1617376 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-6584@1617377 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2014-08-11 22:27:50 +00:00
commit 04884cd530
56 changed files with 1692 additions and 870 deletions

View File

@ -199,6 +199,9 @@ Trunk (Unreleased)
HADOOP-10936. Change default KeyProvider bitlength to 128. (wang)
HADOOP-10224. JavaKeyStoreProvider has to protect against corrupting
underlying store. (asuresh via tucu)
BUG FIXES
HADOOP-9451. Fault single-layer config if node group topology is enabled.
@ -421,6 +424,9 @@ Trunk (Unreleased)
HADOOP-10939. Fix TestKeyProviderFactory testcases to use default 128 bit
length keys. (Arun Suresh via wang)
HADOOP-10862. Miscellaneous trivial corrections to KMS classes.
(asuresh via tucu)
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
@ -547,6 +553,9 @@ Release 2.6.0 - UNRELEASED
HADOOP-10929. Typo in Configuration.getPasswordFromCredentialProviders
(lmccay via brandonli)
HADOOP-10402. Configuration.getValByRegex does not substitute for
variables. (Robert Kanter via kasha)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -2755,7 +2755,8 @@ public Map<String,String> getValByRegex(String regex) {
item.getValue() instanceof String) {
m = p.matcher((String)item.getKey());
if(m.find()) { // match
result.put((String) item.getKey(), (String) item.getValue());
result.put((String) item.getKey(),
substituteVars(getProps().getProperty((String) item.getKey())));
}
}
}

View File

@ -27,8 +27,11 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.ProviderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.crypto.spec.SecretKeySpec;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
@ -80,6 +83,9 @@
@InterfaceAudience.Private
public class JavaKeyStoreProvider extends KeyProvider {
private static final String KEY_METADATA = "KeyMetadata";
private static Logger LOG =
LoggerFactory.getLogger(JavaKeyStoreProvider.class);
public static final String SCHEME_NAME = "jceks";
public static final String KEYSTORE_PASSWORD_FILE_KEY =
@ -115,6 +121,10 @@ private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException {
if (pwFile != null) {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
URL pwdFile = cl.getResource(pwFile);
if (pwdFile == null) {
// Provided Password file does not exist
throw new IOException("Password file does not exists");
}
if (pwdFile != null) {
InputStream is = pwdFile.openStream();
try {
@ -129,19 +139,25 @@ private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException {
password = KEYSTORE_PASSWORD_DEFAULT;
}
try {
Path oldPath = constructOldPath(path);
Path newPath = constructNewPath(path);
keyStore = KeyStore.getInstance(SCHEME_NAME);
FsPermission perm = null;
if (fs.exists(path)) {
// save off permissions in case we need to
// rewrite the keystore in flush()
FileStatus s = fs.getFileStatus(path);
permissions = s.getPermission();
keyStore.load(fs.open(path), password);
// flush did not proceed to completion
// _NEW should not exist
if (fs.exists(newPath)) {
throw new IOException(
String.format("Keystore not loaded due to some inconsistency "
+ "('%s' and '%s' should not exist together)!!", path, newPath));
}
perm = tryLoadFromPath(path, oldPath);
} else {
permissions = new FsPermission("700");
// required to create an empty keystore. *sigh*
keyStore.load(null, password);
perm = tryLoadIncompleteFlush(oldPath, newPath);
}
// Need to save off permissions in case we need to
// rewrite the keystore in flush()
permissions = perm;
} catch (KeyStoreException e) {
throw new IOException("Can't create keystore", e);
} catch (NoSuchAlgorithmException e) {
@ -154,6 +170,136 @@ private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException {
writeLock = lock.writeLock();
}
/**
* Try loading from the user specified path, else load from the backup
* path in case Exception is not due to bad/wrong password
* @param path Actual path to load from
* @param backupPath Backup path (_OLD)
* @return The permissions of the loaded file
* @throws NoSuchAlgorithmException
* @throws CertificateException
* @throws IOException
*/
private FsPermission tryLoadFromPath(Path path, Path backupPath)
throws NoSuchAlgorithmException, CertificateException,
IOException {
FsPermission perm = null;
try {
perm = loadFromPath(path, password);
// Remove _OLD if exists
if (fs.exists(backupPath)) {
fs.delete(backupPath, true);
}
LOG.debug("KeyStore loaded successfully !!");
} catch (IOException ioe) {
// If file is corrupted for some reason other than
// wrong password try the _OLD file if exits
if (!isBadorWrongPassword(ioe)) {
perm = loadFromPath(backupPath, password);
// Rename CURRENT to CORRUPTED
renameOrFail(path, new Path(path.toString() + "_CORRUPTED_"
+ System.currentTimeMillis()));
renameOrFail(backupPath, path);
LOG.debug(String.format(
"KeyStore loaded successfully from '%s' since '%s'"
+ "was corrupted !!", backupPath, path));
} else {
throw ioe;
}
}
return perm;
}
/**
* The KeyStore might have gone down during a flush, In which case either the
* _NEW or _OLD files might exists. This method tries to load the KeyStore
* from one of these intermediate files.
* @param oldPath the _OLD file created during flush
* @param newPath the _NEW file created during flush
* @return The permissions of the loaded file
* @throws IOException
* @throws NoSuchAlgorithmException
* @throws CertificateException
*/
private FsPermission tryLoadIncompleteFlush(Path oldPath, Path newPath)
throws IOException, NoSuchAlgorithmException, CertificateException {
FsPermission perm = null;
// Check if _NEW exists (in case flush had finished writing but not
// completed the re-naming)
if (fs.exists(newPath)) {
perm = loadAndReturnPerm(newPath, oldPath);
}
// try loading from _OLD (An earlier Flushing MIGHT not have completed
// writing completely)
if ((perm == null) && fs.exists(oldPath)) {
perm = loadAndReturnPerm(oldPath, newPath);
}
// If not loaded yet,
// required to create an empty keystore. *sigh*
if (perm == null) {
keyStore.load(null, password);
LOG.debug("KeyStore initialized anew successfully !!");
perm = new FsPermission("700");
}
return perm;
}
private FsPermission loadAndReturnPerm(Path pathToLoad, Path pathToDelete)
throws NoSuchAlgorithmException, CertificateException,
IOException {
FsPermission perm = null;
try {
perm = loadFromPath(pathToLoad, password);
renameOrFail(pathToLoad, path);
LOG.debug(String.format("KeyStore loaded successfully from '%s'!!",
pathToLoad));
if (fs.exists(pathToDelete)) {
fs.delete(pathToDelete, true);
}
} catch (IOException e) {
// Check for password issue : don't want to trash file due
// to wrong password
if (isBadorWrongPassword(e)) {
throw e;
}
}
return perm;
}
private boolean isBadorWrongPassword(IOException ioe) {
// As per documentation this is supposed to be the way to figure
// if password was correct
if (ioe.getCause() instanceof UnrecoverableKeyException) {
return true;
}
// Unfortunately that doesn't seem to work..
// Workaround :
if ((ioe.getCause() == null)
&& (ioe.getMessage() != null)
&& ((ioe.getMessage().contains("Keystore was tampered")) || (ioe
.getMessage().contains("password was incorrect")))) {
return true;
}
return false;
}
private FsPermission loadFromPath(Path p, char[] password)
throws IOException, NoSuchAlgorithmException, CertificateException {
FileStatus s = fs.getFileStatus(p);
keyStore.load(fs.open(p), password);
return s.getPermission();
}
private Path constructNewPath(Path path) {
Path newPath = new Path(path.toString() + "_NEW");
return newPath;
}
private Path constructOldPath(Path path) {
Path oldPath = new Path(path.toString() + "_OLD");
return oldPath;
}
@Override
public KeyVersion getKeyVersion(String versionName) throws IOException {
readLock.lock();
@ -352,11 +498,22 @@ public KeyVersion rollNewVersion(String name,
@Override
public void flush() throws IOException {
Path newPath = constructNewPath(path);
Path oldPath = constructOldPath(path);
writeLock.lock();
try {
if (!changed) {
return;
}
// Might exist if a backup has been restored etc.
if (fs.exists(newPath)) {
renameOrFail(newPath, new Path(newPath.toString()
+ "_ORPHANED_" + System.currentTimeMillis()));
}
if (fs.exists(oldPath)) {
renameOrFail(oldPath, new Path(oldPath.toString()
+ "_ORPHANED_" + System.currentTimeMillis()));
}
// put all of the updates into the keystore
for(Map.Entry<String, Metadata> entry: cache.entrySet()) {
try {
@ -366,25 +523,77 @@ public void flush() throws IOException {
throw new IOException("Can't set metadata key " + entry.getKey(),e );
}
}
// Save old File first
boolean fileExisted = backupToOld(oldPath);
// write out the keystore
FSDataOutputStream out = FileSystem.create(fs, path, permissions);
// Write to _NEW path first :
try {
keyStore.store(out, password);
} catch (KeyStoreException e) {
throw new IOException("Can't store keystore " + this, e);
} catch (NoSuchAlgorithmException e) {
throw new IOException("No such algorithm storing keystore " + this, e);
} catch (CertificateException e) {
throw new IOException("Certificate exception storing keystore " + this,
e);
writeToNew(newPath);
} catch (IOException ioe) {
// rename _OLD back to curent and throw Exception
revertFromOld(oldPath, fileExisted);
throw ioe;
}
out.close();
// Rename _NEW to CURRENT and delete _OLD
cleanupNewAndOld(newPath, oldPath);
changed = false;
} finally {
writeLock.unlock();
}
}
private void cleanupNewAndOld(Path newPath, Path oldPath) throws IOException {
// Rename _NEW to CURRENT
renameOrFail(newPath, path);
// Delete _OLD
if (fs.exists(oldPath)) {
fs.delete(oldPath, true);
}
}
private void writeToNew(Path newPath) throws IOException {
FSDataOutputStream out =
FileSystem.create(fs, newPath, permissions);
try {
keyStore.store(out, password);
} catch (KeyStoreException e) {
throw new IOException("Can't store keystore " + this, e);
} catch (NoSuchAlgorithmException e) {
throw new IOException(
"No such algorithm storing keystore " + this, e);
} catch (CertificateException e) {
throw new IOException(
"Certificate exception storing keystore " + this, e);
}
out.close();
}
private void revertFromOld(Path oldPath, boolean fileExisted)
throws IOException {
if (fileExisted) {
renameOrFail(oldPath, path);
}
}
private boolean backupToOld(Path oldPath)
throws IOException {
boolean fileExisted = false;
if (fs.exists(path)) {
renameOrFail(path, oldPath);
fileExisted = true;
}
return fileExisted;
}
private void renameOrFail(Path src, Path dest)
throws IOException {
if (!fs.rename(src, dest)) {
throw new IOException("Rename unsuccessful : "
+ String.format("'%s' to '%s'", src, dest));
}
}
@Override
public String toString() {
return uri.toString();

View File

@ -512,7 +512,7 @@ private List<String[]> createKeySets(String[] keyNames) {
List<String> batch = new ArrayList<String>();
int batchLen = 0;
for (String name : keyNames) {
int additionalLen = KMSRESTConstants.KEY_OP.length() + 1 + name.length();
int additionalLen = KMSRESTConstants.KEY.length() + 1 + name.length();
batchLen += additionalLen;
// topping at 1500 to account for initial URL and encoded names
if (batchLen > 1500) {
@ -536,7 +536,7 @@ public Metadata[] getKeysMetadata(String ... keyNames) throws IOException {
for (String[] keySet : keySets) {
if (keyNames.length > 0) {
Map<String, Object> queryStr = new HashMap<String, Object>();
queryStr.put(KMSRESTConstants.KEY_OP, keySet);
queryStr.put(KMSRESTConstants.KEY, keySet);
URL url = createURL(KMSRESTConstants.KEYS_METADATA_RESOURCE, null,
null, queryStr);
HttpURLConnection conn = createConnection(url, HTTP_GET);

View File

@ -37,7 +37,7 @@ public class KMSRESTConstants {
public static final String EEK_SUB_RESOURCE = "_eek";
public static final String CURRENT_VERSION_SUB_RESOURCE = "_currentversion";
public static final String KEY_OP = "key";
public static final String KEY = "key";
public static final String EEK_OP = "eek_op";
public static final String EEK_GENERATE = "generate";
public static final String EEK_DECRYPT = "decrypt";

View File

@ -178,6 +178,14 @@ public void testVariableSubstitution() throws IOException {
// check that expansion also occurs for getInt()
assertTrue(conf.getInt("intvar", -1) == 42);
assertTrue(conf.getInt("my.int", -1) == 42);
Map<String, String> results = conf.getValByRegex("^my.*file$");
assertTrue(results.keySet().contains("my.relfile"));
assertTrue(results.keySet().contains("my.fullfile"));
assertTrue(results.keySet().contains("my.file"));
assertEquals(-1, results.get("my.relfile").indexOf("${"));
assertEquals(-1, results.get("my.fullfile").indexOf("${"));
assertEquals(-1, results.get("my.file").indexOf("${"));
}
public void testFinalParam() throws IOException {

View File

@ -220,11 +220,76 @@ public void testJksProvider() throws Exception {
assertTrue(s.getPermission().toString().equals("rwx------"));
assertTrue(file + " should exist", file.isFile());
// Corrupt file and Check if JKS can reload from _OLD file
File oldFile = new File(file.getPath() + "_OLD");
file.renameTo(oldFile);
file.delete();
file.createNewFile();
assertTrue(oldFile.exists());
KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0);
assertTrue(file.exists());
assertTrue(oldFile + "should be deleted", !oldFile.exists());
verifyAfterReload(file, provider);
assertTrue(!oldFile.exists());
// _NEW and current file should not exist together
File newFile = new File(file.getPath() + "_NEW");
newFile.createNewFile();
try {
provider = KeyProviderFactory.getProviders(conf).get(0);
Assert.fail("_NEW and current file should not exist together !!");
} catch (Exception e) {
// Ignore
} finally {
if (newFile.exists()) {
newFile.delete();
}
}
// Load from _NEW file
file.renameTo(newFile);
file.delete();
try {
provider = KeyProviderFactory.getProviders(conf).get(0);
Assert.assertFalse(newFile.exists());
Assert.assertFalse(oldFile.exists());
} catch (Exception e) {
Assert.fail("JKS should load from _NEW file !!");
// Ignore
}
verifyAfterReload(file, provider);
// _NEW exists but corrupt.. must load from _OLD
newFile.createNewFile();
file.renameTo(oldFile);
file.delete();
try {
provider = KeyProviderFactory.getProviders(conf).get(0);
Assert.assertFalse(newFile.exists());
Assert.assertFalse(oldFile.exists());
} catch (Exception e) {
Assert.fail("JKS should load from _OLD file !!");
// Ignore
} finally {
if (newFile.exists()) {
newFile.delete();
}
}
verifyAfterReload(file, provider);
// check permission retention after explicit change
fs.setPermission(path, new FsPermission("777"));
checkPermissionRetention(conf, ourUrl, path);
}
private void verifyAfterReload(File file, KeyProvider provider)
throws IOException {
List<String> existingKeys = provider.getKeys();
assertTrue(existingKeys.contains("key4"));
assertTrue(existingKeys.contains("key3"));
assertTrue(file.exists());
}
public void checkPermissionRetention(Configuration conf, String ourUrl, Path path) throws Exception {
KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0);
// let's add a new key and flush and check that permissions are still set to 777

View File

@ -47,7 +47,6 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.security.Principal;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
@ -59,19 +58,14 @@
@Path(KMSRESTConstants.SERVICE_VERSION)
@InterfaceAudience.Private
public class KMS {
public static final String CREATE_KEY = "CREATE_KEY";
public static final String DELETE_KEY = "DELETE_KEY";
public static final String ROLL_NEW_VERSION = "ROLL_NEW_VERSION";
public static final String GET_KEYS = "GET_KEYS";
public static final String GET_KEYS_METADATA = "GET_KEYS_METADATA";
public static final String GET_KEY_VERSIONS = "GET_KEY_VERSIONS";
public static final String GET_METADATA = "GET_METADATA";
public static final String GET_KEY_VERSION = "GET_KEY_VERSION";
public static final String GET_CURRENT_KEY = "GET_CURRENT_KEY";
public static final String GENERATE_EEK = "GENERATE_EEK";
public static final String DECRYPT_EEK = "DECRYPT_EEK";
public static enum KMSOp {
CREATE_KEY, DELETE_KEY, ROLL_NEW_VERSION,
GET_KEYS, GET_KEYS_METADATA,
GET_KEY_VERSIONS, GET_METADATA, GET_KEY_VERSION, GET_CURRENT_KEY,
GENERATE_EEK, DECRYPT_EEK
}
private KeyProviderCryptoExtension provider;
private KMSAudit kmsAudit;
@ -91,22 +85,22 @@ private static Principal getPrincipal(SecurityContext securityContext)
private static final String UNAUTHORIZED_MSG_WITH_KEY =
"User:{0} not allowed to do ''{1}'' on ''{2}''";
"User:%s not allowed to do '%s' on '%s'";
private static final String UNAUTHORIZED_MSG_WITHOUT_KEY =
"User:{0} not allowed to do ''{1}''";
"User:%s not allowed to do '%s'";
private void assertAccess(KMSACLs.Type aclType, Principal principal,
String operation) throws AccessControlException {
KMSOp operation) throws AccessControlException {
assertAccess(aclType, principal, operation, null);
}
private void assertAccess(KMSACLs.Type aclType, Principal principal,
String operation, String key) throws AccessControlException {
KMSOp operation, String key) throws AccessControlException {
if (!KMSWebApp.getACLs().hasAccess(aclType, principal.getName())) {
KMSWebApp.getUnauthorizedCallsMeter().mark();
kmsAudit.unauthorized(principal, operation, key);
throw new AuthorizationException(MessageFormat.format(
throw new AuthorizationException(String.format(
(key != null) ? UNAUTHORIZED_MSG_WITH_KEY
: UNAUTHORIZED_MSG_WITHOUT_KEY,
principal.getName(), operation, key));
@ -135,7 +129,7 @@ public Response createKey(@Context SecurityContext securityContext,
Principal user = getPrincipal(securityContext);
String name = (String) jsonKey.get(KMSRESTConstants.NAME_FIELD);
KMSClientProvider.checkNotEmpty(name, KMSRESTConstants.NAME_FIELD);
assertAccess(KMSACLs.Type.CREATE, user, CREATE_KEY, name);
assertAccess(KMSACLs.Type.CREATE, user, KMSOp.CREATE_KEY, name);
String cipher = (String) jsonKey.get(KMSRESTConstants.CIPHER_FIELD);
String material = (String) jsonKey.get(KMSRESTConstants.MATERIAL_FIELD);
int length = (jsonKey.containsKey(KMSRESTConstants.LENGTH_FIELD))
@ -146,7 +140,7 @@ public Response createKey(@Context SecurityContext securityContext,
jsonKey.get(KMSRESTConstants.ATTRIBUTES_FIELD);
if (material != null) {
assertAccess(KMSACLs.Type.SET_KEY_MATERIAL, user,
CREATE_KEY + " with user provided material", name);
KMSOp.CREATE_KEY, name);
}
KeyProvider.Options options = new KeyProvider.Options(
KMSWebApp.getConfiguration());
@ -165,7 +159,7 @@ public Response createKey(@Context SecurityContext securityContext,
provider.flush();
kmsAudit.ok(user, CREATE_KEY, name, "UserProvidedMaterial:" +
kmsAudit.ok(user, KMSOp.CREATE_KEY, name, "UserProvidedMaterial:" +
(material != null) + " Description:" + description);
if (!KMSWebApp.getACLs().hasAccess(KMSACLs.Type.GET, user.getName())) {
@ -186,12 +180,12 @@ public Response deleteKey(@Context SecurityContext securityContext,
@PathParam("name") String name) throws Exception {
KMSWebApp.getAdminCallsMeter().mark();
Principal user = getPrincipal(securityContext);
assertAccess(KMSACLs.Type.DELETE, user, DELETE_KEY, name);
assertAccess(KMSACLs.Type.DELETE, user, KMSOp.DELETE_KEY, name);
KMSClientProvider.checkNotEmpty(name, "name");
provider.deleteKey(name);
provider.flush();
kmsAudit.ok(user, DELETE_KEY, name, "");
kmsAudit.ok(user, KMSOp.DELETE_KEY, name, "");
return Response.ok().build();
}
@ -205,13 +199,13 @@ public Response rolloverKey(@Context SecurityContext securityContext,
throws Exception {
KMSWebApp.getAdminCallsMeter().mark();
Principal user = getPrincipal(securityContext);
assertAccess(KMSACLs.Type.ROLLOVER, user, ROLL_NEW_VERSION, name);
assertAccess(KMSACLs.Type.ROLLOVER, user, KMSOp.ROLL_NEW_VERSION, name);
KMSClientProvider.checkNotEmpty(name, "name");
String material = (String)
jsonMaterial.get(KMSRESTConstants.MATERIAL_FIELD);
if (material != null) {
assertAccess(KMSACLs.Type.SET_KEY_MATERIAL, user,
ROLL_NEW_VERSION + " with user provided material", name);
KMSOp.ROLL_NEW_VERSION, name);
}
KeyProvider.KeyVersion keyVersion = (material != null)
? provider.rollNewVersion(name, Base64.decodeBase64(material))
@ -219,7 +213,7 @@ public Response rolloverKey(@Context SecurityContext securityContext,
provider.flush();
kmsAudit.ok(user, ROLL_NEW_VERSION, name, "UserProvidedMaterial:" +
kmsAudit.ok(user, KMSOp.ROLL_NEW_VERSION, name, "UserProvidedMaterial:" +
(material != null) + " NewVersion:" + keyVersion.getVersionName());
if (!KMSWebApp.getACLs().hasAccess(KMSACLs.Type.GET, user.getName())) {
@ -233,15 +227,15 @@ public Response rolloverKey(@Context SecurityContext securityContext,
@Path(KMSRESTConstants.KEYS_METADATA_RESOURCE)
@Produces(MediaType.APPLICATION_JSON)
public Response getKeysMetadata(@Context SecurityContext securityContext,
@QueryParam(KMSRESTConstants.KEY_OP) List<String> keyNamesList)
@QueryParam(KMSRESTConstants.KEY) List<String> keyNamesList)
throws Exception {
KMSWebApp.getAdminCallsMeter().mark();
Principal user = getPrincipal(securityContext);
String[] keyNames = keyNamesList.toArray(new String[keyNamesList.size()]);
assertAccess(KMSACLs.Type.GET_METADATA, user, GET_KEYS_METADATA);
assertAccess(KMSACLs.Type.GET_METADATA, user, KMSOp.GET_KEYS_METADATA);
KeyProvider.Metadata[] keysMeta = provider.getKeysMetadata(keyNames);
Object json = KMSServerJSONUtils.toJSON(keyNames, keysMeta);
kmsAudit.ok(user, GET_KEYS_METADATA, "");
kmsAudit.ok(user, KMSOp.GET_KEYS_METADATA, "");
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
}
@ -252,9 +246,9 @@ public Response getKeyNames(@Context SecurityContext securityContext)
throws Exception {
KMSWebApp.getAdminCallsMeter().mark();
Principal user = getPrincipal(securityContext);
assertAccess(KMSACLs.Type.GET_KEYS, user, GET_KEYS);
assertAccess(KMSACLs.Type.GET_KEYS, user, KMSOp.GET_KEYS);
Object json = provider.getKeys();
kmsAudit.ok(user, GET_KEYS, "");
kmsAudit.ok(user, KMSOp.GET_KEYS, "");
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
}
@ -276,9 +270,9 @@ public Response getMetadata(@Context SecurityContext securityContext,
Principal user = getPrincipal(securityContext);
KMSClientProvider.checkNotEmpty(name, "name");
KMSWebApp.getAdminCallsMeter().mark();
assertAccess(KMSACLs.Type.GET_METADATA, user, GET_METADATA, name);
assertAccess(KMSACLs.Type.GET_METADATA, user, KMSOp.GET_METADATA, name);
Object json = KMSServerJSONUtils.toJSON(name, provider.getMetadata(name));
kmsAudit.ok(user, GET_METADATA, name, "");
kmsAudit.ok(user, KMSOp.GET_METADATA, name, "");
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
}
@ -292,9 +286,9 @@ public Response getCurrentVersion(@Context SecurityContext securityContext,
Principal user = getPrincipal(securityContext);
KMSClientProvider.checkNotEmpty(name, "name");
KMSWebApp.getKeyCallsMeter().mark();
assertAccess(KMSACLs.Type.GET, user, GET_CURRENT_KEY, name);
assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_CURRENT_KEY, name);
Object json = KMSServerJSONUtils.toJSON(provider.getCurrentKey(name));
kmsAudit.ok(user, GET_CURRENT_KEY, name, "");
kmsAudit.ok(user, KMSOp.GET_CURRENT_KEY, name, "");
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
}
@ -308,9 +302,9 @@ public Response getKeyVersion(@Context SecurityContext securityContext,
KMSClientProvider.checkNotEmpty(versionName, "versionName");
KMSWebApp.getKeyCallsMeter().mark();
KeyVersion keyVersion = provider.getKeyVersion(versionName);
assertAccess(KMSACLs.Type.GET, user, GET_KEY_VERSION);
assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_KEY_VERSION);
if (keyVersion != null) {
kmsAudit.ok(user, GET_KEY_VERSION, keyVersion.getName(), "");
kmsAudit.ok(user, KMSOp.GET_KEY_VERSION, keyVersion.getName(), "");
}
Object json = KMSServerJSONUtils.toJSON(keyVersion);
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
@ -334,7 +328,7 @@ public Response generateEncryptedKeys(
Object retJSON;
if (edekOp.equals(KMSRESTConstants.EEK_GENERATE)) {
assertAccess(KMSACLs.Type.GENERATE_EEK, user, GENERATE_EEK, name);
assertAccess(KMSACLs.Type.GENERATE_EEK, user, KMSOp.GENERATE_EEK, name);
List<EncryptedKeyVersion> retEdeks =
new LinkedList<EncryptedKeyVersion>();
@ -345,7 +339,7 @@ public Response generateEncryptedKeys(
} catch (Exception e) {
throw new IOException(e);
}
kmsAudit.ok(user, GENERATE_EEK, name, "");
kmsAudit.ok(user, KMSOp.GENERATE_EEK, name, "");
retJSON = new ArrayList();
for (EncryptedKeyVersion edek : retEdeks) {
((ArrayList)retJSON).add(KMSServerJSONUtils.toJSON(edek));
@ -380,7 +374,7 @@ public Response decryptEncryptedKey(@Context SecurityContext securityContext,
(String) jsonPayload.get(KMSRESTConstants.MATERIAL_FIELD);
Object retJSON;
if (eekOp.equals(KMSRESTConstants.EEK_DECRYPT)) {
assertAccess(KMSACLs.Type.DECRYPT_EEK, user, DECRYPT_EEK, keyName);
assertAccess(KMSACLs.Type.DECRYPT_EEK, user, KMSOp.DECRYPT_EEK, keyName);
KMSClientProvider.checkNotNull(ivStr, KMSRESTConstants.IV_FIELD);
byte[] iv = Base64.decodeBase64(ivStr);
KMSClientProvider.checkNotNull(encMaterialStr,
@ -391,7 +385,7 @@ public Response decryptEncryptedKey(@Context SecurityContext securityContext,
new KMSClientProvider.KMSEncryptedKeyVersion(keyName, versionName,
iv, KeyProviderCryptoExtension.EEK, encMaterial));
retJSON = KMSServerJSONUtils.toJSON(retKeyVersion);
kmsAudit.ok(user, DECRYPT_EEK, keyName, "");
kmsAudit.ok(user, KMSOp.DECRYPT_EEK, keyName, "");
} else {
throw new IllegalArgumentException("Wrong " + KMSRESTConstants.EEK_OP +
" value, it must be " + KMSRESTConstants.EEK_GENERATE + " or " +
@ -412,9 +406,9 @@ public Response getKeyVersions(@Context SecurityContext securityContext,
Principal user = getPrincipal(securityContext);
KMSClientProvider.checkNotEmpty(name, "name");
KMSWebApp.getKeyCallsMeter().mark();
assertAccess(KMSACLs.Type.GET, user, GET_KEY_VERSIONS, name);
assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_KEY_VERSIONS, name);
Object json = KMSServerJSONUtils.toJSON(provider.getKeyVersions(name));
kmsAudit.ok(user, GET_KEY_VERSIONS, name, "");
kmsAudit.ok(user, KMSOp.GET_KEY_VERSIONS, name, "");
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
}

View File

@ -50,11 +50,11 @@ private static class AuditEvent {
private final AtomicLong accessCount = new AtomicLong(-1);
private final String keyName;
private final String user;
private final String op;
private final KMS.KMSOp op;
private final String extraMsg;
private final long startTime = System.currentTimeMillis();
private AuditEvent(String keyName, String user, String op, String msg) {
private AuditEvent(String keyName, String user, KMS.KMSOp op, String msg) {
this.keyName = keyName;
this.user = user;
this.op = op;
@ -77,7 +77,7 @@ public String getUser() {
return user;
}
public String getOp() {
public KMS.KMSOp getOp() {
return op;
}
@ -90,8 +90,9 @@ public static enum OpStatus {
OK, UNAUTHORIZED, UNAUTHENTICATED, ERROR;
}
private static Set<String> AGGREGATE_OPS_WHITELIST = Sets.newHashSet(
KMS.GET_KEY_VERSION, KMS.GET_CURRENT_KEY, KMS.DECRYPT_EEK, KMS.GENERATE_EEK
private static Set<KMS.KMSOp> AGGREGATE_OPS_WHITELIST = Sets.newHashSet(
KMS.KMSOp.GET_KEY_VERSION, KMS.KMSOp.GET_CURRENT_KEY,
KMS.KMSOp.DECRYPT_EEK, KMS.KMSOp.GENERATE_EEK
);
private Cache<String, AuditEvent> cache;
@ -137,10 +138,10 @@ private void logEvent(AuditEvent event) {
event.getExtraMsg());
}
private void op(OpStatus opStatus, final String op, final String user,
private void op(OpStatus opStatus, final KMS.KMSOp op, final String user,
final String key, final String extraMsg) {
if (!Strings.isNullOrEmpty(user) && !Strings.isNullOrEmpty(key)
&& !Strings.isNullOrEmpty(op)
&& (op != null)
&& AGGREGATE_OPS_WHITELIST.contains(op)) {
String cacheKey = createCacheKey(user, key, op);
if (opStatus == OpStatus.UNAUTHORIZED) {
@ -167,7 +168,7 @@ public AuditEvent call() throws Exception {
}
} else {
List<String> kvs = new LinkedList<String>();
if (!Strings.isNullOrEmpty(op)) {
if (op != null) {
kvs.add("op=" + op);
}
if (!Strings.isNullOrEmpty(key)) {
@ -185,16 +186,16 @@ public AuditEvent call() throws Exception {
}
}
public void ok(Principal user, String op, String key,
public void ok(Principal user, KMS.KMSOp op, String key,
String extraMsg) {
op(OpStatus.OK, op, user.getName(), key, extraMsg);
}
public void ok(Principal user, String op, String extraMsg) {
public void ok(Principal user, KMS.KMSOp op, String extraMsg) {
op(OpStatus.OK, op, user.getName(), null, extraMsg);
}
public void unauthorized(Principal user, String op, String key) {
public void unauthorized(Principal user, KMS.KMSOp op, String key) {
op(OpStatus.UNAUTHORIZED, op, user.getName(), key, "");
}
@ -211,7 +212,7 @@ public void unauthenticated(String remoteHost, String method,
+ " URL:" + url + " ErrorMsg:'" + extraMsg + "'");
}
private static String createCacheKey(String user, String key, String op) {
private static String createCacheKey(String user, String key, KMS.KMSOp op) {
return user + "#" + key + "#" + op;
}

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.crypto.key.kms.server;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import java.io.File;
@ -26,6 +27,7 @@
/**
* Utility class to load KMS configuration files.
*/
@InterfaceAudience.Private
public class KMSConfiguration {
public static final String KMS_CONFIG_DIR = "kms.config.dir";

View File

@ -17,12 +17,15 @@
*/
package org.apache.hadoop.crypto.key.kms.server;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.jmx.JMXJsonServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
@InterfaceAudience.Private
public class KMSJMXServlet extends JMXJsonServlet {
@Override

View File

@ -23,6 +23,7 @@
import java.io.PrintStream;
import java.security.Principal;
import org.apache.hadoop.crypto.key.kms.server.KMS.KMSOp;
import org.apache.log4j.LogManager;
import org.apache.log4j.PropertyConfigurator;
import org.junit.After;
@ -82,16 +83,16 @@ private String getAndResetLogOutput() {
public void testAggregation() throws Exception {
Principal luser = Mockito.mock(Principal.class);
Mockito.when(luser.getName()).thenReturn("luser");
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMS.DELETE_KEY, "k1", "testmsg");
kmsAudit.ok(luser, KMS.ROLL_NEW_VERSION, "k1", "testmsg");
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DELETE_KEY, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.ROLL_NEW_VERSION, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
Thread.sleep(1500);
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
Thread.sleep(1500);
String out = getAndResetLogOutput();
System.out.println(out);
@ -110,15 +111,15 @@ public void testAggregation() throws Exception {
public void testAggregationUnauth() throws Exception {
Principal luser = Mockito.mock(Principal.class);
Mockito.when(luser.getName()).thenReturn("luser");
kmsAudit.unauthorized(luser, KMS.GENERATE_EEK, "k2");
kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k2");
Thread.sleep(1000);
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
kmsAudit.unauthorized(luser, KMS.GENERATE_EEK, "k3");
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k3");
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
Thread.sleep(2000);
String out = getAndResetLogOutput();
System.out.println(out);

View File

@ -724,6 +724,10 @@ READ3Response read(XDR xdr, SecurityHandler securityHandler,
FSDataInputStream fis = clientCache.getDfsInputStream(userName,
Nfs3Utils.getFileIdPath(handle));
if (fis == null) {
return new READ3Response(Nfs3Status.NFS3ERR_ACCES);
}
try {
readCount = fis.read(offset, readbuffer, 0, count);
} catch (IOException e) {

View File

@ -278,13 +278,11 @@ public void testRead() throws Exception {
readReq.serialize(xdr_req);
// Attempt by an unpriviledged user should fail.
/* Hits HDFS-6582. It needs to be fixed first.
READ3Response response1 = nfsd.read(xdr_req.asReadOnlyWrap(),
securityHandlerUnpriviledged,
new InetSocketAddress("localhost", 1234));
assertEquals("Incorrect return code:", Nfs3Status.NFS3ERR_ACCES,
response1.getStatus());
*/
// Attempt by a priviledged user should pass.
READ3Response response2 = nfsd.read(xdr_req.asReadOnlyWrap(),

View File

@ -407,6 +407,12 @@ Release 2.6.0 - UNRELEASED
HDFS-6828. Separate block replica dispatching from Balancer. (szetszwo via
jing9)
HDFS-6837. Code cleanup for Balancer and Dispatcher. (szetszwo via
jing9)
HDFS-6838. Code cleanup for unnecessary INode replacement.
(Jing Zhao via wheat9)
OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang)
@ -502,6 +508,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6791. A block could remain under replicated if all of its replicas are on
decommissioned nodes. (Ming Ma via jing9)
HDFS-6582. Missing null check in RpcProgramNfs3#read(XDR, SecurityHandler)
(Abhiraj Butala via brandonli)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -44,7 +44,8 @@
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.BalancerDatanode;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Source;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Task;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Util;
@ -184,10 +185,10 @@ public class Balancer {
// all data node lists
private final Collection<Source> overUtilized = new LinkedList<Source>();
private final Collection<Source> aboveAvgUtilized = new LinkedList<Source>();
private final Collection<BalancerDatanode.StorageGroup> belowAvgUtilized
= new LinkedList<BalancerDatanode.StorageGroup>();
private final Collection<BalancerDatanode.StorageGroup> underUtilized
= new LinkedList<BalancerDatanode.StorageGroup>();
private final Collection<StorageGroup> belowAvgUtilized
= new LinkedList<StorageGroup>();
private final Collection<StorageGroup> underUtilized
= new LinkedList<StorageGroup>();
/* Check that this Balancer is compatible with the Block Placement Policy
* used by the Namenode.
@ -209,8 +210,22 @@ private static void checkReplicationPolicyCompatibility(Configuration conf
* when connection fails.
*/
Balancer(NameNodeConnector theblockpool, Parameters p, Configuration conf) {
final long movedWinWidth = conf.getLong(
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
final int moverThreads = conf.getInt(
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT);
final int dispatcherThreads = conf.getInt(
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT);
final int maxConcurrentMovesPerNode = conf.getInt(
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
this.dispatcher = new Dispatcher(theblockpool, p.nodesToBeIncluded,
p.nodesToBeExcluded, conf);
p.nodesToBeExcluded, movedWinWidth, moverThreads, dispatcherThreads,
maxConcurrentMovesPerNode, conf);
this.threshold = p.threshold;
this.policy = p.policy;
}
@ -255,7 +270,7 @@ private long init(List<DatanodeStorageReport> reports) {
// over-utilized, above-average, below-average and under-utilized.
long overLoadedBytes = 0L, underLoadedBytes = 0L;
for(DatanodeStorageReport r : reports) {
final BalancerDatanode dn = dispatcher.newDatanode(r);
final DDatanode dn = dispatcher.newDatanode(r);
for(StorageType t : StorageType.asList()) {
final Double utilization = policy.getUtilization(r, t);
if (utilization == null) { // datanode does not have such storage type
@ -268,9 +283,9 @@ private long init(List<DatanodeStorageReport> reports) {
final long maxSize2Move = computeMaxSize2Move(capacity,
getRemaining(r, t), utilizationDiff, threshold);
final BalancerDatanode.StorageGroup g;
final StorageGroup g;
if (utilizationDiff > 0) {
final Source s = dn.addSource(t, utilization, maxSize2Move, dispatcher);
final Source s = dn.addSource(t, maxSize2Move, dispatcher);
if (thresholdDiff <= 0) { // within threshold
aboveAvgUtilized.add(s);
} else {
@ -279,7 +294,7 @@ private long init(List<DatanodeStorageReport> reports) {
}
g = s;
} else {
g = dn.addStorageGroup(t, utilization, maxSize2Move);
g = dn.addStorageGroup(t, maxSize2Move);
if (thresholdDiff <= 0) { // within threshold
belowAvgUtilized.add(g);
} else {
@ -328,7 +343,7 @@ private void logUtilizationCollections() {
logUtilizationCollection("underutilized", underUtilized);
}
private static <T extends BalancerDatanode.StorageGroup>
private static <T extends StorageGroup>
void logUtilizationCollection(String name, Collection<T> items) {
LOG.info(items.size() + " " + name + ": " + items);
}
@ -381,8 +396,7 @@ private void chooseStorageGroups(final Matcher matcher) {
* datanodes or the candidates are source nodes with (utilization > Avg), and
* the others are target nodes with (utilization < Avg).
*/
private <G extends BalancerDatanode.StorageGroup,
C extends BalancerDatanode.StorageGroup>
private <G extends StorageGroup, C extends StorageGroup>
void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
Matcher matcher) {
for(final Iterator<G> i = groups.iterator(); i.hasNext();) {
@ -398,9 +412,8 @@ void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
* For the given datanode, choose a candidate and then schedule it.
* @return true if a candidate is chosen; false if no candidates is chosen.
*/
private <C extends BalancerDatanode.StorageGroup>
boolean choose4One(BalancerDatanode.StorageGroup g,
Collection<C> candidates, Matcher matcher) {
private <C extends StorageGroup> boolean choose4One(StorageGroup g,
Collection<C> candidates, Matcher matcher) {
final Iterator<C> i = candidates.iterator();
final C chosen = chooseCandidate(g, i, matcher);
@ -418,8 +431,7 @@ boolean choose4One(BalancerDatanode.StorageGroup g,
return true;
}
private void matchSourceWithTargetToMove(Source source,
BalancerDatanode.StorageGroup target) {
private void matchSourceWithTargetToMove(Source source, StorageGroup target) {
long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
final Task task = new Task(target, size);
source.addTask(task);
@ -430,8 +442,7 @@ private void matchSourceWithTargetToMove(Source source,
}
/** Choose a candidate for the given datanode. */
private <G extends BalancerDatanode.StorageGroup,
C extends BalancerDatanode.StorageGroup>
private <G extends StorageGroup, C extends StorageGroup>
C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
if (g.hasSpaceForScheduling()) {
for(; candidates.hasNext(); ) {
@ -439,7 +450,7 @@ C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
if (!c.hasSpaceForScheduling()) {
candidates.remove();
} else if (matcher.match(dispatcher.getCluster(),
g.getDatanode(), c.getDatanode())) {
g.getDatanodeInfo(), c.getDatanodeInfo())) {
return c;
}
}
@ -457,34 +468,15 @@ private void resetData(Configuration conf) {
dispatcher.reset(conf);;
}
// Exit status
enum ReturnStatus {
// These int values will map directly to the balancer process's exit code.
SUCCESS(0),
IN_PROGRESS(1),
ALREADY_RUNNING(-1),
NO_MOVE_BLOCK(-2),
NO_MOVE_PROGRESS(-3),
IO_EXCEPTION(-4),
ILLEGAL_ARGS(-5),
INTERRUPTED(-6);
final int code;
ReturnStatus(int code) {
this.code = code;
}
}
/** Run an iteration for all datanodes. */
private ReturnStatus run(int iteration, Formatter formatter,
private ExitStatus run(int iteration, Formatter formatter,
Configuration conf) {
try {
final List<DatanodeStorageReport> reports = dispatcher.init();
final long bytesLeftToMove = init(reports);
if (bytesLeftToMove == 0) {
System.out.println("The cluster is balanced. Exiting...");
return ReturnStatus.SUCCESS;
return ExitStatus.SUCCESS;
} else {
LOG.info( "Need to move "+ StringUtils.byteDesc(bytesLeftToMove)
+ " to make the cluster balanced." );
@ -498,7 +490,7 @@ private ReturnStatus run(int iteration, Formatter formatter,
final long bytesToMove = chooseStorageGroups();
if (bytesToMove == 0) {
System.out.println("No block can be moved. Exiting...");
return ReturnStatus.NO_MOVE_BLOCK;
return ExitStatus.NO_MOVE_BLOCK;
} else {
LOG.info( "Will move " + StringUtils.byteDesc(bytesToMove) +
" in this iteration");
@ -519,19 +511,19 @@ private ReturnStatus run(int iteration, Formatter formatter,
* Exit no byte has been moved for 5 consecutive iterations.
*/
if (!dispatcher.dispatchAndCheckContinue()) {
return ReturnStatus.NO_MOVE_PROGRESS;
return ExitStatus.NO_MOVE_PROGRESS;
}
return ReturnStatus.IN_PROGRESS;
return ExitStatus.IN_PROGRESS;
} catch (IllegalArgumentException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.ILLEGAL_ARGS;
return ExitStatus.ILLEGAL_ARGUMENTS;
} catch (IOException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.IO_EXCEPTION;
return ExitStatus.IO_EXCEPTION;
} catch (InterruptedException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.INTERRUPTED;
return ExitStatus.INTERRUPTED;
} finally {
dispatcher.shutdownNow();
}
@ -570,14 +562,14 @@ static int run(Collection<URI> namenodes, final Parameters p,
Collections.shuffle(connectors);
for(NameNodeConnector nnc : connectors) {
final Balancer b = new Balancer(nnc, p, conf);
final ReturnStatus r = b.run(iteration, formatter, conf);
final ExitStatus r = b.run(iteration, formatter, conf);
// clean all lists
b.resetData(conf);
if (r == ReturnStatus.IN_PROGRESS) {
if (r == ExitStatus.IN_PROGRESS) {
done = false;
} else if (r != ReturnStatus.SUCCESS) {
} else if (r != ExitStatus.SUCCESS) {
//must be an error statue, return.
return r.code;
return r.getExitCode();
}
}
@ -590,7 +582,7 @@ static int run(Collection<URI> namenodes, final Parameters p,
nnc.close();
}
}
return ReturnStatus.SUCCESS.code;
return ExitStatus.SUCCESS.getExitCode();
}
/* Given elaspedTime in ms, return a printable string */
@ -661,10 +653,10 @@ public int run(String[] args) {
return Balancer.run(namenodes, parse(args), conf);
} catch (IOException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.IO_EXCEPTION.code;
return ExitStatus.IO_EXCEPTION.getExitCode();
} catch (InterruptedException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.INTERRUPTED.code;
return ExitStatus.INTERRUPTED.getExitCode();
} finally {
System.out.format("%-24s ", DateFormat.getDateTimeInstance().format(new Date()));
System.out.println("Balancing took " + time2Str(Time.now()-startTime));

View File

@ -48,7 +48,6 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.protocol.Block;
@ -63,6 +62,7 @@
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
@ -91,7 +91,6 @@ public class Dispatcher {
// minutes
private final NameNodeConnector nnc;
private final KeyManager keyManager;
private final SaslDataTransferClient saslClient;
/** Set of datanodes to be excluded. */
@ -100,11 +99,10 @@ public class Dispatcher {
private final Set<String> includedNodes;
private final Collection<Source> sources = new HashSet<Source>();
private final Collection<BalancerDatanode.StorageGroup> targets
= new HashSet<BalancerDatanode.StorageGroup>();
private final Collection<StorageGroup> targets = new HashSet<StorageGroup>();
private final GlobalBlockMap globalBlocks = new GlobalBlockMap();
private final MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks;
private final MovedBlocks<StorageGroup> movedBlocks;
/** Map (datanodeUuid,storageType -> StorageGroup) */
private final StorageGroupMap storageGroupMap = new StorageGroupMap();
@ -135,8 +133,7 @@ private DBlock get(Block b) {
}
/** Remove all blocks except for the moved blocks. */
private void removeAllButRetain(
MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks) {
private void removeAllButRetain(MovedBlocks<StorageGroup> movedBlocks) {
for (Iterator<Block> i = map.keySet().iterator(); i.hasNext();) {
if (!movedBlocks.contains(i.next())) {
i.remove();
@ -150,17 +147,15 @@ private static String toKey(String datanodeUuid, StorageType storageType) {
return datanodeUuid + ":" + storageType;
}
private final Map<String, BalancerDatanode.StorageGroup> map
= new HashMap<String, BalancerDatanode.StorageGroup>();
private final Map<String, StorageGroup> map = new HashMap<String, StorageGroup>();
BalancerDatanode.StorageGroup get(String datanodeUuid,
StorageType storageType) {
StorageGroup get(String datanodeUuid, StorageType storageType) {
return map.get(toKey(datanodeUuid, storageType));
}
void put(BalancerDatanode.StorageGroup g) {
final String key = toKey(g.getDatanode().getDatanodeUuid(), g.storageType);
final BalancerDatanode.StorageGroup existing = map.put(key, g);
void put(StorageGroup g) {
final String key = toKey(g.getDatanodeInfo().getDatanodeUuid(), g.storageType);
final StorageGroup existing = map.put(key, g);
Preconditions.checkState(existing == null);
}
@ -177,8 +172,8 @@ void clear() {
private class PendingMove {
private DBlock block;
private Source source;
private BalancerDatanode proxySource;
private BalancerDatanode.StorageGroup target;
private DDatanode proxySource;
private StorageGroup target;
private PendingMove() {
}
@ -235,24 +230,24 @@ private boolean markMovedIfGoodBlock(DBlock block) {
* @return true if a proxy is found; otherwise false
*/
private boolean chooseProxySource() {
final DatanodeInfo targetDN = target.getDatanode();
final DatanodeInfo targetDN = target.getDatanodeInfo();
// if node group is supported, first try add nodes in the same node group
if (cluster.isNodeGroupAware()) {
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)
for (StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameNodeGroup(loc.getDatanodeInfo(), targetDN)
&& addTo(loc)) {
return true;
}
}
}
// check if there is replica which is on the same rack with the target
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
for (StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanodeInfo(), targetDN) && addTo(loc)) {
return true;
}
}
// find out a non-busy replica
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
for (StorageGroup loc : block.getLocations()) {
if (addTo(loc)) {
return true;
}
@ -261,10 +256,10 @@ && addTo(loc)) {
}
/** add to a proxy source for specific block movement */
private boolean addTo(BalancerDatanode.StorageGroup g) {
final BalancerDatanode bdn = g.getBalancerDatanode();
if (bdn.addPendingBlock(this)) {
proxySource = bdn;
private boolean addTo(StorageGroup g) {
final DDatanode dn = g.getDDatanode();
if (dn.addPendingBlock(this)) {
proxySource = dn;
return true;
}
return false;
@ -281,14 +276,13 @@ private void dispatch() {
DataInputStream in = null;
try {
sock.connect(
NetUtils.createSocketAddr(target.getDatanode().getXferAddr()),
NetUtils.createSocketAddr(target.getDatanodeInfo().getXferAddr()),
HdfsServerConstants.READ_TIMEOUT);
/*
* Unfortunately we don't have a good way to know if the Datanode is
* taking a really long time to move a block, OR something has gone
* wrong and it's never going to finish. To deal with this scenario, we
* set a long timeout (20 minutes) to avoid hanging the balancer
* indefinitely.
* set a long timeout (20 minutes) to avoid hanging indefinitely.
*/
sock.setSoTimeout(BLOCK_MOVE_READ_TIMEOUT);
@ -298,9 +292,10 @@ private void dispatch() {
InputStream unbufIn = sock.getInputStream();
ExtendedBlock eb = new ExtendedBlock(nnc.getBlockpoolID(),
block.getBlock());
Token<BlockTokenIdentifier> accessToken = keyManager.getAccessToken(eb);
final KeyManager km = nnc.getKeyManager();
Token<BlockTokenIdentifier> accessToken = km.getAccessToken(eb);
IOStreamPair saslStreams = saslClient.socketSend(sock, unbufOut,
unbufIn, keyManager, accessToken, target.getDatanode());
unbufIn, km, accessToken, target.getDatanodeInfo());
unbufOut = saslStreams.out;
unbufIn = saslStreams.in;
out = new DataOutputStream(new BufferedOutputStream(unbufOut,
@ -314,21 +309,19 @@ private void dispatch() {
LOG.info("Successfully moved " + this);
} catch (IOException e) {
LOG.warn("Failed to move " + this + ": " + e.getMessage());
/*
* proxy or target may have an issue, insert a small delay before using
* these nodes further. This avoids a potential storm of
* "threads quota exceeded" Warnings when the balancer gets out of sync
* with work going on in datanode.
*/
// Proxy or target may have some issues, delay before using these nodes
// further in order to avoid a potential storm of "threads quota
// exceeded" warnings when the dispatcher gets out of sync with work
// going on in datanodes.
proxySource.activateDelay(DELAY_AFTER_ERROR);
target.getBalancerDatanode().activateDelay(DELAY_AFTER_ERROR);
target.getDDatanode().activateDelay(DELAY_AFTER_ERROR);
} finally {
IOUtils.closeStream(out);
IOUtils.closeStream(in);
IOUtils.closeSocket(sock);
proxySource.removePendingBlock(this);
target.getBalancerDatanode().removePendingBlock(this);
target.getDDatanode().removePendingBlock(this);
synchronized (this) {
reset();
@ -342,8 +335,8 @@ private void dispatch() {
/** Send a block replace request to the output stream */
private void sendRequest(DataOutputStream out, ExtendedBlock eb,
Token<BlockTokenIdentifier> accessToken) throws IOException {
new Sender(out).replaceBlock(eb, target.storageType, accessToken, source
.getDatanode().getDatanodeUuid(), proxySource.datanode);
new Sender(out).replaceBlock(eb, target.storageType, accessToken,
source.getDatanodeInfo().getDatanodeUuid(), proxySource.datanode);
}
/** Receive a block copy response from the input stream */
@ -368,8 +361,7 @@ private void reset() {
}
/** A class for keeping track of block locations in the dispatcher. */
private static class DBlock extends
MovedBlocks.Locations<BalancerDatanode.StorageGroup> {
private static class DBlock extends MovedBlocks.Locations<StorageGroup> {
DBlock(Block block) {
super(block);
}
@ -377,10 +369,10 @@ private static class DBlock extends
/** The class represents a desired move. */
static class Task {
private final BalancerDatanode.StorageGroup target;
private final StorageGroup target;
private long size; // bytes scheduled to move
Task(BalancerDatanode.StorageGroup target, long size) {
Task(StorageGroup target, long size) {
this.target = target;
this.size = size;
}
@ -391,28 +383,25 @@ long getSize() {
}
/** A class that keeps track of a datanode. */
static class BalancerDatanode {
static class DDatanode {
/** A group of storages in a datanode with the same storage type. */
class StorageGroup {
final StorageType storageType;
final double utilization;
final long maxSize2Move;
private long scheduledSize = 0L;
private StorageGroup(StorageType storageType, double utilization,
long maxSize2Move) {
private StorageGroup(StorageType storageType, long maxSize2Move) {
this.storageType = storageType;
this.utilization = utilization;
this.maxSize2Move = maxSize2Move;
}
BalancerDatanode getBalancerDatanode() {
return BalancerDatanode.this;
private DDatanode getDDatanode() {
return DDatanode.this;
}
DatanodeInfo getDatanode() {
return BalancerDatanode.this.datanode;
DatanodeInfo getDatanodeInfo() {
return DDatanode.this.datanode;
}
/** Decide if still need to move more bytes */
@ -447,7 +436,7 @@ String getDisplayName() {
@Override
public String toString() {
return "" + utilization;
return getDisplayName();
}
}
@ -461,10 +450,10 @@ public String toString() {
@Override
public String toString() {
return getClass().getSimpleName() + ":" + datanode + ":" + storageMap;
return getClass().getSimpleName() + ":" + datanode + ":" + storageMap.values();
}
private BalancerDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
private DDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
this.datanode = r.getDatanodeInfo();
this.maxConcurrentMoves = maxConcurrentMoves;
this.pendings = new ArrayList<PendingMove>(maxConcurrentMoves);
@ -475,18 +464,14 @@ private void put(StorageType storageType, StorageGroup g) {
Preconditions.checkState(existing == null);
}
StorageGroup addStorageGroup(StorageType storageType, double utilization,
long maxSize2Move) {
final StorageGroup g = new StorageGroup(storageType, utilization,
maxSize2Move);
StorageGroup addStorageGroup(StorageType storageType, long maxSize2Move) {
final StorageGroup g = new StorageGroup(storageType, maxSize2Move);
put(storageType, g);
return g;
}
Source addSource(StorageType storageType, double utilization,
long maxSize2Move, Dispatcher balancer) {
final Source s = balancer.new Source(storageType, utilization,
maxSize2Move, this);
Source addSource(StorageType storageType, long maxSize2Move, Dispatcher d) {
final Source s = d.new Source(storageType, maxSize2Move, this);
put(storageType, s);
return s;
}
@ -528,7 +513,7 @@ synchronized boolean removePendingBlock(PendingMove pendingBlock) {
}
/** A node that can be the sources of a block move */
class Source extends BalancerDatanode.StorageGroup {
class Source extends DDatanode.StorageGroup {
private final List<Task> tasks = new ArrayList<Task>(2);
private long blocksToReceive = 0L;
@ -539,9 +524,8 @@ class Source extends BalancerDatanode.StorageGroup {
*/
private final List<DBlock> srcBlocks = new ArrayList<DBlock>();
private Source(StorageType storageType, double utilization,
long maxSize2Move, BalancerDatanode dn) {
dn.super(storageType, utilization, maxSize2Move);
private Source(StorageType storageType, long maxSize2Move, DDatanode dn) {
dn.super(storageType, maxSize2Move);
}
/** Add a task */
@ -565,7 +549,7 @@ Iterator<DBlock> getBlockIterator() {
*/
private long getBlockList() throws IOException {
final long size = Math.min(MAX_BLOCKS_SIZE_TO_FETCH, blocksToReceive);
final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanode(), size);
final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanodeInfo(), size);
long bytesReceived = 0;
for (BlockWithLocations blk : newBlocks.getBlocks()) {
@ -579,7 +563,7 @@ private long getBlockList() throws IOException {
final String[] datanodeUuids = blk.getDatanodeUuids();
final StorageType[] storageTypes = blk.getStorageTypes();
for (int i = 0; i < datanodeUuids.length; i++) {
final BalancerDatanode.StorageGroup g = storageGroupMap.get(
final StorageGroup g = storageGroupMap.get(
datanodeUuids[i], storageTypes[i]);
if (g != null) { // not unknown
block.addLocation(g);
@ -617,7 +601,7 @@ private boolean isGoodBlockCandidate(DBlock block) {
private PendingMove chooseNextMove() {
for (Iterator<Task> i = tasks.iterator(); i.hasNext();) {
final Task task = i.next();
final BalancerDatanode target = task.target.getBalancerDatanode();
final DDatanode target = task.target.getDDatanode();
PendingMove pendingBlock = new PendingMove();
if (target.addPendingBlock(pendingBlock)) {
// target is not busy, so do a tentative block allocation
@ -670,7 +654,7 @@ private void dispatchBlocks() {
final long startTime = Time.monotonicNow();
this.blocksToReceive = 2 * getScheduledSize();
boolean isTimeUp = false;
int noPendingBlockIteration = 0;
int noPendingMoveIteration = 0;
while (!isTimeUp && getScheduledSize() > 0
&& (!srcBlocks.isEmpty() || blocksToReceive > 0)) {
final PendingMove p = chooseNextMove();
@ -699,11 +683,11 @@ public void run() {
return;
}
} else {
// source node cannot find a pendingBlockToMove, iteration +1
noPendingBlockIteration++;
// source node cannot find a pending block to move, iteration +1
noPendingMoveIteration++;
// in case no blocks can be moved for source node's task,
// jump out of while-loop after 5 iterations.
if (noPendingBlockIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
if (noPendingMoveIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
resetScheduledSize();
}
}
@ -726,29 +710,19 @@ public void run() {
}
}
Dispatcher(NameNodeConnector theblockpool, Set<String> includedNodes,
Set<String> excludedNodes, Configuration conf) {
this.nnc = theblockpool;
this.keyManager = nnc.getKeyManager();
public Dispatcher(NameNodeConnector nnc, Set<String> includedNodes,
Set<String> excludedNodes, long movedWinWidth, int moverThreads,
int dispatcherThreads, int maxConcurrentMovesPerNode, Configuration conf) {
this.nnc = nnc;
this.excludedNodes = excludedNodes;
this.includedNodes = includedNodes;
final long movedWinWidth = conf.getLong(
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
movedBlocks = new MovedBlocks<BalancerDatanode.StorageGroup>(movedWinWidth);
this.movedBlocks = new MovedBlocks<StorageGroup>(movedWinWidth);
this.cluster = NetworkTopology.getInstance(conf);
this.moveExecutor = Executors.newFixedThreadPool(conf.getInt(
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT));
this.dispatchExecutor = Executors.newFixedThreadPool(conf.getInt(
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT));
this.maxConcurrentMovesPerNode = conf.getInt(
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
this.moveExecutor = Executors.newFixedThreadPool(moverThreads);
this.dispatchExecutor = Executors.newFixedThreadPool(dispatcherThreads);
this.maxConcurrentMovesPerNode = maxConcurrentMovesPerNode;
final boolean fallbackToSimpleAuthAllowed = conf.getBoolean(
CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
@ -784,7 +758,7 @@ long bytesToMove() {
return b;
}
void add(Source source, BalancerDatanode.StorageGroup target) {
void add(Source source, StorageGroup target) {
sources.add(source);
targets.add(target);
}
@ -826,8 +800,8 @@ List<DatanodeStorageReport> init() throws IOException {
return trimmed;
}
public BalancerDatanode newDatanode(DatanodeStorageReport r) {
return new BalancerDatanode(r, maxConcurrentMovesPerNode);
public DDatanode newDatanode(DatanodeStorageReport r) {
return new DDatanode(r, maxConcurrentMovesPerNode);
}
public boolean dispatchAndCheckContinue() throws InterruptedException {
@ -884,8 +858,8 @@ static void setBlockMoveWaitTime(long time) {
private void waitForMoveCompletion() {
for(;;) {
boolean empty = true;
for (BalancerDatanode.StorageGroup t : targets) {
if (!t.getBalancerDatanode().isPendingQEmpty()) {
for (StorageGroup t : targets) {
if (!t.getDDatanode().isPendingQEmpty()) {
empty = false;
break;
}
@ -907,8 +881,8 @@ private void waitForMoveCompletion() {
* 2. the block does not have a replica on the target;
* 3. doing the move does not reduce the number of racks that the block has
*/
private boolean isGoodBlockCandidate(Source source,
BalancerDatanode.StorageGroup target, DBlock block) {
private boolean isGoodBlockCandidate(Source source, StorageGroup target,
DBlock block) {
if (source.storageType != target.storageType) {
return false;
}
@ -933,17 +907,17 @@ && isOnSameNodeGroupWithReplicas(target, block, source)) {
* Determine whether moving the given block replica from source to target
* would reduce the number of racks of the block replicas.
*/
private boolean reduceNumOfRacks(Source source,
BalancerDatanode.StorageGroup target, DBlock block) {
final DatanodeInfo sourceDn = source.getDatanode();
if (cluster.isOnSameRack(sourceDn, target.getDatanode())) {
private boolean reduceNumOfRacks(Source source, StorageGroup target,
DBlock block) {
final DatanodeInfo sourceDn = source.getDatanodeInfo();
if (cluster.isOnSameRack(sourceDn, target.getDatanodeInfo())) {
// source and target are on the same rack
return false;
}
boolean notOnSameRack = true;
synchronized (block) {
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
for (StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanodeInfo(), target.getDatanodeInfo())) {
notOnSameRack = false;
break;
}
@ -953,8 +927,8 @@ private boolean reduceNumOfRacks(Source source,
// target is not on the same rack as any replica
return false;
}
for (BalancerDatanode.StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameRack(g.getDatanode(), sourceDn)) {
for (StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameRack(g.getDatanodeInfo(), sourceDn)) {
// source is on the same rack of another replica
return false;
}
@ -971,10 +945,10 @@ private boolean reduceNumOfRacks(Source source,
* group with target
*/
private boolean isOnSameNodeGroupWithReplicas(
BalancerDatanode.StorageGroup target, DBlock block, Source source) {
final DatanodeInfo targetDn = target.getDatanode();
for (BalancerDatanode.StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameNodeGroup(g.getDatanode(), targetDn)) {
StorageGroup target, DBlock block, Source source) {
final DatanodeInfo targetDn = target.getDatanodeInfo();
for (StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameNodeGroup(g.getDatanodeInfo(), targetDn)) {
return true;
}
}

View File

@ -15,22 +15,30 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.balancer;
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
/**
* Exit status - The values associated with each exit status is directly mapped
* to the process's exit code in command line.
*/
public enum ExitStatus {
SUCCESS(0),
IN_PROGRESS(1),
ALREADY_RUNNING(-1),
NO_MOVE_BLOCK(-2),
NO_MOVE_PROGRESS(-3),
IO_EXCEPTION(-4),
ILLEGAL_ARGUMENTS(-5),
INTERRUPTED(-6);
import org.apache.hadoop.yarn.api.records.ApplicationId;
private final int code;
public class RMAppNewSavedEvent extends RMAppEvent {
private final Exception storedException;
public RMAppNewSavedEvent(ApplicationId appId, Exception storedException) {
super(appId, RMAppEventType.APP_NEW_SAVED);
this.storedException = storedException;
private ExitStatus(int code) {
this.code = code;
}
public Exception getStoredException() {
return storedException;
/** @return the command line exit code. */
public int getExitCode() {
return code;
}
}

View File

@ -768,8 +768,6 @@ private static void validateRenameSource(String src, INodesInPath srcIIP)
checkSnapshot(srcInode, null);
}
private class RenameOperation {
private final INodesInPath srcIIP;
private final INodesInPath dstIIP;
@ -802,7 +800,7 @@ private RenameOperation(String src, String dst, INodesInPath srcIIP, INodesInPat
// snapshot is taken on the dst tree, changes will be recorded in the latest
// snapshot of the src tree.
if (isSrcInSnapshot) {
srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
srcChild.recordModification(srcIIP.getLatestSnapshotId());
}
// check srcChild for reference
@ -932,8 +930,7 @@ Block[] unprotectedSetReplication(String src, short replication,
updateCount(iip, 0, dsDelta, true);
}
file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
inodeMap);
file.setFileReplication(replication, iip.getLatestSnapshotId());
final short newBR = file.getBlockReplication();
// check newBR < oldBR case.
@ -1216,8 +1213,7 @@ long unprotectedDelete(INodesInPath iip, BlocksMapUpdateInfo collectedBlocks,
// record modification
final int latestSnapshot = iip.getLatestSnapshotId();
targetNode = targetNode.recordModification(latestSnapshot);
iip.setLastINode(targetNode);
targetNode.recordModification(latestSnapshot);
// Remove the node from the namespace
long removed = removeLastINode(iip);
@ -2126,7 +2122,7 @@ INodeDirectory unprotectedSetQuota(String src, long nsQuota, long dsQuota)
}
final int latest = iip.getLatestSnapshotId();
dirNode = dirNode.recordModification(latest);
dirNode.recordModification(latest);
dirNode.setQuota(nsQuota, dsQuota);
return dirNode;
}

View File

@ -2515,7 +2515,7 @@ LocatedBlock prepareFileForWrite(String src, INodeFile file,
boolean writeToEditLog,
int latestSnapshot, boolean logRetryCache)
throws IOException {
file = file.recordModification(latestSnapshot);
file.recordModification(latestSnapshot);
final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine);
leaseManager.addLease(cons.getFileUnderConstructionFeature()
@ -4214,7 +4214,7 @@ private void finalizeINodeFileUnderConstruction(String src,
Preconditions.checkArgument(uc != null);
leaseManager.removeLease(uc.getClientName(), src);
pendingFile = pendingFile.recordModification(latestSnapshot);
pendingFile.recordModification(latestSnapshot);
// The file is no longer pending.
// Create permanent INode, update blocks. No need to replace the inode here

View File

@ -97,9 +97,9 @@ public final String getUserName() {
/** Set user */
final INode setUser(String user, int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.setUser(user);
return nodeToUpdate;
recordModification(latestSnapshotId);
setUser(user);
return this;
}
/**
* @param snapshotId
@ -122,9 +122,9 @@ public final String getGroupName() {
/** Set group */
final INode setGroup(String group, int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.setGroup(group);
return nodeToUpdate;
recordModification(latestSnapshotId);
setGroup(group);
return this;
}
/**
@ -148,9 +148,9 @@ public final FsPermission getFsPermission() {
/** Set the {@link FsPermission} of this {@link INode} */
INode setPermission(FsPermission permission, int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.setPermission(permission);
return nodeToUpdate;
recordModification(latestSnapshotId);
setPermission(permission);
return this;
}
abstract AclFeature getAclFeature(int snapshotId);
@ -164,18 +164,18 @@ public final AclFeature getAclFeature() {
final INode addAclFeature(AclFeature aclFeature, int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.addAclFeature(aclFeature);
return nodeToUpdate;
recordModification(latestSnapshotId);
addAclFeature(aclFeature);
return this;
}
abstract void removeAclFeature();
final INode removeAclFeature(int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.removeAclFeature();
return nodeToUpdate;
recordModification(latestSnapshotId);
removeAclFeature();
return this;
}
/**
@ -199,9 +199,9 @@ public final XAttrFeature getXAttrFeature() {
final INode addXAttrFeature(XAttrFeature xAttrFeature, int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.addXAttrFeature(xAttrFeature);
return nodeToUpdate;
recordModification(latestSnapshotId);
addXAttrFeature(xAttrFeature);
return this;
}
/**
@ -211,9 +211,9 @@ final INode addXAttrFeature(XAttrFeature xAttrFeature, int latestSnapshotId)
final INode removeXAttrFeature(int lastestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(lastestSnapshotId);
nodeToUpdate.removeXAttrFeature();
return nodeToUpdate;
recordModification(lastestSnapshotId);
removeXAttrFeature();
return this;
}
/**
@ -298,11 +298,8 @@ public final boolean shouldRecordInSrcSnapshot(final int latestInDst) {
* @param latestSnapshotId The id of the latest snapshot that has been taken.
* Note that it is {@link Snapshot#CURRENT_STATE_ID}
* if no snapshots have been taken.
* @return The current inode, which usually is the same object of this inode.
* However, in some cases, this inode may be replaced with a new inode
* for maintaining snapshots. The current inode is then the new inode.
*/
abstract INode recordModification(final int latestSnapshotId)
abstract void recordModification(final int latestSnapshotId)
throws QuotaExceededException;
/** Check whether it's a reference. */
@ -652,9 +649,9 @@ public abstract INode updateModificationTime(long mtime, int latestSnapshotId)
/** Set the last modification time of inode. */
public final INode setModificationTime(long modificationTime,
int latestSnapshotId) throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.setModificationTime(modificationTime);
return nodeToUpdate;
recordModification(latestSnapshotId);
setModificationTime(modificationTime);
return this;
}
/**
@ -682,9 +679,9 @@ public final long getAccessTime() {
*/
public final INode setAccessTime(long accessTime, int latestSnapshotId)
throws QuotaExceededException {
final INode nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.setAccessTime(accessTime);
return nodeToUpdate;
recordModification(latestSnapshotId);
setAccessTime(accessTime);
return this;
}

View File

@ -318,7 +318,7 @@ INodeReference.WithName replaceChild4ReferenceWithName(INode oldChild,
}
@Override
public INodeDirectory recordModification(int latestSnapshotId)
public void recordModification(int latestSnapshotId)
throws QuotaExceededException {
if (isInLatestSnapshot(latestSnapshotId)
&& !shouldRecordInSrcSnapshot(latestSnapshotId)) {
@ -330,7 +330,6 @@ public INodeDirectory recordModification(int latestSnapshotId)
// record self in the diff list if necessary
sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
}
return this;
}
/**

View File

@ -296,7 +296,7 @@ public INodeFileAttributes getSnapshotINode(final int snapshotId) {
}
@Override
public INodeFile recordModification(final int latestSnapshotId)
public void recordModification(final int latestSnapshotId)
throws QuotaExceededException {
if (isInLatestSnapshot(latestSnapshotId)
&& !shouldRecordInSrcSnapshot(latestSnapshotId)) {
@ -308,7 +308,6 @@ public INodeFile recordModification(final int latestSnapshotId)
// record self in the diff list if necessary
sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
}
return this;
}
public FileDiffList getDiffs() {
@ -356,11 +355,10 @@ public final void setFileReplication(short replication) {
/** Set the replication factor of this file. */
public final INodeFile setFileReplication(short replication,
int latestSnapshotId, final INodeMap inodeMap)
throws QuotaExceededException {
final INodeFile nodeToUpdate = recordModification(latestSnapshotId);
nodeToUpdate.setFileReplication(replication);
return nodeToUpdate;
int latestSnapshotId) throws QuotaExceededException {
recordModification(latestSnapshotId);
setFileReplication(replication);
return this;
}
/** @return preferred block size (in bytes) of the file. */

View File

@ -93,9 +93,8 @@ public INode get(long id) {
"", "", new FsPermission((short) 0)), 0, 0) {
@Override
INode recordModification(int latestSnapshotId)
void recordModification(int latestSnapshotId)
throws QuotaExceededException {
return null;
}
@Override

View File

@ -287,11 +287,9 @@ public final void setAccessTime(long accessTime) {
}
@Override
final INode recordModification(int latestSnapshotId)
final void recordModification(int latestSnapshotId)
throws QuotaExceededException {
referred.recordModification(latestSnapshotId);
// reference is never replaced
return this;
}
@Override // used by WithCount

View File

@ -47,12 +47,11 @@ public class INodeSymlink extends INodeWithAdditionalFields {
}
@Override
INode recordModification(int latestSnapshotId) throws QuotaExceededException {
void recordModification(int latestSnapshotId) throws QuotaExceededException {
if (isInLatestSnapshot(latestSnapshotId)) {
INodeDirectory parent = getParent();
parent.saveChild2Snapshot(this, latestSnapshotId, new INodeSymlink(this));
}
return this;
}
/** @return true unconditionally. */

View File

@ -570,10 +570,10 @@ private void runBalancer(Configuration conf,
final int r = Balancer.run(namenodes, p, conf);
if (conf.getInt(DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT) ==0) {
assertEquals(Balancer.ReturnStatus.NO_MOVE_PROGRESS.code, r);
assertEquals(ExitStatus.NO_MOVE_PROGRESS.getExitCode(), r);
return;
} else {
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
}
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
LOG.info("Rebalancing with default ctor.");
@ -717,7 +717,7 @@ public void testUnknownDatanode() throws Exception {
Balancer.Parameters.DEFAULT.threshold,
datanodes, Balancer.Parameters.DEFAULT.nodesToBeIncluded);
final int r = Balancer.run(namenodes, p, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
} finally {
cluster.shutdown();
}

View File

@ -98,7 +98,7 @@ public void testBalancerWithHANameNodes() throws Exception {
assertEquals(1, namenodes.size());
assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
cluster, Balancer.Parameters.DEFAULT);
} finally {

View File

@ -160,7 +160,7 @@ static void runBalancer(Suite s,
// start rebalancing
final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, s.conf);
Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
Assert.assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
LOG.info("BALANCER 2");
wait(s.clients, totalUsed, totalCapacity);

View File

@ -176,7 +176,7 @@ private void runBalancer(Configuration conf,
// start rebalancing
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
waitForHeartBeat(totalUsedSpace, totalCapacity);
LOG.info("Rebalancing with default factor.");
@ -190,8 +190,8 @@ private void runBalancerCanFinish(Configuration conf,
// start rebalancing
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
Assert.assertTrue(r == Balancer.ReturnStatus.SUCCESS.code ||
(r == Balancer.ReturnStatus.NO_MOVE_PROGRESS.code));
Assert.assertTrue(r == ExitStatus.SUCCESS.getExitCode() ||
(r == ExitStatus.NO_MOVE_PROGRESS.getExitCode()));
waitForHeartBeat(totalUsedSpace, totalCapacity);
LOG.info("Rebalancing with default factor.");
}

View File

@ -100,6 +100,19 @@ Release 2.6.0 - UNRELEASED
YARN-2212. ApplicationMaster needs to find a way to update the AMRMToken
periodically. (xgong)
YARN-2026. Fair scheduler: Consider only active queues for computing fairshare.
(Ashwin Shankar via kasha)
YARN-1954. Added waitFor to AMRMClient(Async). (Tsuyoshi Ozawa via zjshen)
YARN-2302. Refactor TimelineWebServices. (Zhijie Shen via junping_du)
YARN-2337. ResourceManager sets ClientRMService in RMContext multiple times.
(Zhihai Xu via kasha)
YARN-2138. Cleaned up notifyDone* APIs in RMStateStore. (Varun Saxena via
jianhe)
OPTIMIZATIONS
BUG FIXES
@ -154,6 +167,11 @@ Release 2.6.0 - UNRELEASED
YARN-2008. Fixed CapacityScheduler to calculate headroom based on max available
capacity instead of configured max capacity. (Craig Welch via jianhe)
YARN-2400. Fixed TestAMRestart fails intermittently. (Jian He via xgong)
YARN-2361. RMAppAttempt state machine entries for KILLED state has duplicate
event entries. (Zhihai Xu via kasha)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -22,6 +22,8 @@
import java.util.Collection;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
@ -37,12 +39,14 @@
import org.apache.hadoop.yarn.exceptions.YarnException;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
AbstractService {
private static final Log LOG = LogFactory.getLog(AMRMClient.class);
/**
* Create a new instance of AMRMClient.
@ -336,4 +340,63 @@ public NMTokenCache getNMTokenCache() {
return nmTokenCache;
}
/**
* Wait for <code>check</code> to return true for each 1000 ms.
* See also {@link #waitFor(com.google.common.base.Supplier, int)}
* and {@link #waitFor(com.google.common.base.Supplier, int, int)}
* @param check
*/
public void waitFor(Supplier<Boolean> check) throws InterruptedException {
waitFor(check, 1000);
}
/**
* Wait for <code>check</code> to return true for each
* <code>checkEveryMillis</code> ms.
* See also {@link #waitFor(com.google.common.base.Supplier, int, int)}
* @param check user defined checker
* @param checkEveryMillis interval to call <code>check</code>
*/
public void waitFor(Supplier<Boolean> check, int checkEveryMillis)
throws InterruptedException {
waitFor(check, checkEveryMillis, 1);
}
/**
* Wait for <code>check</code> to return true for each
* <code>checkEveryMillis</code> ms. In the main loop, this method will log
* the message "waiting in main loop" for each <code>logInterval</code> times
* iteration to confirm the thread is alive.
* @param check user defined checker
* @param checkEveryMillis interval to call <code>check</code>
* @param logInterval interval to log for each
*/
public void waitFor(Supplier<Boolean> check, int checkEveryMillis,
int logInterval) throws InterruptedException {
Preconditions.checkNotNull(check, "check should not be null");
Preconditions.checkArgument(checkEveryMillis >= 0,
"checkEveryMillis should be positive value");
Preconditions.checkArgument(logInterval >= 0,
"logInterval should be positive value");
int loggingCounter = logInterval;
do {
if (LOG.isDebugEnabled()) {
LOG.debug("Check the condition for main loop.");
}
boolean result = check.get();
if (result) {
LOG.info("Exits the main loop.");
return;
}
if (--loggingCounter <= 0) {
LOG.info("Waiting in main loop.");
loggingCounter = logInterval;
}
Thread.sleep(checkEveryMillis);
} while (true);
}
}

View File

@ -18,11 +18,15 @@
package org.apache.hadoop.yarn.client.api.async;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Stable;
@ -90,6 +94,7 @@
@Stable
public abstract class AMRMClientAsync<T extends ContainerRequest>
extends AbstractService {
private static final Log LOG = LogFactory.getLog(AMRMClientAsync.class);
protected final AMRMClient<T> client;
protected final CallbackHandler handler;
@ -189,6 +194,65 @@ public abstract void unregisterApplicationMaster(
*/
public abstract int getClusterNodeCount();
/**
* Wait for <code>check</code> to return true for each 1000 ms.
* See also {@link #waitFor(com.google.common.base.Supplier, int)}
* and {@link #waitFor(com.google.common.base.Supplier, int, int)}
* @param check
*/
public void waitFor(Supplier<Boolean> check) throws InterruptedException {
waitFor(check, 1000);
}
/**
* Wait for <code>check</code> to return true for each
* <code>checkEveryMillis</code> ms.
* See also {@link #waitFor(com.google.common.base.Supplier, int, int)}
* @param check user defined checker
* @param checkEveryMillis interval to call <code>check</code>
*/
public void waitFor(Supplier<Boolean> check, int checkEveryMillis)
throws InterruptedException {
waitFor(check, checkEveryMillis, 1);
};
/**
* Wait for <code>check</code> to return true for each
* <code>checkEveryMillis</code> ms. In the main loop, this method will log
* the message "waiting in main loop" for each <code>logInterval</code> times
* iteration to confirm the thread is alive.
* @param check user defined checker
* @param checkEveryMillis interval to call <code>check</code>
* @param logInterval interval to log for each
*/
public void waitFor(Supplier<Boolean> check, int checkEveryMillis,
int logInterval) throws InterruptedException {
Preconditions.checkNotNull(check, "check should not be null");
Preconditions.checkArgument(checkEveryMillis >= 0,
"checkEveryMillis should be positive value");
Preconditions.checkArgument(logInterval >= 0,
"logInterval should be positive value");
int loggingCounter = logInterval;
do {
if (LOG.isDebugEnabled()) {
LOG.debug("Check the condition for main loop.");
}
boolean result = check.get();
if (result) {
LOG.info("Exits the main loop.");
return;
}
if (--loggingCounter <= 0) {
LOG.info("Waiting in main loop.");
loggingCounter = logInterval;
}
Thread.sleep(checkEveryMillis);
} while (true);
}
public interface CallbackHandler {
/**

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.client.api.async.impl;
import com.google.common.base.Supplier;
import static org.mockito.Matchers.anyFloat;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
@ -180,7 +181,7 @@ private void runHeartBeatThrowOutException(Exception ex) throws Exception{
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
when(client.allocate(anyFloat())).thenThrow(ex);
AMRMClientAsync<ContainerRequest> asyncClient =
AMRMClientAsync<ContainerRequest> asyncClient =
AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
asyncClient.init(conf);
asyncClient.start();
@ -228,6 +229,41 @@ public void testAMRMClientAsyncShutDown() throws Exception {
asyncClient.stop();
}
@Test (timeout = 10000)
public void testAMRMClientAsyncShutDownWithWaitFor() throws Exception {
Configuration conf = new Configuration();
final TestCallbackHandler callbackHandler = new TestCallbackHandler();
@SuppressWarnings("unchecked")
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
final AllocateResponse shutDownResponse = createAllocateResponse(
new ArrayList<ContainerStatus>(), new ArrayList<Container>(), null);
shutDownResponse.setAMCommand(AMCommand.AM_SHUTDOWN);
when(client.allocate(anyFloat())).thenReturn(shutDownResponse);
AMRMClientAsync<ContainerRequest> asyncClient =
AMRMClientAsync.createAMRMClientAsync(client, 10, callbackHandler);
asyncClient.init(conf);
asyncClient.start();
Supplier<Boolean> checker = new Supplier<Boolean>() {
@Override
public Boolean get() {
return callbackHandler.reboot;
}
};
asyncClient.registerApplicationMaster("localhost", 1234, null);
asyncClient.waitFor(checker);
asyncClient.stop();
// stopping should have joined all threads and completed all callbacks
Assert.assertTrue(callbackHandler.callbackCount == 0);
verify(client, times(1)).allocate(anyFloat());
asyncClient.stop();
}
@Test (timeout = 5000)
public void testCallAMRMClientAsyncStopFromCallbackHandler()
throws YarnException, IOException, InterruptedException {
@ -262,6 +298,40 @@ public void testCallAMRMClientAsyncStopFromCallbackHandler()
}
}
@Test (timeout = 5000)
public void testCallAMRMClientAsyncStopFromCallbackHandlerWithWaitFor()
throws YarnException, IOException, InterruptedException {
Configuration conf = new Configuration();
final TestCallbackHandler2 callbackHandler = new TestCallbackHandler2();
@SuppressWarnings("unchecked")
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
List<ContainerStatus> completed = Arrays.asList(
ContainerStatus.newInstance(newContainerId(0, 0, 0, 0),
ContainerState.COMPLETE, "", 0));
final AllocateResponse response = createAllocateResponse(completed,
new ArrayList<Container>(), null);
when(client.allocate(anyFloat())).thenReturn(response);
AMRMClientAsync<ContainerRequest> asyncClient =
AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
callbackHandler.asynClient = asyncClient;
asyncClient.init(conf);
asyncClient.start();
Supplier<Boolean> checker = new Supplier<Boolean>() {
@Override
public Boolean get() {
return callbackHandler.notify;
}
};
asyncClient.registerApplicationMaster("localhost", 1234, null);
asyncClient.waitFor(checker);
Assert.assertTrue(checker.get());
}
void runCallBackThrowOutException(TestCallbackHandler2 callbackHandler) throws
InterruptedException, YarnException, IOException {
Configuration conf = new Configuration();
@ -342,7 +412,7 @@ private class TestCallbackHandler implements AMRMClientAsync.CallbackHandler {
private volatile List<ContainerStatus> completedContainers;
private volatile List<Container> allocatedContainers;
Exception savedException = null;
boolean reboot = false;
volatile boolean reboot = false;
Object notifier = new Object();
int callbackCount = 0;
@ -432,7 +502,7 @@ private class TestCallbackHandler2 implements AMRMClientAsync.CallbackHandler {
@SuppressWarnings("rawtypes")
AMRMClientAsync asynClient;
boolean stop = true;
boolean notify = false;
volatile boolean notify = false;
boolean throwOutException = false;
@Override

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.client.api.impl;
import com.google.common.base.Supplier;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@ -814,6 +815,40 @@ public AllocateResponse answer(InvocationOnMock invocation)
assertEquals(0, amClient.ask.size());
assertEquals(0, amClient.release.size());
}
class CountDownSupplier implements Supplier<Boolean> {
int counter = 0;
@Override
public Boolean get() {
counter++;
if (counter >= 3) {
return true;
} else {
return false;
}
}
};
@Test
public void testWaitFor() throws InterruptedException {
AMRMClientImpl<ContainerRequest> amClient = null;
CountDownSupplier countDownChecker = new CountDownSupplier();
try {
// start am rm client
amClient =
(AMRMClientImpl<ContainerRequest>) AMRMClient
.<ContainerRequest> createAMRMClient();
amClient.init(new YarnConfiguration());
amClient.start();
amClient.waitFor(countDownChecker, 1000);
assertEquals(3, countDownChecker.counter);
} finally {
if (amClient != null) {
amClient.stop();
}
}
}
private void sleep(int sleepTime) {
try {

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.applicationhistoryservice.webapp.AHSWebApp;
import org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
import org.apache.hadoop.yarn.server.timeline.security.TimelineAuthenticationFilterInitializer;
@ -59,12 +60,12 @@ public class ApplicationHistoryServer extends CompositeService {
private static final Log LOG = LogFactory
.getLog(ApplicationHistoryServer.class);
protected ApplicationHistoryClientService ahsClientService;
protected ApplicationHistoryManager historyManager;
protected TimelineStore timelineStore;
protected TimelineDelegationTokenSecretManagerService secretManagerService;
protected TimelineACLsManager timelineACLsManager;
protected WebApp webApp;
private ApplicationHistoryClientService ahsClientService;
private ApplicationHistoryManager historyManager;
private TimelineStore timelineStore;
private TimelineDelegationTokenSecretManagerService secretManagerService;
private TimelineDataManager timelineDataManager;
private WebApp webApp;
public ApplicationHistoryServer() {
super(ApplicationHistoryServer.class.getName());
@ -72,15 +73,18 @@ public ApplicationHistoryServer() {
@Override
protected void serviceInit(Configuration conf) throws Exception {
historyManager = createApplicationHistory();
ahsClientService = createApplicationHistoryClientService(historyManager);
addService(ahsClientService);
addService((Service) historyManager);
// init timeline services first
timelineStore = createTimelineStore(conf);
addIfService(timelineStore);
secretManagerService = createTimelineDelegationTokenSecretManagerService(conf);
addService(secretManagerService);
timelineACLsManager = createTimelineACLsManager(conf);
timelineDataManager = createTimelineDataManager(conf);
// init generic history service afterwards
historyManager = createApplicationHistoryManager(conf);
ahsClientService = createApplicationHistoryClientService(historyManager);
addService(ahsClientService);
addService((Service) historyManager);
DefaultMetricsSystem.initialize("ApplicationHistoryServer");
JvmMetrics.initSingleton("ApplicationHistoryServer", null);
@ -111,21 +115,22 @@ protected void serviceStop() throws Exception {
@Private
@VisibleForTesting
public ApplicationHistoryClientService getClientService() {
ApplicationHistoryClientService getClientService() {
return this.ahsClientService;
}
protected ApplicationHistoryClientService
createApplicationHistoryClientService(
ApplicationHistoryManager historyManager) {
return new ApplicationHistoryClientService(historyManager);
/**
* @return ApplicationTimelineStore
*/
@Private
@VisibleForTesting
public TimelineStore getTimelineStore() {
return timelineStore;
}
protected ApplicationHistoryManager createApplicationHistory() {
return new ApplicationHistoryManagerImpl();
}
protected ApplicationHistoryManager getApplicationHistory() {
@Private
@VisibleForTesting
ApplicationHistoryManager getApplicationHistoryManager() {
return this.historyManager;
}
@ -154,28 +159,35 @@ public static void main(String[] args) {
launchAppHistoryServer(args);
}
protected ApplicationHistoryManager createApplicationHistoryManager(
private ApplicationHistoryClientService
createApplicationHistoryClientService(
ApplicationHistoryManager historyManager) {
return new ApplicationHistoryClientService(historyManager);
}
private ApplicationHistoryManager createApplicationHistoryManager(
Configuration conf) {
return new ApplicationHistoryManagerImpl();
}
protected TimelineStore createTimelineStore(
private TimelineStore createTimelineStore(
Configuration conf) {
return ReflectionUtils.newInstance(conf.getClass(
YarnConfiguration.TIMELINE_SERVICE_STORE, LeveldbTimelineStore.class,
TimelineStore.class), conf);
}
protected TimelineDelegationTokenSecretManagerService
private TimelineDelegationTokenSecretManagerService
createTimelineDelegationTokenSecretManagerService(Configuration conf) {
return new TimelineDelegationTokenSecretManagerService();
}
protected TimelineACLsManager createTimelineACLsManager(Configuration conf) {
return new TimelineACLsManager(conf);
private TimelineDataManager createTimelineDataManager(Configuration conf) {
return new TimelineDataManager(
timelineStore, new TimelineACLsManager(conf));
}
protected void startWebApp() {
private void startWebApp() {
Configuration conf = getConfig();
// Always load pseudo authentication filter to parse "user.name" in an URL
// to identify a HTTP request's user in insecure mode.
@ -199,9 +211,8 @@ protected void startWebApp() {
try {
AHSWebApp ahsWebApp = AHSWebApp.getInstance();
ahsWebApp.setApplicationHistoryManager(historyManager);
ahsWebApp.setTimelineStore(timelineStore);
ahsWebApp.setTimelineDelegationTokenSecretManagerService(secretManagerService);
ahsWebApp.setTimelineACLsManager(timelineACLsManager);
ahsWebApp.setTimelineDataManager(timelineDataManager);
webApp =
WebApps
.$for("applicationhistory", ApplicationHistoryClientService.class,
@ -213,14 +224,6 @@ protected void startWebApp() {
throw new YarnRuntimeException(msg, e);
}
}
/**
* @return ApplicationTimelineStore
*/
@Private
@VisibleForTesting
public TimelineStore getTimelineStore() {
return timelineStore;
}
private void doSecureLogin(Configuration conf) throws IOException {
InetSocketAddress socAddr = getBindAddress(conf);

View File

@ -22,8 +22,7 @@
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.yarn.server.api.ApplicationContext;
import org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryManager;
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
import org.apache.hadoop.yarn.server.timeline.security.TimelineDelegationTokenSecretManagerService;
import org.apache.hadoop.yarn.server.timeline.webapp.TimelineWebServices;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
@ -36,9 +35,8 @@
public class AHSWebApp extends WebApp implements YarnWebParams {
private ApplicationHistoryManager applicationHistoryManager;
private TimelineStore timelineStore;
private TimelineDelegationTokenSecretManagerService secretManagerService;
private TimelineACLsManager timelineACLsManager;
private TimelineDataManager timelineDataManager;
private static AHSWebApp instance = null;
@ -68,14 +66,6 @@ public void setApplicationHistoryManager(
this.applicationHistoryManager = applicationHistoryManager;
}
public TimelineStore getTimelineStore() {
return timelineStore;
}
public void setTimelineStore(TimelineStore timelineStore) {
this.timelineStore = timelineStore;
}
public TimelineDelegationTokenSecretManagerService
getTimelineDelegationTokenSecretManagerService() {
return secretManagerService;
@ -86,12 +76,12 @@ public void setTimelineDelegationTokenSecretManagerService(
this.secretManagerService = secretManagerService;
}
public TimelineACLsManager getTimelineACLsManager() {
return timelineACLsManager;
public TimelineDataManager getTimelineDataManager() {
return timelineDataManager;
}
public void setTimelineACLsManager(TimelineACLsManager timelineACLsManager) {
this.timelineACLsManager = timelineACLsManager;
public void setTimelineDataManager(TimelineDataManager timelineDataManager) {
this.timelineDataManager = timelineDataManager;
}
@Override
@ -101,10 +91,9 @@ public void setup() {
bind(TimelineWebServices.class);
bind(GenericExceptionHandler.class);
bind(ApplicationContext.class).toInstance(applicationHistoryManager);
bind(TimelineStore.class).toInstance(timelineStore);
bind(TimelineDelegationTokenSecretManagerService.class).toInstance(
secretManagerService);
bind(TimelineACLsManager.class).toInstance(timelineACLsManager);
bind(TimelineDataManager.class).toInstance(timelineDataManager);
route("/", AHSController.class);
route(pajoin("/apps", APP_STATE), AHSController.class);
route(pajoin("/app", APPLICATION_ID), AHSController.class, "app");

View File

@ -0,0 +1,319 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.timeline;
import static org.apache.hadoop.yarn.util.StringHelper.CSV_JOINER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
/**
* The class wrap over the timeline store and the ACLs manager. It does some non
* trivial manipulation of the timeline data before putting or after getting it
* from the timeline store, and checks the user's access to it.
*
*/
public class TimelineDataManager {
private static final Log LOG = LogFactory.getLog(TimelineDataManager.class);
private TimelineStore store;
private TimelineACLsManager timelineACLsManager;
public TimelineDataManager(TimelineStore store,
TimelineACLsManager timelineACLsManager) {
this.store = store;
this.timelineACLsManager = timelineACLsManager;
}
/**
* Get the timeline entities that the given user have access to. The meaning
* of each argument has been documented with
* {@link TimelineReader#getEntities}.
*
* @see TimelineReader#getEntities
*/
public TimelineEntities getEntities(
String entityType,
NameValuePair primaryFilter,
Collection<NameValuePair> secondaryFilter,
Long windowStart,
Long windowEnd,
String fromId,
Long fromTs,
Long limit,
EnumSet<Field> fields,
UserGroupInformation callerUGI) throws YarnException, IOException {
TimelineEntities entities = null;
boolean modified = extendFields(fields);
entities = store.getEntities(
entityType,
limit,
windowStart,
windowEnd,
fromId,
fromTs,
primaryFilter,
secondaryFilter,
fields);
if (entities != null) {
Iterator<TimelineEntity> entitiesItr =
entities.getEntities().iterator();
while (entitiesItr.hasNext()) {
TimelineEntity entity = entitiesItr.next();
try {
// check ACLs
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
entitiesItr.remove();
} else {
// clean up system data
if (modified) {
entity.setPrimaryFilters(null);
} else {
cleanupOwnerInfo(entity);
}
}
} catch (YarnException e) {
LOG.error("Error when verifying access for user " + callerUGI
+ " on the events of the timeline entity "
+ new EntityIdentifier(entity.getEntityId(),
entity.getEntityType()), e);
entitiesItr.remove();
}
}
}
if (entities == null) {
return new TimelineEntities();
}
return entities;
}
/**
* Get the single timeline entity that the given user has access to. The
* meaning of each argument has been documented with
* {@link TimelineReader#getEntity}.
*
* @see TimelineReader#getEntity
*/
public TimelineEntity getEntity(
String entityType,
String entityId,
EnumSet<Field> fields,
UserGroupInformation callerUGI) throws YarnException, IOException {
TimelineEntity entity = null;
boolean modified = extendFields(fields);
entity =
store.getEntity(entityId, entityType, fields);
if (entity != null) {
// check ACLs
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
entity = null;
} else {
// clean up the system data
if (modified) {
entity.setPrimaryFilters(null);
} else {
cleanupOwnerInfo(entity);
}
}
}
return entity;
}
/**
* Get the events whose entities the given user has access to. The meaning of
* each argument has been documented with
* {@link TimelineReader#getEntityTimelines}.
*
* @see TimelineReader#getEntityTimelines
*/
public TimelineEvents getEvents(
String entityType,
SortedSet<String> entityIds,
SortedSet<String> eventTypes,
Long windowStart,
Long windowEnd,
Long limit,
UserGroupInformation callerUGI) throws YarnException, IOException {
TimelineEvents events = null;
events = store.getEntityTimelines(
entityType,
entityIds,
limit,
windowStart,
windowEnd,
eventTypes);
if (events != null) {
Iterator<TimelineEvents.EventsOfOneEntity> eventsItr =
events.getAllEvents().iterator();
while (eventsItr.hasNext()) {
TimelineEvents.EventsOfOneEntity eventsOfOneEntity = eventsItr.next();
try {
TimelineEntity entity = store.getEntity(
eventsOfOneEntity.getEntityId(),
eventsOfOneEntity.getEntityType(),
EnumSet.of(Field.PRIMARY_FILTERS));
// check ACLs
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
eventsItr.remove();
}
} catch (Exception e) {
LOG.error("Error when verifying access for user " + callerUGI
+ " on the events of the timeline entity "
+ new EntityIdentifier(eventsOfOneEntity.getEntityId(),
eventsOfOneEntity.getEntityType()), e);
eventsItr.remove();
}
}
}
if (events == null) {
return new TimelineEvents();
}
return events;
}
/**
* Store the timeline entities into the store and set the owner of them to the
* given user.
*/
public TimelinePutResponse postEntities(
TimelineEntities entities,
UserGroupInformation callerUGI) throws YarnException, IOException {
if (entities == null) {
return new TimelinePutResponse();
}
List<EntityIdentifier> entityIDs = new ArrayList<EntityIdentifier>();
TimelineEntities entitiesToPut = new TimelineEntities();
List<TimelinePutResponse.TimelinePutError> errors =
new ArrayList<TimelinePutResponse.TimelinePutError>();
for (TimelineEntity entity : entities.getEntities()) {
EntityIdentifier entityID =
new EntityIdentifier(entity.getEntityId(), entity.getEntityType());
// check if there is existing entity
TimelineEntity existingEntity = null;
try {
existingEntity =
store.getEntity(entityID.getId(), entityID.getType(),
EnumSet.of(Field.PRIMARY_FILTERS));
if (existingEntity != null
&& !timelineACLsManager.checkAccess(callerUGI, existingEntity)) {
throw new YarnException("The timeline entity " + entityID
+ " was not put by " + callerUGI + " before");
}
} catch (Exception e) {
// Skip the entity which already exists and was put by others
LOG.error("Skip the timeline entity: " + entityID + ", because "
+ e.getMessage());
TimelinePutResponse.TimelinePutError error =
new TimelinePutResponse.TimelinePutError();
error.setEntityId(entityID.getId());
error.setEntityType(entityID.getType());
error.setErrorCode(
TimelinePutResponse.TimelinePutError.ACCESS_DENIED);
errors.add(error);
continue;
}
// inject owner information for the access check if this is the first
// time to post the entity, in case it's the admin who is updating
// the timeline data.
try {
if (existingEntity == null) {
injectOwnerInfo(entity, callerUGI.getShortUserName());
}
} catch (YarnException e) {
// Skip the entity which messes up the primary filter and record the
// error
LOG.error("Skip the timeline entity: " + entityID + ", because "
+ e.getMessage());
TimelinePutResponse.TimelinePutError error =
new TimelinePutResponse.TimelinePutError();
error.setEntityId(entityID.getId());
error.setEntityType(entityID.getType());
error.setErrorCode(
TimelinePutResponse.TimelinePutError.SYSTEM_FILTER_CONFLICT);
errors.add(error);
continue;
}
entityIDs.add(entityID);
entitiesToPut.addEntity(entity);
if (LOG.isDebugEnabled()) {
LOG.debug("Storing the entity " + entityID + ", JSON-style content: "
+ TimelineUtils.dumpTimelineRecordtoJSON(entity));
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Storing entities: " + CSV_JOINER.join(entityIDs));
}
TimelinePutResponse response = store.put(entitiesToPut);
// add the errors of timeline system filter key conflict
response.addErrors(errors);
return response;
}
private static boolean extendFields(EnumSet<Field> fieldEnums) {
boolean modified = false;
if (fieldEnums != null && !fieldEnums.contains(Field.PRIMARY_FILTERS)) {
fieldEnums.add(Field.PRIMARY_FILTERS);
modified = true;
}
return modified;
}
private static void injectOwnerInfo(TimelineEntity timelineEntity,
String owner) throws YarnException {
if (timelineEntity.getPrimaryFilters() != null &&
timelineEntity.getPrimaryFilters().containsKey(
TimelineStore.SystemFilter.ENTITY_OWNER.toString())) {
throw new YarnException(
"User should not use the timeline system filter key: "
+ TimelineStore.SystemFilter.ENTITY_OWNER);
}
timelineEntity.addPrimaryFilter(
TimelineStore.SystemFilter.ENTITY_OWNER
.toString(), owner);
}
private static void cleanupOwnerInfo(TimelineEntity timelineEntity) {
if (timelineEntity.getPrimaryFilters() != null) {
timelineEntity.getPrimaryFilters().remove(
TimelineStore.SystemFilter.ENTITY_OWNER.toString());
}
}
}

View File

@ -18,14 +18,10 @@
package org.apache.hadoop.yarn.server.timeline.webapp;
import static org.apache.hadoop.yarn.util.StringHelper.CSV_JOINER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
@ -58,14 +54,11 @@
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents;
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.timeline.EntityIdentifier;
import org.apache.hadoop.yarn.server.timeline.GenericObjectMapper;
import org.apache.hadoop.yarn.server.timeline.NameValuePair;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.ForbiddenException;
import org.apache.hadoop.yarn.webapp.NotFoundException;
@ -80,14 +73,11 @@ public class TimelineWebServices {
private static final Log LOG = LogFactory.getLog(TimelineWebServices.class);
private TimelineStore store;
private TimelineACLsManager timelineACLsManager;
private TimelineDataManager timelineDataManager;
@Inject
public TimelineWebServices(TimelineStore store,
TimelineACLsManager timelineACLsManager) {
this.store = store;
this.timelineACLsManager = timelineACLsManager;
public TimelineWebServices(TimelineDataManager timelineDataManager) {
this.timelineDataManager = timelineDataManager;
}
@XmlRootElement(name = "about")
@ -148,61 +138,28 @@ public TimelineEntities getEntities(
@QueryParam("limit") String limit,
@QueryParam("fields") String fields) {
init(res);
TimelineEntities entities = null;
try {
EnumSet<Field> fieldEnums = parseFieldsStr(fields, ",");
boolean modified = extendFields(fieldEnums);
UserGroupInformation callerUGI = getUser(req);
entities = store.getEntities(
return timelineDataManager.getEntities(
parseStr(entityType),
parseLongStr(limit),
parsePairStr(primaryFilter, ":"),
parsePairsStr(secondaryFilter, ",", ":"),
parseLongStr(windowStart),
parseLongStr(windowEnd),
parseStr(fromId),
parseLongStr(fromTs),
parsePairStr(primaryFilter, ":"),
parsePairsStr(secondaryFilter, ",", ":"),
fieldEnums);
if (entities != null) {
Iterator<TimelineEntity> entitiesItr =
entities.getEntities().iterator();
while (entitiesItr.hasNext()) {
TimelineEntity entity = entitiesItr.next();
try {
// check ACLs
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
entitiesItr.remove();
} else {
// clean up system data
if (modified) {
entity.setPrimaryFilters(null);
} else {
cleanupOwnerInfo(entity);
}
}
} catch (YarnException e) {
LOG.error("Error when verifying access for user " + callerUGI
+ " on the events of the timeline entity "
+ new EntityIdentifier(entity.getEntityId(),
entity.getEntityType()), e);
entitiesItr.remove();
}
}
}
parseLongStr(limit),
parseFieldsStr(fields, ","),
getUser(req));
} catch (NumberFormatException e) {
throw new BadRequestException(
"windowStart, windowEnd or limit is not a numeric value.");
} catch (IllegalArgumentException e) {
throw new BadRequestException("requested invalid field.");
} catch (IOException e) {
} catch (Exception e) {
LOG.error("Error getting entities", e);
throw new WebApplicationException(e,
Response.Status.INTERNAL_SERVER_ERROR);
}
if (entities == null) {
return new TimelineEntities();
}
return entities;
}
/**
@ -220,33 +177,15 @@ public TimelineEntity getEntity(
init(res);
TimelineEntity entity = null;
try {
EnumSet<Field> fieldEnums = parseFieldsStr(fields, ",");
boolean modified = extendFields(fieldEnums);
entity =
store.getEntity(parseStr(entityId), parseStr(entityType),
fieldEnums);
if (entity != null) {
// check ACLs
UserGroupInformation callerUGI = getUser(req);
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
entity = null;
} else {
// clean up the system data
if (modified) {
entity.setPrimaryFilters(null);
} else {
cleanupOwnerInfo(entity);
}
}
}
entity = timelineDataManager.getEntity(
parseStr(entityType),
parseStr(entityId),
parseFieldsStr(fields, ","),
getUser(req));
} catch (IllegalArgumentException e) {
throw new BadRequestException(
"requested invalid field.");
} catch (IOException e) {
LOG.error("Error getting entity", e);
throw new WebApplicationException(e,
Response.Status.INTERNAL_SERVER_ERROR);
} catch (YarnException e) {
} catch (Exception e) {
LOG.error("Error getting entity", e);
throw new WebApplicationException(e,
Response.Status.INTERNAL_SERVER_ERROR);
@ -275,51 +214,23 @@ public TimelineEvents getEvents(
@QueryParam("windowEnd") String windowEnd,
@QueryParam("limit") String limit) {
init(res);
TimelineEvents events = null;
try {
UserGroupInformation callerUGI = getUser(req);
events = store.getEntityTimelines(
return timelineDataManager.getEvents(
parseStr(entityType),
parseArrayStr(entityId, ","),
parseLongStr(limit),
parseArrayStr(eventType, ","),
parseLongStr(windowStart),
parseLongStr(windowEnd),
parseArrayStr(eventType, ","));
if (events != null) {
Iterator<TimelineEvents.EventsOfOneEntity> eventsItr =
events.getAllEvents().iterator();
while (eventsItr.hasNext()) {
TimelineEvents.EventsOfOneEntity eventsOfOneEntity = eventsItr.next();
try {
TimelineEntity entity = store.getEntity(
eventsOfOneEntity.getEntityId(),
eventsOfOneEntity.getEntityType(),
EnumSet.of(Field.PRIMARY_FILTERS));
// check ACLs
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
eventsItr.remove();
}
} catch (Exception e) {
LOG.error("Error when verifying access for user " + callerUGI
+ " on the events of the timeline entity "
+ new EntityIdentifier(eventsOfOneEntity.getEntityId(),
eventsOfOneEntity.getEntityType()), e);
eventsItr.remove();
}
}
}
parseLongStr(limit),
getUser(req));
} catch (NumberFormatException e) {
throw new BadRequestException(
"windowStart, windowEnd or limit is not a numeric value.");
} catch (IOException e) {
} catch (Exception e) {
LOG.error("Error getting entity timelines", e);
throw new WebApplicationException(e,
Response.Status.INTERNAL_SERVER_ERROR);
}
if (events == null) {
return new TimelineEvents();
}
return events;
}
/**
@ -333,9 +244,6 @@ public TimelinePutResponse postEntities(
@Context HttpServletResponse res,
TimelineEntities entities) {
init(res);
if (entities == null) {
return new TimelinePutResponse();
}
UserGroupInformation callerUGI = getUser(req);
if (callerUGI == null) {
String msg = "The owner of the posted timeline entities is not set";
@ -343,76 +251,8 @@ public TimelinePutResponse postEntities(
throw new ForbiddenException(msg);
}
try {
List<EntityIdentifier> entityIDs = new ArrayList<EntityIdentifier>();
TimelineEntities entitiesToPut = new TimelineEntities();
List<TimelinePutResponse.TimelinePutError> errors =
new ArrayList<TimelinePutResponse.TimelinePutError>();
for (TimelineEntity entity : entities.getEntities()) {
EntityIdentifier entityID =
new EntityIdentifier(entity.getEntityId(), entity.getEntityType());
// check if there is existing entity
TimelineEntity existingEntity = null;
try {
existingEntity =
store.getEntity(entityID.getId(), entityID.getType(),
EnumSet.of(Field.PRIMARY_FILTERS));
if (existingEntity != null
&& !timelineACLsManager.checkAccess(callerUGI, existingEntity)) {
throw new YarnException("The timeline entity " + entityID
+ " was not put by " + callerUGI + " before");
}
} catch (Exception e) {
// Skip the entity which already exists and was put by others
LOG.warn("Skip the timeline entity: " + entityID + ", because "
+ e.getMessage());
TimelinePutResponse.TimelinePutError error =
new TimelinePutResponse.TimelinePutError();
error.setEntityId(entityID.getId());
error.setEntityType(entityID.getType());
error.setErrorCode(
TimelinePutResponse.TimelinePutError.ACCESS_DENIED);
errors.add(error);
continue;
}
// inject owner information for the access check if this is the first
// time to post the entity, in case it's the admin who is updating
// the timeline data.
try {
if (existingEntity == null) {
injectOwnerInfo(entity, callerUGI.getShortUserName());
}
} catch (YarnException e) {
// Skip the entity which messes up the primary filter and record the
// error
LOG.warn("Skip the timeline entity: " + entityID + ", because "
+ e.getMessage());
TimelinePutResponse.TimelinePutError error =
new TimelinePutResponse.TimelinePutError();
error.setEntityId(entityID.getId());
error.setEntityType(entityID.getType());
error.setErrorCode(
TimelinePutResponse.TimelinePutError.SYSTEM_FILTER_CONFLICT);
errors.add(error);
continue;
}
entityIDs.add(entityID);
entitiesToPut.addEntity(entity);
if (LOG.isDebugEnabled()) {
LOG.debug("Storing the entity " + entityID + ", JSON-style content: "
+ TimelineUtils.dumpTimelineRecordtoJSON(entity));
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Storing entities: " + CSV_JOINER.join(entityIDs));
}
TimelinePutResponse response = store.put(entitiesToPut);
// add the errors of timeline system filter key conflict
response.addErrors(errors);
return response;
} catch (IOException e) {
return timelineDataManager.postEntities(entities, callerUGI);
} catch (Exception e) {
LOG.error("Error putting entities", e);
throw new WebApplicationException(e,
Response.Status.INTERNAL_SERVER_ERROR);
@ -423,6 +263,15 @@ private void init(HttpServletResponse response) {
response.setContentType(null);
}
private static UserGroupInformation getUser(HttpServletRequest req) {
String remoteUser = req.getRemoteUser();
UserGroupInformation callerUGI = null;
if (remoteUser != null) {
callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
}
return callerUGI;
}
private static SortedSet<String> parseArrayStr(String str, String delimiter) {
if (str == null) {
return null;
@ -495,14 +344,6 @@ private static EnumSet<Field> parseFieldsStr(String str, String delimiter) {
}
}
private static boolean extendFields(EnumSet<Field> fieldEnums) {
boolean modified = false;
if (fieldEnums != null && !fieldEnums.contains(Field.PRIMARY_FILTERS)) {
fieldEnums.add(Field.PRIMARY_FILTERS);
modified = true;
}
return modified;
}
private static Long parseLongStr(String str) {
return str == null ? null : Long.parseLong(str.trim());
}
@ -511,34 +352,4 @@ private static String parseStr(String str) {
return str == null ? null : str.trim();
}
private static UserGroupInformation getUser(HttpServletRequest req) {
String remoteUser = req.getRemoteUser();
UserGroupInformation callerUGI = null;
if (remoteUser != null) {
callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
}
return callerUGI;
}
private static void injectOwnerInfo(TimelineEntity timelineEntity,
String owner) throws YarnException {
if (timelineEntity.getPrimaryFilters() != null &&
timelineEntity.getPrimaryFilters().containsKey(
TimelineStore.SystemFilter.ENTITY_OWNER.toString())) {
throw new YarnException(
"User should not use the timeline system filter key: "
+ TimelineStore.SystemFilter.ENTITY_OWNER);
}
timelineEntity.addPrimaryFilter(
TimelineStore.SystemFilter.ENTITY_OWNER
.toString(), owner);
}
private static void cleanupOwnerInfo(TimelineEntity timelineEntity) {
if (timelineEntity.getPrimaryFilters() != null) {
timelineEntity.getPrimaryFilters().remove(
TimelineStore.SystemFilter.ENTITY_OWNER.toString());
}
}
}

View File

@ -69,7 +69,7 @@ public void setup() {
historyServer.init(config);
historyServer.start();
store =
((ApplicationHistoryManagerImpl) historyServer.getApplicationHistory())
((ApplicationHistoryManagerImpl) historyServer.getApplicationHistoryManager())
.getHistoryStore();
}

View File

@ -49,6 +49,7 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.security.AdminACLsManager;
import org.apache.hadoop.yarn.server.timeline.TestMemoryTimelineStore;
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
import org.apache.hadoop.yarn.server.timeline.security.TimelineAuthenticationFilter;
@ -89,14 +90,15 @@ protected void configureServlets() {
} catch (Exception e) {
Assert.fail();
}
bind(TimelineStore.class).toInstance(store);
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, false);
timelineACLsManager = new TimelineACLsManager(conf);
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "admin");
adminACLsManager = new AdminACLsManager(conf);
bind(TimelineACLsManager.class).toInstance(timelineACLsManager);
TimelineDataManager timelineDataManager =
new TimelineDataManager(store, timelineACLsManager);
bind(TimelineDataManager.class).toInstance(timelineDataManager);
serve("/*").with(GuiceContainer.class);
TimelineAuthenticationFilter taFilter = new TimelineAuthenticationFilter();
FilterConfig filterConfig = mock(FilterConfig.class);

View File

@ -461,7 +461,6 @@ protected void serviceInit(Configuration configuration) throws Exception {
rmDispatcher.register(RMAppManagerEventType.class, rmAppManager);
clientRM = createClientRMService();
rmContext.setClientRMService(clientRM);
addService(clientRM);
rmContext.setClientRMService(clientRM);

View File

@ -52,13 +52,13 @@
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppUpdateSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
import org.apache.hadoop.yarn.state.SingleArcTransition;
import org.apache.hadoop.yarn.state.StateMachine;
@ -132,7 +132,8 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
LOG.info("Storing info for app: " + appId);
try {
store.storeApplicationStateInternal(appId, appStateData);
store.notifyDoneStoringApplication(appId, null);
store.notifyApplication(new RMAppEvent(appId,
RMAppEventType.APP_NEW_SAVED));
} catch (Exception e) {
LOG.error("Error storing app: " + appId, e);
store.notifyStoreOperationFailed(e);
@ -156,7 +157,8 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
LOG.info("Updating info for app: " + appId);
try {
store.updateApplicationStateInternal(appId, appStateData);
store.notifyDoneUpdatingApplication(appId, null);
store.notifyApplication(new RMAppEvent(appId,
RMAppEventType.APP_UPDATE_SAVED));
} catch (Exception e) {
LOG.error("Error updating app: " + appId, e);
store.notifyStoreOperationFailed(e);
@ -205,8 +207,9 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
}
store.storeApplicationAttemptStateInternal(attemptState.getAttemptId(),
attemptStateData);
store.notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(),
null);
store.notifyApplicationAttempt(new RMAppAttemptEvent
(attemptState.getAttemptId(),
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
} catch (Exception e) {
LOG.error("Error storing appAttempt: " + attemptState.getAttemptId(), e);
store.notifyStoreOperationFailed(e);
@ -233,8 +236,9 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
}
store.updateApplicationAttemptStateInternal(attemptState.getAttemptId(),
attemptStateData);
store.notifyDoneUpdatingApplicationAttempt(attemptState.getAttemptId(),
null);
store.notifyApplicationAttempt(new RMAppAttemptEvent
(attemptState.getAttemptId(),
RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
} catch (Exception e) {
LOG.error("Error updating appAttempt: " + attemptState.getAttemptId(), e);
store.notifyStoreOperationFailed(e);
@ -801,47 +805,28 @@ protected void notifyStoreOperationFailed(Exception failureCause) {
}
rmDispatcher.getEventHandler().handle(new RMFatalEvent(type, failureCause));
}
@SuppressWarnings("unchecked")
/**
* In (@link handleStoreEvent}, this method is called to notify the
* application that new application is stored in state store
* @param appId id of the application that has been saved
* @param storedException the exception that is thrown when storing the
* application
* This method is called to notify the application that
* new application is stored or updated in state store
* @param event App event containing the app id and event type
*/
private void notifyDoneStoringApplication(ApplicationId appId,
Exception storedException) {
rmDispatcher.getEventHandler().handle(
new RMAppNewSavedEvent(appId, storedException));
private void notifyApplication(RMAppEvent event) {
rmDispatcher.getEventHandler().handle(event);
}
@SuppressWarnings("unchecked")
private void notifyDoneUpdatingApplication(ApplicationId appId,
Exception storedException) {
rmDispatcher.getEventHandler().handle(
new RMAppUpdateSavedEvent(appId, storedException));
}
@SuppressWarnings("unchecked")
/**
* In (@link handleStoreEvent}, this method is called to notify the
* application attempt that new attempt is stored in state store
* @param appAttempt attempt that has been saved
* This method is called to notify the application attempt
* that new attempt is stored or updated in state store
* @param event App attempt event containing the app attempt
* id and event type
*/
private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId,
Exception storedException) {
rmDispatcher.getEventHandler().handle(
new RMAppAttemptNewSavedEvent(attemptId, storedException));
private void notifyApplicationAttempt(RMAppAttemptEvent event) {
rmDispatcher.getEventHandler().handle(event);
}
@SuppressWarnings("unchecked")
private void notifyDoneUpdatingApplicationAttempt(ApplicationAttemptId attemptId,
Exception updatedException) {
rmDispatcher.getEventHandler().handle(
new RMAppAttemptUpdateSavedEvent(attemptId, updatedException));
}
/**
* EventHandler implementation which forward events to the FSRMStateStore
* This hides the EventHandle methods of the store from its public interface

View File

@ -820,17 +820,6 @@ private static final class AddApplicationToSchedulerTransition extends
RMAppTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
if (event instanceof RMAppNewSavedEvent) {
RMAppNewSavedEvent storeEvent = (RMAppNewSavedEvent) event;
// For HA this exception needs to be handled by giving up
// master status if we got fenced
if (((RMAppNewSavedEvent) event).getStoredException() != null) {
LOG.error(
"Failed to store application: " + storeEvent.getApplicationId(),
storeEvent.getStoredException());
ExitUtil.terminate(1, storeEvent.getStoredException());
}
}
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user));
}
@ -848,13 +837,6 @@ private static final class FinalStateSavedTransition implements
@Override
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
RMAppUpdateSavedEvent storeEvent = (RMAppUpdateSavedEvent) event;
if (storeEvent.getUpdatedException() != null) {
LOG.error("Failed to update the final state of application"
+ storeEvent.getApplicationId(), storeEvent.getUpdatedException());
ExitUtil.terminate(1, storeEvent.getUpdatedException());
}
if (app.transitionTodo instanceof SingleArcTransition) {
((SingleArcTransition) app.transitionTodo).transition(app,
app.eventCausingFinalSaving);

View File

@ -1,36 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
import org.apache.hadoop.yarn.api.records.ApplicationId;
public class RMAppUpdateSavedEvent extends RMAppEvent {
private final Exception updatedException;
public RMAppUpdateSavedEvent(ApplicationId appId, Exception updatedException) {
super(appId, RMAppEventType.APP_UPDATE_SAVED);
this.updatedException = updatedException;
}
public Exception getUpdatedException() {
return updatedException;
}
}

View File

@ -80,11 +80,9 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@ -398,7 +396,6 @@ RMAppAttemptEventType.STATUS_UPDATE, new StatusUpdateTransition())
RMAppAttemptState.KILLED,
RMAppAttemptState.KILLED,
EnumSet.of(RMAppAttemptEventType.ATTEMPT_ADDED,
RMAppAttemptEventType.EXPIRE,
RMAppAttemptEventType.LAUNCHED,
RMAppAttemptEventType.LAUNCH_FAILED,
RMAppAttemptEventType.EXPIRE,
@ -906,8 +903,6 @@ private static final class AttemptStoredTransition extends BaseTransition {
@Override
public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
appAttempt.checkAttemptStoreError(event);
appAttempt.launchAttempt();
}
}
@ -1059,14 +1054,6 @@ private static class FinalStateSavedTransition implements
@Override
public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
RMAppAttemptUpdateSavedEvent storeEvent = (RMAppAttemptUpdateSavedEvent) event;
if (storeEvent.getUpdatedException() != null) {
LOG.error("Failed to update the final state of application attempt: "
+ storeEvent.getApplicationAttemptId(),
storeEvent.getUpdatedException());
ExitUtil.terminate(1, storeEvent.getUpdatedException());
}
RMAppAttemptEvent causeEvent = appAttempt.eventCausingFinalSaving;
if (appAttempt.transitionTodo instanceof SingleArcTransition) {
@ -1196,8 +1183,6 @@ private static final class UnmanagedAMAttemptSavedTransition
@Override
public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
appAttempt.checkAttemptStoreError(event);
// create AMRMToken
appAttempt.amrmToken =
appAttempt.rmContext.getAMRMTokenSecretManager().createAndGetAMRMToken(
@ -1690,18 +1675,6 @@ private void attemptLaunched() {
rmContext.getAMLivelinessMonitor().register(getAppAttemptId());
}
private void checkAttemptStoreError(RMAppAttemptEvent event) {
RMAppAttemptNewSavedEvent storeEvent = (RMAppAttemptNewSavedEvent) event;
if(storeEvent.getStoredException() != null)
{
// This needs to be handled for HA and give up master status if we got
// fenced
LOG.error("Failed to store attempt: " + getAppAttemptId(),
storeEvent.getStoredException());
ExitUtil.terminate(1, storeEvent.getStoredException());
}
}
private void storeAttempt() {
// store attempt data in a non-blocking manner to prevent dispatcher
// thread starvation and wait for state to be saved

View File

@ -1,39 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
public class RMAppAttemptNewSavedEvent extends RMAppAttemptEvent {
final Exception storedException;
public RMAppAttemptNewSavedEvent(ApplicationAttemptId appAttemptId,
Exception storedException) {
super(appAttemptId, RMAppAttemptEventType.ATTEMPT_NEW_SAVED);
this.storedException = storedException;
}
public Exception getStoredException() {
return storedException;
}
}

View File

@ -1,38 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
public class RMAppAttemptUpdateSavedEvent extends RMAppAttemptEvent {
final Exception updatedException;
public RMAppAttemptUpdateSavedEvent(ApplicationAttemptId appAttemptId,
Exception updatedException) {
super(appAttemptId, RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED);
this.updatedException = updatedException;
}
public Exception getUpdatedException() {
return updatedException;
}
}

View File

@ -116,6 +116,18 @@ public Resource getFairShare() {
return fairShare;
}
/**
* Returns true if queue has atleast one app running. Always returns true for
* AppSchedulables.
*/
public boolean isActive() {
if (this instanceof FSQueue) {
FSQueue queue = (FSQueue) this;
return queue.getNumRunnableApps() > 0;
}
return true;
}
/** Convenient toString implementation for debugging. */
@Override
public String toString() {

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.hadoop.yarn.api.records.Resource;
@ -33,7 +34,31 @@
public class ComputeFairShares {
private static final int COMPUTE_FAIR_SHARES_ITERATIONS = 25;
/**
* Compute fair share of the given schedulables.Fair share is an allocation of
* shares considering only active schedulables ie schedulables which have
* running apps.
*
* @param schedulables
* @param totalResources
* @param type
*/
public static void computeShares(
Collection<? extends Schedulable> schedulables, Resource totalResources,
ResourceType type) {
Collection<Schedulable> activeSchedulables = new ArrayList<Schedulable>();
for (Schedulable sched : schedulables) {
if (sched.isActive()) {
activeSchedulables.add(sched);
} else {
setResourceValue(0, sched.getFairShare(), type);
}
}
computeSharesInternal(activeSchedulables, totalResources, type);
}
/**
* Given a set of Schedulables and a number of slots, compute their weighted
* fair shares. The min and max shares and of the Schedulables are assumed to
@ -75,7 +100,7 @@ public class ComputeFairShares {
* because resourceUsedWithWeightToResourceRatio is linear-time and the number of
* iterations of binary search is a constant (dependent on desired precision).
*/
public static void computeShares(
private static void computeSharesInternal(
Collection<? extends Schedulable> schedulables, Resource totalResources,
ResourceType type) {
if (schedulables.isEmpty()) {

View File

@ -386,7 +386,8 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
ApplicationState appState =
memStore.getState().getApplicationState().get(app1.getApplicationId());
// AM should be restarted even though max-am-attempt is 1.
MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
MockAM am2 =
rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
@ -398,7 +399,8 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
am2.waitForState(RMAppAttemptState.FAILED);
Assert.assertTrue(! attempt2.shouldCountTowardsMaxAttemptRetry());
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
MockAM am3 =
rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1);
RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
Assert.assertTrue(((RMAppAttemptImpl) attempt3).mayBeLastAttempt());
@ -421,7 +423,8 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
.getAMContainerExitStatus());
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
MockAM am4 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
MockAM am4 =
rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
RMAppAttempt attempt4 = app1.getCurrentAppAttempt();
Assert.assertTrue(((RMAppAttemptImpl) attempt4).mayBeLastAttempt());

View File

@ -65,8 +65,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
import org.apache.hadoop.yarn.server.security.MasterKeyData;
@ -77,10 +77,9 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{
public static final Log LOG = LogFactory.getLog(RMStateStoreTestBase.class);
static class TestDispatcher implements
Dispatcher, EventHandler<RMAppAttemptNewSavedEvent> {
Dispatcher, EventHandler<RMAppAttemptEvent> {
ApplicationAttemptId attemptId;
Exception storedException;
boolean notified = false;
@ -91,9 +90,8 @@ public void register(Class<? extends Enum> eventType,
}
@Override
public void handle(RMAppAttemptNewSavedEvent event) {
public void handle(RMAppAttemptEvent event) {
assertEquals(attemptId, event.getApplicationAttemptId());
assertEquals(storedException, event.getStoredException());
notified = true;
synchronized (this) {
notifyAll();
@ -163,7 +161,6 @@ ContainerId storeAttempt(RMStateStore store, ApplicationAttemptId attemptId,
when(mockAttempt.getClientTokenMasterKey())
.thenReturn(clientTokenMasterKey);
dispatcher.attemptId = attemptId;
dispatcher.storedException = null;
store.storeNewApplicationAttempt(mockAttempt);
waitNotify(dispatcher);
return container.getId();

View File

@ -60,7 +60,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
@ -328,15 +327,15 @@ private void assertFailed(RMApp application, String regex) {
private void sendAppUpdateSavedEvent(RMApp application) {
RMAppEvent event =
new RMAppUpdateSavedEvent(application.getApplicationId(), null);
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
application.handle(event);
rmDispatcher.await();
}
private void sendAttemptUpdateSavedEvent(RMApp application) {
application.getCurrentAppAttempt().handle(
new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt()
.getAppAttemptId(), null));
new RMAppAttemptEvent(application.getCurrentAppAttempt().getAppAttemptId(),
RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
}
protected RMApp testCreateAppNewSaving(
@ -357,7 +356,7 @@ protected RMApp testCreateAppSubmittedNoRecovery(
RMApp application = testCreateAppNewSaving(submissionContext);
// NEW_SAVING => SUBMITTED event RMAppEventType.APP_SAVED
RMAppEvent event =
new RMAppNewSavedEvent(application.getApplicationId(), null);
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_NEW_SAVED);
application.handle(event);
assertStartTimeSet(application);
assertAppState(RMAppState.SUBMITTED, application);
@ -422,7 +421,7 @@ protected RMApp testCreateAppFinishing(
RMApp application = testCreateAppFinalSaving(submissionContext);
// FINAL_SAVING => FINISHING event RMAppEventType.APP_UPDATED
RMAppEvent appUpdated =
new RMAppUpdateSavedEvent(application.getApplicationId(), null);
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
application.handle(appUpdated);
assertAppState(RMAppState.FINISHING, application);
assertTimesAtFinish(application);
@ -763,7 +762,7 @@ public void testAppFinalSavingToFinished() throws IOException {
application.handle(event);
assertAppState(RMAppState.FINAL_SAVING, application);
RMAppEvent appUpdated =
new RMAppUpdateSavedEvent(application.getApplicationId(), null);
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
application.handle(appUpdated);
assertAppState(RMAppState.FINISHED, application);

View File

@ -81,10 +81,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
@ -570,15 +568,15 @@ private void scheduleApplicationAttempt() {
submitApplicationAttempt();
applicationAttempt.handle(
new RMAppAttemptEvent(
applicationAttempt.getAppAttemptId(),
applicationAttempt.getAppAttemptId(),
RMAppAttemptEventType.ATTEMPT_ADDED));
if(unmanagedAM){
assertEquals(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING,
applicationAttempt.getAppAttemptState());
applicationAttempt.handle(
new RMAppAttemptNewSavedEvent(
applicationAttempt.getAppAttemptId(), null));
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
}
testAppAttemptScheduledState();
@ -616,8 +614,8 @@ private Container allocateApplicationAttempt() {
assertEquals(RMAppAttemptState.ALLOCATED_SAVING,
applicationAttempt.getAppAttemptState());
applicationAttempt.handle(
new RMAppAttemptNewSavedEvent(
applicationAttempt.getAppAttemptId(), null));
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
testAppAttemptAllocatedState(container);
@ -696,8 +694,8 @@ private void sendAttemptUpdateSavedEvent(RMAppAttempt applicationAttempt) {
assertEquals(RMAppAttemptState.FINAL_SAVING,
applicationAttempt.getAppAttemptState());
applicationAttempt.handle(
new RMAppAttemptUpdateSavedEvent(
applicationAttempt.getAppAttemptId(), null));
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
}
@Test

View File

@ -292,6 +292,7 @@ public void testSimpleFairShareCalculation() throws IOException {
// Have two queues which want entire cluster capacity
createSchedulingRequest(10 * 1024, "queue1", "user1");
createSchedulingRequest(10 * 1024, "queue2", "user1");
createSchedulingRequest(10 * 1024, "root.default", "user1");
scheduler.update();
@ -322,6 +323,7 @@ public void testSimpleHierarchicalFairShareCalculation() throws IOException {
// Have two queues which want entire cluster capacity
createSchedulingRequest(10 * 1024, "parent.queue2", "user1");
createSchedulingRequest(10 * 1024, "parent.queue3", "user1");
createSchedulingRequest(10 * 1024, "root.default", "user1");
scheduler.update();
@ -766,8 +768,10 @@ public void testFairShareAndWeightsInNestedUserQueueRule() throws Exception {
scheduler.handle(nodeEvent1);
// user1,user2 submit their apps to parentq and create user queues
scheduler.assignToQueue(rmApp1, "root.parentq", "user1");
scheduler.assignToQueue(rmApp2, "root.parentq", "user2");
createSchedulingRequest(10 * 1024, "root.parentq", "user1");
createSchedulingRequest(10 * 1024, "root.parentq", "user2");
// user3 submits app in default queue
createSchedulingRequest(10 * 1024, "root.default", "user3");
scheduler.update();
@ -1287,7 +1291,7 @@ public void testPreemptionIsNotDelayedToNextRound() throws Exception {
scheduler.update();
Resource toPreempt = scheduler.resToPreempt(scheduler.getQueueManager()
.getLeafQueue("queueA.queueA2", false), clock.getTime());
assertEquals(2980, toPreempt.getMemory());
assertEquals(3277, toPreempt.getMemory());
// verify if the 3 containers required by queueA2 are preempted in the same
// round
@ -2446,8 +2450,12 @@ public void testQueueMaxAMShare() throws Exception {
scheduler.update();
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
assertEquals("Queue queue1's fair share should be 10240",
10240, queue1.getFairShare().getMemory());
assertEquals("Queue queue1's fair share should be 0", 0, queue1
.getFairShare().getMemory());
createSchedulingRequest(1 * 1024, "root.default", "user1");
scheduler.update();
scheduler.handle(updateEvent);
Resource amResource1 = Resource.newInstance(1024, 1);
Resource amResource2 = Resource.newInstance(2048, 2);
@ -2635,24 +2643,32 @@ public void testQueueMaxAMShareDefault() throws Exception {
FSLeafQueue queue1 =
scheduler.getQueueManager().getLeafQueue("queue1", true);
assertEquals("Queue queue1's fair share should be 1366",
1366, queue1.getFairShare().getMemory());
assertEquals("Queue queue1's fair share should be 0", 0, queue1
.getFairShare().getMemory());
FSLeafQueue queue2 =
scheduler.getQueueManager().getLeafQueue("queue2", true);
assertEquals("Queue queue2's fair share should be 1366",
1366, queue2.getFairShare().getMemory());
assertEquals("Queue queue2's fair share should be 0", 0, queue2
.getFairShare().getMemory());
FSLeafQueue queue3 =
scheduler.getQueueManager().getLeafQueue("queue3", true);
assertEquals("Queue queue3's fair share should be 1366",
1366, queue3.getFairShare().getMemory());
assertEquals("Queue queue3's fair share should be 0", 0, queue3
.getFairShare().getMemory());
FSLeafQueue queue4 =
scheduler.getQueueManager().getLeafQueue("queue4", true);
assertEquals("Queue queue4's fair share should be 1366",
1366, queue4.getFairShare().getMemory());
assertEquals("Queue queue4's fair share should be 0", 0, queue4
.getFairShare().getMemory());
FSLeafQueue queue5 =
scheduler.getQueueManager().getLeafQueue("queue5", true);
assertEquals("Queue queue5's fair share should be 1366",
1366, queue5.getFairShare().getMemory());
assertEquals("Queue queue5's fair share should be 0", 0, queue5
.getFairShare().getMemory());
List<String> queues = Arrays.asList("root.default", "root.queue3",
"root.queue4", "root.queue5");
for (String queue : queues) {
createSchedulingRequest(1 * 1024, queue, "user1");
scheduler.update();
scheduler.handle(updateEvent);
}
Resource amResource1 = Resource.newInstance(2048, 1);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();

View File

@ -0,0 +1,308 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collection;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class TestFairSchedulerFairShare extends FairSchedulerTestBase {
private final static String ALLOC_FILE = new File(TEST_DIR,
TestFairSchedulerFairShare.class.getName() + ".xml").getAbsolutePath();
@Before
public void setup() throws IOException {
conf = createConfiguration();
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
}
@After
public void teardown() {
if (resourceManager != null) {
resourceManager.stop();
resourceManager = null;
}
conf = null;
}
private void createClusterWithQueuesAndOneNode(int mem, String policy)
throws IOException {
createClusterWithQueuesAndOneNode(mem, 0, policy);
}
private void createClusterWithQueuesAndOneNode(int mem, int vCores,
String policy) throws IOException {
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"root\" >");
out.println(" <queue name=\"parentA\" >");
out.println(" <weight>8</weight>");
out.println(" <queue name=\"childA1\" />");
out.println(" <queue name=\"childA2\" />");
out.println(" <queue name=\"childA3\" />");
out.println(" <queue name=\"childA4\" />");
out.println(" </queue>");
out.println(" <queue name=\"parentB\" >");
out.println(" <weight>1</weight>");
out.println(" <queue name=\"childB1\" />");
out.println(" <queue name=\"childB2\" />");
out.println(" </queue>");
out.println("</queue>");
out.println("<defaultQueueSchedulingPolicy>" + policy
+ "</defaultQueueSchedulingPolicy>");
out.println("</allocations>");
out.close();
resourceManager = new MockRM(conf);
resourceManager.start();
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
RMNode node1 = MockNodes.newNodeInfo(1,
Resources.createResource(mem, vCores), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
}
@Test
public void testFairShareNoAppsRunning() throws IOException {
int nodeCapacity = 16 * 1024;
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
scheduler.update();
// No apps are running in the cluster,verify if fair share is zero
// for all queues under parentA and parentB.
Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager()
.getLeafQueues();
for (FSLeafQueue leaf : leafQueues) {
if (leaf.getName().startsWith("root.parentA")) {
assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity
* 100, 0);
} else if (leaf.getName().startsWith("root.parentB")) {
assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity
* 100, 0.1);
}
}
}
@Test
public void testFairShareOneAppRunning() throws IOException {
int nodeCapacity = 16 * 1024;
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
// Run a app in a childA1. Verify whether fair share is 100% in childA1,
// since it is the only active queue.
// Also verify if fair share is 0 for childA2. since no app is
// running in it.
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
scheduler.update();
assertEquals(
100,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA1", false).getFairShare()
.getMemory()
/ nodeCapacity * 100, 0.1);
assertEquals(
0,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA2", false).getFairShare()
.getMemory()
/ nodeCapacity * 100, 0.1);
}
@Test
public void testFairShareMultipleActiveQueuesUnderSameParent()
throws IOException {
int nodeCapacity = 16 * 1024;
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
// Run apps in childA1,childA2,childA3
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
createSchedulingRequest(2 * 1024, "root.parentA.childA2", "user2");
createSchedulingRequest(2 * 1024, "root.parentA.childA3", "user3");
scheduler.update();
// Verify if fair share is 100 / 3 = 33%
for (int i = 1; i <= 3; i++) {
assertEquals(
33,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
.getMemory()
/ nodeCapacity * 100, .9);
}
}
@Test
public void testFairShareMultipleActiveQueuesUnderDifferentParent()
throws IOException {
int nodeCapacity = 16 * 1024;
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
// Run apps in childA1,childA2 which are under parentA
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
// Run app in childB1 which is under parentB
createSchedulingRequest(1 * 1024, "root.parentB.childB1", "user3");
// Run app in root.default queue
createSchedulingRequest(1 * 1024, "root.default", "user4");
scheduler.update();
// The two active child queues under parentA would
// get fair share of 80/2=40%
for (int i = 1; i <= 2; i++) {
assertEquals(
40,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
.getMemory()
/ nodeCapacity * 100, .9);
}
// The child queue under parentB would get a fair share of 10%,
// basically all of parentB's fair share
assertEquals(
10,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentB.childB1", false).getFairShare()
.getMemory()
/ nodeCapacity * 100, .9);
}
@Test
public void testFairShareResetsToZeroWhenAppsComplete() throws IOException {
int nodeCapacity = 16 * 1024;
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
// Run apps in childA1,childA2 which are under parentA
ApplicationAttemptId app1 = createSchedulingRequest(2 * 1024,
"root.parentA.childA1", "user1");
ApplicationAttemptId app2 = createSchedulingRequest(3 * 1024,
"root.parentA.childA2", "user2");
scheduler.update();
// Verify if both the active queues under parentA get 50% fair
// share
for (int i = 1; i <= 2; i++) {
assertEquals(
50,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
.getMemory()
/ nodeCapacity * 100, .9);
}
// Let app under childA1 complete. This should cause the fair share
// of queue childA1 to be reset to zero,since the queue has no apps running.
// Queue childA2's fair share would increase to 100% since its the only
// active queue.
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(
app1, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent1);
scheduler.update();
assertEquals(
0,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA1", false).getFairShare()
.getMemory()
/ nodeCapacity * 100, 0);
assertEquals(
100,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA2", false).getFairShare()
.getMemory()
/ nodeCapacity * 100, 0.1);
}
@Test
public void testFairShareWithDRFMultipleActiveQueuesUnderDifferentParent()
throws IOException {
int nodeMem = 16 * 1024;
int nodeVCores = 10;
createClusterWithQueuesAndOneNode(nodeMem, nodeVCores, "drf");
// Run apps in childA1,childA2 which are under parentA
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
// Run app in childB1 which is under parentB
createSchedulingRequest(1 * 1024, "root.parentB.childB1", "user3");
// Run app in root.default queue
createSchedulingRequest(1 * 1024, "root.default", "user4");
scheduler.update();
// The two active child queues under parentA would
// get 80/2=40% memory and vcores
for (int i = 1; i <= 2; i++) {
assertEquals(
40,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
.getMemory()
/ nodeMem * 100, .9);
assertEquals(
40,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
.getVirtualCores()
/ nodeVCores * 100, .9);
}
// The only active child queue under parentB would get 10% memory and vcores
assertEquals(
10,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentB.childB1", false).getFairShare()
.getMemory()
/ nodeMem * 100, .9);
assertEquals(
10,
(double) scheduler.getQueueManager()
.getLeafQueue("root.parentB.childB1", false).getFairShare()
.getVirtualCores()
/ nodeVCores * 100, .9);
}
}