Merging r1616894 through r1617376 from trunk.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-6584@1617377 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
04884cd530
@ -199,6 +199,9 @@ Trunk (Unreleased)
|
||||
|
||||
HADOOP-10936. Change default KeyProvider bitlength to 128. (wang)
|
||||
|
||||
HADOOP-10224. JavaKeyStoreProvider has to protect against corrupting
|
||||
underlying store. (asuresh via tucu)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-9451. Fault single-layer config if node group topology is enabled.
|
||||
@ -421,6 +424,9 @@ Trunk (Unreleased)
|
||||
HADOOP-10939. Fix TestKeyProviderFactory testcases to use default 128 bit
|
||||
length keys. (Arun Suresh via wang)
|
||||
|
||||
HADOOP-10862. Miscellaneous trivial corrections to KMS classes.
|
||||
(asuresh via tucu)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-7761. Improve the performance of raw comparisons. (todd)
|
||||
@ -547,6 +553,9 @@ Release 2.6.0 - UNRELEASED
|
||||
HADOOP-10929. Typo in Configuration.getPasswordFromCredentialProviders
|
||||
(lmccay via brandonli)
|
||||
|
||||
HADOOP-10402. Configuration.getValByRegex does not substitute for
|
||||
variables. (Robert Kanter via kasha)
|
||||
|
||||
Release 2.5.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -2755,7 +2755,8 @@ public Map<String,String> getValByRegex(String regex) {
|
||||
item.getValue() instanceof String) {
|
||||
m = p.matcher((String)item.getKey());
|
||||
if(m.find()) { // match
|
||||
result.put((String) item.getKey(), (String) item.getValue());
|
||||
result.put((String) item.getKey(),
|
||||
substituteVars(getProps().getProperty((String) item.getKey())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,8 +27,11 @@
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.security.ProviderUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.crypto.spec.SecretKeySpec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.ObjectInputStream;
|
||||
@ -80,6 +83,9 @@
|
||||
@InterfaceAudience.Private
|
||||
public class JavaKeyStoreProvider extends KeyProvider {
|
||||
private static final String KEY_METADATA = "KeyMetadata";
|
||||
private static Logger LOG =
|
||||
LoggerFactory.getLogger(JavaKeyStoreProvider.class);
|
||||
|
||||
public static final String SCHEME_NAME = "jceks";
|
||||
|
||||
public static final String KEYSTORE_PASSWORD_FILE_KEY =
|
||||
@ -115,6 +121,10 @@ private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException {
|
||||
if (pwFile != null) {
|
||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||
URL pwdFile = cl.getResource(pwFile);
|
||||
if (pwdFile == null) {
|
||||
// Provided Password file does not exist
|
||||
throw new IOException("Password file does not exists");
|
||||
}
|
||||
if (pwdFile != null) {
|
||||
InputStream is = pwdFile.openStream();
|
||||
try {
|
||||
@ -129,19 +139,25 @@ private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException {
|
||||
password = KEYSTORE_PASSWORD_DEFAULT;
|
||||
}
|
||||
try {
|
||||
Path oldPath = constructOldPath(path);
|
||||
Path newPath = constructNewPath(path);
|
||||
keyStore = KeyStore.getInstance(SCHEME_NAME);
|
||||
FsPermission perm = null;
|
||||
if (fs.exists(path)) {
|
||||
// save off permissions in case we need to
|
||||
// rewrite the keystore in flush()
|
||||
FileStatus s = fs.getFileStatus(path);
|
||||
permissions = s.getPermission();
|
||||
|
||||
keyStore.load(fs.open(path), password);
|
||||
// flush did not proceed to completion
|
||||
// _NEW should not exist
|
||||
if (fs.exists(newPath)) {
|
||||
throw new IOException(
|
||||
String.format("Keystore not loaded due to some inconsistency "
|
||||
+ "('%s' and '%s' should not exist together)!!", path, newPath));
|
||||
}
|
||||
perm = tryLoadFromPath(path, oldPath);
|
||||
} else {
|
||||
permissions = new FsPermission("700");
|
||||
// required to create an empty keystore. *sigh*
|
||||
keyStore.load(null, password);
|
||||
perm = tryLoadIncompleteFlush(oldPath, newPath);
|
||||
}
|
||||
// Need to save off permissions in case we need to
|
||||
// rewrite the keystore in flush()
|
||||
permissions = perm;
|
||||
} catch (KeyStoreException e) {
|
||||
throw new IOException("Can't create keystore", e);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
@ -154,6 +170,136 @@ private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException {
|
||||
writeLock = lock.writeLock();
|
||||
}
|
||||
|
||||
/**
|
||||
* Try loading from the user specified path, else load from the backup
|
||||
* path in case Exception is not due to bad/wrong password
|
||||
* @param path Actual path to load from
|
||||
* @param backupPath Backup path (_OLD)
|
||||
* @return The permissions of the loaded file
|
||||
* @throws NoSuchAlgorithmException
|
||||
* @throws CertificateException
|
||||
* @throws IOException
|
||||
*/
|
||||
private FsPermission tryLoadFromPath(Path path, Path backupPath)
|
||||
throws NoSuchAlgorithmException, CertificateException,
|
||||
IOException {
|
||||
FsPermission perm = null;
|
||||
try {
|
||||
perm = loadFromPath(path, password);
|
||||
// Remove _OLD if exists
|
||||
if (fs.exists(backupPath)) {
|
||||
fs.delete(backupPath, true);
|
||||
}
|
||||
LOG.debug("KeyStore loaded successfully !!");
|
||||
} catch (IOException ioe) {
|
||||
// If file is corrupted for some reason other than
|
||||
// wrong password try the _OLD file if exits
|
||||
if (!isBadorWrongPassword(ioe)) {
|
||||
perm = loadFromPath(backupPath, password);
|
||||
// Rename CURRENT to CORRUPTED
|
||||
renameOrFail(path, new Path(path.toString() + "_CORRUPTED_"
|
||||
+ System.currentTimeMillis()));
|
||||
renameOrFail(backupPath, path);
|
||||
LOG.debug(String.format(
|
||||
"KeyStore loaded successfully from '%s' since '%s'"
|
||||
+ "was corrupted !!", backupPath, path));
|
||||
} else {
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
return perm;
|
||||
}
|
||||
|
||||
/**
|
||||
* The KeyStore might have gone down during a flush, In which case either the
|
||||
* _NEW or _OLD files might exists. This method tries to load the KeyStore
|
||||
* from one of these intermediate files.
|
||||
* @param oldPath the _OLD file created during flush
|
||||
* @param newPath the _NEW file created during flush
|
||||
* @return The permissions of the loaded file
|
||||
* @throws IOException
|
||||
* @throws NoSuchAlgorithmException
|
||||
* @throws CertificateException
|
||||
*/
|
||||
private FsPermission tryLoadIncompleteFlush(Path oldPath, Path newPath)
|
||||
throws IOException, NoSuchAlgorithmException, CertificateException {
|
||||
FsPermission perm = null;
|
||||
// Check if _NEW exists (in case flush had finished writing but not
|
||||
// completed the re-naming)
|
||||
if (fs.exists(newPath)) {
|
||||
perm = loadAndReturnPerm(newPath, oldPath);
|
||||
}
|
||||
// try loading from _OLD (An earlier Flushing MIGHT not have completed
|
||||
// writing completely)
|
||||
if ((perm == null) && fs.exists(oldPath)) {
|
||||
perm = loadAndReturnPerm(oldPath, newPath);
|
||||
}
|
||||
// If not loaded yet,
|
||||
// required to create an empty keystore. *sigh*
|
||||
if (perm == null) {
|
||||
keyStore.load(null, password);
|
||||
LOG.debug("KeyStore initialized anew successfully !!");
|
||||
perm = new FsPermission("700");
|
||||
}
|
||||
return perm;
|
||||
}
|
||||
|
||||
private FsPermission loadAndReturnPerm(Path pathToLoad, Path pathToDelete)
|
||||
throws NoSuchAlgorithmException, CertificateException,
|
||||
IOException {
|
||||
FsPermission perm = null;
|
||||
try {
|
||||
perm = loadFromPath(pathToLoad, password);
|
||||
renameOrFail(pathToLoad, path);
|
||||
LOG.debug(String.format("KeyStore loaded successfully from '%s'!!",
|
||||
pathToLoad));
|
||||
if (fs.exists(pathToDelete)) {
|
||||
fs.delete(pathToDelete, true);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Check for password issue : don't want to trash file due
|
||||
// to wrong password
|
||||
if (isBadorWrongPassword(e)) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
return perm;
|
||||
}
|
||||
|
||||
private boolean isBadorWrongPassword(IOException ioe) {
|
||||
// As per documentation this is supposed to be the way to figure
|
||||
// if password was correct
|
||||
if (ioe.getCause() instanceof UnrecoverableKeyException) {
|
||||
return true;
|
||||
}
|
||||
// Unfortunately that doesn't seem to work..
|
||||
// Workaround :
|
||||
if ((ioe.getCause() == null)
|
||||
&& (ioe.getMessage() != null)
|
||||
&& ((ioe.getMessage().contains("Keystore was tampered")) || (ioe
|
||||
.getMessage().contains("password was incorrect")))) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private FsPermission loadFromPath(Path p, char[] password)
|
||||
throws IOException, NoSuchAlgorithmException, CertificateException {
|
||||
FileStatus s = fs.getFileStatus(p);
|
||||
keyStore.load(fs.open(p), password);
|
||||
return s.getPermission();
|
||||
}
|
||||
|
||||
private Path constructNewPath(Path path) {
|
||||
Path newPath = new Path(path.toString() + "_NEW");
|
||||
return newPath;
|
||||
}
|
||||
|
||||
private Path constructOldPath(Path path) {
|
||||
Path oldPath = new Path(path.toString() + "_OLD");
|
||||
return oldPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeyVersion getKeyVersion(String versionName) throws IOException {
|
||||
readLock.lock();
|
||||
@ -352,11 +498,22 @@ public KeyVersion rollNewVersion(String name,
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
Path newPath = constructNewPath(path);
|
||||
Path oldPath = constructOldPath(path);
|
||||
writeLock.lock();
|
||||
try {
|
||||
if (!changed) {
|
||||
return;
|
||||
}
|
||||
// Might exist if a backup has been restored etc.
|
||||
if (fs.exists(newPath)) {
|
||||
renameOrFail(newPath, new Path(newPath.toString()
|
||||
+ "_ORPHANED_" + System.currentTimeMillis()));
|
||||
}
|
||||
if (fs.exists(oldPath)) {
|
||||
renameOrFail(oldPath, new Path(oldPath.toString()
|
||||
+ "_ORPHANED_" + System.currentTimeMillis()));
|
||||
}
|
||||
// put all of the updates into the keystore
|
||||
for(Map.Entry<String, Metadata> entry: cache.entrySet()) {
|
||||
try {
|
||||
@ -366,25 +523,77 @@ public void flush() throws IOException {
|
||||
throw new IOException("Can't set metadata key " + entry.getKey(),e );
|
||||
}
|
||||
}
|
||||
|
||||
// Save old File first
|
||||
boolean fileExisted = backupToOld(oldPath);
|
||||
// write out the keystore
|
||||
FSDataOutputStream out = FileSystem.create(fs, path, permissions);
|
||||
// Write to _NEW path first :
|
||||
try {
|
||||
keyStore.store(out, password);
|
||||
} catch (KeyStoreException e) {
|
||||
throw new IOException("Can't store keystore " + this, e);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new IOException("No such algorithm storing keystore " + this, e);
|
||||
} catch (CertificateException e) {
|
||||
throw new IOException("Certificate exception storing keystore " + this,
|
||||
e);
|
||||
writeToNew(newPath);
|
||||
} catch (IOException ioe) {
|
||||
// rename _OLD back to curent and throw Exception
|
||||
revertFromOld(oldPath, fileExisted);
|
||||
throw ioe;
|
||||
}
|
||||
out.close();
|
||||
// Rename _NEW to CURRENT and delete _OLD
|
||||
cleanupNewAndOld(newPath, oldPath);
|
||||
changed = false;
|
||||
} finally {
|
||||
writeLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void cleanupNewAndOld(Path newPath, Path oldPath) throws IOException {
|
||||
// Rename _NEW to CURRENT
|
||||
renameOrFail(newPath, path);
|
||||
// Delete _OLD
|
||||
if (fs.exists(oldPath)) {
|
||||
fs.delete(oldPath, true);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeToNew(Path newPath) throws IOException {
|
||||
FSDataOutputStream out =
|
||||
FileSystem.create(fs, newPath, permissions);
|
||||
try {
|
||||
keyStore.store(out, password);
|
||||
} catch (KeyStoreException e) {
|
||||
throw new IOException("Can't store keystore " + this, e);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new IOException(
|
||||
"No such algorithm storing keystore " + this, e);
|
||||
} catch (CertificateException e) {
|
||||
throw new IOException(
|
||||
"Certificate exception storing keystore " + this, e);
|
||||
}
|
||||
out.close();
|
||||
}
|
||||
|
||||
private void revertFromOld(Path oldPath, boolean fileExisted)
|
||||
throws IOException {
|
||||
if (fileExisted) {
|
||||
renameOrFail(oldPath, path);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean backupToOld(Path oldPath)
|
||||
throws IOException {
|
||||
boolean fileExisted = false;
|
||||
if (fs.exists(path)) {
|
||||
renameOrFail(path, oldPath);
|
||||
fileExisted = true;
|
||||
}
|
||||
return fileExisted;
|
||||
}
|
||||
|
||||
private void renameOrFail(Path src, Path dest)
|
||||
throws IOException {
|
||||
if (!fs.rename(src, dest)) {
|
||||
throw new IOException("Rename unsuccessful : "
|
||||
+ String.format("'%s' to '%s'", src, dest));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return uri.toString();
|
||||
|
@ -512,7 +512,7 @@ private List<String[]> createKeySets(String[] keyNames) {
|
||||
List<String> batch = new ArrayList<String>();
|
||||
int batchLen = 0;
|
||||
for (String name : keyNames) {
|
||||
int additionalLen = KMSRESTConstants.KEY_OP.length() + 1 + name.length();
|
||||
int additionalLen = KMSRESTConstants.KEY.length() + 1 + name.length();
|
||||
batchLen += additionalLen;
|
||||
// topping at 1500 to account for initial URL and encoded names
|
||||
if (batchLen > 1500) {
|
||||
@ -536,7 +536,7 @@ public Metadata[] getKeysMetadata(String ... keyNames) throws IOException {
|
||||
for (String[] keySet : keySets) {
|
||||
if (keyNames.length > 0) {
|
||||
Map<String, Object> queryStr = new HashMap<String, Object>();
|
||||
queryStr.put(KMSRESTConstants.KEY_OP, keySet);
|
||||
queryStr.put(KMSRESTConstants.KEY, keySet);
|
||||
URL url = createURL(KMSRESTConstants.KEYS_METADATA_RESOURCE, null,
|
||||
null, queryStr);
|
||||
HttpURLConnection conn = createConnection(url, HTTP_GET);
|
||||
|
@ -37,7 +37,7 @@ public class KMSRESTConstants {
|
||||
public static final String EEK_SUB_RESOURCE = "_eek";
|
||||
public static final String CURRENT_VERSION_SUB_RESOURCE = "_currentversion";
|
||||
|
||||
public static final String KEY_OP = "key";
|
||||
public static final String KEY = "key";
|
||||
public static final String EEK_OP = "eek_op";
|
||||
public static final String EEK_GENERATE = "generate";
|
||||
public static final String EEK_DECRYPT = "decrypt";
|
||||
|
@ -178,6 +178,14 @@ public void testVariableSubstitution() throws IOException {
|
||||
// check that expansion also occurs for getInt()
|
||||
assertTrue(conf.getInt("intvar", -1) == 42);
|
||||
assertTrue(conf.getInt("my.int", -1) == 42);
|
||||
|
||||
Map<String, String> results = conf.getValByRegex("^my.*file$");
|
||||
assertTrue(results.keySet().contains("my.relfile"));
|
||||
assertTrue(results.keySet().contains("my.fullfile"));
|
||||
assertTrue(results.keySet().contains("my.file"));
|
||||
assertEquals(-1, results.get("my.relfile").indexOf("${"));
|
||||
assertEquals(-1, results.get("my.fullfile").indexOf("${"));
|
||||
assertEquals(-1, results.get("my.file").indexOf("${"));
|
||||
}
|
||||
|
||||
public void testFinalParam() throws IOException {
|
||||
|
@ -220,11 +220,76 @@ public void testJksProvider() throws Exception {
|
||||
assertTrue(s.getPermission().toString().equals("rwx------"));
|
||||
assertTrue(file + " should exist", file.isFile());
|
||||
|
||||
// Corrupt file and Check if JKS can reload from _OLD file
|
||||
File oldFile = new File(file.getPath() + "_OLD");
|
||||
file.renameTo(oldFile);
|
||||
file.delete();
|
||||
file.createNewFile();
|
||||
assertTrue(oldFile.exists());
|
||||
KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0);
|
||||
assertTrue(file.exists());
|
||||
assertTrue(oldFile + "should be deleted", !oldFile.exists());
|
||||
verifyAfterReload(file, provider);
|
||||
assertTrue(!oldFile.exists());
|
||||
|
||||
// _NEW and current file should not exist together
|
||||
File newFile = new File(file.getPath() + "_NEW");
|
||||
newFile.createNewFile();
|
||||
try {
|
||||
provider = KeyProviderFactory.getProviders(conf).get(0);
|
||||
Assert.fail("_NEW and current file should not exist together !!");
|
||||
} catch (Exception e) {
|
||||
// Ignore
|
||||
} finally {
|
||||
if (newFile.exists()) {
|
||||
newFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
// Load from _NEW file
|
||||
file.renameTo(newFile);
|
||||
file.delete();
|
||||
try {
|
||||
provider = KeyProviderFactory.getProviders(conf).get(0);
|
||||
Assert.assertFalse(newFile.exists());
|
||||
Assert.assertFalse(oldFile.exists());
|
||||
} catch (Exception e) {
|
||||
Assert.fail("JKS should load from _NEW file !!");
|
||||
// Ignore
|
||||
}
|
||||
verifyAfterReload(file, provider);
|
||||
|
||||
// _NEW exists but corrupt.. must load from _OLD
|
||||
newFile.createNewFile();
|
||||
file.renameTo(oldFile);
|
||||
file.delete();
|
||||
try {
|
||||
provider = KeyProviderFactory.getProviders(conf).get(0);
|
||||
Assert.assertFalse(newFile.exists());
|
||||
Assert.assertFalse(oldFile.exists());
|
||||
} catch (Exception e) {
|
||||
Assert.fail("JKS should load from _OLD file !!");
|
||||
// Ignore
|
||||
} finally {
|
||||
if (newFile.exists()) {
|
||||
newFile.delete();
|
||||
}
|
||||
}
|
||||
verifyAfterReload(file, provider);
|
||||
|
||||
// check permission retention after explicit change
|
||||
fs.setPermission(path, new FsPermission("777"));
|
||||
checkPermissionRetention(conf, ourUrl, path);
|
||||
}
|
||||
|
||||
private void verifyAfterReload(File file, KeyProvider provider)
|
||||
throws IOException {
|
||||
List<String> existingKeys = provider.getKeys();
|
||||
assertTrue(existingKeys.contains("key4"));
|
||||
assertTrue(existingKeys.contains("key3"));
|
||||
assertTrue(file.exists());
|
||||
}
|
||||
|
||||
public void checkPermissionRetention(Configuration conf, String ourUrl, Path path) throws Exception {
|
||||
KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0);
|
||||
// let's add a new key and flush and check that permissions are still set to 777
|
||||
|
@ -47,7 +47,6 @@
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.security.Principal;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -59,19 +58,14 @@
|
||||
@Path(KMSRESTConstants.SERVICE_VERSION)
|
||||
@InterfaceAudience.Private
|
||||
public class KMS {
|
||||
public static final String CREATE_KEY = "CREATE_KEY";
|
||||
public static final String DELETE_KEY = "DELETE_KEY";
|
||||
public static final String ROLL_NEW_VERSION = "ROLL_NEW_VERSION";
|
||||
public static final String GET_KEYS = "GET_KEYS";
|
||||
public static final String GET_KEYS_METADATA = "GET_KEYS_METADATA";
|
||||
public static final String GET_KEY_VERSIONS = "GET_KEY_VERSIONS";
|
||||
public static final String GET_METADATA = "GET_METADATA";
|
||||
|
||||
public static final String GET_KEY_VERSION = "GET_KEY_VERSION";
|
||||
public static final String GET_CURRENT_KEY = "GET_CURRENT_KEY";
|
||||
public static final String GENERATE_EEK = "GENERATE_EEK";
|
||||
public static final String DECRYPT_EEK = "DECRYPT_EEK";
|
||||
|
||||
public static enum KMSOp {
|
||||
CREATE_KEY, DELETE_KEY, ROLL_NEW_VERSION,
|
||||
GET_KEYS, GET_KEYS_METADATA,
|
||||
GET_KEY_VERSIONS, GET_METADATA, GET_KEY_VERSION, GET_CURRENT_KEY,
|
||||
GENERATE_EEK, DECRYPT_EEK
|
||||
}
|
||||
|
||||
private KeyProviderCryptoExtension provider;
|
||||
private KMSAudit kmsAudit;
|
||||
|
||||
@ -91,22 +85,22 @@ private static Principal getPrincipal(SecurityContext securityContext)
|
||||
|
||||
|
||||
private static final String UNAUTHORIZED_MSG_WITH_KEY =
|
||||
"User:{0} not allowed to do ''{1}'' on ''{2}''";
|
||||
"User:%s not allowed to do '%s' on '%s'";
|
||||
|
||||
private static final String UNAUTHORIZED_MSG_WITHOUT_KEY =
|
||||
"User:{0} not allowed to do ''{1}''";
|
||||
"User:%s not allowed to do '%s'";
|
||||
|
||||
private void assertAccess(KMSACLs.Type aclType, Principal principal,
|
||||
String operation) throws AccessControlException {
|
||||
KMSOp operation) throws AccessControlException {
|
||||
assertAccess(aclType, principal, operation, null);
|
||||
}
|
||||
|
||||
private void assertAccess(KMSACLs.Type aclType, Principal principal,
|
||||
String operation, String key) throws AccessControlException {
|
||||
KMSOp operation, String key) throws AccessControlException {
|
||||
if (!KMSWebApp.getACLs().hasAccess(aclType, principal.getName())) {
|
||||
KMSWebApp.getUnauthorizedCallsMeter().mark();
|
||||
kmsAudit.unauthorized(principal, operation, key);
|
||||
throw new AuthorizationException(MessageFormat.format(
|
||||
throw new AuthorizationException(String.format(
|
||||
(key != null) ? UNAUTHORIZED_MSG_WITH_KEY
|
||||
: UNAUTHORIZED_MSG_WITHOUT_KEY,
|
||||
principal.getName(), operation, key));
|
||||
@ -135,7 +129,7 @@ public Response createKey(@Context SecurityContext securityContext,
|
||||
Principal user = getPrincipal(securityContext);
|
||||
String name = (String) jsonKey.get(KMSRESTConstants.NAME_FIELD);
|
||||
KMSClientProvider.checkNotEmpty(name, KMSRESTConstants.NAME_FIELD);
|
||||
assertAccess(KMSACLs.Type.CREATE, user, CREATE_KEY, name);
|
||||
assertAccess(KMSACLs.Type.CREATE, user, KMSOp.CREATE_KEY, name);
|
||||
String cipher = (String) jsonKey.get(KMSRESTConstants.CIPHER_FIELD);
|
||||
String material = (String) jsonKey.get(KMSRESTConstants.MATERIAL_FIELD);
|
||||
int length = (jsonKey.containsKey(KMSRESTConstants.LENGTH_FIELD))
|
||||
@ -146,7 +140,7 @@ public Response createKey(@Context SecurityContext securityContext,
|
||||
jsonKey.get(KMSRESTConstants.ATTRIBUTES_FIELD);
|
||||
if (material != null) {
|
||||
assertAccess(KMSACLs.Type.SET_KEY_MATERIAL, user,
|
||||
CREATE_KEY + " with user provided material", name);
|
||||
KMSOp.CREATE_KEY, name);
|
||||
}
|
||||
KeyProvider.Options options = new KeyProvider.Options(
|
||||
KMSWebApp.getConfiguration());
|
||||
@ -165,7 +159,7 @@ public Response createKey(@Context SecurityContext securityContext,
|
||||
|
||||
provider.flush();
|
||||
|
||||
kmsAudit.ok(user, CREATE_KEY, name, "UserProvidedMaterial:" +
|
||||
kmsAudit.ok(user, KMSOp.CREATE_KEY, name, "UserProvidedMaterial:" +
|
||||
(material != null) + " Description:" + description);
|
||||
|
||||
if (!KMSWebApp.getACLs().hasAccess(KMSACLs.Type.GET, user.getName())) {
|
||||
@ -186,12 +180,12 @@ public Response deleteKey(@Context SecurityContext securityContext,
|
||||
@PathParam("name") String name) throws Exception {
|
||||
KMSWebApp.getAdminCallsMeter().mark();
|
||||
Principal user = getPrincipal(securityContext);
|
||||
assertAccess(KMSACLs.Type.DELETE, user, DELETE_KEY, name);
|
||||
assertAccess(KMSACLs.Type.DELETE, user, KMSOp.DELETE_KEY, name);
|
||||
KMSClientProvider.checkNotEmpty(name, "name");
|
||||
provider.deleteKey(name);
|
||||
provider.flush();
|
||||
|
||||
kmsAudit.ok(user, DELETE_KEY, name, "");
|
||||
kmsAudit.ok(user, KMSOp.DELETE_KEY, name, "");
|
||||
|
||||
return Response.ok().build();
|
||||
}
|
||||
@ -205,13 +199,13 @@ public Response rolloverKey(@Context SecurityContext securityContext,
|
||||
throws Exception {
|
||||
KMSWebApp.getAdminCallsMeter().mark();
|
||||
Principal user = getPrincipal(securityContext);
|
||||
assertAccess(KMSACLs.Type.ROLLOVER, user, ROLL_NEW_VERSION, name);
|
||||
assertAccess(KMSACLs.Type.ROLLOVER, user, KMSOp.ROLL_NEW_VERSION, name);
|
||||
KMSClientProvider.checkNotEmpty(name, "name");
|
||||
String material = (String)
|
||||
jsonMaterial.get(KMSRESTConstants.MATERIAL_FIELD);
|
||||
if (material != null) {
|
||||
assertAccess(KMSACLs.Type.SET_KEY_MATERIAL, user,
|
||||
ROLL_NEW_VERSION + " with user provided material", name);
|
||||
KMSOp.ROLL_NEW_VERSION, name);
|
||||
}
|
||||
KeyProvider.KeyVersion keyVersion = (material != null)
|
||||
? provider.rollNewVersion(name, Base64.decodeBase64(material))
|
||||
@ -219,7 +213,7 @@ public Response rolloverKey(@Context SecurityContext securityContext,
|
||||
|
||||
provider.flush();
|
||||
|
||||
kmsAudit.ok(user, ROLL_NEW_VERSION, name, "UserProvidedMaterial:" +
|
||||
kmsAudit.ok(user, KMSOp.ROLL_NEW_VERSION, name, "UserProvidedMaterial:" +
|
||||
(material != null) + " NewVersion:" + keyVersion.getVersionName());
|
||||
|
||||
if (!KMSWebApp.getACLs().hasAccess(KMSACLs.Type.GET, user.getName())) {
|
||||
@ -233,15 +227,15 @@ public Response rolloverKey(@Context SecurityContext securityContext,
|
||||
@Path(KMSRESTConstants.KEYS_METADATA_RESOURCE)
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
public Response getKeysMetadata(@Context SecurityContext securityContext,
|
||||
@QueryParam(KMSRESTConstants.KEY_OP) List<String> keyNamesList)
|
||||
@QueryParam(KMSRESTConstants.KEY) List<String> keyNamesList)
|
||||
throws Exception {
|
||||
KMSWebApp.getAdminCallsMeter().mark();
|
||||
Principal user = getPrincipal(securityContext);
|
||||
String[] keyNames = keyNamesList.toArray(new String[keyNamesList.size()]);
|
||||
assertAccess(KMSACLs.Type.GET_METADATA, user, GET_KEYS_METADATA);
|
||||
assertAccess(KMSACLs.Type.GET_METADATA, user, KMSOp.GET_KEYS_METADATA);
|
||||
KeyProvider.Metadata[] keysMeta = provider.getKeysMetadata(keyNames);
|
||||
Object json = KMSServerJSONUtils.toJSON(keyNames, keysMeta);
|
||||
kmsAudit.ok(user, GET_KEYS_METADATA, "");
|
||||
kmsAudit.ok(user, KMSOp.GET_KEYS_METADATA, "");
|
||||
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
|
||||
}
|
||||
|
||||
@ -252,9 +246,9 @@ public Response getKeyNames(@Context SecurityContext securityContext)
|
||||
throws Exception {
|
||||
KMSWebApp.getAdminCallsMeter().mark();
|
||||
Principal user = getPrincipal(securityContext);
|
||||
assertAccess(KMSACLs.Type.GET_KEYS, user, GET_KEYS);
|
||||
assertAccess(KMSACLs.Type.GET_KEYS, user, KMSOp.GET_KEYS);
|
||||
Object json = provider.getKeys();
|
||||
kmsAudit.ok(user, GET_KEYS, "");
|
||||
kmsAudit.ok(user, KMSOp.GET_KEYS, "");
|
||||
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
|
||||
}
|
||||
|
||||
@ -276,9 +270,9 @@ public Response getMetadata(@Context SecurityContext securityContext,
|
||||
Principal user = getPrincipal(securityContext);
|
||||
KMSClientProvider.checkNotEmpty(name, "name");
|
||||
KMSWebApp.getAdminCallsMeter().mark();
|
||||
assertAccess(KMSACLs.Type.GET_METADATA, user, GET_METADATA, name);
|
||||
assertAccess(KMSACLs.Type.GET_METADATA, user, KMSOp.GET_METADATA, name);
|
||||
Object json = KMSServerJSONUtils.toJSON(name, provider.getMetadata(name));
|
||||
kmsAudit.ok(user, GET_METADATA, name, "");
|
||||
kmsAudit.ok(user, KMSOp.GET_METADATA, name, "");
|
||||
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
|
||||
}
|
||||
|
||||
@ -292,9 +286,9 @@ public Response getCurrentVersion(@Context SecurityContext securityContext,
|
||||
Principal user = getPrincipal(securityContext);
|
||||
KMSClientProvider.checkNotEmpty(name, "name");
|
||||
KMSWebApp.getKeyCallsMeter().mark();
|
||||
assertAccess(KMSACLs.Type.GET, user, GET_CURRENT_KEY, name);
|
||||
assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_CURRENT_KEY, name);
|
||||
Object json = KMSServerJSONUtils.toJSON(provider.getCurrentKey(name));
|
||||
kmsAudit.ok(user, GET_CURRENT_KEY, name, "");
|
||||
kmsAudit.ok(user, KMSOp.GET_CURRENT_KEY, name, "");
|
||||
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
|
||||
}
|
||||
|
||||
@ -308,9 +302,9 @@ public Response getKeyVersion(@Context SecurityContext securityContext,
|
||||
KMSClientProvider.checkNotEmpty(versionName, "versionName");
|
||||
KMSWebApp.getKeyCallsMeter().mark();
|
||||
KeyVersion keyVersion = provider.getKeyVersion(versionName);
|
||||
assertAccess(KMSACLs.Type.GET, user, GET_KEY_VERSION);
|
||||
assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_KEY_VERSION);
|
||||
if (keyVersion != null) {
|
||||
kmsAudit.ok(user, GET_KEY_VERSION, keyVersion.getName(), "");
|
||||
kmsAudit.ok(user, KMSOp.GET_KEY_VERSION, keyVersion.getName(), "");
|
||||
}
|
||||
Object json = KMSServerJSONUtils.toJSON(keyVersion);
|
||||
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
|
||||
@ -334,7 +328,7 @@ public Response generateEncryptedKeys(
|
||||
|
||||
Object retJSON;
|
||||
if (edekOp.equals(KMSRESTConstants.EEK_GENERATE)) {
|
||||
assertAccess(KMSACLs.Type.GENERATE_EEK, user, GENERATE_EEK, name);
|
||||
assertAccess(KMSACLs.Type.GENERATE_EEK, user, KMSOp.GENERATE_EEK, name);
|
||||
|
||||
List<EncryptedKeyVersion> retEdeks =
|
||||
new LinkedList<EncryptedKeyVersion>();
|
||||
@ -345,7 +339,7 @@ public Response generateEncryptedKeys(
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
kmsAudit.ok(user, GENERATE_EEK, name, "");
|
||||
kmsAudit.ok(user, KMSOp.GENERATE_EEK, name, "");
|
||||
retJSON = new ArrayList();
|
||||
for (EncryptedKeyVersion edek : retEdeks) {
|
||||
((ArrayList)retJSON).add(KMSServerJSONUtils.toJSON(edek));
|
||||
@ -380,7 +374,7 @@ public Response decryptEncryptedKey(@Context SecurityContext securityContext,
|
||||
(String) jsonPayload.get(KMSRESTConstants.MATERIAL_FIELD);
|
||||
Object retJSON;
|
||||
if (eekOp.equals(KMSRESTConstants.EEK_DECRYPT)) {
|
||||
assertAccess(KMSACLs.Type.DECRYPT_EEK, user, DECRYPT_EEK, keyName);
|
||||
assertAccess(KMSACLs.Type.DECRYPT_EEK, user, KMSOp.DECRYPT_EEK, keyName);
|
||||
KMSClientProvider.checkNotNull(ivStr, KMSRESTConstants.IV_FIELD);
|
||||
byte[] iv = Base64.decodeBase64(ivStr);
|
||||
KMSClientProvider.checkNotNull(encMaterialStr,
|
||||
@ -391,7 +385,7 @@ public Response decryptEncryptedKey(@Context SecurityContext securityContext,
|
||||
new KMSClientProvider.KMSEncryptedKeyVersion(keyName, versionName,
|
||||
iv, KeyProviderCryptoExtension.EEK, encMaterial));
|
||||
retJSON = KMSServerJSONUtils.toJSON(retKeyVersion);
|
||||
kmsAudit.ok(user, DECRYPT_EEK, keyName, "");
|
||||
kmsAudit.ok(user, KMSOp.DECRYPT_EEK, keyName, "");
|
||||
} else {
|
||||
throw new IllegalArgumentException("Wrong " + KMSRESTConstants.EEK_OP +
|
||||
" value, it must be " + KMSRESTConstants.EEK_GENERATE + " or " +
|
||||
@ -412,9 +406,9 @@ public Response getKeyVersions(@Context SecurityContext securityContext,
|
||||
Principal user = getPrincipal(securityContext);
|
||||
KMSClientProvider.checkNotEmpty(name, "name");
|
||||
KMSWebApp.getKeyCallsMeter().mark();
|
||||
assertAccess(KMSACLs.Type.GET, user, GET_KEY_VERSIONS, name);
|
||||
assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_KEY_VERSIONS, name);
|
||||
Object json = KMSServerJSONUtils.toJSON(provider.getKeyVersions(name));
|
||||
kmsAudit.ok(user, GET_KEY_VERSIONS, name, "");
|
||||
kmsAudit.ok(user, KMSOp.GET_KEY_VERSIONS, name, "");
|
||||
return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
|
||||
}
|
||||
|
||||
|
@ -50,11 +50,11 @@ private static class AuditEvent {
|
||||
private final AtomicLong accessCount = new AtomicLong(-1);
|
||||
private final String keyName;
|
||||
private final String user;
|
||||
private final String op;
|
||||
private final KMS.KMSOp op;
|
||||
private final String extraMsg;
|
||||
private final long startTime = System.currentTimeMillis();
|
||||
|
||||
private AuditEvent(String keyName, String user, String op, String msg) {
|
||||
private AuditEvent(String keyName, String user, KMS.KMSOp op, String msg) {
|
||||
this.keyName = keyName;
|
||||
this.user = user;
|
||||
this.op = op;
|
||||
@ -77,7 +77,7 @@ public String getUser() {
|
||||
return user;
|
||||
}
|
||||
|
||||
public String getOp() {
|
||||
public KMS.KMSOp getOp() {
|
||||
return op;
|
||||
}
|
||||
|
||||
@ -90,8 +90,9 @@ public static enum OpStatus {
|
||||
OK, UNAUTHORIZED, UNAUTHENTICATED, ERROR;
|
||||
}
|
||||
|
||||
private static Set<String> AGGREGATE_OPS_WHITELIST = Sets.newHashSet(
|
||||
KMS.GET_KEY_VERSION, KMS.GET_CURRENT_KEY, KMS.DECRYPT_EEK, KMS.GENERATE_EEK
|
||||
private static Set<KMS.KMSOp> AGGREGATE_OPS_WHITELIST = Sets.newHashSet(
|
||||
KMS.KMSOp.GET_KEY_VERSION, KMS.KMSOp.GET_CURRENT_KEY,
|
||||
KMS.KMSOp.DECRYPT_EEK, KMS.KMSOp.GENERATE_EEK
|
||||
);
|
||||
|
||||
private Cache<String, AuditEvent> cache;
|
||||
@ -137,10 +138,10 @@ private void logEvent(AuditEvent event) {
|
||||
event.getExtraMsg());
|
||||
}
|
||||
|
||||
private void op(OpStatus opStatus, final String op, final String user,
|
||||
private void op(OpStatus opStatus, final KMS.KMSOp op, final String user,
|
||||
final String key, final String extraMsg) {
|
||||
if (!Strings.isNullOrEmpty(user) && !Strings.isNullOrEmpty(key)
|
||||
&& !Strings.isNullOrEmpty(op)
|
||||
&& (op != null)
|
||||
&& AGGREGATE_OPS_WHITELIST.contains(op)) {
|
||||
String cacheKey = createCacheKey(user, key, op);
|
||||
if (opStatus == OpStatus.UNAUTHORIZED) {
|
||||
@ -167,7 +168,7 @@ public AuditEvent call() throws Exception {
|
||||
}
|
||||
} else {
|
||||
List<String> kvs = new LinkedList<String>();
|
||||
if (!Strings.isNullOrEmpty(op)) {
|
||||
if (op != null) {
|
||||
kvs.add("op=" + op);
|
||||
}
|
||||
if (!Strings.isNullOrEmpty(key)) {
|
||||
@ -185,16 +186,16 @@ public AuditEvent call() throws Exception {
|
||||
}
|
||||
}
|
||||
|
||||
public void ok(Principal user, String op, String key,
|
||||
public void ok(Principal user, KMS.KMSOp op, String key,
|
||||
String extraMsg) {
|
||||
op(OpStatus.OK, op, user.getName(), key, extraMsg);
|
||||
}
|
||||
|
||||
public void ok(Principal user, String op, String extraMsg) {
|
||||
public void ok(Principal user, KMS.KMSOp op, String extraMsg) {
|
||||
op(OpStatus.OK, op, user.getName(), null, extraMsg);
|
||||
}
|
||||
|
||||
public void unauthorized(Principal user, String op, String key) {
|
||||
public void unauthorized(Principal user, KMS.KMSOp op, String key) {
|
||||
op(OpStatus.UNAUTHORIZED, op, user.getName(), key, "");
|
||||
}
|
||||
|
||||
@ -211,7 +212,7 @@ public void unauthenticated(String remoteHost, String method,
|
||||
+ " URL:" + url + " ErrorMsg:'" + extraMsg + "'");
|
||||
}
|
||||
|
||||
private static String createCacheKey(String user, String key, String op) {
|
||||
private static String createCacheKey(String user, String key, KMS.KMSOp op) {
|
||||
return user + "#" + key + "#" + op;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
*/
|
||||
package org.apache.hadoop.crypto.key.kms.server;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import java.io.File;
|
||||
@ -26,6 +27,7 @@
|
||||
/**
|
||||
* Utility class to load KMS configuration files.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class KMSConfiguration {
|
||||
|
||||
public static final String KMS_CONFIG_DIR = "kms.config.dir";
|
||||
|
@ -17,12 +17,15 @@
|
||||
*/
|
||||
package org.apache.hadoop.crypto.key.kms.server;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.jmx.JMXJsonServlet;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public class KMSJMXServlet extends JMXJsonServlet {
|
||||
|
||||
@Override
|
||||
|
@ -23,6 +23,7 @@
|
||||
import java.io.PrintStream;
|
||||
import java.security.Principal;
|
||||
|
||||
import org.apache.hadoop.crypto.key.kms.server.KMS.KMSOp;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.PropertyConfigurator;
|
||||
import org.junit.After;
|
||||
@ -82,16 +83,16 @@ private String getAndResetLogOutput() {
|
||||
public void testAggregation() throws Exception {
|
||||
Principal luser = Mockito.mock(Principal.class);
|
||||
Mockito.when(luser.getName()).thenReturn("luser");
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.DELETE_KEY, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.ROLL_NEW_VERSION, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DELETE_KEY, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.ROLL_NEW_VERSION, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
Thread.sleep(1500);
|
||||
kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
|
||||
Thread.sleep(1500);
|
||||
String out = getAndResetLogOutput();
|
||||
System.out.println(out);
|
||||
@ -110,15 +111,15 @@ public void testAggregation() throws Exception {
|
||||
public void testAggregationUnauth() throws Exception {
|
||||
Principal luser = Mockito.mock(Principal.class);
|
||||
Mockito.when(luser.getName()).thenReturn("luser");
|
||||
kmsAudit.unauthorized(luser, KMS.GENERATE_EEK, "k2");
|
||||
kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k2");
|
||||
Thread.sleep(1000);
|
||||
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.unauthorized(luser, KMS.GENERATE_EEK, "k3");
|
||||
kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
|
||||
kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k3");
|
||||
kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
|
||||
Thread.sleep(2000);
|
||||
String out = getAndResetLogOutput();
|
||||
System.out.println(out);
|
||||
|
@ -724,6 +724,10 @@ READ3Response read(XDR xdr, SecurityHandler securityHandler,
|
||||
FSDataInputStream fis = clientCache.getDfsInputStream(userName,
|
||||
Nfs3Utils.getFileIdPath(handle));
|
||||
|
||||
if (fis == null) {
|
||||
return new READ3Response(Nfs3Status.NFS3ERR_ACCES);
|
||||
}
|
||||
|
||||
try {
|
||||
readCount = fis.read(offset, readbuffer, 0, count);
|
||||
} catch (IOException e) {
|
||||
|
@ -278,13 +278,11 @@ public void testRead() throws Exception {
|
||||
readReq.serialize(xdr_req);
|
||||
|
||||
// Attempt by an unpriviledged user should fail.
|
||||
/* Hits HDFS-6582. It needs to be fixed first.
|
||||
READ3Response response1 = nfsd.read(xdr_req.asReadOnlyWrap(),
|
||||
securityHandlerUnpriviledged,
|
||||
new InetSocketAddress("localhost", 1234));
|
||||
assertEquals("Incorrect return code:", Nfs3Status.NFS3ERR_ACCES,
|
||||
response1.getStatus());
|
||||
*/
|
||||
|
||||
// Attempt by a priviledged user should pass.
|
||||
READ3Response response2 = nfsd.read(xdr_req.asReadOnlyWrap(),
|
||||
|
@ -407,6 +407,12 @@ Release 2.6.0 - UNRELEASED
|
||||
HDFS-6828. Separate block replica dispatching from Balancer. (szetszwo via
|
||||
jing9)
|
||||
|
||||
HDFS-6837. Code cleanup for Balancer and Dispatcher. (szetszwo via
|
||||
jing9)
|
||||
|
||||
HDFS-6838. Code cleanup for unnecessary INode replacement.
|
||||
(Jing Zhao via wheat9)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-6690. Deduplicate xattr names in memory. (wang)
|
||||
@ -502,6 +508,9 @@ Release 2.6.0 - UNRELEASED
|
||||
HDFS-6791. A block could remain under replicated if all of its replicas are on
|
||||
decommissioned nodes. (Ming Ma via jing9)
|
||||
|
||||
HDFS-6582. Missing null check in RpcProgramNfs3#read(XDR, SecurityHandler)
|
||||
(Abhiraj Butala via brandonli)
|
||||
|
||||
Release 2.5.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -44,7 +44,8 @@
|
||||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.StorageType;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.BalancerDatanode;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Source;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Task;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Util;
|
||||
@ -184,10 +185,10 @@ public class Balancer {
|
||||
// all data node lists
|
||||
private final Collection<Source> overUtilized = new LinkedList<Source>();
|
||||
private final Collection<Source> aboveAvgUtilized = new LinkedList<Source>();
|
||||
private final Collection<BalancerDatanode.StorageGroup> belowAvgUtilized
|
||||
= new LinkedList<BalancerDatanode.StorageGroup>();
|
||||
private final Collection<BalancerDatanode.StorageGroup> underUtilized
|
||||
= new LinkedList<BalancerDatanode.StorageGroup>();
|
||||
private final Collection<StorageGroup> belowAvgUtilized
|
||||
= new LinkedList<StorageGroup>();
|
||||
private final Collection<StorageGroup> underUtilized
|
||||
= new LinkedList<StorageGroup>();
|
||||
|
||||
/* Check that this Balancer is compatible with the Block Placement Policy
|
||||
* used by the Namenode.
|
||||
@ -209,8 +210,22 @@ private static void checkReplicationPolicyCompatibility(Configuration conf
|
||||
* when connection fails.
|
||||
*/
|
||||
Balancer(NameNodeConnector theblockpool, Parameters p, Configuration conf) {
|
||||
final long movedWinWidth = conf.getLong(
|
||||
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
|
||||
final int moverThreads = conf.getInt(
|
||||
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT);
|
||||
final int dispatcherThreads = conf.getInt(
|
||||
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT);
|
||||
final int maxConcurrentMovesPerNode = conf.getInt(
|
||||
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
|
||||
|
||||
this.dispatcher = new Dispatcher(theblockpool, p.nodesToBeIncluded,
|
||||
p.nodesToBeExcluded, conf);
|
||||
p.nodesToBeExcluded, movedWinWidth, moverThreads, dispatcherThreads,
|
||||
maxConcurrentMovesPerNode, conf);
|
||||
this.threshold = p.threshold;
|
||||
this.policy = p.policy;
|
||||
}
|
||||
@ -255,7 +270,7 @@ private long init(List<DatanodeStorageReport> reports) {
|
||||
// over-utilized, above-average, below-average and under-utilized.
|
||||
long overLoadedBytes = 0L, underLoadedBytes = 0L;
|
||||
for(DatanodeStorageReport r : reports) {
|
||||
final BalancerDatanode dn = dispatcher.newDatanode(r);
|
||||
final DDatanode dn = dispatcher.newDatanode(r);
|
||||
for(StorageType t : StorageType.asList()) {
|
||||
final Double utilization = policy.getUtilization(r, t);
|
||||
if (utilization == null) { // datanode does not have such storage type
|
||||
@ -268,9 +283,9 @@ private long init(List<DatanodeStorageReport> reports) {
|
||||
final long maxSize2Move = computeMaxSize2Move(capacity,
|
||||
getRemaining(r, t), utilizationDiff, threshold);
|
||||
|
||||
final BalancerDatanode.StorageGroup g;
|
||||
final StorageGroup g;
|
||||
if (utilizationDiff > 0) {
|
||||
final Source s = dn.addSource(t, utilization, maxSize2Move, dispatcher);
|
||||
final Source s = dn.addSource(t, maxSize2Move, dispatcher);
|
||||
if (thresholdDiff <= 0) { // within threshold
|
||||
aboveAvgUtilized.add(s);
|
||||
} else {
|
||||
@ -279,7 +294,7 @@ private long init(List<DatanodeStorageReport> reports) {
|
||||
}
|
||||
g = s;
|
||||
} else {
|
||||
g = dn.addStorageGroup(t, utilization, maxSize2Move);
|
||||
g = dn.addStorageGroup(t, maxSize2Move);
|
||||
if (thresholdDiff <= 0) { // within threshold
|
||||
belowAvgUtilized.add(g);
|
||||
} else {
|
||||
@ -328,7 +343,7 @@ private void logUtilizationCollections() {
|
||||
logUtilizationCollection("underutilized", underUtilized);
|
||||
}
|
||||
|
||||
private static <T extends BalancerDatanode.StorageGroup>
|
||||
private static <T extends StorageGroup>
|
||||
void logUtilizationCollection(String name, Collection<T> items) {
|
||||
LOG.info(items.size() + " " + name + ": " + items);
|
||||
}
|
||||
@ -381,8 +396,7 @@ private void chooseStorageGroups(final Matcher matcher) {
|
||||
* datanodes or the candidates are source nodes with (utilization > Avg), and
|
||||
* the others are target nodes with (utilization < Avg).
|
||||
*/
|
||||
private <G extends BalancerDatanode.StorageGroup,
|
||||
C extends BalancerDatanode.StorageGroup>
|
||||
private <G extends StorageGroup, C extends StorageGroup>
|
||||
void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
|
||||
Matcher matcher) {
|
||||
for(final Iterator<G> i = groups.iterator(); i.hasNext();) {
|
||||
@ -398,9 +412,8 @@ void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
|
||||
* For the given datanode, choose a candidate and then schedule it.
|
||||
* @return true if a candidate is chosen; false if no candidates is chosen.
|
||||
*/
|
||||
private <C extends BalancerDatanode.StorageGroup>
|
||||
boolean choose4One(BalancerDatanode.StorageGroup g,
|
||||
Collection<C> candidates, Matcher matcher) {
|
||||
private <C extends StorageGroup> boolean choose4One(StorageGroup g,
|
||||
Collection<C> candidates, Matcher matcher) {
|
||||
final Iterator<C> i = candidates.iterator();
|
||||
final C chosen = chooseCandidate(g, i, matcher);
|
||||
|
||||
@ -418,8 +431,7 @@ boolean choose4One(BalancerDatanode.StorageGroup g,
|
||||
return true;
|
||||
}
|
||||
|
||||
private void matchSourceWithTargetToMove(Source source,
|
||||
BalancerDatanode.StorageGroup target) {
|
||||
private void matchSourceWithTargetToMove(Source source, StorageGroup target) {
|
||||
long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
|
||||
final Task task = new Task(target, size);
|
||||
source.addTask(task);
|
||||
@ -430,8 +442,7 @@ private void matchSourceWithTargetToMove(Source source,
|
||||
}
|
||||
|
||||
/** Choose a candidate for the given datanode. */
|
||||
private <G extends BalancerDatanode.StorageGroup,
|
||||
C extends BalancerDatanode.StorageGroup>
|
||||
private <G extends StorageGroup, C extends StorageGroup>
|
||||
C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
|
||||
if (g.hasSpaceForScheduling()) {
|
||||
for(; candidates.hasNext(); ) {
|
||||
@ -439,7 +450,7 @@ C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
|
||||
if (!c.hasSpaceForScheduling()) {
|
||||
candidates.remove();
|
||||
} else if (matcher.match(dispatcher.getCluster(),
|
||||
g.getDatanode(), c.getDatanode())) {
|
||||
g.getDatanodeInfo(), c.getDatanodeInfo())) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
@ -457,34 +468,15 @@ private void resetData(Configuration conf) {
|
||||
dispatcher.reset(conf);;
|
||||
}
|
||||
|
||||
// Exit status
|
||||
enum ReturnStatus {
|
||||
// These int values will map directly to the balancer process's exit code.
|
||||
SUCCESS(0),
|
||||
IN_PROGRESS(1),
|
||||
ALREADY_RUNNING(-1),
|
||||
NO_MOVE_BLOCK(-2),
|
||||
NO_MOVE_PROGRESS(-3),
|
||||
IO_EXCEPTION(-4),
|
||||
ILLEGAL_ARGS(-5),
|
||||
INTERRUPTED(-6);
|
||||
|
||||
final int code;
|
||||
|
||||
ReturnStatus(int code) {
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
|
||||
/** Run an iteration for all datanodes. */
|
||||
private ReturnStatus run(int iteration, Formatter formatter,
|
||||
private ExitStatus run(int iteration, Formatter formatter,
|
||||
Configuration conf) {
|
||||
try {
|
||||
final List<DatanodeStorageReport> reports = dispatcher.init();
|
||||
final long bytesLeftToMove = init(reports);
|
||||
if (bytesLeftToMove == 0) {
|
||||
System.out.println("The cluster is balanced. Exiting...");
|
||||
return ReturnStatus.SUCCESS;
|
||||
return ExitStatus.SUCCESS;
|
||||
} else {
|
||||
LOG.info( "Need to move "+ StringUtils.byteDesc(bytesLeftToMove)
|
||||
+ " to make the cluster balanced." );
|
||||
@ -498,7 +490,7 @@ private ReturnStatus run(int iteration, Formatter formatter,
|
||||
final long bytesToMove = chooseStorageGroups();
|
||||
if (bytesToMove == 0) {
|
||||
System.out.println("No block can be moved. Exiting...");
|
||||
return ReturnStatus.NO_MOVE_BLOCK;
|
||||
return ExitStatus.NO_MOVE_BLOCK;
|
||||
} else {
|
||||
LOG.info( "Will move " + StringUtils.byteDesc(bytesToMove) +
|
||||
" in this iteration");
|
||||
@ -519,19 +511,19 @@ private ReturnStatus run(int iteration, Formatter formatter,
|
||||
* Exit no byte has been moved for 5 consecutive iterations.
|
||||
*/
|
||||
if (!dispatcher.dispatchAndCheckContinue()) {
|
||||
return ReturnStatus.NO_MOVE_PROGRESS;
|
||||
return ExitStatus.NO_MOVE_PROGRESS;
|
||||
}
|
||||
|
||||
return ReturnStatus.IN_PROGRESS;
|
||||
return ExitStatus.IN_PROGRESS;
|
||||
} catch (IllegalArgumentException e) {
|
||||
System.out.println(e + ". Exiting ...");
|
||||
return ReturnStatus.ILLEGAL_ARGS;
|
||||
return ExitStatus.ILLEGAL_ARGUMENTS;
|
||||
} catch (IOException e) {
|
||||
System.out.println(e + ". Exiting ...");
|
||||
return ReturnStatus.IO_EXCEPTION;
|
||||
return ExitStatus.IO_EXCEPTION;
|
||||
} catch (InterruptedException e) {
|
||||
System.out.println(e + ". Exiting ...");
|
||||
return ReturnStatus.INTERRUPTED;
|
||||
return ExitStatus.INTERRUPTED;
|
||||
} finally {
|
||||
dispatcher.shutdownNow();
|
||||
}
|
||||
@ -570,14 +562,14 @@ static int run(Collection<URI> namenodes, final Parameters p,
|
||||
Collections.shuffle(connectors);
|
||||
for(NameNodeConnector nnc : connectors) {
|
||||
final Balancer b = new Balancer(nnc, p, conf);
|
||||
final ReturnStatus r = b.run(iteration, formatter, conf);
|
||||
final ExitStatus r = b.run(iteration, formatter, conf);
|
||||
// clean all lists
|
||||
b.resetData(conf);
|
||||
if (r == ReturnStatus.IN_PROGRESS) {
|
||||
if (r == ExitStatus.IN_PROGRESS) {
|
||||
done = false;
|
||||
} else if (r != ReturnStatus.SUCCESS) {
|
||||
} else if (r != ExitStatus.SUCCESS) {
|
||||
//must be an error statue, return.
|
||||
return r.code;
|
||||
return r.getExitCode();
|
||||
}
|
||||
}
|
||||
|
||||
@ -590,7 +582,7 @@ static int run(Collection<URI> namenodes, final Parameters p,
|
||||
nnc.close();
|
||||
}
|
||||
}
|
||||
return ReturnStatus.SUCCESS.code;
|
||||
return ExitStatus.SUCCESS.getExitCode();
|
||||
}
|
||||
|
||||
/* Given elaspedTime in ms, return a printable string */
|
||||
@ -661,10 +653,10 @@ public int run(String[] args) {
|
||||
return Balancer.run(namenodes, parse(args), conf);
|
||||
} catch (IOException e) {
|
||||
System.out.println(e + ". Exiting ...");
|
||||
return ReturnStatus.IO_EXCEPTION.code;
|
||||
return ExitStatus.IO_EXCEPTION.getExitCode();
|
||||
} catch (InterruptedException e) {
|
||||
System.out.println(e + ". Exiting ...");
|
||||
return ReturnStatus.INTERRUPTED.code;
|
||||
return ExitStatus.INTERRUPTED.getExitCode();
|
||||
} finally {
|
||||
System.out.format("%-24s ", DateFormat.getDateTimeInstance().format(new Date()));
|
||||
System.out.println("Balancing took " + time2Str(Time.now()-startTime));
|
||||
|
@ -48,7 +48,6 @@
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
import org.apache.hadoop.hdfs.StorageType;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
@ -63,6 +62,7 @@
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
|
||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
|
||||
@ -91,7 +91,6 @@ public class Dispatcher {
|
||||
// minutes
|
||||
|
||||
private final NameNodeConnector nnc;
|
||||
private final KeyManager keyManager;
|
||||
private final SaslDataTransferClient saslClient;
|
||||
|
||||
/** Set of datanodes to be excluded. */
|
||||
@ -100,11 +99,10 @@ public class Dispatcher {
|
||||
private final Set<String> includedNodes;
|
||||
|
||||
private final Collection<Source> sources = new HashSet<Source>();
|
||||
private final Collection<BalancerDatanode.StorageGroup> targets
|
||||
= new HashSet<BalancerDatanode.StorageGroup>();
|
||||
private final Collection<StorageGroup> targets = new HashSet<StorageGroup>();
|
||||
|
||||
private final GlobalBlockMap globalBlocks = new GlobalBlockMap();
|
||||
private final MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks;
|
||||
private final MovedBlocks<StorageGroup> movedBlocks;
|
||||
|
||||
/** Map (datanodeUuid,storageType -> StorageGroup) */
|
||||
private final StorageGroupMap storageGroupMap = new StorageGroupMap();
|
||||
@ -135,8 +133,7 @@ private DBlock get(Block b) {
|
||||
}
|
||||
|
||||
/** Remove all blocks except for the moved blocks. */
|
||||
private void removeAllButRetain(
|
||||
MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks) {
|
||||
private void removeAllButRetain(MovedBlocks<StorageGroup> movedBlocks) {
|
||||
for (Iterator<Block> i = map.keySet().iterator(); i.hasNext();) {
|
||||
if (!movedBlocks.contains(i.next())) {
|
||||
i.remove();
|
||||
@ -150,17 +147,15 @@ private static String toKey(String datanodeUuid, StorageType storageType) {
|
||||
return datanodeUuid + ":" + storageType;
|
||||
}
|
||||
|
||||
private final Map<String, BalancerDatanode.StorageGroup> map
|
||||
= new HashMap<String, BalancerDatanode.StorageGroup>();
|
||||
private final Map<String, StorageGroup> map = new HashMap<String, StorageGroup>();
|
||||
|
||||
BalancerDatanode.StorageGroup get(String datanodeUuid,
|
||||
StorageType storageType) {
|
||||
StorageGroup get(String datanodeUuid, StorageType storageType) {
|
||||
return map.get(toKey(datanodeUuid, storageType));
|
||||
}
|
||||
|
||||
void put(BalancerDatanode.StorageGroup g) {
|
||||
final String key = toKey(g.getDatanode().getDatanodeUuid(), g.storageType);
|
||||
final BalancerDatanode.StorageGroup existing = map.put(key, g);
|
||||
void put(StorageGroup g) {
|
||||
final String key = toKey(g.getDatanodeInfo().getDatanodeUuid(), g.storageType);
|
||||
final StorageGroup existing = map.put(key, g);
|
||||
Preconditions.checkState(existing == null);
|
||||
}
|
||||
|
||||
@ -177,8 +172,8 @@ void clear() {
|
||||
private class PendingMove {
|
||||
private DBlock block;
|
||||
private Source source;
|
||||
private BalancerDatanode proxySource;
|
||||
private BalancerDatanode.StorageGroup target;
|
||||
private DDatanode proxySource;
|
||||
private StorageGroup target;
|
||||
|
||||
private PendingMove() {
|
||||
}
|
||||
@ -235,24 +230,24 @@ private boolean markMovedIfGoodBlock(DBlock block) {
|
||||
* @return true if a proxy is found; otherwise false
|
||||
*/
|
||||
private boolean chooseProxySource() {
|
||||
final DatanodeInfo targetDN = target.getDatanode();
|
||||
final DatanodeInfo targetDN = target.getDatanodeInfo();
|
||||
// if node group is supported, first try add nodes in the same node group
|
||||
if (cluster.isNodeGroupAware()) {
|
||||
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
|
||||
if (cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)
|
||||
for (StorageGroup loc : block.getLocations()) {
|
||||
if (cluster.isOnSameNodeGroup(loc.getDatanodeInfo(), targetDN)
|
||||
&& addTo(loc)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// check if there is replica which is on the same rack with the target
|
||||
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
|
||||
if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
|
||||
for (StorageGroup loc : block.getLocations()) {
|
||||
if (cluster.isOnSameRack(loc.getDatanodeInfo(), targetDN) && addTo(loc)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// find out a non-busy replica
|
||||
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
|
||||
for (StorageGroup loc : block.getLocations()) {
|
||||
if (addTo(loc)) {
|
||||
return true;
|
||||
}
|
||||
@ -261,10 +256,10 @@ && addTo(loc)) {
|
||||
}
|
||||
|
||||
/** add to a proxy source for specific block movement */
|
||||
private boolean addTo(BalancerDatanode.StorageGroup g) {
|
||||
final BalancerDatanode bdn = g.getBalancerDatanode();
|
||||
if (bdn.addPendingBlock(this)) {
|
||||
proxySource = bdn;
|
||||
private boolean addTo(StorageGroup g) {
|
||||
final DDatanode dn = g.getDDatanode();
|
||||
if (dn.addPendingBlock(this)) {
|
||||
proxySource = dn;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -281,14 +276,13 @@ private void dispatch() {
|
||||
DataInputStream in = null;
|
||||
try {
|
||||
sock.connect(
|
||||
NetUtils.createSocketAddr(target.getDatanode().getXferAddr()),
|
||||
NetUtils.createSocketAddr(target.getDatanodeInfo().getXferAddr()),
|
||||
HdfsServerConstants.READ_TIMEOUT);
|
||||
/*
|
||||
* Unfortunately we don't have a good way to know if the Datanode is
|
||||
* taking a really long time to move a block, OR something has gone
|
||||
* wrong and it's never going to finish. To deal with this scenario, we
|
||||
* set a long timeout (20 minutes) to avoid hanging the balancer
|
||||
* indefinitely.
|
||||
* set a long timeout (20 minutes) to avoid hanging indefinitely.
|
||||
*/
|
||||
sock.setSoTimeout(BLOCK_MOVE_READ_TIMEOUT);
|
||||
|
||||
@ -298,9 +292,10 @@ private void dispatch() {
|
||||
InputStream unbufIn = sock.getInputStream();
|
||||
ExtendedBlock eb = new ExtendedBlock(nnc.getBlockpoolID(),
|
||||
block.getBlock());
|
||||
Token<BlockTokenIdentifier> accessToken = keyManager.getAccessToken(eb);
|
||||
final KeyManager km = nnc.getKeyManager();
|
||||
Token<BlockTokenIdentifier> accessToken = km.getAccessToken(eb);
|
||||
IOStreamPair saslStreams = saslClient.socketSend(sock, unbufOut,
|
||||
unbufIn, keyManager, accessToken, target.getDatanode());
|
||||
unbufIn, km, accessToken, target.getDatanodeInfo());
|
||||
unbufOut = saslStreams.out;
|
||||
unbufIn = saslStreams.in;
|
||||
out = new DataOutputStream(new BufferedOutputStream(unbufOut,
|
||||
@ -314,21 +309,19 @@ private void dispatch() {
|
||||
LOG.info("Successfully moved " + this);
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to move " + this + ": " + e.getMessage());
|
||||
/*
|
||||
* proxy or target may have an issue, insert a small delay before using
|
||||
* these nodes further. This avoids a potential storm of
|
||||
* "threads quota exceeded" Warnings when the balancer gets out of sync
|
||||
* with work going on in datanode.
|
||||
*/
|
||||
// Proxy or target may have some issues, delay before using these nodes
|
||||
// further in order to avoid a potential storm of "threads quota
|
||||
// exceeded" warnings when the dispatcher gets out of sync with work
|
||||
// going on in datanodes.
|
||||
proxySource.activateDelay(DELAY_AFTER_ERROR);
|
||||
target.getBalancerDatanode().activateDelay(DELAY_AFTER_ERROR);
|
||||
target.getDDatanode().activateDelay(DELAY_AFTER_ERROR);
|
||||
} finally {
|
||||
IOUtils.closeStream(out);
|
||||
IOUtils.closeStream(in);
|
||||
IOUtils.closeSocket(sock);
|
||||
|
||||
proxySource.removePendingBlock(this);
|
||||
target.getBalancerDatanode().removePendingBlock(this);
|
||||
target.getDDatanode().removePendingBlock(this);
|
||||
|
||||
synchronized (this) {
|
||||
reset();
|
||||
@ -342,8 +335,8 @@ private void dispatch() {
|
||||
/** Send a block replace request to the output stream */
|
||||
private void sendRequest(DataOutputStream out, ExtendedBlock eb,
|
||||
Token<BlockTokenIdentifier> accessToken) throws IOException {
|
||||
new Sender(out).replaceBlock(eb, target.storageType, accessToken, source
|
||||
.getDatanode().getDatanodeUuid(), proxySource.datanode);
|
||||
new Sender(out).replaceBlock(eb, target.storageType, accessToken,
|
||||
source.getDatanodeInfo().getDatanodeUuid(), proxySource.datanode);
|
||||
}
|
||||
|
||||
/** Receive a block copy response from the input stream */
|
||||
@ -368,8 +361,7 @@ private void reset() {
|
||||
}
|
||||
|
||||
/** A class for keeping track of block locations in the dispatcher. */
|
||||
private static class DBlock extends
|
||||
MovedBlocks.Locations<BalancerDatanode.StorageGroup> {
|
||||
private static class DBlock extends MovedBlocks.Locations<StorageGroup> {
|
||||
DBlock(Block block) {
|
||||
super(block);
|
||||
}
|
||||
@ -377,10 +369,10 @@ private static class DBlock extends
|
||||
|
||||
/** The class represents a desired move. */
|
||||
static class Task {
|
||||
private final BalancerDatanode.StorageGroup target;
|
||||
private final StorageGroup target;
|
||||
private long size; // bytes scheduled to move
|
||||
|
||||
Task(BalancerDatanode.StorageGroup target, long size) {
|
||||
Task(StorageGroup target, long size) {
|
||||
this.target = target;
|
||||
this.size = size;
|
||||
}
|
||||
@ -391,28 +383,25 @@ long getSize() {
|
||||
}
|
||||
|
||||
/** A class that keeps track of a datanode. */
|
||||
static class BalancerDatanode {
|
||||
static class DDatanode {
|
||||
|
||||
/** A group of storages in a datanode with the same storage type. */
|
||||
class StorageGroup {
|
||||
final StorageType storageType;
|
||||
final double utilization;
|
||||
final long maxSize2Move;
|
||||
private long scheduledSize = 0L;
|
||||
|
||||
private StorageGroup(StorageType storageType, double utilization,
|
||||
long maxSize2Move) {
|
||||
private StorageGroup(StorageType storageType, long maxSize2Move) {
|
||||
this.storageType = storageType;
|
||||
this.utilization = utilization;
|
||||
this.maxSize2Move = maxSize2Move;
|
||||
}
|
||||
|
||||
BalancerDatanode getBalancerDatanode() {
|
||||
return BalancerDatanode.this;
|
||||
private DDatanode getDDatanode() {
|
||||
return DDatanode.this;
|
||||
}
|
||||
|
||||
DatanodeInfo getDatanode() {
|
||||
return BalancerDatanode.this.datanode;
|
||||
DatanodeInfo getDatanodeInfo() {
|
||||
return DDatanode.this.datanode;
|
||||
}
|
||||
|
||||
/** Decide if still need to move more bytes */
|
||||
@ -447,7 +436,7 @@ String getDisplayName() {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "" + utilization;
|
||||
return getDisplayName();
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,10 +450,10 @@ public String toString() {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + ":" + datanode + ":" + storageMap;
|
||||
return getClass().getSimpleName() + ":" + datanode + ":" + storageMap.values();
|
||||
}
|
||||
|
||||
private BalancerDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
|
||||
private DDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
|
||||
this.datanode = r.getDatanodeInfo();
|
||||
this.maxConcurrentMoves = maxConcurrentMoves;
|
||||
this.pendings = new ArrayList<PendingMove>(maxConcurrentMoves);
|
||||
@ -475,18 +464,14 @@ private void put(StorageType storageType, StorageGroup g) {
|
||||
Preconditions.checkState(existing == null);
|
||||
}
|
||||
|
||||
StorageGroup addStorageGroup(StorageType storageType, double utilization,
|
||||
long maxSize2Move) {
|
||||
final StorageGroup g = new StorageGroup(storageType, utilization,
|
||||
maxSize2Move);
|
||||
StorageGroup addStorageGroup(StorageType storageType, long maxSize2Move) {
|
||||
final StorageGroup g = new StorageGroup(storageType, maxSize2Move);
|
||||
put(storageType, g);
|
||||
return g;
|
||||
}
|
||||
|
||||
Source addSource(StorageType storageType, double utilization,
|
||||
long maxSize2Move, Dispatcher balancer) {
|
||||
final Source s = balancer.new Source(storageType, utilization,
|
||||
maxSize2Move, this);
|
||||
Source addSource(StorageType storageType, long maxSize2Move, Dispatcher d) {
|
||||
final Source s = d.new Source(storageType, maxSize2Move, this);
|
||||
put(storageType, s);
|
||||
return s;
|
||||
}
|
||||
@ -528,7 +513,7 @@ synchronized boolean removePendingBlock(PendingMove pendingBlock) {
|
||||
}
|
||||
|
||||
/** A node that can be the sources of a block move */
|
||||
class Source extends BalancerDatanode.StorageGroup {
|
||||
class Source extends DDatanode.StorageGroup {
|
||||
|
||||
private final List<Task> tasks = new ArrayList<Task>(2);
|
||||
private long blocksToReceive = 0L;
|
||||
@ -539,9 +524,8 @@ class Source extends BalancerDatanode.StorageGroup {
|
||||
*/
|
||||
private final List<DBlock> srcBlocks = new ArrayList<DBlock>();
|
||||
|
||||
private Source(StorageType storageType, double utilization,
|
||||
long maxSize2Move, BalancerDatanode dn) {
|
||||
dn.super(storageType, utilization, maxSize2Move);
|
||||
private Source(StorageType storageType, long maxSize2Move, DDatanode dn) {
|
||||
dn.super(storageType, maxSize2Move);
|
||||
}
|
||||
|
||||
/** Add a task */
|
||||
@ -565,7 +549,7 @@ Iterator<DBlock> getBlockIterator() {
|
||||
*/
|
||||
private long getBlockList() throws IOException {
|
||||
final long size = Math.min(MAX_BLOCKS_SIZE_TO_FETCH, blocksToReceive);
|
||||
final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanode(), size);
|
||||
final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanodeInfo(), size);
|
||||
|
||||
long bytesReceived = 0;
|
||||
for (BlockWithLocations blk : newBlocks.getBlocks()) {
|
||||
@ -579,7 +563,7 @@ private long getBlockList() throws IOException {
|
||||
final String[] datanodeUuids = blk.getDatanodeUuids();
|
||||
final StorageType[] storageTypes = blk.getStorageTypes();
|
||||
for (int i = 0; i < datanodeUuids.length; i++) {
|
||||
final BalancerDatanode.StorageGroup g = storageGroupMap.get(
|
||||
final StorageGroup g = storageGroupMap.get(
|
||||
datanodeUuids[i], storageTypes[i]);
|
||||
if (g != null) { // not unknown
|
||||
block.addLocation(g);
|
||||
@ -617,7 +601,7 @@ private boolean isGoodBlockCandidate(DBlock block) {
|
||||
private PendingMove chooseNextMove() {
|
||||
for (Iterator<Task> i = tasks.iterator(); i.hasNext();) {
|
||||
final Task task = i.next();
|
||||
final BalancerDatanode target = task.target.getBalancerDatanode();
|
||||
final DDatanode target = task.target.getDDatanode();
|
||||
PendingMove pendingBlock = new PendingMove();
|
||||
if (target.addPendingBlock(pendingBlock)) {
|
||||
// target is not busy, so do a tentative block allocation
|
||||
@ -670,7 +654,7 @@ private void dispatchBlocks() {
|
||||
final long startTime = Time.monotonicNow();
|
||||
this.blocksToReceive = 2 * getScheduledSize();
|
||||
boolean isTimeUp = false;
|
||||
int noPendingBlockIteration = 0;
|
||||
int noPendingMoveIteration = 0;
|
||||
while (!isTimeUp && getScheduledSize() > 0
|
||||
&& (!srcBlocks.isEmpty() || blocksToReceive > 0)) {
|
||||
final PendingMove p = chooseNextMove();
|
||||
@ -699,11 +683,11 @@ public void run() {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// source node cannot find a pendingBlockToMove, iteration +1
|
||||
noPendingBlockIteration++;
|
||||
// source node cannot find a pending block to move, iteration +1
|
||||
noPendingMoveIteration++;
|
||||
// in case no blocks can be moved for source node's task,
|
||||
// jump out of while-loop after 5 iterations.
|
||||
if (noPendingBlockIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
|
||||
if (noPendingMoveIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
|
||||
resetScheduledSize();
|
||||
}
|
||||
}
|
||||
@ -726,29 +710,19 @@ public void run() {
|
||||
}
|
||||
}
|
||||
|
||||
Dispatcher(NameNodeConnector theblockpool, Set<String> includedNodes,
|
||||
Set<String> excludedNodes, Configuration conf) {
|
||||
this.nnc = theblockpool;
|
||||
this.keyManager = nnc.getKeyManager();
|
||||
public Dispatcher(NameNodeConnector nnc, Set<String> includedNodes,
|
||||
Set<String> excludedNodes, long movedWinWidth, int moverThreads,
|
||||
int dispatcherThreads, int maxConcurrentMovesPerNode, Configuration conf) {
|
||||
this.nnc = nnc;
|
||||
this.excludedNodes = excludedNodes;
|
||||
this.includedNodes = includedNodes;
|
||||
|
||||
final long movedWinWidth = conf.getLong(
|
||||
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
|
||||
movedBlocks = new MovedBlocks<BalancerDatanode.StorageGroup>(movedWinWidth);
|
||||
this.movedBlocks = new MovedBlocks<StorageGroup>(movedWinWidth);
|
||||
|
||||
this.cluster = NetworkTopology.getInstance(conf);
|
||||
|
||||
this.moveExecutor = Executors.newFixedThreadPool(conf.getInt(
|
||||
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT));
|
||||
this.dispatchExecutor = Executors.newFixedThreadPool(conf.getInt(
|
||||
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT));
|
||||
this.maxConcurrentMovesPerNode = conf.getInt(
|
||||
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
|
||||
this.moveExecutor = Executors.newFixedThreadPool(moverThreads);
|
||||
this.dispatchExecutor = Executors.newFixedThreadPool(dispatcherThreads);
|
||||
this.maxConcurrentMovesPerNode = maxConcurrentMovesPerNode;
|
||||
|
||||
final boolean fallbackToSimpleAuthAllowed = conf.getBoolean(
|
||||
CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
|
||||
@ -784,7 +758,7 @@ long bytesToMove() {
|
||||
return b;
|
||||
}
|
||||
|
||||
void add(Source source, BalancerDatanode.StorageGroup target) {
|
||||
void add(Source source, StorageGroup target) {
|
||||
sources.add(source);
|
||||
targets.add(target);
|
||||
}
|
||||
@ -826,8 +800,8 @@ List<DatanodeStorageReport> init() throws IOException {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
public BalancerDatanode newDatanode(DatanodeStorageReport r) {
|
||||
return new BalancerDatanode(r, maxConcurrentMovesPerNode);
|
||||
public DDatanode newDatanode(DatanodeStorageReport r) {
|
||||
return new DDatanode(r, maxConcurrentMovesPerNode);
|
||||
}
|
||||
|
||||
public boolean dispatchAndCheckContinue() throws InterruptedException {
|
||||
@ -884,8 +858,8 @@ static void setBlockMoveWaitTime(long time) {
|
||||
private void waitForMoveCompletion() {
|
||||
for(;;) {
|
||||
boolean empty = true;
|
||||
for (BalancerDatanode.StorageGroup t : targets) {
|
||||
if (!t.getBalancerDatanode().isPendingQEmpty()) {
|
||||
for (StorageGroup t : targets) {
|
||||
if (!t.getDDatanode().isPendingQEmpty()) {
|
||||
empty = false;
|
||||
break;
|
||||
}
|
||||
@ -907,8 +881,8 @@ private void waitForMoveCompletion() {
|
||||
* 2. the block does not have a replica on the target;
|
||||
* 3. doing the move does not reduce the number of racks that the block has
|
||||
*/
|
||||
private boolean isGoodBlockCandidate(Source source,
|
||||
BalancerDatanode.StorageGroup target, DBlock block) {
|
||||
private boolean isGoodBlockCandidate(Source source, StorageGroup target,
|
||||
DBlock block) {
|
||||
if (source.storageType != target.storageType) {
|
||||
return false;
|
||||
}
|
||||
@ -933,17 +907,17 @@ && isOnSameNodeGroupWithReplicas(target, block, source)) {
|
||||
* Determine whether moving the given block replica from source to target
|
||||
* would reduce the number of racks of the block replicas.
|
||||
*/
|
||||
private boolean reduceNumOfRacks(Source source,
|
||||
BalancerDatanode.StorageGroup target, DBlock block) {
|
||||
final DatanodeInfo sourceDn = source.getDatanode();
|
||||
if (cluster.isOnSameRack(sourceDn, target.getDatanode())) {
|
||||
private boolean reduceNumOfRacks(Source source, StorageGroup target,
|
||||
DBlock block) {
|
||||
final DatanodeInfo sourceDn = source.getDatanodeInfo();
|
||||
if (cluster.isOnSameRack(sourceDn, target.getDatanodeInfo())) {
|
||||
// source and target are on the same rack
|
||||
return false;
|
||||
}
|
||||
boolean notOnSameRack = true;
|
||||
synchronized (block) {
|
||||
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
|
||||
if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
|
||||
for (StorageGroup loc : block.getLocations()) {
|
||||
if (cluster.isOnSameRack(loc.getDatanodeInfo(), target.getDatanodeInfo())) {
|
||||
notOnSameRack = false;
|
||||
break;
|
||||
}
|
||||
@ -953,8 +927,8 @@ private boolean reduceNumOfRacks(Source source,
|
||||
// target is not on the same rack as any replica
|
||||
return false;
|
||||
}
|
||||
for (BalancerDatanode.StorageGroup g : block.getLocations()) {
|
||||
if (g != source && cluster.isOnSameRack(g.getDatanode(), sourceDn)) {
|
||||
for (StorageGroup g : block.getLocations()) {
|
||||
if (g != source && cluster.isOnSameRack(g.getDatanodeInfo(), sourceDn)) {
|
||||
// source is on the same rack of another replica
|
||||
return false;
|
||||
}
|
||||
@ -971,10 +945,10 @@ private boolean reduceNumOfRacks(Source source,
|
||||
* group with target
|
||||
*/
|
||||
private boolean isOnSameNodeGroupWithReplicas(
|
||||
BalancerDatanode.StorageGroup target, DBlock block, Source source) {
|
||||
final DatanodeInfo targetDn = target.getDatanode();
|
||||
for (BalancerDatanode.StorageGroup g : block.getLocations()) {
|
||||
if (g != source && cluster.isOnSameNodeGroup(g.getDatanode(), targetDn)) {
|
||||
StorageGroup target, DBlock block, Source source) {
|
||||
final DatanodeInfo targetDn = target.getDatanodeInfo();
|
||||
for (StorageGroup g : block.getLocations()) {
|
||||
if (g != source && cluster.isOnSameNodeGroup(g.getDatanodeInfo(), targetDn)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -15,22 +15,30 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.balancer;
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
|
||||
/**
|
||||
* Exit status - The values associated with each exit status is directly mapped
|
||||
* to the process's exit code in command line.
|
||||
*/
|
||||
public enum ExitStatus {
|
||||
SUCCESS(0),
|
||||
IN_PROGRESS(1),
|
||||
ALREADY_RUNNING(-1),
|
||||
NO_MOVE_BLOCK(-2),
|
||||
NO_MOVE_PROGRESS(-3),
|
||||
IO_EXCEPTION(-4),
|
||||
ILLEGAL_ARGUMENTS(-5),
|
||||
INTERRUPTED(-6);
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
private final int code;
|
||||
|
||||
public class RMAppNewSavedEvent extends RMAppEvent {
|
||||
|
||||
private final Exception storedException;
|
||||
|
||||
public RMAppNewSavedEvent(ApplicationId appId, Exception storedException) {
|
||||
super(appId, RMAppEventType.APP_NEW_SAVED);
|
||||
this.storedException = storedException;
|
||||
private ExitStatus(int code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public Exception getStoredException() {
|
||||
return storedException;
|
||||
|
||||
/** @return the command line exit code. */
|
||||
public int getExitCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
}
|
@ -768,8 +768,6 @@ private static void validateRenameSource(String src, INodesInPath srcIIP)
|
||||
checkSnapshot(srcInode, null);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private class RenameOperation {
|
||||
private final INodesInPath srcIIP;
|
||||
private final INodesInPath dstIIP;
|
||||
@ -802,7 +800,7 @@ private RenameOperation(String src, String dst, INodesInPath srcIIP, INodesInPat
|
||||
// snapshot is taken on the dst tree, changes will be recorded in the latest
|
||||
// snapshot of the src tree.
|
||||
if (isSrcInSnapshot) {
|
||||
srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
|
||||
srcChild.recordModification(srcIIP.getLatestSnapshotId());
|
||||
}
|
||||
|
||||
// check srcChild for reference
|
||||
@ -932,8 +930,7 @@ Block[] unprotectedSetReplication(String src, short replication,
|
||||
updateCount(iip, 0, dsDelta, true);
|
||||
}
|
||||
|
||||
file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
|
||||
inodeMap);
|
||||
file.setFileReplication(replication, iip.getLatestSnapshotId());
|
||||
|
||||
final short newBR = file.getBlockReplication();
|
||||
// check newBR < oldBR case.
|
||||
@ -1216,8 +1213,7 @@ long unprotectedDelete(INodesInPath iip, BlocksMapUpdateInfo collectedBlocks,
|
||||
|
||||
// record modification
|
||||
final int latestSnapshot = iip.getLatestSnapshotId();
|
||||
targetNode = targetNode.recordModification(latestSnapshot);
|
||||
iip.setLastINode(targetNode);
|
||||
targetNode.recordModification(latestSnapshot);
|
||||
|
||||
// Remove the node from the namespace
|
||||
long removed = removeLastINode(iip);
|
||||
@ -2126,7 +2122,7 @@ INodeDirectory unprotectedSetQuota(String src, long nsQuota, long dsQuota)
|
||||
}
|
||||
|
||||
final int latest = iip.getLatestSnapshotId();
|
||||
dirNode = dirNode.recordModification(latest);
|
||||
dirNode.recordModification(latest);
|
||||
dirNode.setQuota(nsQuota, dsQuota);
|
||||
return dirNode;
|
||||
}
|
||||
|
@ -2515,7 +2515,7 @@ LocatedBlock prepareFileForWrite(String src, INodeFile file,
|
||||
boolean writeToEditLog,
|
||||
int latestSnapshot, boolean logRetryCache)
|
||||
throws IOException {
|
||||
file = file.recordModification(latestSnapshot);
|
||||
file.recordModification(latestSnapshot);
|
||||
final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine);
|
||||
|
||||
leaseManager.addLease(cons.getFileUnderConstructionFeature()
|
||||
@ -4214,7 +4214,7 @@ private void finalizeINodeFileUnderConstruction(String src,
|
||||
Preconditions.checkArgument(uc != null);
|
||||
leaseManager.removeLease(uc.getClientName(), src);
|
||||
|
||||
pendingFile = pendingFile.recordModification(latestSnapshot);
|
||||
pendingFile.recordModification(latestSnapshot);
|
||||
|
||||
// The file is no longer pending.
|
||||
// Create permanent INode, update blocks. No need to replace the inode here
|
||||
|
@ -97,9 +97,9 @@ public final String getUserName() {
|
||||
/** Set user */
|
||||
final INode setUser(String user, int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.setUser(user);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
setUser(user);
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* @param snapshotId
|
||||
@ -122,9 +122,9 @@ public final String getGroupName() {
|
||||
/** Set group */
|
||||
final INode setGroup(String group, int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.setGroup(group);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
setGroup(group);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -148,9 +148,9 @@ public final FsPermission getFsPermission() {
|
||||
/** Set the {@link FsPermission} of this {@link INode} */
|
||||
INode setPermission(FsPermission permission, int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.setPermission(permission);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
setPermission(permission);
|
||||
return this;
|
||||
}
|
||||
|
||||
abstract AclFeature getAclFeature(int snapshotId);
|
||||
@ -164,18 +164,18 @@ public final AclFeature getAclFeature() {
|
||||
|
||||
final INode addAclFeature(AclFeature aclFeature, int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.addAclFeature(aclFeature);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
addAclFeature(aclFeature);
|
||||
return this;
|
||||
}
|
||||
|
||||
abstract void removeAclFeature();
|
||||
|
||||
final INode removeAclFeature(int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.removeAclFeature();
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
removeAclFeature();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -199,9 +199,9 @@ public final XAttrFeature getXAttrFeature() {
|
||||
|
||||
final INode addXAttrFeature(XAttrFeature xAttrFeature, int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.addXAttrFeature(xAttrFeature);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
addXAttrFeature(xAttrFeature);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -211,9 +211,9 @@ final INode addXAttrFeature(XAttrFeature xAttrFeature, int latestSnapshotId)
|
||||
|
||||
final INode removeXAttrFeature(int lastestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(lastestSnapshotId);
|
||||
nodeToUpdate.removeXAttrFeature();
|
||||
return nodeToUpdate;
|
||||
recordModification(lastestSnapshotId);
|
||||
removeXAttrFeature();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -298,11 +298,8 @@ public final boolean shouldRecordInSrcSnapshot(final int latestInDst) {
|
||||
* @param latestSnapshotId The id of the latest snapshot that has been taken.
|
||||
* Note that it is {@link Snapshot#CURRENT_STATE_ID}
|
||||
* if no snapshots have been taken.
|
||||
* @return The current inode, which usually is the same object of this inode.
|
||||
* However, in some cases, this inode may be replaced with a new inode
|
||||
* for maintaining snapshots. The current inode is then the new inode.
|
||||
*/
|
||||
abstract INode recordModification(final int latestSnapshotId)
|
||||
abstract void recordModification(final int latestSnapshotId)
|
||||
throws QuotaExceededException;
|
||||
|
||||
/** Check whether it's a reference. */
|
||||
@ -652,9 +649,9 @@ public abstract INode updateModificationTime(long mtime, int latestSnapshotId)
|
||||
/** Set the last modification time of inode. */
|
||||
public final INode setModificationTime(long modificationTime,
|
||||
int latestSnapshotId) throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.setModificationTime(modificationTime);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
setModificationTime(modificationTime);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -682,9 +679,9 @@ public final long getAccessTime() {
|
||||
*/
|
||||
public final INode setAccessTime(long accessTime, int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
final INode nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.setAccessTime(accessTime);
|
||||
return nodeToUpdate;
|
||||
recordModification(latestSnapshotId);
|
||||
setAccessTime(accessTime);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
|
@ -318,7 +318,7 @@ INodeReference.WithName replaceChild4ReferenceWithName(INode oldChild,
|
||||
}
|
||||
|
||||
@Override
|
||||
public INodeDirectory recordModification(int latestSnapshotId)
|
||||
public void recordModification(int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
if (isInLatestSnapshot(latestSnapshotId)
|
||||
&& !shouldRecordInSrcSnapshot(latestSnapshotId)) {
|
||||
@ -330,7 +330,6 @@ public INodeDirectory recordModification(int latestSnapshotId)
|
||||
// record self in the diff list if necessary
|
||||
sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -296,7 +296,7 @@ public INodeFileAttributes getSnapshotINode(final int snapshotId) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public INodeFile recordModification(final int latestSnapshotId)
|
||||
public void recordModification(final int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
if (isInLatestSnapshot(latestSnapshotId)
|
||||
&& !shouldRecordInSrcSnapshot(latestSnapshotId)) {
|
||||
@ -308,7 +308,6 @@ public INodeFile recordModification(final int latestSnapshotId)
|
||||
// record self in the diff list if necessary
|
||||
sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public FileDiffList getDiffs() {
|
||||
@ -356,11 +355,10 @@ public final void setFileReplication(short replication) {
|
||||
|
||||
/** Set the replication factor of this file. */
|
||||
public final INodeFile setFileReplication(short replication,
|
||||
int latestSnapshotId, final INodeMap inodeMap)
|
||||
throws QuotaExceededException {
|
||||
final INodeFile nodeToUpdate = recordModification(latestSnapshotId);
|
||||
nodeToUpdate.setFileReplication(replication);
|
||||
return nodeToUpdate;
|
||||
int latestSnapshotId) throws QuotaExceededException {
|
||||
recordModification(latestSnapshotId);
|
||||
setFileReplication(replication);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @return preferred block size (in bytes) of the file. */
|
||||
|
@ -93,9 +93,8 @@ public INode get(long id) {
|
||||
"", "", new FsPermission((short) 0)), 0, 0) {
|
||||
|
||||
@Override
|
||||
INode recordModification(int latestSnapshotId)
|
||||
void recordModification(int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -287,11 +287,9 @@ public final void setAccessTime(long accessTime) {
|
||||
}
|
||||
|
||||
@Override
|
||||
final INode recordModification(int latestSnapshotId)
|
||||
final void recordModification(int latestSnapshotId)
|
||||
throws QuotaExceededException {
|
||||
referred.recordModification(latestSnapshotId);
|
||||
// reference is never replaced
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override // used by WithCount
|
||||
|
@ -47,12 +47,11 @@ public class INodeSymlink extends INodeWithAdditionalFields {
|
||||
}
|
||||
|
||||
@Override
|
||||
INode recordModification(int latestSnapshotId) throws QuotaExceededException {
|
||||
void recordModification(int latestSnapshotId) throws QuotaExceededException {
|
||||
if (isInLatestSnapshot(latestSnapshotId)) {
|
||||
INodeDirectory parent = getParent();
|
||||
parent.saveChild2Snapshot(this, latestSnapshotId, new INodeSymlink(this));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @return true unconditionally. */
|
||||
|
@ -570,10 +570,10 @@ private void runBalancer(Configuration conf,
|
||||
final int r = Balancer.run(namenodes, p, conf);
|
||||
if (conf.getInt(DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT) ==0) {
|
||||
assertEquals(Balancer.ReturnStatus.NO_MOVE_PROGRESS.code, r);
|
||||
assertEquals(ExitStatus.NO_MOVE_PROGRESS.getExitCode(), r);
|
||||
return;
|
||||
} else {
|
||||
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
|
||||
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
|
||||
}
|
||||
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
|
||||
LOG.info("Rebalancing with default ctor.");
|
||||
@ -717,7 +717,7 @@ public void testUnknownDatanode() throws Exception {
|
||||
Balancer.Parameters.DEFAULT.threshold,
|
||||
datanodes, Balancer.Parameters.DEFAULT.nodesToBeIncluded);
|
||||
final int r = Balancer.run(namenodes, p, conf);
|
||||
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
|
||||
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ public void testBalancerWithHANameNodes() throws Exception {
|
||||
assertEquals(1, namenodes.size());
|
||||
assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
|
||||
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
|
||||
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
|
||||
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
|
||||
TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
|
||||
cluster, Balancer.Parameters.DEFAULT);
|
||||
} finally {
|
||||
|
@ -160,7 +160,7 @@ static void runBalancer(Suite s,
|
||||
// start rebalancing
|
||||
final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
|
||||
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, s.conf);
|
||||
Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
|
||||
Assert.assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
|
||||
|
||||
LOG.info("BALANCER 2");
|
||||
wait(s.clients, totalUsed, totalCapacity);
|
||||
|
@ -176,7 +176,7 @@ private void runBalancer(Configuration conf,
|
||||
// start rebalancing
|
||||
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
|
||||
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
|
||||
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
|
||||
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
|
||||
|
||||
waitForHeartBeat(totalUsedSpace, totalCapacity);
|
||||
LOG.info("Rebalancing with default factor.");
|
||||
@ -190,8 +190,8 @@ private void runBalancerCanFinish(Configuration conf,
|
||||
// start rebalancing
|
||||
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
|
||||
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
|
||||
Assert.assertTrue(r == Balancer.ReturnStatus.SUCCESS.code ||
|
||||
(r == Balancer.ReturnStatus.NO_MOVE_PROGRESS.code));
|
||||
Assert.assertTrue(r == ExitStatus.SUCCESS.getExitCode() ||
|
||||
(r == ExitStatus.NO_MOVE_PROGRESS.getExitCode()));
|
||||
waitForHeartBeat(totalUsedSpace, totalCapacity);
|
||||
LOG.info("Rebalancing with default factor.");
|
||||
}
|
||||
|
@ -100,6 +100,19 @@ Release 2.6.0 - UNRELEASED
|
||||
YARN-2212. ApplicationMaster needs to find a way to update the AMRMToken
|
||||
periodically. (xgong)
|
||||
|
||||
YARN-2026. Fair scheduler: Consider only active queues for computing fairshare.
|
||||
(Ashwin Shankar via kasha)
|
||||
|
||||
YARN-1954. Added waitFor to AMRMClient(Async). (Tsuyoshi Ozawa via zjshen)
|
||||
|
||||
YARN-2302. Refactor TimelineWebServices. (Zhijie Shen via junping_du)
|
||||
|
||||
YARN-2337. ResourceManager sets ClientRMService in RMContext multiple times.
|
||||
(Zhihai Xu via kasha)
|
||||
|
||||
YARN-2138. Cleaned up notifyDone* APIs in RMStateStore. (Varun Saxena via
|
||||
jianhe)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
@ -154,6 +167,11 @@ Release 2.6.0 - UNRELEASED
|
||||
YARN-2008. Fixed CapacityScheduler to calculate headroom based on max available
|
||||
capacity instead of configured max capacity. (Craig Welch via jianhe)
|
||||
|
||||
YARN-2400. Fixed TestAMRestart fails intermittently. (Jian He via xgong)
|
||||
|
||||
YARN-2361. RMAppAttempt state machine entries for KILLED state has duplicate
|
||||
event entries. (Zhihai Xu via kasha)
|
||||
|
||||
Release 2.5.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -22,6 +22,8 @@
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||
@ -37,12 +39,14 @@
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
@InterfaceAudience.Public
|
||||
@InterfaceStability.Stable
|
||||
public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
|
||||
AbstractService {
|
||||
private static final Log LOG = LogFactory.getLog(AMRMClient.class);
|
||||
|
||||
/**
|
||||
* Create a new instance of AMRMClient.
|
||||
@ -336,4 +340,63 @@ public NMTokenCache getNMTokenCache() {
|
||||
return nmTokenCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for <code>check</code> to return true for each 1000 ms.
|
||||
* See also {@link #waitFor(com.google.common.base.Supplier, int)}
|
||||
* and {@link #waitFor(com.google.common.base.Supplier, int, int)}
|
||||
* @param check
|
||||
*/
|
||||
public void waitFor(Supplier<Boolean> check) throws InterruptedException {
|
||||
waitFor(check, 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for <code>check</code> to return true for each
|
||||
* <code>checkEveryMillis</code> ms.
|
||||
* See also {@link #waitFor(com.google.common.base.Supplier, int, int)}
|
||||
* @param check user defined checker
|
||||
* @param checkEveryMillis interval to call <code>check</code>
|
||||
*/
|
||||
public void waitFor(Supplier<Boolean> check, int checkEveryMillis)
|
||||
throws InterruptedException {
|
||||
waitFor(check, checkEveryMillis, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for <code>check</code> to return true for each
|
||||
* <code>checkEveryMillis</code> ms. In the main loop, this method will log
|
||||
* the message "waiting in main loop" for each <code>logInterval</code> times
|
||||
* iteration to confirm the thread is alive.
|
||||
* @param check user defined checker
|
||||
* @param checkEveryMillis interval to call <code>check</code>
|
||||
* @param logInterval interval to log for each
|
||||
*/
|
||||
public void waitFor(Supplier<Boolean> check, int checkEveryMillis,
|
||||
int logInterval) throws InterruptedException {
|
||||
Preconditions.checkNotNull(check, "check should not be null");
|
||||
Preconditions.checkArgument(checkEveryMillis >= 0,
|
||||
"checkEveryMillis should be positive value");
|
||||
Preconditions.checkArgument(logInterval >= 0,
|
||||
"logInterval should be positive value");
|
||||
|
||||
int loggingCounter = logInterval;
|
||||
do {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Check the condition for main loop.");
|
||||
}
|
||||
|
||||
boolean result = check.get();
|
||||
if (result) {
|
||||
LOG.info("Exits the main loop.");
|
||||
return;
|
||||
}
|
||||
if (--loggingCounter <= 0) {
|
||||
LOG.info("Waiting in main loop.");
|
||||
loggingCounter = logInterval;
|
||||
}
|
||||
|
||||
Thread.sleep(checkEveryMillis);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,11 +18,15 @@
|
||||
|
||||
package org.apache.hadoop.yarn.client.api.async;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Supplier;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Stable;
|
||||
@ -90,6 +94,7 @@
|
||||
@Stable
|
||||
public abstract class AMRMClientAsync<T extends ContainerRequest>
|
||||
extends AbstractService {
|
||||
private static final Log LOG = LogFactory.getLog(AMRMClientAsync.class);
|
||||
|
||||
protected final AMRMClient<T> client;
|
||||
protected final CallbackHandler handler;
|
||||
@ -189,6 +194,65 @@ public abstract void unregisterApplicationMaster(
|
||||
*/
|
||||
public abstract int getClusterNodeCount();
|
||||
|
||||
/**
|
||||
* Wait for <code>check</code> to return true for each 1000 ms.
|
||||
* See also {@link #waitFor(com.google.common.base.Supplier, int)}
|
||||
* and {@link #waitFor(com.google.common.base.Supplier, int, int)}
|
||||
* @param check
|
||||
*/
|
||||
public void waitFor(Supplier<Boolean> check) throws InterruptedException {
|
||||
waitFor(check, 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for <code>check</code> to return true for each
|
||||
* <code>checkEveryMillis</code> ms.
|
||||
* See also {@link #waitFor(com.google.common.base.Supplier, int, int)}
|
||||
* @param check user defined checker
|
||||
* @param checkEveryMillis interval to call <code>check</code>
|
||||
*/
|
||||
public void waitFor(Supplier<Boolean> check, int checkEveryMillis)
|
||||
throws InterruptedException {
|
||||
waitFor(check, checkEveryMillis, 1);
|
||||
};
|
||||
|
||||
/**
|
||||
* Wait for <code>check</code> to return true for each
|
||||
* <code>checkEveryMillis</code> ms. In the main loop, this method will log
|
||||
* the message "waiting in main loop" for each <code>logInterval</code> times
|
||||
* iteration to confirm the thread is alive.
|
||||
* @param check user defined checker
|
||||
* @param checkEveryMillis interval to call <code>check</code>
|
||||
* @param logInterval interval to log for each
|
||||
*/
|
||||
public void waitFor(Supplier<Boolean> check, int checkEveryMillis,
|
||||
int logInterval) throws InterruptedException {
|
||||
Preconditions.checkNotNull(check, "check should not be null");
|
||||
Preconditions.checkArgument(checkEveryMillis >= 0,
|
||||
"checkEveryMillis should be positive value");
|
||||
Preconditions.checkArgument(logInterval >= 0,
|
||||
"logInterval should be positive value");
|
||||
|
||||
int loggingCounter = logInterval;
|
||||
do {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Check the condition for main loop.");
|
||||
}
|
||||
|
||||
boolean result = check.get();
|
||||
if (result) {
|
||||
LOG.info("Exits the main loop.");
|
||||
return;
|
||||
}
|
||||
if (--loggingCounter <= 0) {
|
||||
LOG.info("Waiting in main loop.");
|
||||
loggingCounter = logInterval;
|
||||
}
|
||||
|
||||
Thread.sleep(checkEveryMillis);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
public interface CallbackHandler {
|
||||
|
||||
/**
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hadoop.yarn.client.api.async.impl;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import static org.mockito.Matchers.anyFloat;
|
||||
import static org.mockito.Matchers.anyInt;
|
||||
import static org.mockito.Matchers.anyString;
|
||||
@ -180,7 +181,7 @@ private void runHeartBeatThrowOutException(Exception ex) throws Exception{
|
||||
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
|
||||
when(client.allocate(anyFloat())).thenThrow(ex);
|
||||
|
||||
AMRMClientAsync<ContainerRequest> asyncClient =
|
||||
AMRMClientAsync<ContainerRequest> asyncClient =
|
||||
AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
|
||||
asyncClient.init(conf);
|
||||
asyncClient.start();
|
||||
@ -228,6 +229,41 @@ public void testAMRMClientAsyncShutDown() throws Exception {
|
||||
asyncClient.stop();
|
||||
}
|
||||
|
||||
@Test (timeout = 10000)
|
||||
public void testAMRMClientAsyncShutDownWithWaitFor() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
final TestCallbackHandler callbackHandler = new TestCallbackHandler();
|
||||
@SuppressWarnings("unchecked")
|
||||
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
|
||||
|
||||
final AllocateResponse shutDownResponse = createAllocateResponse(
|
||||
new ArrayList<ContainerStatus>(), new ArrayList<Container>(), null);
|
||||
shutDownResponse.setAMCommand(AMCommand.AM_SHUTDOWN);
|
||||
when(client.allocate(anyFloat())).thenReturn(shutDownResponse);
|
||||
|
||||
AMRMClientAsync<ContainerRequest> asyncClient =
|
||||
AMRMClientAsync.createAMRMClientAsync(client, 10, callbackHandler);
|
||||
asyncClient.init(conf);
|
||||
asyncClient.start();
|
||||
|
||||
Supplier<Boolean> checker = new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
return callbackHandler.reboot;
|
||||
}
|
||||
};
|
||||
|
||||
asyncClient.registerApplicationMaster("localhost", 1234, null);
|
||||
asyncClient.waitFor(checker);
|
||||
|
||||
asyncClient.stop();
|
||||
// stopping should have joined all threads and completed all callbacks
|
||||
Assert.assertTrue(callbackHandler.callbackCount == 0);
|
||||
|
||||
verify(client, times(1)).allocate(anyFloat());
|
||||
asyncClient.stop();
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
public void testCallAMRMClientAsyncStopFromCallbackHandler()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
@ -262,6 +298,40 @@ public void testCallAMRMClientAsyncStopFromCallbackHandler()
|
||||
}
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
public void testCallAMRMClientAsyncStopFromCallbackHandlerWithWaitFor()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
Configuration conf = new Configuration();
|
||||
final TestCallbackHandler2 callbackHandler = new TestCallbackHandler2();
|
||||
@SuppressWarnings("unchecked")
|
||||
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
|
||||
|
||||
List<ContainerStatus> completed = Arrays.asList(
|
||||
ContainerStatus.newInstance(newContainerId(0, 0, 0, 0),
|
||||
ContainerState.COMPLETE, "", 0));
|
||||
final AllocateResponse response = createAllocateResponse(completed,
|
||||
new ArrayList<Container>(), null);
|
||||
|
||||
when(client.allocate(anyFloat())).thenReturn(response);
|
||||
|
||||
AMRMClientAsync<ContainerRequest> asyncClient =
|
||||
AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
|
||||
callbackHandler.asynClient = asyncClient;
|
||||
asyncClient.init(conf);
|
||||
asyncClient.start();
|
||||
|
||||
Supplier<Boolean> checker = new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
return callbackHandler.notify;
|
||||
}
|
||||
};
|
||||
|
||||
asyncClient.registerApplicationMaster("localhost", 1234, null);
|
||||
asyncClient.waitFor(checker);
|
||||
Assert.assertTrue(checker.get());
|
||||
}
|
||||
|
||||
void runCallBackThrowOutException(TestCallbackHandler2 callbackHandler) throws
|
||||
InterruptedException, YarnException, IOException {
|
||||
Configuration conf = new Configuration();
|
||||
@ -342,7 +412,7 @@ private class TestCallbackHandler implements AMRMClientAsync.CallbackHandler {
|
||||
private volatile List<ContainerStatus> completedContainers;
|
||||
private volatile List<Container> allocatedContainers;
|
||||
Exception savedException = null;
|
||||
boolean reboot = false;
|
||||
volatile boolean reboot = false;
|
||||
Object notifier = new Object();
|
||||
|
||||
int callbackCount = 0;
|
||||
@ -432,7 +502,7 @@ private class TestCallbackHandler2 implements AMRMClientAsync.CallbackHandler {
|
||||
@SuppressWarnings("rawtypes")
|
||||
AMRMClientAsync asynClient;
|
||||
boolean stop = true;
|
||||
boolean notify = false;
|
||||
volatile boolean notify = false;
|
||||
boolean throwOutException = false;
|
||||
|
||||
@Override
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hadoop.yarn.client.api.impl;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
@ -814,6 +815,40 @@ public AllocateResponse answer(InvocationOnMock invocation)
|
||||
assertEquals(0, amClient.ask.size());
|
||||
assertEquals(0, amClient.release.size());
|
||||
}
|
||||
|
||||
class CountDownSupplier implements Supplier<Boolean> {
|
||||
int counter = 0;
|
||||
@Override
|
||||
public Boolean get() {
|
||||
counter++;
|
||||
if (counter >= 3) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@Test
|
||||
public void testWaitFor() throws InterruptedException {
|
||||
AMRMClientImpl<ContainerRequest> amClient = null;
|
||||
CountDownSupplier countDownChecker = new CountDownSupplier();
|
||||
|
||||
try {
|
||||
// start am rm client
|
||||
amClient =
|
||||
(AMRMClientImpl<ContainerRequest>) AMRMClient
|
||||
.<ContainerRequest> createAMRMClient();
|
||||
amClient.init(new YarnConfiguration());
|
||||
amClient.start();
|
||||
amClient.waitFor(countDownChecker, 1000);
|
||||
assertEquals(3, countDownChecker.counter);
|
||||
} finally {
|
||||
if (amClient != null) {
|
||||
amClient.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void sleep(int sleepTime) {
|
||||
try {
|
||||
|
@ -39,6 +39,7 @@
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.server.applicationhistoryservice.webapp.AHSWebApp;
|
||||
import org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineAuthenticationFilterInitializer;
|
||||
@ -59,12 +60,12 @@ public class ApplicationHistoryServer extends CompositeService {
|
||||
private static final Log LOG = LogFactory
|
||||
.getLog(ApplicationHistoryServer.class);
|
||||
|
||||
protected ApplicationHistoryClientService ahsClientService;
|
||||
protected ApplicationHistoryManager historyManager;
|
||||
protected TimelineStore timelineStore;
|
||||
protected TimelineDelegationTokenSecretManagerService secretManagerService;
|
||||
protected TimelineACLsManager timelineACLsManager;
|
||||
protected WebApp webApp;
|
||||
private ApplicationHistoryClientService ahsClientService;
|
||||
private ApplicationHistoryManager historyManager;
|
||||
private TimelineStore timelineStore;
|
||||
private TimelineDelegationTokenSecretManagerService secretManagerService;
|
||||
private TimelineDataManager timelineDataManager;
|
||||
private WebApp webApp;
|
||||
|
||||
public ApplicationHistoryServer() {
|
||||
super(ApplicationHistoryServer.class.getName());
|
||||
@ -72,15 +73,18 @@ public ApplicationHistoryServer() {
|
||||
|
||||
@Override
|
||||
protected void serviceInit(Configuration conf) throws Exception {
|
||||
historyManager = createApplicationHistory();
|
||||
ahsClientService = createApplicationHistoryClientService(historyManager);
|
||||
addService(ahsClientService);
|
||||
addService((Service) historyManager);
|
||||
// init timeline services first
|
||||
timelineStore = createTimelineStore(conf);
|
||||
addIfService(timelineStore);
|
||||
secretManagerService = createTimelineDelegationTokenSecretManagerService(conf);
|
||||
addService(secretManagerService);
|
||||
timelineACLsManager = createTimelineACLsManager(conf);
|
||||
timelineDataManager = createTimelineDataManager(conf);
|
||||
|
||||
// init generic history service afterwards
|
||||
historyManager = createApplicationHistoryManager(conf);
|
||||
ahsClientService = createApplicationHistoryClientService(historyManager);
|
||||
addService(ahsClientService);
|
||||
addService((Service) historyManager);
|
||||
|
||||
DefaultMetricsSystem.initialize("ApplicationHistoryServer");
|
||||
JvmMetrics.initSingleton("ApplicationHistoryServer", null);
|
||||
@ -111,21 +115,22 @@ protected void serviceStop() throws Exception {
|
||||
|
||||
@Private
|
||||
@VisibleForTesting
|
||||
public ApplicationHistoryClientService getClientService() {
|
||||
ApplicationHistoryClientService getClientService() {
|
||||
return this.ahsClientService;
|
||||
}
|
||||
|
||||
protected ApplicationHistoryClientService
|
||||
createApplicationHistoryClientService(
|
||||
ApplicationHistoryManager historyManager) {
|
||||
return new ApplicationHistoryClientService(historyManager);
|
||||
/**
|
||||
* @return ApplicationTimelineStore
|
||||
*/
|
||||
@Private
|
||||
@VisibleForTesting
|
||||
public TimelineStore getTimelineStore() {
|
||||
return timelineStore;
|
||||
}
|
||||
|
||||
protected ApplicationHistoryManager createApplicationHistory() {
|
||||
return new ApplicationHistoryManagerImpl();
|
||||
}
|
||||
|
||||
protected ApplicationHistoryManager getApplicationHistory() {
|
||||
@Private
|
||||
@VisibleForTesting
|
||||
ApplicationHistoryManager getApplicationHistoryManager() {
|
||||
return this.historyManager;
|
||||
}
|
||||
|
||||
@ -154,28 +159,35 @@ public static void main(String[] args) {
|
||||
launchAppHistoryServer(args);
|
||||
}
|
||||
|
||||
protected ApplicationHistoryManager createApplicationHistoryManager(
|
||||
private ApplicationHistoryClientService
|
||||
createApplicationHistoryClientService(
|
||||
ApplicationHistoryManager historyManager) {
|
||||
return new ApplicationHistoryClientService(historyManager);
|
||||
}
|
||||
|
||||
private ApplicationHistoryManager createApplicationHistoryManager(
|
||||
Configuration conf) {
|
||||
return new ApplicationHistoryManagerImpl();
|
||||
}
|
||||
|
||||
protected TimelineStore createTimelineStore(
|
||||
private TimelineStore createTimelineStore(
|
||||
Configuration conf) {
|
||||
return ReflectionUtils.newInstance(conf.getClass(
|
||||
YarnConfiguration.TIMELINE_SERVICE_STORE, LeveldbTimelineStore.class,
|
||||
TimelineStore.class), conf);
|
||||
}
|
||||
|
||||
protected TimelineDelegationTokenSecretManagerService
|
||||
private TimelineDelegationTokenSecretManagerService
|
||||
createTimelineDelegationTokenSecretManagerService(Configuration conf) {
|
||||
return new TimelineDelegationTokenSecretManagerService();
|
||||
}
|
||||
|
||||
protected TimelineACLsManager createTimelineACLsManager(Configuration conf) {
|
||||
return new TimelineACLsManager(conf);
|
||||
private TimelineDataManager createTimelineDataManager(Configuration conf) {
|
||||
return new TimelineDataManager(
|
||||
timelineStore, new TimelineACLsManager(conf));
|
||||
}
|
||||
|
||||
protected void startWebApp() {
|
||||
private void startWebApp() {
|
||||
Configuration conf = getConfig();
|
||||
// Always load pseudo authentication filter to parse "user.name" in an URL
|
||||
// to identify a HTTP request's user in insecure mode.
|
||||
@ -199,9 +211,8 @@ protected void startWebApp() {
|
||||
try {
|
||||
AHSWebApp ahsWebApp = AHSWebApp.getInstance();
|
||||
ahsWebApp.setApplicationHistoryManager(historyManager);
|
||||
ahsWebApp.setTimelineStore(timelineStore);
|
||||
ahsWebApp.setTimelineDelegationTokenSecretManagerService(secretManagerService);
|
||||
ahsWebApp.setTimelineACLsManager(timelineACLsManager);
|
||||
ahsWebApp.setTimelineDataManager(timelineDataManager);
|
||||
webApp =
|
||||
WebApps
|
||||
.$for("applicationhistory", ApplicationHistoryClientService.class,
|
||||
@ -213,14 +224,6 @@ protected void startWebApp() {
|
||||
throw new YarnRuntimeException(msg, e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @return ApplicationTimelineStore
|
||||
*/
|
||||
@Private
|
||||
@VisibleForTesting
|
||||
public TimelineStore getTimelineStore() {
|
||||
return timelineStore;
|
||||
}
|
||||
|
||||
private void doSecureLogin(Configuration conf) throws IOException {
|
||||
InetSocketAddress socAddr = getBindAddress(conf);
|
||||
|
@ -22,8 +22,7 @@
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.yarn.server.api.ApplicationContext;
|
||||
import org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineDelegationTokenSecretManagerService;
|
||||
import org.apache.hadoop.yarn.server.timeline.webapp.TimelineWebServices;
|
||||
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
|
||||
@ -36,9 +35,8 @@
|
||||
public class AHSWebApp extends WebApp implements YarnWebParams {
|
||||
|
||||
private ApplicationHistoryManager applicationHistoryManager;
|
||||
private TimelineStore timelineStore;
|
||||
private TimelineDelegationTokenSecretManagerService secretManagerService;
|
||||
private TimelineACLsManager timelineACLsManager;
|
||||
private TimelineDataManager timelineDataManager;
|
||||
|
||||
private static AHSWebApp instance = null;
|
||||
|
||||
@ -68,14 +66,6 @@ public void setApplicationHistoryManager(
|
||||
this.applicationHistoryManager = applicationHistoryManager;
|
||||
}
|
||||
|
||||
public TimelineStore getTimelineStore() {
|
||||
return timelineStore;
|
||||
}
|
||||
|
||||
public void setTimelineStore(TimelineStore timelineStore) {
|
||||
this.timelineStore = timelineStore;
|
||||
}
|
||||
|
||||
public TimelineDelegationTokenSecretManagerService
|
||||
getTimelineDelegationTokenSecretManagerService() {
|
||||
return secretManagerService;
|
||||
@ -86,12 +76,12 @@ public void setTimelineDelegationTokenSecretManagerService(
|
||||
this.secretManagerService = secretManagerService;
|
||||
}
|
||||
|
||||
public TimelineACLsManager getTimelineACLsManager() {
|
||||
return timelineACLsManager;
|
||||
public TimelineDataManager getTimelineDataManager() {
|
||||
return timelineDataManager;
|
||||
}
|
||||
|
||||
public void setTimelineACLsManager(TimelineACLsManager timelineACLsManager) {
|
||||
this.timelineACLsManager = timelineACLsManager;
|
||||
public void setTimelineDataManager(TimelineDataManager timelineDataManager) {
|
||||
this.timelineDataManager = timelineDataManager;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -101,10 +91,9 @@ public void setup() {
|
||||
bind(TimelineWebServices.class);
|
||||
bind(GenericExceptionHandler.class);
|
||||
bind(ApplicationContext.class).toInstance(applicationHistoryManager);
|
||||
bind(TimelineStore.class).toInstance(timelineStore);
|
||||
bind(TimelineDelegationTokenSecretManagerService.class).toInstance(
|
||||
secretManagerService);
|
||||
bind(TimelineACLsManager.class).toInstance(timelineACLsManager);
|
||||
bind(TimelineDataManager.class).toInstance(timelineDataManager);
|
||||
route("/", AHSController.class);
|
||||
route(pajoin("/apps", APP_STATE), AHSController.class);
|
||||
route(pajoin("/app", APPLICATION_ID), AHSController.class, "app");
|
||||
|
@ -0,0 +1,319 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.timeline;
|
||||
|
||||
import static org.apache.hadoop.yarn.util.StringHelper.CSV_JOINER;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
||||
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
||||
|
||||
/**
|
||||
* The class wrap over the timeline store and the ACLs manager. It does some non
|
||||
* trivial manipulation of the timeline data before putting or after getting it
|
||||
* from the timeline store, and checks the user's access to it.
|
||||
*
|
||||
*/
|
||||
public class TimelineDataManager {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(TimelineDataManager.class);
|
||||
|
||||
private TimelineStore store;
|
||||
private TimelineACLsManager timelineACLsManager;
|
||||
|
||||
public TimelineDataManager(TimelineStore store,
|
||||
TimelineACLsManager timelineACLsManager) {
|
||||
this.store = store;
|
||||
this.timelineACLsManager = timelineACLsManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the timeline entities that the given user have access to. The meaning
|
||||
* of each argument has been documented with
|
||||
* {@link TimelineReader#getEntities}.
|
||||
*
|
||||
* @see TimelineReader#getEntities
|
||||
*/
|
||||
public TimelineEntities getEntities(
|
||||
String entityType,
|
||||
NameValuePair primaryFilter,
|
||||
Collection<NameValuePair> secondaryFilter,
|
||||
Long windowStart,
|
||||
Long windowEnd,
|
||||
String fromId,
|
||||
Long fromTs,
|
||||
Long limit,
|
||||
EnumSet<Field> fields,
|
||||
UserGroupInformation callerUGI) throws YarnException, IOException {
|
||||
TimelineEntities entities = null;
|
||||
boolean modified = extendFields(fields);
|
||||
entities = store.getEntities(
|
||||
entityType,
|
||||
limit,
|
||||
windowStart,
|
||||
windowEnd,
|
||||
fromId,
|
||||
fromTs,
|
||||
primaryFilter,
|
||||
secondaryFilter,
|
||||
fields);
|
||||
if (entities != null) {
|
||||
Iterator<TimelineEntity> entitiesItr =
|
||||
entities.getEntities().iterator();
|
||||
while (entitiesItr.hasNext()) {
|
||||
TimelineEntity entity = entitiesItr.next();
|
||||
try {
|
||||
// check ACLs
|
||||
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
|
||||
entitiesItr.remove();
|
||||
} else {
|
||||
// clean up system data
|
||||
if (modified) {
|
||||
entity.setPrimaryFilters(null);
|
||||
} else {
|
||||
cleanupOwnerInfo(entity);
|
||||
}
|
||||
}
|
||||
} catch (YarnException e) {
|
||||
LOG.error("Error when verifying access for user " + callerUGI
|
||||
+ " on the events of the timeline entity "
|
||||
+ new EntityIdentifier(entity.getEntityId(),
|
||||
entity.getEntityType()), e);
|
||||
entitiesItr.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (entities == null) {
|
||||
return new TimelineEntities();
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the single timeline entity that the given user has access to. The
|
||||
* meaning of each argument has been documented with
|
||||
* {@link TimelineReader#getEntity}.
|
||||
*
|
||||
* @see TimelineReader#getEntity
|
||||
*/
|
||||
public TimelineEntity getEntity(
|
||||
String entityType,
|
||||
String entityId,
|
||||
EnumSet<Field> fields,
|
||||
UserGroupInformation callerUGI) throws YarnException, IOException {
|
||||
TimelineEntity entity = null;
|
||||
boolean modified = extendFields(fields);
|
||||
entity =
|
||||
store.getEntity(entityId, entityType, fields);
|
||||
if (entity != null) {
|
||||
// check ACLs
|
||||
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
|
||||
entity = null;
|
||||
} else {
|
||||
// clean up the system data
|
||||
if (modified) {
|
||||
entity.setPrimaryFilters(null);
|
||||
} else {
|
||||
cleanupOwnerInfo(entity);
|
||||
}
|
||||
}
|
||||
}
|
||||
return entity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the events whose entities the given user has access to. The meaning of
|
||||
* each argument has been documented with
|
||||
* {@link TimelineReader#getEntityTimelines}.
|
||||
*
|
||||
* @see TimelineReader#getEntityTimelines
|
||||
*/
|
||||
public TimelineEvents getEvents(
|
||||
String entityType,
|
||||
SortedSet<String> entityIds,
|
||||
SortedSet<String> eventTypes,
|
||||
Long windowStart,
|
||||
Long windowEnd,
|
||||
Long limit,
|
||||
UserGroupInformation callerUGI) throws YarnException, IOException {
|
||||
TimelineEvents events = null;
|
||||
events = store.getEntityTimelines(
|
||||
entityType,
|
||||
entityIds,
|
||||
limit,
|
||||
windowStart,
|
||||
windowEnd,
|
||||
eventTypes);
|
||||
if (events != null) {
|
||||
Iterator<TimelineEvents.EventsOfOneEntity> eventsItr =
|
||||
events.getAllEvents().iterator();
|
||||
while (eventsItr.hasNext()) {
|
||||
TimelineEvents.EventsOfOneEntity eventsOfOneEntity = eventsItr.next();
|
||||
try {
|
||||
TimelineEntity entity = store.getEntity(
|
||||
eventsOfOneEntity.getEntityId(),
|
||||
eventsOfOneEntity.getEntityType(),
|
||||
EnumSet.of(Field.PRIMARY_FILTERS));
|
||||
// check ACLs
|
||||
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
|
||||
eventsItr.remove();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error when verifying access for user " + callerUGI
|
||||
+ " on the events of the timeline entity "
|
||||
+ new EntityIdentifier(eventsOfOneEntity.getEntityId(),
|
||||
eventsOfOneEntity.getEntityType()), e);
|
||||
eventsItr.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (events == null) {
|
||||
return new TimelineEvents();
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the timeline entities into the store and set the owner of them to the
|
||||
* given user.
|
||||
*/
|
||||
public TimelinePutResponse postEntities(
|
||||
TimelineEntities entities,
|
||||
UserGroupInformation callerUGI) throws YarnException, IOException {
|
||||
if (entities == null) {
|
||||
return new TimelinePutResponse();
|
||||
}
|
||||
List<EntityIdentifier> entityIDs = new ArrayList<EntityIdentifier>();
|
||||
TimelineEntities entitiesToPut = new TimelineEntities();
|
||||
List<TimelinePutResponse.TimelinePutError> errors =
|
||||
new ArrayList<TimelinePutResponse.TimelinePutError>();
|
||||
for (TimelineEntity entity : entities.getEntities()) {
|
||||
EntityIdentifier entityID =
|
||||
new EntityIdentifier(entity.getEntityId(), entity.getEntityType());
|
||||
|
||||
// check if there is existing entity
|
||||
TimelineEntity existingEntity = null;
|
||||
try {
|
||||
existingEntity =
|
||||
store.getEntity(entityID.getId(), entityID.getType(),
|
||||
EnumSet.of(Field.PRIMARY_FILTERS));
|
||||
if (existingEntity != null
|
||||
&& !timelineACLsManager.checkAccess(callerUGI, existingEntity)) {
|
||||
throw new YarnException("The timeline entity " + entityID
|
||||
+ " was not put by " + callerUGI + " before");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Skip the entity which already exists and was put by others
|
||||
LOG.error("Skip the timeline entity: " + entityID + ", because "
|
||||
+ e.getMessage());
|
||||
TimelinePutResponse.TimelinePutError error =
|
||||
new TimelinePutResponse.TimelinePutError();
|
||||
error.setEntityId(entityID.getId());
|
||||
error.setEntityType(entityID.getType());
|
||||
error.setErrorCode(
|
||||
TimelinePutResponse.TimelinePutError.ACCESS_DENIED);
|
||||
errors.add(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
// inject owner information for the access check if this is the first
|
||||
// time to post the entity, in case it's the admin who is updating
|
||||
// the timeline data.
|
||||
try {
|
||||
if (existingEntity == null) {
|
||||
injectOwnerInfo(entity, callerUGI.getShortUserName());
|
||||
}
|
||||
} catch (YarnException e) {
|
||||
// Skip the entity which messes up the primary filter and record the
|
||||
// error
|
||||
LOG.error("Skip the timeline entity: " + entityID + ", because "
|
||||
+ e.getMessage());
|
||||
TimelinePutResponse.TimelinePutError error =
|
||||
new TimelinePutResponse.TimelinePutError();
|
||||
error.setEntityId(entityID.getId());
|
||||
error.setEntityType(entityID.getType());
|
||||
error.setErrorCode(
|
||||
TimelinePutResponse.TimelinePutError.SYSTEM_FILTER_CONFLICT);
|
||||
errors.add(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
entityIDs.add(entityID);
|
||||
entitiesToPut.addEntity(entity);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Storing the entity " + entityID + ", JSON-style content: "
|
||||
+ TimelineUtils.dumpTimelineRecordtoJSON(entity));
|
||||
}
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Storing entities: " + CSV_JOINER.join(entityIDs));
|
||||
}
|
||||
TimelinePutResponse response = store.put(entitiesToPut);
|
||||
// add the errors of timeline system filter key conflict
|
||||
response.addErrors(errors);
|
||||
return response;
|
||||
}
|
||||
|
||||
private static boolean extendFields(EnumSet<Field> fieldEnums) {
|
||||
boolean modified = false;
|
||||
if (fieldEnums != null && !fieldEnums.contains(Field.PRIMARY_FILTERS)) {
|
||||
fieldEnums.add(Field.PRIMARY_FILTERS);
|
||||
modified = true;
|
||||
}
|
||||
return modified;
|
||||
}
|
||||
|
||||
private static void injectOwnerInfo(TimelineEntity timelineEntity,
|
||||
String owner) throws YarnException {
|
||||
if (timelineEntity.getPrimaryFilters() != null &&
|
||||
timelineEntity.getPrimaryFilters().containsKey(
|
||||
TimelineStore.SystemFilter.ENTITY_OWNER.toString())) {
|
||||
throw new YarnException(
|
||||
"User should not use the timeline system filter key: "
|
||||
+ TimelineStore.SystemFilter.ENTITY_OWNER);
|
||||
}
|
||||
timelineEntity.addPrimaryFilter(
|
||||
TimelineStore.SystemFilter.ENTITY_OWNER
|
||||
.toString(), owner);
|
||||
}
|
||||
|
||||
private static void cleanupOwnerInfo(TimelineEntity timelineEntity) {
|
||||
if (timelineEntity.getPrimaryFilters() != null) {
|
||||
timelineEntity.getPrimaryFilters().remove(
|
||||
TimelineStore.SystemFilter.ENTITY_OWNER.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -18,14 +18,10 @@
|
||||
|
||||
package org.apache.hadoop.yarn.server.timeline.webapp;
|
||||
|
||||
import static org.apache.hadoop.yarn.util.StringHelper.CSV_JOINER;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
@ -58,14 +54,11 @@
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.timeline.EntityIdentifier;
|
||||
import org.apache.hadoop.yarn.server.timeline.GenericObjectMapper;
|
||||
import org.apache.hadoop.yarn.server.timeline.NameValuePair;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
||||
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
||||
import org.apache.hadoop.yarn.webapp.BadRequestException;
|
||||
import org.apache.hadoop.yarn.webapp.ForbiddenException;
|
||||
import org.apache.hadoop.yarn.webapp.NotFoundException;
|
||||
@ -80,14 +73,11 @@ public class TimelineWebServices {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(TimelineWebServices.class);
|
||||
|
||||
private TimelineStore store;
|
||||
private TimelineACLsManager timelineACLsManager;
|
||||
private TimelineDataManager timelineDataManager;
|
||||
|
||||
@Inject
|
||||
public TimelineWebServices(TimelineStore store,
|
||||
TimelineACLsManager timelineACLsManager) {
|
||||
this.store = store;
|
||||
this.timelineACLsManager = timelineACLsManager;
|
||||
public TimelineWebServices(TimelineDataManager timelineDataManager) {
|
||||
this.timelineDataManager = timelineDataManager;
|
||||
}
|
||||
|
||||
@XmlRootElement(name = "about")
|
||||
@ -148,61 +138,28 @@ public TimelineEntities getEntities(
|
||||
@QueryParam("limit") String limit,
|
||||
@QueryParam("fields") String fields) {
|
||||
init(res);
|
||||
TimelineEntities entities = null;
|
||||
try {
|
||||
EnumSet<Field> fieldEnums = parseFieldsStr(fields, ",");
|
||||
boolean modified = extendFields(fieldEnums);
|
||||
UserGroupInformation callerUGI = getUser(req);
|
||||
entities = store.getEntities(
|
||||
return timelineDataManager.getEntities(
|
||||
parseStr(entityType),
|
||||
parseLongStr(limit),
|
||||
parsePairStr(primaryFilter, ":"),
|
||||
parsePairsStr(secondaryFilter, ",", ":"),
|
||||
parseLongStr(windowStart),
|
||||
parseLongStr(windowEnd),
|
||||
parseStr(fromId),
|
||||
parseLongStr(fromTs),
|
||||
parsePairStr(primaryFilter, ":"),
|
||||
parsePairsStr(secondaryFilter, ",", ":"),
|
||||
fieldEnums);
|
||||
if (entities != null) {
|
||||
Iterator<TimelineEntity> entitiesItr =
|
||||
entities.getEntities().iterator();
|
||||
while (entitiesItr.hasNext()) {
|
||||
TimelineEntity entity = entitiesItr.next();
|
||||
try {
|
||||
// check ACLs
|
||||
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
|
||||
entitiesItr.remove();
|
||||
} else {
|
||||
// clean up system data
|
||||
if (modified) {
|
||||
entity.setPrimaryFilters(null);
|
||||
} else {
|
||||
cleanupOwnerInfo(entity);
|
||||
}
|
||||
}
|
||||
} catch (YarnException e) {
|
||||
LOG.error("Error when verifying access for user " + callerUGI
|
||||
+ " on the events of the timeline entity "
|
||||
+ new EntityIdentifier(entity.getEntityId(),
|
||||
entity.getEntityType()), e);
|
||||
entitiesItr.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
parseLongStr(limit),
|
||||
parseFieldsStr(fields, ","),
|
||||
getUser(req));
|
||||
} catch (NumberFormatException e) {
|
||||
throw new BadRequestException(
|
||||
"windowStart, windowEnd or limit is not a numeric value.");
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new BadRequestException("requested invalid field.");
|
||||
} catch (IOException e) {
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error getting entities", e);
|
||||
throw new WebApplicationException(e,
|
||||
Response.Status.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
if (entities == null) {
|
||||
return new TimelineEntities();
|
||||
}
|
||||
return entities;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -220,33 +177,15 @@ public TimelineEntity getEntity(
|
||||
init(res);
|
||||
TimelineEntity entity = null;
|
||||
try {
|
||||
EnumSet<Field> fieldEnums = parseFieldsStr(fields, ",");
|
||||
boolean modified = extendFields(fieldEnums);
|
||||
entity =
|
||||
store.getEntity(parseStr(entityId), parseStr(entityType),
|
||||
fieldEnums);
|
||||
if (entity != null) {
|
||||
// check ACLs
|
||||
UserGroupInformation callerUGI = getUser(req);
|
||||
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
|
||||
entity = null;
|
||||
} else {
|
||||
// clean up the system data
|
||||
if (modified) {
|
||||
entity.setPrimaryFilters(null);
|
||||
} else {
|
||||
cleanupOwnerInfo(entity);
|
||||
}
|
||||
}
|
||||
}
|
||||
entity = timelineDataManager.getEntity(
|
||||
parseStr(entityType),
|
||||
parseStr(entityId),
|
||||
parseFieldsStr(fields, ","),
|
||||
getUser(req));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new BadRequestException(
|
||||
"requested invalid field.");
|
||||
} catch (IOException e) {
|
||||
LOG.error("Error getting entity", e);
|
||||
throw new WebApplicationException(e,
|
||||
Response.Status.INTERNAL_SERVER_ERROR);
|
||||
} catch (YarnException e) {
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error getting entity", e);
|
||||
throw new WebApplicationException(e,
|
||||
Response.Status.INTERNAL_SERVER_ERROR);
|
||||
@ -275,51 +214,23 @@ public TimelineEvents getEvents(
|
||||
@QueryParam("windowEnd") String windowEnd,
|
||||
@QueryParam("limit") String limit) {
|
||||
init(res);
|
||||
TimelineEvents events = null;
|
||||
try {
|
||||
UserGroupInformation callerUGI = getUser(req);
|
||||
events = store.getEntityTimelines(
|
||||
return timelineDataManager.getEvents(
|
||||
parseStr(entityType),
|
||||
parseArrayStr(entityId, ","),
|
||||
parseLongStr(limit),
|
||||
parseArrayStr(eventType, ","),
|
||||
parseLongStr(windowStart),
|
||||
parseLongStr(windowEnd),
|
||||
parseArrayStr(eventType, ","));
|
||||
if (events != null) {
|
||||
Iterator<TimelineEvents.EventsOfOneEntity> eventsItr =
|
||||
events.getAllEvents().iterator();
|
||||
while (eventsItr.hasNext()) {
|
||||
TimelineEvents.EventsOfOneEntity eventsOfOneEntity = eventsItr.next();
|
||||
try {
|
||||
TimelineEntity entity = store.getEntity(
|
||||
eventsOfOneEntity.getEntityId(),
|
||||
eventsOfOneEntity.getEntityType(),
|
||||
EnumSet.of(Field.PRIMARY_FILTERS));
|
||||
// check ACLs
|
||||
if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
|
||||
eventsItr.remove();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error when verifying access for user " + callerUGI
|
||||
+ " on the events of the timeline entity "
|
||||
+ new EntityIdentifier(eventsOfOneEntity.getEntityId(),
|
||||
eventsOfOneEntity.getEntityType()), e);
|
||||
eventsItr.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
parseLongStr(limit),
|
||||
getUser(req));
|
||||
} catch (NumberFormatException e) {
|
||||
throw new BadRequestException(
|
||||
"windowStart, windowEnd or limit is not a numeric value.");
|
||||
} catch (IOException e) {
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error getting entity timelines", e);
|
||||
throw new WebApplicationException(e,
|
||||
Response.Status.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
if (events == null) {
|
||||
return new TimelineEvents();
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -333,9 +244,6 @@ public TimelinePutResponse postEntities(
|
||||
@Context HttpServletResponse res,
|
||||
TimelineEntities entities) {
|
||||
init(res);
|
||||
if (entities == null) {
|
||||
return new TimelinePutResponse();
|
||||
}
|
||||
UserGroupInformation callerUGI = getUser(req);
|
||||
if (callerUGI == null) {
|
||||
String msg = "The owner of the posted timeline entities is not set";
|
||||
@ -343,76 +251,8 @@ public TimelinePutResponse postEntities(
|
||||
throw new ForbiddenException(msg);
|
||||
}
|
||||
try {
|
||||
List<EntityIdentifier> entityIDs = new ArrayList<EntityIdentifier>();
|
||||
TimelineEntities entitiesToPut = new TimelineEntities();
|
||||
List<TimelinePutResponse.TimelinePutError> errors =
|
||||
new ArrayList<TimelinePutResponse.TimelinePutError>();
|
||||
for (TimelineEntity entity : entities.getEntities()) {
|
||||
EntityIdentifier entityID =
|
||||
new EntityIdentifier(entity.getEntityId(), entity.getEntityType());
|
||||
|
||||
// check if there is existing entity
|
||||
TimelineEntity existingEntity = null;
|
||||
try {
|
||||
existingEntity =
|
||||
store.getEntity(entityID.getId(), entityID.getType(),
|
||||
EnumSet.of(Field.PRIMARY_FILTERS));
|
||||
if (existingEntity != null
|
||||
&& !timelineACLsManager.checkAccess(callerUGI, existingEntity)) {
|
||||
throw new YarnException("The timeline entity " + entityID
|
||||
+ " was not put by " + callerUGI + " before");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Skip the entity which already exists and was put by others
|
||||
LOG.warn("Skip the timeline entity: " + entityID + ", because "
|
||||
+ e.getMessage());
|
||||
TimelinePutResponse.TimelinePutError error =
|
||||
new TimelinePutResponse.TimelinePutError();
|
||||
error.setEntityId(entityID.getId());
|
||||
error.setEntityType(entityID.getType());
|
||||
error.setErrorCode(
|
||||
TimelinePutResponse.TimelinePutError.ACCESS_DENIED);
|
||||
errors.add(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
// inject owner information for the access check if this is the first
|
||||
// time to post the entity, in case it's the admin who is updating
|
||||
// the timeline data.
|
||||
try {
|
||||
if (existingEntity == null) {
|
||||
injectOwnerInfo(entity, callerUGI.getShortUserName());
|
||||
}
|
||||
} catch (YarnException e) {
|
||||
// Skip the entity which messes up the primary filter and record the
|
||||
// error
|
||||
LOG.warn("Skip the timeline entity: " + entityID + ", because "
|
||||
+ e.getMessage());
|
||||
TimelinePutResponse.TimelinePutError error =
|
||||
new TimelinePutResponse.TimelinePutError();
|
||||
error.setEntityId(entityID.getId());
|
||||
error.setEntityType(entityID.getType());
|
||||
error.setErrorCode(
|
||||
TimelinePutResponse.TimelinePutError.SYSTEM_FILTER_CONFLICT);
|
||||
errors.add(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
entityIDs.add(entityID);
|
||||
entitiesToPut.addEntity(entity);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Storing the entity " + entityID + ", JSON-style content: "
|
||||
+ TimelineUtils.dumpTimelineRecordtoJSON(entity));
|
||||
}
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Storing entities: " + CSV_JOINER.join(entityIDs));
|
||||
}
|
||||
TimelinePutResponse response = store.put(entitiesToPut);
|
||||
// add the errors of timeline system filter key conflict
|
||||
response.addErrors(errors);
|
||||
return response;
|
||||
} catch (IOException e) {
|
||||
return timelineDataManager.postEntities(entities, callerUGI);
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error putting entities", e);
|
||||
throw new WebApplicationException(e,
|
||||
Response.Status.INTERNAL_SERVER_ERROR);
|
||||
@ -423,6 +263,15 @@ private void init(HttpServletResponse response) {
|
||||
response.setContentType(null);
|
||||
}
|
||||
|
||||
private static UserGroupInformation getUser(HttpServletRequest req) {
|
||||
String remoteUser = req.getRemoteUser();
|
||||
UserGroupInformation callerUGI = null;
|
||||
if (remoteUser != null) {
|
||||
callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
|
||||
}
|
||||
return callerUGI;
|
||||
}
|
||||
|
||||
private static SortedSet<String> parseArrayStr(String str, String delimiter) {
|
||||
if (str == null) {
|
||||
return null;
|
||||
@ -495,14 +344,6 @@ private static EnumSet<Field> parseFieldsStr(String str, String delimiter) {
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean extendFields(EnumSet<Field> fieldEnums) {
|
||||
boolean modified = false;
|
||||
if (fieldEnums != null && !fieldEnums.contains(Field.PRIMARY_FILTERS)) {
|
||||
fieldEnums.add(Field.PRIMARY_FILTERS);
|
||||
modified = true;
|
||||
}
|
||||
return modified;
|
||||
}
|
||||
private static Long parseLongStr(String str) {
|
||||
return str == null ? null : Long.parseLong(str.trim());
|
||||
}
|
||||
@ -511,34 +352,4 @@ private static String parseStr(String str) {
|
||||
return str == null ? null : str.trim();
|
||||
}
|
||||
|
||||
private static UserGroupInformation getUser(HttpServletRequest req) {
|
||||
String remoteUser = req.getRemoteUser();
|
||||
UserGroupInformation callerUGI = null;
|
||||
if (remoteUser != null) {
|
||||
callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
|
||||
}
|
||||
return callerUGI;
|
||||
}
|
||||
|
||||
private static void injectOwnerInfo(TimelineEntity timelineEntity,
|
||||
String owner) throws YarnException {
|
||||
if (timelineEntity.getPrimaryFilters() != null &&
|
||||
timelineEntity.getPrimaryFilters().containsKey(
|
||||
TimelineStore.SystemFilter.ENTITY_OWNER.toString())) {
|
||||
throw new YarnException(
|
||||
"User should not use the timeline system filter key: "
|
||||
+ TimelineStore.SystemFilter.ENTITY_OWNER);
|
||||
}
|
||||
timelineEntity.addPrimaryFilter(
|
||||
TimelineStore.SystemFilter.ENTITY_OWNER
|
||||
.toString(), owner);
|
||||
}
|
||||
|
||||
private static void cleanupOwnerInfo(TimelineEntity timelineEntity) {
|
||||
if (timelineEntity.getPrimaryFilters() != null) {
|
||||
timelineEntity.getPrimaryFilters().remove(
|
||||
TimelineStore.SystemFilter.ENTITY_OWNER.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ public void setup() {
|
||||
historyServer.init(config);
|
||||
historyServer.start();
|
||||
store =
|
||||
((ApplicationHistoryManagerImpl) historyServer.getApplicationHistory())
|
||||
((ApplicationHistoryManagerImpl) historyServer.getApplicationHistoryManager())
|
||||
.getHistoryStore();
|
||||
}
|
||||
|
||||
|
@ -49,6 +49,7 @@
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.security.AdminACLsManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.TestMemoryTimelineStore;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.TimelineStore;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineAuthenticationFilter;
|
||||
@ -89,14 +90,15 @@ protected void configureServlets() {
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
bind(TimelineStore.class).toInstance(store);
|
||||
Configuration conf = new YarnConfiguration();
|
||||
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, false);
|
||||
timelineACLsManager = new TimelineACLsManager(conf);
|
||||
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
|
||||
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "admin");
|
||||
adminACLsManager = new AdminACLsManager(conf);
|
||||
bind(TimelineACLsManager.class).toInstance(timelineACLsManager);
|
||||
TimelineDataManager timelineDataManager =
|
||||
new TimelineDataManager(store, timelineACLsManager);
|
||||
bind(TimelineDataManager.class).toInstance(timelineDataManager);
|
||||
serve("/*").with(GuiceContainer.class);
|
||||
TimelineAuthenticationFilter taFilter = new TimelineAuthenticationFilter();
|
||||
FilterConfig filterConfig = mock(FilterConfig.class);
|
||||
|
@ -461,7 +461,6 @@ protected void serviceInit(Configuration configuration) throws Exception {
|
||||
rmDispatcher.register(RMAppManagerEventType.class, rmAppManager);
|
||||
|
||||
clientRM = createClientRMService();
|
||||
rmContext.setClientRMService(clientRM);
|
||||
addService(clientRM);
|
||||
rmContext.setClientRMService(clientRM);
|
||||
|
||||
|
@ -52,13 +52,13 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNewSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppUpdateSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
|
||||
import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
|
||||
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
||||
import org.apache.hadoop.yarn.state.StateMachine;
|
||||
@ -132,7 +132,8 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
|
||||
LOG.info("Storing info for app: " + appId);
|
||||
try {
|
||||
store.storeApplicationStateInternal(appId, appStateData);
|
||||
store.notifyDoneStoringApplication(appId, null);
|
||||
store.notifyApplication(new RMAppEvent(appId,
|
||||
RMAppEventType.APP_NEW_SAVED));
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error storing app: " + appId, e);
|
||||
store.notifyStoreOperationFailed(e);
|
||||
@ -156,7 +157,8 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
|
||||
LOG.info("Updating info for app: " + appId);
|
||||
try {
|
||||
store.updateApplicationStateInternal(appId, appStateData);
|
||||
store.notifyDoneUpdatingApplication(appId, null);
|
||||
store.notifyApplication(new RMAppEvent(appId,
|
||||
RMAppEventType.APP_UPDATE_SAVED));
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error updating app: " + appId, e);
|
||||
store.notifyStoreOperationFailed(e);
|
||||
@ -205,8 +207,9 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
|
||||
}
|
||||
store.storeApplicationAttemptStateInternal(attemptState.getAttemptId(),
|
||||
attemptStateData);
|
||||
store.notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(),
|
||||
null);
|
||||
store.notifyApplicationAttempt(new RMAppAttemptEvent
|
||||
(attemptState.getAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error storing appAttempt: " + attemptState.getAttemptId(), e);
|
||||
store.notifyStoreOperationFailed(e);
|
||||
@ -233,8 +236,9 @@ public void transition(RMStateStore store, RMStateStoreEvent event) {
|
||||
}
|
||||
store.updateApplicationAttemptStateInternal(attemptState.getAttemptId(),
|
||||
attemptStateData);
|
||||
store.notifyDoneUpdatingApplicationAttempt(attemptState.getAttemptId(),
|
||||
null);
|
||||
store.notifyApplicationAttempt(new RMAppAttemptEvent
|
||||
(attemptState.getAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error updating appAttempt: " + attemptState.getAttemptId(), e);
|
||||
store.notifyStoreOperationFailed(e);
|
||||
@ -801,47 +805,28 @@ protected void notifyStoreOperationFailed(Exception failureCause) {
|
||||
}
|
||||
rmDispatcher.getEventHandler().handle(new RMFatalEvent(type, failureCause));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
/**
|
||||
* In (@link handleStoreEvent}, this method is called to notify the
|
||||
* application that new application is stored in state store
|
||||
* @param appId id of the application that has been saved
|
||||
* @param storedException the exception that is thrown when storing the
|
||||
* application
|
||||
* This method is called to notify the application that
|
||||
* new application is stored or updated in state store
|
||||
* @param event App event containing the app id and event type
|
||||
*/
|
||||
private void notifyDoneStoringApplication(ApplicationId appId,
|
||||
Exception storedException) {
|
||||
rmDispatcher.getEventHandler().handle(
|
||||
new RMAppNewSavedEvent(appId, storedException));
|
||||
private void notifyApplication(RMAppEvent event) {
|
||||
rmDispatcher.getEventHandler().handle(event);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void notifyDoneUpdatingApplication(ApplicationId appId,
|
||||
Exception storedException) {
|
||||
rmDispatcher.getEventHandler().handle(
|
||||
new RMAppUpdateSavedEvent(appId, storedException));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
/**
|
||||
* In (@link handleStoreEvent}, this method is called to notify the
|
||||
* application attempt that new attempt is stored in state store
|
||||
* @param appAttempt attempt that has been saved
|
||||
* This method is called to notify the application attempt
|
||||
* that new attempt is stored or updated in state store
|
||||
* @param event App attempt event containing the app attempt
|
||||
* id and event type
|
||||
*/
|
||||
private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId,
|
||||
Exception storedException) {
|
||||
rmDispatcher.getEventHandler().handle(
|
||||
new RMAppAttemptNewSavedEvent(attemptId, storedException));
|
||||
private void notifyApplicationAttempt(RMAppAttemptEvent event) {
|
||||
rmDispatcher.getEventHandler().handle(event);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void notifyDoneUpdatingApplicationAttempt(ApplicationAttemptId attemptId,
|
||||
Exception updatedException) {
|
||||
rmDispatcher.getEventHandler().handle(
|
||||
new RMAppAttemptUpdateSavedEvent(attemptId, updatedException));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* EventHandler implementation which forward events to the FSRMStateStore
|
||||
* This hides the EventHandle methods of the store from its public interface
|
||||
|
@ -820,17 +820,6 @@ private static final class AddApplicationToSchedulerTransition extends
|
||||
RMAppTransition {
|
||||
@Override
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
if (event instanceof RMAppNewSavedEvent) {
|
||||
RMAppNewSavedEvent storeEvent = (RMAppNewSavedEvent) event;
|
||||
// For HA this exception needs to be handled by giving up
|
||||
// master status if we got fenced
|
||||
if (((RMAppNewSavedEvent) event).getStoredException() != null) {
|
||||
LOG.error(
|
||||
"Failed to store application: " + storeEvent.getApplicationId(),
|
||||
storeEvent.getStoredException());
|
||||
ExitUtil.terminate(1, storeEvent.getStoredException());
|
||||
}
|
||||
}
|
||||
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||
app.submissionContext.getQueue(), app.user));
|
||||
}
|
||||
@ -848,13 +837,6 @@ private static final class FinalStateSavedTransition implements
|
||||
|
||||
@Override
|
||||
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
|
||||
RMAppUpdateSavedEvent storeEvent = (RMAppUpdateSavedEvent) event;
|
||||
if (storeEvent.getUpdatedException() != null) {
|
||||
LOG.error("Failed to update the final state of application"
|
||||
+ storeEvent.getApplicationId(), storeEvent.getUpdatedException());
|
||||
ExitUtil.terminate(1, storeEvent.getUpdatedException());
|
||||
}
|
||||
|
||||
if (app.transitionTodo instanceof SingleArcTransition) {
|
||||
((SingleArcTransition) app.transitionTodo).transition(app,
|
||||
app.eventCausingFinalSaving);
|
||||
|
@ -1,36 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
|
||||
public class RMAppUpdateSavedEvent extends RMAppEvent {
|
||||
|
||||
private final Exception updatedException;
|
||||
|
||||
public RMAppUpdateSavedEvent(ApplicationId appId, Exception updatedException) {
|
||||
super(appId, RMAppEventType.APP_UPDATE_SAVED);
|
||||
this.updatedException = updatedException;
|
||||
}
|
||||
|
||||
public Exception getUpdatedException() {
|
||||
return updatedException;
|
||||
}
|
||||
|
||||
}
|
@ -80,11 +80,9 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||
@ -398,7 +396,6 @@ RMAppAttemptEventType.STATUS_UPDATE, new StatusUpdateTransition())
|
||||
RMAppAttemptState.KILLED,
|
||||
RMAppAttemptState.KILLED,
|
||||
EnumSet.of(RMAppAttemptEventType.ATTEMPT_ADDED,
|
||||
RMAppAttemptEventType.EXPIRE,
|
||||
RMAppAttemptEventType.LAUNCHED,
|
||||
RMAppAttemptEventType.LAUNCH_FAILED,
|
||||
RMAppAttemptEventType.EXPIRE,
|
||||
@ -906,8 +903,6 @@ private static final class AttemptStoredTransition extends BaseTransition {
|
||||
@Override
|
||||
public void transition(RMAppAttemptImpl appAttempt,
|
||||
RMAppAttemptEvent event) {
|
||||
appAttempt.checkAttemptStoreError(event);
|
||||
|
||||
appAttempt.launchAttempt();
|
||||
}
|
||||
}
|
||||
@ -1059,14 +1054,6 @@ private static class FinalStateSavedTransition implements
|
||||
@Override
|
||||
public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
|
||||
RMAppAttemptEvent event) {
|
||||
RMAppAttemptUpdateSavedEvent storeEvent = (RMAppAttemptUpdateSavedEvent) event;
|
||||
if (storeEvent.getUpdatedException() != null) {
|
||||
LOG.error("Failed to update the final state of application attempt: "
|
||||
+ storeEvent.getApplicationAttemptId(),
|
||||
storeEvent.getUpdatedException());
|
||||
ExitUtil.terminate(1, storeEvent.getUpdatedException());
|
||||
}
|
||||
|
||||
RMAppAttemptEvent causeEvent = appAttempt.eventCausingFinalSaving;
|
||||
|
||||
if (appAttempt.transitionTodo instanceof SingleArcTransition) {
|
||||
@ -1196,8 +1183,6 @@ private static final class UnmanagedAMAttemptSavedTransition
|
||||
@Override
|
||||
public void transition(RMAppAttemptImpl appAttempt,
|
||||
RMAppAttemptEvent event) {
|
||||
appAttempt.checkAttemptStoreError(event);
|
||||
|
||||
// create AMRMToken
|
||||
appAttempt.amrmToken =
|
||||
appAttempt.rmContext.getAMRMTokenSecretManager().createAndGetAMRMToken(
|
||||
@ -1690,18 +1675,6 @@ private void attemptLaunched() {
|
||||
rmContext.getAMLivelinessMonitor().register(getAppAttemptId());
|
||||
}
|
||||
|
||||
private void checkAttemptStoreError(RMAppAttemptEvent event) {
|
||||
RMAppAttemptNewSavedEvent storeEvent = (RMAppAttemptNewSavedEvent) event;
|
||||
if(storeEvent.getStoredException() != null)
|
||||
{
|
||||
// This needs to be handled for HA and give up master status if we got
|
||||
// fenced
|
||||
LOG.error("Failed to store attempt: " + getAppAttemptId(),
|
||||
storeEvent.getStoredException());
|
||||
ExitUtil.terminate(1, storeEvent.getStoredException());
|
||||
}
|
||||
}
|
||||
|
||||
private void storeAttempt() {
|
||||
// store attempt data in a non-blocking manner to prevent dispatcher
|
||||
// thread starvation and wait for state to be saved
|
||||
|
@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
|
||||
public class RMAppAttemptNewSavedEvent extends RMAppAttemptEvent {
|
||||
|
||||
final Exception storedException;
|
||||
|
||||
public RMAppAttemptNewSavedEvent(ApplicationAttemptId appAttemptId,
|
||||
Exception storedException) {
|
||||
super(appAttemptId, RMAppAttemptEventType.ATTEMPT_NEW_SAVED);
|
||||
this.storedException = storedException;
|
||||
}
|
||||
|
||||
public Exception getStoredException() {
|
||||
return storedException;
|
||||
}
|
||||
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
|
||||
public class RMAppAttemptUpdateSavedEvent extends RMAppAttemptEvent {
|
||||
|
||||
final Exception updatedException;
|
||||
|
||||
public RMAppAttemptUpdateSavedEvent(ApplicationAttemptId appAttemptId,
|
||||
Exception updatedException) {
|
||||
super(appAttemptId, RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED);
|
||||
this.updatedException = updatedException;
|
||||
}
|
||||
|
||||
public Exception getUpdatedException() {
|
||||
return updatedException;
|
||||
}
|
||||
}
|
@ -116,6 +116,18 @@ public Resource getFairShare() {
|
||||
return fairShare;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if queue has atleast one app running. Always returns true for
|
||||
* AppSchedulables.
|
||||
*/
|
||||
public boolean isActive() {
|
||||
if (this instanceof FSQueue) {
|
||||
FSQueue queue = (FSQueue) this;
|
||||
return queue.getNumRunnableApps() > 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Convenient toString implementation for debugging. */
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -17,6 +17,7 @@
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
@ -33,7 +34,31 @@
|
||||
public class ComputeFairShares {
|
||||
|
||||
private static final int COMPUTE_FAIR_SHARES_ITERATIONS = 25;
|
||||
|
||||
|
||||
/**
|
||||
* Compute fair share of the given schedulables.Fair share is an allocation of
|
||||
* shares considering only active schedulables ie schedulables which have
|
||||
* running apps.
|
||||
*
|
||||
* @param schedulables
|
||||
* @param totalResources
|
||||
* @param type
|
||||
*/
|
||||
public static void computeShares(
|
||||
Collection<? extends Schedulable> schedulables, Resource totalResources,
|
||||
ResourceType type) {
|
||||
Collection<Schedulable> activeSchedulables = new ArrayList<Schedulable>();
|
||||
for (Schedulable sched : schedulables) {
|
||||
if (sched.isActive()) {
|
||||
activeSchedulables.add(sched);
|
||||
} else {
|
||||
setResourceValue(0, sched.getFairShare(), type);
|
||||
}
|
||||
}
|
||||
|
||||
computeSharesInternal(activeSchedulables, totalResources, type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a set of Schedulables and a number of slots, compute their weighted
|
||||
* fair shares. The min and max shares and of the Schedulables are assumed to
|
||||
@ -75,7 +100,7 @@ public class ComputeFairShares {
|
||||
* because resourceUsedWithWeightToResourceRatio is linear-time and the number of
|
||||
* iterations of binary search is a constant (dependent on desired precision).
|
||||
*/
|
||||
public static void computeShares(
|
||||
private static void computeSharesInternal(
|
||||
Collection<? extends Schedulable> schedulables, Resource totalResources,
|
||||
ResourceType type) {
|
||||
if (schedulables.isEmpty()) {
|
||||
|
@ -386,7 +386,8 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
|
||||
ApplicationState appState =
|
||||
memStore.getState().getApplicationState().get(app1.getApplicationId());
|
||||
// AM should be restarted even though max-am-attempt is 1.
|
||||
MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
|
||||
MockAM am2 =
|
||||
rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
|
||||
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
|
||||
Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
|
||||
|
||||
@ -398,7 +399,8 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
|
||||
am2.waitForState(RMAppAttemptState.FAILED);
|
||||
Assert.assertTrue(! attempt2.shouldCountTowardsMaxAttemptRetry());
|
||||
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
|
||||
MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
|
||||
MockAM am3 =
|
||||
rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1);
|
||||
RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
|
||||
Assert.assertTrue(((RMAppAttemptImpl) attempt3).mayBeLastAttempt());
|
||||
|
||||
@ -421,7 +423,8 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
|
||||
.getAMContainerExitStatus());
|
||||
|
||||
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
|
||||
MockAM am4 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
|
||||
MockAM am4 =
|
||||
rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
|
||||
RMAppAttempt attempt4 = app1.getCurrentAppAttempt();
|
||||
Assert.assertTrue(((RMAppAttemptImpl) attempt4).mayBeLastAttempt());
|
||||
|
||||
|
@ -65,8 +65,8 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
|
||||
import org.apache.hadoop.yarn.server.security.MasterKeyData;
|
||||
@ -77,10 +77,9 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{
|
||||
public static final Log LOG = LogFactory.getLog(RMStateStoreTestBase.class);
|
||||
|
||||
static class TestDispatcher implements
|
||||
Dispatcher, EventHandler<RMAppAttemptNewSavedEvent> {
|
||||
Dispatcher, EventHandler<RMAppAttemptEvent> {
|
||||
|
||||
ApplicationAttemptId attemptId;
|
||||
Exception storedException;
|
||||
|
||||
boolean notified = false;
|
||||
|
||||
@ -91,9 +90,8 @@ public void register(Class<? extends Enum> eventType,
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handle(RMAppAttemptNewSavedEvent event) {
|
||||
public void handle(RMAppAttemptEvent event) {
|
||||
assertEquals(attemptId, event.getApplicationAttemptId());
|
||||
assertEquals(storedException, event.getStoredException());
|
||||
notified = true;
|
||||
synchronized (this) {
|
||||
notifyAll();
|
||||
@ -163,7 +161,6 @@ ContainerId storeAttempt(RMStateStore store, ApplicationAttemptId attemptId,
|
||||
when(mockAttempt.getClientTokenMasterKey())
|
||||
.thenReturn(clientTokenMasterKey);
|
||||
dispatcher.attemptId = attemptId;
|
||||
dispatcher.storedException = null;
|
||||
store.storeNewApplicationAttempt(mockAttempt);
|
||||
waitNotify(dispatcher);
|
||||
return container.getId();
|
||||
|
@ -60,7 +60,6 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
||||
@ -328,15 +327,15 @@ private void assertFailed(RMApp application, String regex) {
|
||||
|
||||
private void sendAppUpdateSavedEvent(RMApp application) {
|
||||
RMAppEvent event =
|
||||
new RMAppUpdateSavedEvent(application.getApplicationId(), null);
|
||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
}
|
||||
|
||||
private void sendAttemptUpdateSavedEvent(RMApp application) {
|
||||
application.getCurrentAppAttempt().handle(
|
||||
new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt()
|
||||
.getAppAttemptId(), null));
|
||||
new RMAppAttemptEvent(application.getCurrentAppAttempt().getAppAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
|
||||
}
|
||||
|
||||
protected RMApp testCreateAppNewSaving(
|
||||
@ -357,7 +356,7 @@ protected RMApp testCreateAppSubmittedNoRecovery(
|
||||
RMApp application = testCreateAppNewSaving(submissionContext);
|
||||
// NEW_SAVING => SUBMITTED event RMAppEventType.APP_SAVED
|
||||
RMAppEvent event =
|
||||
new RMAppNewSavedEvent(application.getApplicationId(), null);
|
||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_NEW_SAVED);
|
||||
application.handle(event);
|
||||
assertStartTimeSet(application);
|
||||
assertAppState(RMAppState.SUBMITTED, application);
|
||||
@ -422,7 +421,7 @@ protected RMApp testCreateAppFinishing(
|
||||
RMApp application = testCreateAppFinalSaving(submissionContext);
|
||||
// FINAL_SAVING => FINISHING event RMAppEventType.APP_UPDATED
|
||||
RMAppEvent appUpdated =
|
||||
new RMAppUpdateSavedEvent(application.getApplicationId(), null);
|
||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
|
||||
application.handle(appUpdated);
|
||||
assertAppState(RMAppState.FINISHING, application);
|
||||
assertTimesAtFinish(application);
|
||||
@ -763,7 +762,7 @@ public void testAppFinalSavingToFinished() throws IOException {
|
||||
application.handle(event);
|
||||
assertAppState(RMAppState.FINAL_SAVING, application);
|
||||
RMAppEvent appUpdated =
|
||||
new RMAppUpdateSavedEvent(application.getApplicationId(), null);
|
||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
|
||||
application.handle(appUpdated);
|
||||
assertAppState(RMAppState.FINISHED, application);
|
||||
|
||||
|
@ -81,10 +81,8 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||
@ -570,15 +568,15 @@ private void scheduleApplicationAttempt() {
|
||||
submitApplicationAttempt();
|
||||
applicationAttempt.handle(
|
||||
new RMAppAttemptEvent(
|
||||
applicationAttempt.getAppAttemptId(),
|
||||
applicationAttempt.getAppAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_ADDED));
|
||||
|
||||
if(unmanagedAM){
|
||||
assertEquals(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
applicationAttempt.handle(
|
||||
new RMAppAttemptNewSavedEvent(
|
||||
applicationAttempt.getAppAttemptId(), null));
|
||||
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
|
||||
}
|
||||
|
||||
testAppAttemptScheduledState();
|
||||
@ -616,8 +614,8 @@ private Container allocateApplicationAttempt() {
|
||||
assertEquals(RMAppAttemptState.ALLOCATED_SAVING,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
applicationAttempt.handle(
|
||||
new RMAppAttemptNewSavedEvent(
|
||||
applicationAttempt.getAppAttemptId(), null));
|
||||
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
|
||||
|
||||
testAppAttemptAllocatedState(container);
|
||||
|
||||
@ -696,8 +694,8 @@ private void sendAttemptUpdateSavedEvent(RMAppAttempt applicationAttempt) {
|
||||
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
applicationAttempt.handle(
|
||||
new RMAppAttemptUpdateSavedEvent(
|
||||
applicationAttempt.getAppAttemptId(), null));
|
||||
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
||||
RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -292,6 +292,7 @@ public void testSimpleFairShareCalculation() throws IOException {
|
||||
// Have two queues which want entire cluster capacity
|
||||
createSchedulingRequest(10 * 1024, "queue1", "user1");
|
||||
createSchedulingRequest(10 * 1024, "queue2", "user1");
|
||||
createSchedulingRequest(10 * 1024, "root.default", "user1");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
@ -322,6 +323,7 @@ public void testSimpleHierarchicalFairShareCalculation() throws IOException {
|
||||
// Have two queues which want entire cluster capacity
|
||||
createSchedulingRequest(10 * 1024, "parent.queue2", "user1");
|
||||
createSchedulingRequest(10 * 1024, "parent.queue3", "user1");
|
||||
createSchedulingRequest(10 * 1024, "root.default", "user1");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
@ -766,8 +768,10 @@ public void testFairShareAndWeightsInNestedUserQueueRule() throws Exception {
|
||||
scheduler.handle(nodeEvent1);
|
||||
|
||||
// user1,user2 submit their apps to parentq and create user queues
|
||||
scheduler.assignToQueue(rmApp1, "root.parentq", "user1");
|
||||
scheduler.assignToQueue(rmApp2, "root.parentq", "user2");
|
||||
createSchedulingRequest(10 * 1024, "root.parentq", "user1");
|
||||
createSchedulingRequest(10 * 1024, "root.parentq", "user2");
|
||||
// user3 submits app in default queue
|
||||
createSchedulingRequest(10 * 1024, "root.default", "user3");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
@ -1287,7 +1291,7 @@ public void testPreemptionIsNotDelayedToNextRound() throws Exception {
|
||||
scheduler.update();
|
||||
Resource toPreempt = scheduler.resToPreempt(scheduler.getQueueManager()
|
||||
.getLeafQueue("queueA.queueA2", false), clock.getTime());
|
||||
assertEquals(2980, toPreempt.getMemory());
|
||||
assertEquals(3277, toPreempt.getMemory());
|
||||
|
||||
// verify if the 3 containers required by queueA2 are preempted in the same
|
||||
// round
|
||||
@ -2446,8 +2450,12 @@ public void testQueueMaxAMShare() throws Exception {
|
||||
scheduler.update();
|
||||
|
||||
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
|
||||
assertEquals("Queue queue1's fair share should be 10240",
|
||||
10240, queue1.getFairShare().getMemory());
|
||||
assertEquals("Queue queue1's fair share should be 0", 0, queue1
|
||||
.getFairShare().getMemory());
|
||||
|
||||
createSchedulingRequest(1 * 1024, "root.default", "user1");
|
||||
scheduler.update();
|
||||
scheduler.handle(updateEvent);
|
||||
|
||||
Resource amResource1 = Resource.newInstance(1024, 1);
|
||||
Resource amResource2 = Resource.newInstance(2048, 2);
|
||||
@ -2635,24 +2643,32 @@ public void testQueueMaxAMShareDefault() throws Exception {
|
||||
|
||||
FSLeafQueue queue1 =
|
||||
scheduler.getQueueManager().getLeafQueue("queue1", true);
|
||||
assertEquals("Queue queue1's fair share should be 1366",
|
||||
1366, queue1.getFairShare().getMemory());
|
||||
assertEquals("Queue queue1's fair share should be 0", 0, queue1
|
||||
.getFairShare().getMemory());
|
||||
FSLeafQueue queue2 =
|
||||
scheduler.getQueueManager().getLeafQueue("queue2", true);
|
||||
assertEquals("Queue queue2's fair share should be 1366",
|
||||
1366, queue2.getFairShare().getMemory());
|
||||
assertEquals("Queue queue2's fair share should be 0", 0, queue2
|
||||
.getFairShare().getMemory());
|
||||
FSLeafQueue queue3 =
|
||||
scheduler.getQueueManager().getLeafQueue("queue3", true);
|
||||
assertEquals("Queue queue3's fair share should be 1366",
|
||||
1366, queue3.getFairShare().getMemory());
|
||||
assertEquals("Queue queue3's fair share should be 0", 0, queue3
|
||||
.getFairShare().getMemory());
|
||||
FSLeafQueue queue4 =
|
||||
scheduler.getQueueManager().getLeafQueue("queue4", true);
|
||||
assertEquals("Queue queue4's fair share should be 1366",
|
||||
1366, queue4.getFairShare().getMemory());
|
||||
assertEquals("Queue queue4's fair share should be 0", 0, queue4
|
||||
.getFairShare().getMemory());
|
||||
FSLeafQueue queue5 =
|
||||
scheduler.getQueueManager().getLeafQueue("queue5", true);
|
||||
assertEquals("Queue queue5's fair share should be 1366",
|
||||
1366, queue5.getFairShare().getMemory());
|
||||
assertEquals("Queue queue5's fair share should be 0", 0, queue5
|
||||
.getFairShare().getMemory());
|
||||
|
||||
List<String> queues = Arrays.asList("root.default", "root.queue3",
|
||||
"root.queue4", "root.queue5");
|
||||
for (String queue : queues) {
|
||||
createSchedulingRequest(1 * 1024, queue, "user1");
|
||||
scheduler.update();
|
||||
scheduler.handle(updateEvent);
|
||||
}
|
||||
|
||||
Resource amResource1 = Resource.newInstance(2048, 1);
|
||||
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
|
||||
|
@ -0,0 +1,308 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestFairSchedulerFairShare extends FairSchedulerTestBase {
|
||||
private final static String ALLOC_FILE = new File(TEST_DIR,
|
||||
TestFairSchedulerFairShare.class.getName() + ".xml").getAbsolutePath();
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf = createConfiguration();
|
||||
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
|
||||
}
|
||||
|
||||
@After
|
||||
public void teardown() {
|
||||
if (resourceManager != null) {
|
||||
resourceManager.stop();
|
||||
resourceManager = null;
|
||||
}
|
||||
conf = null;
|
||||
}
|
||||
|
||||
private void createClusterWithQueuesAndOneNode(int mem, String policy)
|
||||
throws IOException {
|
||||
createClusterWithQueuesAndOneNode(mem, 0, policy);
|
||||
}
|
||||
|
||||
private void createClusterWithQueuesAndOneNode(int mem, int vCores,
|
||||
String policy) throws IOException {
|
||||
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
|
||||
out.println("<?xml version=\"1.0\"?>");
|
||||
out.println("<allocations>");
|
||||
out.println("<queue name=\"root\" >");
|
||||
out.println(" <queue name=\"parentA\" >");
|
||||
out.println(" <weight>8</weight>");
|
||||
out.println(" <queue name=\"childA1\" />");
|
||||
out.println(" <queue name=\"childA2\" />");
|
||||
out.println(" <queue name=\"childA3\" />");
|
||||
out.println(" <queue name=\"childA4\" />");
|
||||
out.println(" </queue>");
|
||||
out.println(" <queue name=\"parentB\" >");
|
||||
out.println(" <weight>1</weight>");
|
||||
out.println(" <queue name=\"childB1\" />");
|
||||
out.println(" <queue name=\"childB2\" />");
|
||||
out.println(" </queue>");
|
||||
out.println("</queue>");
|
||||
out.println("<defaultQueueSchedulingPolicy>" + policy
|
||||
+ "</defaultQueueSchedulingPolicy>");
|
||||
out.println("</allocations>");
|
||||
out.close();
|
||||
|
||||
resourceManager = new MockRM(conf);
|
||||
resourceManager.start();
|
||||
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
|
||||
|
||||
RMNode node1 = MockNodes.newNodeInfo(1,
|
||||
Resources.createResource(mem, vCores), 1, "127.0.0.1");
|
||||
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
|
||||
scheduler.handle(nodeEvent1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFairShareNoAppsRunning() throws IOException {
|
||||
int nodeCapacity = 16 * 1024;
|
||||
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
|
||||
|
||||
scheduler.update();
|
||||
// No apps are running in the cluster,verify if fair share is zero
|
||||
// for all queues under parentA and parentB.
|
||||
Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager()
|
||||
.getLeafQueues();
|
||||
|
||||
for (FSLeafQueue leaf : leafQueues) {
|
||||
if (leaf.getName().startsWith("root.parentA")) {
|
||||
assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity
|
||||
* 100, 0);
|
||||
} else if (leaf.getName().startsWith("root.parentB")) {
|
||||
assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity
|
||||
* 100, 0.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFairShareOneAppRunning() throws IOException {
|
||||
int nodeCapacity = 16 * 1024;
|
||||
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
|
||||
|
||||
// Run a app in a childA1. Verify whether fair share is 100% in childA1,
|
||||
// since it is the only active queue.
|
||||
// Also verify if fair share is 0 for childA2. since no app is
|
||||
// running in it.
|
||||
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
assertEquals(
|
||||
100,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA1", false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, 0.1);
|
||||
assertEquals(
|
||||
0,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA2", false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFairShareMultipleActiveQueuesUnderSameParent()
|
||||
throws IOException {
|
||||
int nodeCapacity = 16 * 1024;
|
||||
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
|
||||
|
||||
// Run apps in childA1,childA2,childA3
|
||||
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
|
||||
createSchedulingRequest(2 * 1024, "root.parentA.childA2", "user2");
|
||||
createSchedulingRequest(2 * 1024, "root.parentA.childA3", "user3");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
// Verify if fair share is 100 / 3 = 33%
|
||||
for (int i = 1; i <= 3; i++) {
|
||||
assertEquals(
|
||||
33,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, .9);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFairShareMultipleActiveQueuesUnderDifferentParent()
|
||||
throws IOException {
|
||||
int nodeCapacity = 16 * 1024;
|
||||
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
|
||||
|
||||
// Run apps in childA1,childA2 which are under parentA
|
||||
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
|
||||
createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
|
||||
|
||||
// Run app in childB1 which is under parentB
|
||||
createSchedulingRequest(1 * 1024, "root.parentB.childB1", "user3");
|
||||
|
||||
// Run app in root.default queue
|
||||
createSchedulingRequest(1 * 1024, "root.default", "user4");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
// The two active child queues under parentA would
|
||||
// get fair share of 80/2=40%
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
assertEquals(
|
||||
40,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, .9);
|
||||
}
|
||||
|
||||
// The child queue under parentB would get a fair share of 10%,
|
||||
// basically all of parentB's fair share
|
||||
assertEquals(
|
||||
10,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentB.childB1", false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, .9);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFairShareResetsToZeroWhenAppsComplete() throws IOException {
|
||||
int nodeCapacity = 16 * 1024;
|
||||
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
|
||||
|
||||
// Run apps in childA1,childA2 which are under parentA
|
||||
ApplicationAttemptId app1 = createSchedulingRequest(2 * 1024,
|
||||
"root.parentA.childA1", "user1");
|
||||
ApplicationAttemptId app2 = createSchedulingRequest(3 * 1024,
|
||||
"root.parentA.childA2", "user2");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
// Verify if both the active queues under parentA get 50% fair
|
||||
// share
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
assertEquals(
|
||||
50,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, .9);
|
||||
}
|
||||
// Let app under childA1 complete. This should cause the fair share
|
||||
// of queue childA1 to be reset to zero,since the queue has no apps running.
|
||||
// Queue childA2's fair share would increase to 100% since its the only
|
||||
// active queue.
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(
|
||||
app1, RMAppAttemptState.FINISHED, false);
|
||||
|
||||
scheduler.handle(appRemovedEvent1);
|
||||
scheduler.update();
|
||||
|
||||
assertEquals(
|
||||
0,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA1", false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, 0);
|
||||
assertEquals(
|
||||
100,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA2", false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeCapacity * 100, 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFairShareWithDRFMultipleActiveQueuesUnderDifferentParent()
|
||||
throws IOException {
|
||||
int nodeMem = 16 * 1024;
|
||||
int nodeVCores = 10;
|
||||
createClusterWithQueuesAndOneNode(nodeMem, nodeVCores, "drf");
|
||||
|
||||
// Run apps in childA1,childA2 which are under parentA
|
||||
createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
|
||||
createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
|
||||
|
||||
// Run app in childB1 which is under parentB
|
||||
createSchedulingRequest(1 * 1024, "root.parentB.childB1", "user3");
|
||||
|
||||
// Run app in root.default queue
|
||||
createSchedulingRequest(1 * 1024, "root.default", "user4");
|
||||
|
||||
scheduler.update();
|
||||
|
||||
// The two active child queues under parentA would
|
||||
// get 80/2=40% memory and vcores
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
assertEquals(
|
||||
40,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeMem * 100, .9);
|
||||
assertEquals(
|
||||
40,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentA.childA" + i, false).getFairShare()
|
||||
.getVirtualCores()
|
||||
/ nodeVCores * 100, .9);
|
||||
}
|
||||
|
||||
// The only active child queue under parentB would get 10% memory and vcores
|
||||
assertEquals(
|
||||
10,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentB.childB1", false).getFairShare()
|
||||
.getMemory()
|
||||
/ nodeMem * 100, .9);
|
||||
assertEquals(
|
||||
10,
|
||||
(double) scheduler.getQueueManager()
|
||||
.getLeafQueue("root.parentB.childB1", false).getFairShare()
|
||||
.getVirtualCores()
|
||||
/ nodeVCores * 100, .9);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user