diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
index 0f046ae9f3e..a69ee46bbae 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
@@ -327,7 +327,11 @@ public class KerberosAuthenticator implements Authenticator {
}
});
} catch (PrivilegedActionException ex) {
- throw new AuthenticationException(ex.getException());
+ if (ex.getException() instanceof IOException) {
+ throw (IOException) ex.getException();
+ } else {
+ throw new AuthenticationException(ex.getException());
+ }
} catch (LoginException ex) {
throw new AuthenticationException(ex);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 489c35496fa..f206861c02f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -61,6 +61,7 @@ import javax.security.sasl.Sasl;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
@@ -107,7 +108,7 @@ import com.google.protobuf.CodedOutputStream;
*
* @see Server
*/
-@InterfaceAudience.LimitedPrivate(value = { "Common", "HDFS", "MapReduce", "Yarn" })
+@Public
@InterfaceStability.Evolving
public class Client implements AutoCloseable {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index eb28ad57d11..1cc9f1d3eeb 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -74,6 +74,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
@@ -133,7 +134,7 @@ import com.google.protobuf.Message.Builder;
*
* @see Client
*/
-@InterfaceAudience.LimitedPrivate(value = { "Common", "HDFS", "MapReduce", "Yarn" })
+@Public
@InterfaceStability.Evolving
public abstract class Server {
private final boolean authorize;
@@ -439,7 +440,7 @@ public abstract class Server {
/**
* Checks if LogSlowRPC is set true.
- * @return
+ * @return true, if LogSlowRPC is set true, false, otherwise.
*/
protected boolean isLogSlowRPC() {
return logSlowRPC;
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index b3436da6702..96b108f0f37 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -765,6 +765,13 @@
+
+ fs.s3a.path.style.access
+ Enable S3 path style access ie disabling the default virtual hosting behaviour.
+ Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
+
+
+
fs.s3a.proxy.host
Hostname of the (optional) proxy server for S3 connections.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java
index 08a3d93d2fa..4e421da2219 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java
@@ -60,14 +60,14 @@ public class TestLoadBalancingKMSClientProvider {
providers[2].getKMSUrl()));
kp = new KMSClientProvider.Factory().createProvider(new URI(
- "kms://http@host1;host2;host3:16000/kms/foo"), conf);
+ "kms://http@host1;host2;host3:9600/kms/foo"), conf);
assertTrue(kp instanceof LoadBalancingKMSClientProvider);
providers =
((LoadBalancingKMSClientProvider) kp).getProviders();
assertEquals(3, providers.length);
- assertEquals(Sets.newHashSet("http://host1:16000/kms/foo/v1/",
- "http://host2:16000/kms/foo/v1/",
- "http://host3:16000/kms/foo/v1/"),
+ assertEquals(Sets.newHashSet("http://host1:9600/kms/foo/v1/",
+ "http://host2:9600/kms/foo/v1/",
+ "http://host3:9600/kms/foo/v1/"),
Sets.newHashSet(providers[0].getKMSUrl(),
providers[1].getKMSUrl(),
providers[2].getKMSUrl()));
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestAnd.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestAnd.java
index d82a25e07b6..bb5ca4ca1c5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestAnd.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestAnd.java
@@ -26,12 +26,17 @@ import java.util.Deque;
import java.util.LinkedList;
import org.apache.hadoop.fs.shell.PathData;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
import org.junit.Test;
public class TestAnd {
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
// test all expressions passing
- @Test(timeout = 1000)
+ @Test
public void testPass() throws IOException {
And and = new And();
@@ -56,7 +61,7 @@ public class TestAnd {
}
// test the first expression failing
- @Test(timeout = 1000)
+ @Test
public void testFailFirst() throws IOException {
And and = new And();
@@ -80,7 +85,7 @@ public class TestAnd {
}
// test the second expression failing
- @Test(timeout = 1000)
+ @Test
public void testFailSecond() throws IOException {
And and = new And();
@@ -105,7 +110,7 @@ public class TestAnd {
}
// test both expressions failing
- @Test(timeout = 1000)
+ @Test
public void testFailBoth() throws IOException {
And and = new And();
@@ -129,7 +134,7 @@ public class TestAnd {
}
// test the first expression stopping
- @Test(timeout = 1000)
+ @Test
public void testStopFirst() throws IOException {
And and = new And();
@@ -154,7 +159,7 @@ public class TestAnd {
}
// test the second expression stopping
- @Test(timeout = 1000)
+ @Test
public void testStopSecond() throws IOException {
And and = new And();
@@ -179,7 +184,7 @@ public class TestAnd {
}
// test first expression stopping and second failing
- @Test(timeout = 1000)
+ @Test
public void testStopFail() throws IOException {
And and = new And();
@@ -204,7 +209,7 @@ public class TestAnd {
}
// test setOptions is called on child
- @Test(timeout = 1000)
+ @Test
public void testSetOptions() throws IOException {
And and = new And();
Expression first = mock(Expression.class);
@@ -224,7 +229,7 @@ public class TestAnd {
}
// test prepare is called on child
- @Test(timeout = 1000)
+ @Test
public void testPrepare() throws IOException {
And and = new And();
Expression first = mock(Expression.class);
@@ -243,7 +248,7 @@ public class TestAnd {
}
// test finish is called on child
- @Test(timeout = 1000)
+ @Test
public void testFinish() throws IOException {
And and = new And();
Expression first = mock(Expression.class);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFilterExpression.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFilterExpression.java
index 5986a06b23f..7ad0574e183 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFilterExpression.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFilterExpression.java
@@ -26,12 +26,17 @@ import java.util.Deque;
import org.apache.hadoop.fs.shell.PathData;
import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
import org.junit.Test;
public class TestFilterExpression {
private Expression expr;
private FilterExpression test;
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
@Before
public void setup() {
expr = mock(Expression.class);
@@ -40,13 +45,13 @@ public class TestFilterExpression {
}
// test that the child expression is correctly set
- @Test(timeout = 1000)
+ @Test
public void expression() throws IOException {
assertEquals(expr, test.expression);
}
// test that setOptions method is called
- @Test(timeout = 1000)
+ @Test
public void setOptions() throws IOException {
FindOptions options = mock(FindOptions.class);
test.setOptions(options);
@@ -55,7 +60,7 @@ public class TestFilterExpression {
}
// test the apply method is called and the result returned
- @Test(timeout = 1000)
+ @Test
public void apply() throws IOException {
PathData item = mock(PathData.class);
when(expr.apply(item, -1)).thenReturn(Result.PASS).thenReturn(Result.FAIL);
@@ -66,7 +71,7 @@ public class TestFilterExpression {
}
// test that the finish method is called
- @Test(timeout = 1000)
+ @Test
public void finish() throws IOException {
test.finish();
verify(expr).finish();
@@ -74,7 +79,7 @@ public class TestFilterExpression {
}
// test that the getUsage method is called
- @Test(timeout = 1000)
+ @Test
public void getUsage() {
String[] usage = new String[] { "Usage 1", "Usage 2", "Usage 3" };
when(expr.getUsage()).thenReturn(usage);
@@ -84,7 +89,7 @@ public class TestFilterExpression {
}
// test that the getHelp method is called
- @Test(timeout = 1000)
+ @Test
public void getHelp() {
String[] help = new String[] { "Help 1", "Help 2", "Help 3" };
when(expr.getHelp()).thenReturn(help);
@@ -94,7 +99,7 @@ public class TestFilterExpression {
}
// test that the isAction method is called
- @Test(timeout = 1000)
+ @Test
public void isAction() {
when(expr.isAction()).thenReturn(true).thenReturn(false);
assertTrue(test.isAction());
@@ -104,7 +109,7 @@ public class TestFilterExpression {
}
// test that the isOperator method is called
- @Test(timeout = 1000)
+ @Test
public void isOperator() {
when(expr.isAction()).thenReturn(true).thenReturn(false);
assertTrue(test.isAction());
@@ -114,7 +119,7 @@ public class TestFilterExpression {
}
// test that the getPrecedence method is called
- @Test(timeout = 1000)
+ @Test
public void getPrecedence() {
int precedence = 12345;
when(expr.getPrecedence()).thenReturn(precedence);
@@ -124,7 +129,7 @@ public class TestFilterExpression {
}
// test that the addChildren method is called
- @Test(timeout = 1000)
+ @Test
public void addChildren() {
@SuppressWarnings("unchecked")
Deque expressions = mock(Deque.class);
@@ -134,7 +139,7 @@ public class TestFilterExpression {
}
// test that the addArguments method is called
- @Test(timeout = 1000)
+ @Test
public void addArguments() {
@SuppressWarnings("unchecked")
Deque args = mock(Deque.class);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFind.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFind.java
index 8bfcec66146..716230aa4c4 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFind.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestFind.java
@@ -39,11 +39,12 @@ import org.apache.hadoop.fs.shell.find.FindOptions;
import org.apache.hadoop.fs.shell.find.Result;
import org.junit.Before;
import org.junit.Rule;
-import org.junit.Test;
import org.junit.rules.Timeout;
+import org.junit.Test;
import org.mockito.InOrder;
public class TestFind {
+
@Rule
public Timeout timeout = new Timeout(10000);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestIname.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestIname.java
index 6e42fce58fe..c204322f1e9 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestIname.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestIname.java
@@ -25,12 +25,17 @@ import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.shell.PathData;
import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
import org.junit.Test;
public class TestIname {
private FileSystem mockFs;
private Name.Iname name;
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
@Before
public void resetMock() throws IOException {
mockFs = MockFileSystem.setup();
@@ -44,7 +49,7 @@ public class TestIname {
}
// test a matching name (same case)
- @Test(timeout = 1000)
+ @Test
public void applyMatch() throws IOException {
setup("name");
PathData item = new PathData("/directory/path/name", mockFs.getConf());
@@ -52,7 +57,7 @@ public class TestIname {
}
// test a non-matching name
- @Test(timeout = 1000)
+ @Test
public void applyNotMatch() throws IOException {
setup("name");
PathData item = new PathData("/directory/path/notname", mockFs.getConf());
@@ -60,7 +65,7 @@ public class TestIname {
}
// test a matching name (different case)
- @Test(timeout = 1000)
+ @Test
public void applyMixedCase() throws IOException {
setup("name");
PathData item = new PathData("/directory/path/NaMe", mockFs.getConf());
@@ -68,7 +73,7 @@ public class TestIname {
}
// test a matching glob pattern (same case)
- @Test(timeout = 1000)
+ @Test
public void applyGlob() throws IOException {
setup("n*e");
PathData item = new PathData("/directory/path/name", mockFs.getConf());
@@ -76,7 +81,7 @@ public class TestIname {
}
// test a matching glob pattern (different case)
- @Test(timeout = 1000)
+ @Test
public void applyGlobMixedCase() throws IOException {
setup("n*e");
PathData item = new PathData("/directory/path/NaMe", mockFs.getConf());
@@ -84,7 +89,7 @@ public class TestIname {
}
// test a non-matching glob pattern
- @Test(timeout = 1000)
+ @Test
public void applyGlobNotMatch() throws IOException {
setup("n*e");
PathData item = new PathData("/directory/path/notmatch", mockFs.getConf());
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestName.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestName.java
index 2c77fe14b72..81a405f4cfd 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestName.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestName.java
@@ -25,12 +25,17 @@ import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.shell.PathData;
import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
import org.junit.Test;
public class TestName {
private FileSystem mockFs;
private Name name;
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
@Before
public void resetMock() throws IOException {
mockFs = MockFileSystem.setup();
@@ -44,7 +49,7 @@ public class TestName {
}
// test a matching name
- @Test(timeout = 1000)
+ @Test
public void applyMatch() throws IOException {
setup("name");
PathData item = new PathData("/directory/path/name", mockFs.getConf());
@@ -52,7 +57,7 @@ public class TestName {
}
// test a non-matching name
- @Test(timeout = 1000)
+ @Test
public void applyNotMatch() throws IOException {
setup("name");
PathData item = new PathData("/directory/path/notname", mockFs.getConf());
@@ -60,7 +65,7 @@ public class TestName {
}
// test a different case name
- @Test(timeout = 1000)
+ @Test
public void applyMixedCase() throws IOException {
setup("name");
PathData item = new PathData("/directory/path/NaMe", mockFs.getConf());
@@ -68,7 +73,7 @@ public class TestName {
}
// test a matching glob pattern
- @Test(timeout = 1000)
+ @Test
public void applyGlob() throws IOException {
setup("n*e");
PathData item = new PathData("/directory/path/name", mockFs.getConf());
@@ -76,7 +81,7 @@ public class TestName {
}
// test a glob pattern with different case
- @Test(timeout = 1000)
+ @Test
public void applyGlobMixedCase() throws IOException {
setup("n*e");
PathData item = new PathData("/directory/path/NaMe", mockFs.getConf());
@@ -84,7 +89,7 @@ public class TestName {
}
// test a non-matching glob pattern
- @Test(timeout = 1000)
+ @Test
public void applyGlobNotMatch() throws IOException {
setup("n*e");
PathData item = new PathData("/directory/path/notmatch", mockFs.getConf());
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint.java
index 2d276650b96..a5cacc7defb 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint.java
@@ -23,23 +23,28 @@ import static org.mockito.Mockito.*;
import java.io.IOException;
import org.apache.hadoop.fs.shell.PathData;
-import org.junit.Test;
import java.io.PrintStream;
import org.apache.hadoop.fs.FileSystem;
import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
+import org.junit.Test;
public class TestPrint {
private FileSystem mockFs;
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
@Before
public void resetMock() throws IOException {
mockFs = MockFileSystem.setup();
}
// test the full path is printed to stdout
- @Test(timeout = 1000)
+ @Test
public void testPrint() throws IOException {
Print print = new Print();
PrintStream out = mock(PrintStream.class);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint0.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint0.java
index 3b89438d308..20c9bd69470 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint0.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestPrint0.java
@@ -23,23 +23,28 @@ import static org.mockito.Mockito.*;
import java.io.IOException;
import org.apache.hadoop.fs.shell.PathData;
-import org.junit.Test;
import java.io.PrintStream;
import org.apache.hadoop.fs.FileSystem;
import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
+import org.junit.Test;
public class TestPrint0 {
private FileSystem mockFs;
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
@Before
public void resetMock() throws IOException {
mockFs = MockFileSystem.setup();
}
// test the full path is printed to stdout with a '\0'
- @Test(timeout = 1000)
+ @Test
public void testPrint() throws IOException {
Print.Print0 print = new Print.Print0();
PrintStream out = mock(PrintStream.class);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestResult.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestResult.java
index 1139220b94d..999ff598d77 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestResult.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/find/TestResult.java
@@ -19,12 +19,17 @@ package org.apache.hadoop.fs.shell.find;
import static org.junit.Assert.*;
+import org.junit.Rule;
+import org.junit.rules.Timeout;
import org.junit.Test;
public class TestResult {
+ @Rule
+ public Timeout globalTimeout = new Timeout(10000);
+
// test the PASS value
- @Test(timeout = 1000)
+ @Test
public void testPass() {
Result result = Result.PASS;
assertTrue(result.isPass());
@@ -32,7 +37,7 @@ public class TestResult {
}
// test the FAIL value
- @Test(timeout = 1000)
+ @Test
public void testFail() {
Result result = Result.FAIL;
assertFalse(result.isPass());
@@ -40,7 +45,7 @@ public class TestResult {
}
// test the STOP value
- @Test(timeout = 1000)
+ @Test
public void testStop() {
Result result = Result.STOP;
assertTrue(result.isPass());
@@ -48,7 +53,7 @@ public class TestResult {
}
// test combine method with two PASSes
- @Test(timeout = 1000)
+ @Test
public void combinePassPass() {
Result result = Result.PASS.combine(Result.PASS);
assertTrue(result.isPass());
@@ -56,7 +61,7 @@ public class TestResult {
}
// test the combine method with a PASS and a FAIL
- @Test(timeout = 1000)
+ @Test
public void combinePassFail() {
Result result = Result.PASS.combine(Result.FAIL);
assertFalse(result.isPass());
@@ -64,7 +69,7 @@ public class TestResult {
}
// test the combine method with a FAIL and a PASS
- @Test(timeout = 1000)
+ @Test
public void combineFailPass() {
Result result = Result.FAIL.combine(Result.PASS);
assertFalse(result.isPass());
@@ -72,7 +77,7 @@ public class TestResult {
}
// test the combine method with two FAILs
- @Test(timeout = 1000)
+ @Test
public void combineFailFail() {
Result result = Result.FAIL.combine(Result.FAIL);
assertFalse(result.isPass());
@@ -80,7 +85,7 @@ public class TestResult {
}
// test the combine method with a PASS and STOP
- @Test(timeout = 1000)
+ @Test
public void combinePassStop() {
Result result = Result.PASS.combine(Result.STOP);
assertTrue(result.isPass());
@@ -88,7 +93,7 @@ public class TestResult {
}
// test the combine method with a STOP and FAIL
- @Test(timeout = 1000)
+ @Test
public void combineStopFail() {
Result result = Result.STOP.combine(Result.FAIL);
assertFalse(result.isPass());
@@ -96,7 +101,7 @@ public class TestResult {
}
// test the combine method with a STOP and a PASS
- @Test(timeout = 1000)
+ @Test
public void combineStopPass() {
Result result = Result.STOP.combine(Result.PASS);
assertTrue(result.isPass());
@@ -104,7 +109,7 @@ public class TestResult {
}
// test the combine method with a FAIL and a STOP
- @Test(timeout = 1000)
+ @Test
public void combineFailStop() {
Result result = Result.FAIL.combine(Result.STOP);
assertFalse(result.isPass());
@@ -112,7 +117,7 @@ public class TestResult {
}
// test the negation of PASS
- @Test(timeout = 1000)
+ @Test
public void negatePass() {
Result result = Result.PASS.negate();
assertFalse(result.isPass());
@@ -120,7 +125,7 @@ public class TestResult {
}
// test the negation of FAIL
- @Test(timeout = 1000)
+ @Test
public void negateFail() {
Result result = Result.FAIL.negate();
assertTrue(result.isPass());
@@ -128,7 +133,7 @@ public class TestResult {
}
// test the negation of STOP
- @Test(timeout = 1000)
+ @Test
public void negateStop() {
Result result = Result.STOP.negate();
assertFalse(result.isPass());
@@ -136,7 +141,7 @@ public class TestResult {
}
// test equals with two PASSes
- @Test(timeout = 1000)
+ @Test
public void equalsPass() {
Result one = Result.PASS;
Result two = Result.PASS.combine(Result.PASS);
@@ -144,7 +149,7 @@ public class TestResult {
}
// test equals with two FAILs
- @Test(timeout = 1000)
+ @Test
public void equalsFail() {
Result one = Result.FAIL;
Result two = Result.FAIL.combine(Result.FAIL);
@@ -152,7 +157,7 @@ public class TestResult {
}
// test equals with two STOPS
- @Test(timeout = 1000)
+ @Test
public void equalsStop() {
Result one = Result.STOP;
Result two = Result.STOP.combine(Result.STOP);
@@ -160,7 +165,7 @@ public class TestResult {
}
// test all combinations of not equals
- @Test(timeout = 1000)
+ @Test
public void notEquals() {
assertFalse(Result.PASS.equals(Result.FAIL));
assertFalse(Result.PASS.equals(Result.STOP));
diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-env.sh b/hadoop-common-project/hadoop-kms/src/main/conf/kms-env.sh
index 7044fa86704..729e63a1218 100644
--- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-env.sh
+++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-env.sh
@@ -24,7 +24,7 @@
# The HTTP port used by KMS
#
-# export KMS_HTTP_PORT=16000
+# export KMS_HTTP_PORT=9600
# The Admin port used by KMS
#
diff --git a/hadoop-common-project/hadoop-kms/src/main/libexec/kms-config.sh b/hadoop-common-project/hadoop-kms/src/main/libexec/kms-config.sh
index 5e1ffa40c9d..927b4af1fc4 100644
--- a/hadoop-common-project/hadoop-kms/src/main/libexec/kms-config.sh
+++ b/hadoop-common-project/hadoop-kms/src/main/libexec/kms-config.sh
@@ -37,7 +37,7 @@ function hadoop_subproject_init
export HADOOP_CATALINA_CONFIG="${HADOOP_CONF_DIR}"
export HADOOP_CATALINA_LOG="${HADOOP_LOG_DIR}"
- export HADOOP_CATALINA_HTTP_PORT="${KMS_HTTP_PORT:-16000}"
+ export HADOOP_CATALINA_HTTP_PORT="${KMS_HTTP_PORT:-9600}"
export HADOOP_CATALINA_ADMIN_PORT="${KMS_ADMIN_PORT:-$((HADOOP_CATALINA_HTTP_PORT+1))}"
export HADOOP_CATALINA_MAX_THREADS="${KMS_MAX_THREADS:-1000}"
export HADOOP_CATALINA_MAX_HTTP_HEADER_SIZE="${KMS_MAX_HTTP_HEADER_SIZE:-65536}"
diff --git a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm
index 65854cf1105..68663672b23 100644
--- a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm
+++ b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm
@@ -32,7 +32,7 @@ KMS is a Java web-application and it runs using a pre-configured Tomcat bundled
KMS Client Configuration
------------------------
-The KMS client `KeyProvider` uses the **kms** scheme, and the embedded URL must be the URL of the KMS. For example, for a KMS running on `http://localhost:16000/kms`, the KeyProvider URI is `kms://http@localhost:16000/kms`. And, for a KMS running on `https://localhost:16000/kms`, the KeyProvider URI is `kms://https@localhost:16000/kms`
+The KMS client `KeyProvider` uses the **kms** scheme, and the embedded URL must be the URL of the KMS. For example, for a KMS running on `http://localhost:9600/kms`, the KeyProvider URI is `kms://http@localhost:9600/kms`. And, for a KMS running on `https://localhost:9600/kms`, the KeyProvider URI is `kms://https@localhost:9600/kms`
KMS
---
@@ -178,7 +178,7 @@ $H3 Embedded Tomcat Configuration
To configure the embedded Tomcat go to the `share/hadoop/kms/tomcat/conf`.
-KMS pre-configures the HTTP and Admin ports in Tomcat's `server.xml` to 16000 and 16001.
+KMS pre-configures the HTTP and Admin ports in Tomcat's `server.xml` to 9600 and 9601.
Tomcat logs are also preconfigured to go to Hadoop's `logs/` directory.
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
index dc88e08991c..0f8279943d2 100755
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@@ -811,7 +811,7 @@ public class DFSOutputStream extends FSOutputSummer
try {
if (retries == 0) {
throw new IOException("Unable to close file because the last block"
- + " does not have enough number of replicas.");
+ + last + " does not have enough number of replicas.");
}
retries--;
Thread.sleep(sleeptime);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 104d72379a4..8b50ef884f6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -305,6 +305,14 @@ public class BlockManager implements BlockStatsMXBean {
* processed again after aquiring lock again.
*/
private int numBlocksPerIteration;
+
+ /**
+ * Minimum size that a block can be sent to Balancer through getBlocks.
+ * And after HDFS-8824, the small blocks are unused anyway, so there's no
+ * point to send them to balancer.
+ */
+ private long getBlocksMinBlockSize = -1;
+
/**
* Progress of the Reconstruction queues initialisation.
*/
@@ -414,6 +422,9 @@ public class BlockManager implements BlockStatsMXBean {
this.numBlocksPerIteration = conf.getInt(
DFSConfigKeys.DFS_BLOCK_MISREPLICATION_PROCESSING_LIMIT,
DFSConfigKeys.DFS_BLOCK_MISREPLICATION_PROCESSING_LIMIT_DEFAULT);
+ this.getBlocksMinBlockSize = conf.getLongBytes(
+ DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
+ DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_DEFAULT);
this.blockReportLeaseManager = new BlockReportLeaseManager(conf);
bmSafeMode = new BlockManagerSafeMode(this, namesystem, haEnabled, conf);
@@ -1179,6 +1190,9 @@ public class BlockManager implements BlockStatsMXBean {
while(totalSize blocks = st.getChildren("BLOCK");
+ List blocks = st.hasChildren("BLOCK") ?
+ st.getChildren("BLOCK") : new ArrayList();
this.blocks = new Block[blocks.size()];
for (int i = 0; i < blocks.size(); i++) {
this.blocks[i] = FSEditLogOp.blockFromXml(blocks.get(i));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
index a35246f38b5..08d3da5405c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
@@ -427,6 +427,7 @@ public class DFSAdmin extends FsShell {
"\t[-allowSnapshot ]\n" +
"\t[-disallowSnapshot ]\n" +
"\t[-shutdownDatanode [upgrade]]\n" +
+ "\t[-evictWriters ]\n" +
"\t[-getDatanodeInfo ]\n" +
"\t[-metasave filename]\n" +
"\t[-triggerBlockReport [-incremental] ]\n" +
@@ -1829,6 +1830,9 @@ public class DFSAdmin extends FsShell {
} else if ("-shutdownDatanode".equals(cmd)) {
System.err.println("Usage: hdfs dfsadmin"
+ " [-shutdownDatanode [upgrade]]");
+ } else if ("-evictWriters".equals(cmd)) {
+ System.err.println("Usage: hdfs dfsadmin"
+ + " [-evictWriters ]");
} else if ("-getDatanodeInfo".equals(cmd)) {
System.err.println("Usage: hdfs dfsadmin"
+ " [-getDatanodeInfo ]");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
index 035abd63187..a6c8b4c4c9a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md
@@ -385,6 +385,7 @@ Usage:
hdfs dfsadmin [-allowSnapshot ]
hdfs dfsadmin [-disallowSnapshot ]
hdfs dfsadmin [-shutdownDatanode [upgrade]]
+ hdfs dfsadmin [-evictWriters ]
hdfs dfsadmin [-getDatanodeInfo ]
hdfs dfsadmin [-metasave filename]
hdfs dfsadmin [-triggerBlockReport [-incremental] ]
@@ -419,6 +420,7 @@ Usage:
| `-allowSnapshot` \ | Allowing snapshots of a directory to be created. If the operation completes successfully, the directory becomes snapshottable. See the [HDFS Snapshot Documentation](./HdfsSnapshots.html) for more information. |
| `-disallowSnapshot` \ | Disallowing snapshots of a directory to be created. All snapshots of the directory must be deleted before disallowing snapshots. See the [HDFS Snapshot Documentation](./HdfsSnapshots.html) for more information. |
| `-shutdownDatanode` \ [upgrade] | Submit a shutdown request for the given datanode. See [Rolling Upgrade document](./HdfsRollingUpgrade.html#dfsadmin_-shutdownDatanode) for the detail. |
+| `-evictWriters` \ | Make the datanode evict all clients that are writing a block. This is useful if decommissioning is hung due to slow writers. |
| `-getDatanodeInfo` \ | Get the information about the given datanode. See [Rolling Upgrade document](./HdfsRollingUpgrade.html#dfsadmin_-getDatanodeInfo) for the detail. |
| `-metasave` filename | Save Namenode's primary data structures to *filename* in the directory specified by hadoop.log.dir property. *filename* is overwritten if it exists. *filename* will contain one line for each of the following
1. Datanodes heart beating with Namenode
2. Blocks waiting to be replicated
3. Blocks currently being replicated
4. Blocks waiting to be deleted |
| `-triggerBlockReport` `[-incremental]` \ | Trigger a block report for the given datanode. If 'incremental' is specified, it will be otherwise, it will be a full block report. |
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
index 8a52bbb1e08..d159fc55d0e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
@@ -1272,6 +1272,18 @@ public class DFSTestUtil {
// OP_APPEND 47
FSDataOutputStream s2 = filesystem.append(pathFileCreate, 4096, null);
s2.close();
+
+ // OP_UPDATE_BLOCKS 25
+ final String updateBlockFile = "/update_blocks";
+ FSDataOutputStream fout = filesystem.create(new Path(updateBlockFile), true, 4096, (short)1, 4096L);
+ fout.write(1);
+ fout.hflush();
+ long fileId = ((DFSOutputStream)fout.getWrappedStream()).getFileId();
+ DFSClient dfsclient = DFSClientAdapter.getDFSClient(filesystem);
+ LocatedBlocks blocks = dfsclient.getNamenode().getBlockLocations(updateBlockFile, 0, Integer.MAX_VALUE);
+ dfsclient.getNamenode().abandonBlock(blocks.get(0).getBlock(), fileId, updateBlockFile, dfsclient.clientName);
+ fout.close();
+
// OP_SET_STORAGE_POLICY 45
filesystem.setStoragePolicy(pathFileCreate,
HdfsConstants.HOT_STORAGE_POLICY_NAME);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java
index 0f0221c1197..6d0dfa86c82 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/StripedFileTestUtil.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
import org.apache.hadoop.hdfs.server.namenode.ErasureCodingPolicyManager;
import org.apache.hadoop.hdfs.util.StripedBlockUtil;
import org.apache.hadoop.hdfs.web.WebHdfsFileSystem.WebHdfsInputStream;
+import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.erasurecode.CodecUtil;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder;
import org.junit.Assert;
@@ -85,16 +86,6 @@ public class StripedFileTestUtil {
return (byte) (pos % mod + 1);
}
- static int readAll(FSDataInputStream in, byte[] buf) throws IOException {
- int readLen = 0;
- int ret;
- while ((ret = in.read(buf, readLen, buf.length - readLen)) >= 0 &&
- readLen <= buf.length) {
- readLen += ret;
- }
- return readLen;
- }
-
static void verifyLength(FileSystem fs, Path srcPath, int fileLength)
throws IOException {
FileStatus status = fs.getFileStatus(srcPath);
@@ -214,11 +205,11 @@ public class StripedFileTestUtil {
static void assertSeekAndRead(FSDataInputStream fsdis, int pos,
int writeBytes) throws IOException {
fsdis.seek(pos);
- byte[] buf = new byte[writeBytes];
- int readLen = StripedFileTestUtil.readAll(fsdis, buf);
- assertEquals(readLen, writeBytes - pos);
- for (int i = 0; i < readLen; i++) {
- assertEquals("Byte at " + i + " should be the same", StripedFileTestUtil.getByte(pos + i), buf[i]);
+ byte[] buf = new byte[writeBytes - pos];
+ IOUtils.readFully(fsdis, buf, 0, buf.length);
+ for (int i = 0; i < buf.length; i++) {
+ assertEquals("Byte at " + i + " should be the same",
+ StripedFileTestUtil.getByte(pos + i), buf[i]);
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
index 741e6410a91..6e4b0f9f42f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
@@ -179,11 +179,15 @@ public class TestGetBlocks {
final int DEFAULT_BLOCK_SIZE = 1024;
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
+ CONF.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
+ DEFAULT_BLOCK_SIZE);
+
MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(
REPLICATION_FACTOR).build();
try {
cluster.waitActive();
- long fileLen = 2 * DEFAULT_BLOCK_SIZE;
+ // the third block will not be visible to getBlocks
+ long fileLen = 2 * DEFAULT_BLOCK_SIZE + 1;
DFSTestUtil.createFile(cluster.getFileSystem(), new Path("/tmp.txt"),
fileLen, REPLICATION_FACTOR, 0L);
@@ -196,7 +200,7 @@ public class TestGetBlocks {
DFSUtilClient.getNNAddress(CONF), CONF);
locatedBlocks = dfsclient.getNamenode()
.getBlockLocations("/tmp.txt", 0, fileLen).getLocatedBlocks();
- assertEquals(2, locatedBlocks.size());
+ assertEquals(3, locatedBlocks.size());
notWritten = false;
for (int i = 0; i < 2; i++) {
dataNodes = locatedBlocks.get(i).getLocations();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/FsDatasetTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/FsDatasetTestUtils.java
index f5bf4e9f6a8..867d6c92155 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/FsDatasetTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/FsDatasetTestUtils.java
@@ -135,6 +135,13 @@ public interface FsDatasetTestUtils {
* @throws IOException I/O error.
*/
void truncateMeta(long newSize) throws IOException;
+
+ /**
+ * Make the replica unreachable, perhaps by renaming it to an
+ * invalid file name.
+ * @throws IOException On I/O error.
+ */
+ void makeUnreachable() throws IOException;
}
/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
index 4628a4651bc..021361b2d8a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
@@ -24,6 +24,7 @@ import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_
import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
import java.io.Closeable;
import java.io.File;
@@ -38,6 +39,7 @@ import java.util.concurrent.Semaphore;
import com.google.common.base.Supplier;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.datanode.FsDatasetTestUtils.MaterializedReplica;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler;
import org.apache.hadoop.conf.Configuration;
@@ -139,6 +141,11 @@ public class TestBlockScanner {
throws Exception {
return DFSTestUtil.getFirstBlock(dfs[nsIdx], getPath(fileIdx));
}
+
+ public MaterializedReplica getMaterializedReplica(int nsIdx, int fileIdx)
+ throws Exception {
+ return cluster.getMaterializedReplica(0, getFileBlock(nsIdx, fileIdx));
+ }
}
/**
@@ -806,4 +813,60 @@ public class TestBlockScanner {
info.blocksScanned = 0;
}
}
+
+ /**
+ * Test that blocks which are in the wrong location are ignored.
+ */
+ @Test(timeout=120000)
+ public void testIgnoreMisplacedBlock() throws Exception {
+ Configuration conf = new Configuration();
+ // Set a really long scan period.
+ conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
+ conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
+ TestScanResultHandler.class.getName());
+ conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
+ final TestContext ctx = new TestContext(conf, 1);
+ final int NUM_FILES = 4;
+ ctx.createFiles(0, NUM_FILES, 5);
+ MaterializedReplica unreachableReplica = ctx.getMaterializedReplica(0, 1);
+ ExtendedBlock unreachableBlock = ctx.getFileBlock(0, 1);
+ unreachableReplica.makeUnreachable();
+ final TestScanResultHandler.Info info =
+ TestScanResultHandler.getInfo(ctx.volumes.get(0));
+ String storageID = ctx.volumes.get(0).getStorageID();
+ synchronized (info) {
+ info.sem = new Semaphore(NUM_FILES);
+ info.shouldRun = true;
+ info.notify();
+ }
+ // Scan the first 4 blocks
+ LOG.info("Waiting for the blocks to be scanned.");
+ GenericTestUtils.waitFor(new Supplier() {
+ @Override
+ public Boolean get() {
+ synchronized (info) {
+ if (info.blocksScanned >= NUM_FILES - 1) {
+ LOG.info("info = {}. blockScanned has now reached " +
+ info.blocksScanned, info);
+ return true;
+ } else {
+ LOG.info("info = {}. Waiting for blockScanned to reach " +
+ (NUM_FILES - 1), info);
+ return false;
+ }
+ }
+ }
+ }, 50, 30000);
+ // We should have scanned 4 blocks
+ synchronized (info) {
+ assertFalse(info.goodBlocks.contains(unreachableBlock));
+ assertFalse(info.badBlocks.contains(unreachableBlock));
+ assertEquals("Expected 3 good blocks.", 3, info.goodBlocks.size());
+ info.goodBlocks.clear();
+ assertEquals("Expected 3 blocksScanned", 3, info.blocksScanned);
+ assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
+ info.blocksScanned = 0;
+ }
+ info.sem.release(1);
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
index 5f9b60267c5..355f7a1e753 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
@@ -258,10 +258,9 @@ public class TestDataNodeMetrics {
* and reading causes totalReadTime to move.
* @throws Exception
*/
- @Test(timeout=60000)
+ @Test(timeout=120000)
public void testDataNodeTimeSpend() throws Exception {
Configuration conf = new HdfsConfiguration();
- SimulatedFSDataset.setFactory(conf);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
final FileSystem fs = cluster.getFileSystem();
@@ -284,6 +283,7 @@ public class TestDataNodeMetrics {
DFSTestUtil.createFile(fs, new Path("/time.txt." + x.get()),
LONG_FILE_LEN, (short) 1, Time.monotonicNow());
DFSTestUtil.readFile(fs, new Path("/time.txt." + x.get()));
+ fs.delete(new Path("/time.txt." + x.get()), true);
} catch (IOException ioe) {
LOG.error("Caught IOException while ingesting DN metrics", ioe);
return false;
@@ -294,7 +294,7 @@ public class TestDataNodeMetrics {
return endWriteValue > startWriteValue
&& endReadValue > startReadValue;
}
- }, 30, 30000);
+ }, 30, 60000);
} finally {
if (cluster != null) {
cluster.shutdown();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java
index f3c740a3200..f780a14f2a8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java
@@ -170,6 +170,27 @@ public class FsDatasetImplTestUtils implements FsDatasetTestUtils {
truncate(metaFile, newSize);
}
+ @Override
+ public void makeUnreachable() throws IOException {
+ long blockId = Block.getBlockId(blockFile.getAbsolutePath());
+ File origDir = blockFile.getParentFile();
+ File root = origDir.getParentFile().getParentFile();
+ File newDir = null;
+ // Keep incrementing the block ID until the block and metadata
+ // files end up in a different directory. Actually, with the
+ // current replica file placement scheme, this should only ever
+ // require one increment, but this is a bit of defensive coding.
+ do {
+ blockId++;
+ newDir = DatanodeUtil.idToBlockDir(root, blockId);
+ } while (origDir.equals(newDir));
+ Files.createDirectories(newDir.toPath());
+ Files.move(blockFile.toPath(),
+ new File(newDir, blockFile.getName()).toPath());
+ Files.move(metaFile.toPath(),
+ new File(newDir, metaFile.getName()).toPath());
+ }
+
@Override
public String toString() {
return String.format("MaterializedReplica: file=%s", blockFile);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
index 1904bbc122e..6be39509c9a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
import org.apache.hadoop.hdfs.util.Canceler;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.log4j.Level;
+
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -62,7 +63,7 @@ public class TestFSImageWithSnapshot {
}
static final long seed = 0;
- static final short REPLICATION = 3;
+ static final short NUM_DATANODES = 3;
static final int BLOCKSIZE = 1024;
static final long txid = 1;
@@ -78,7 +79,7 @@ public class TestFSImageWithSnapshot {
@Before
public void setUp() throws Exception {
conf = new Configuration();
- cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
.build();
cluster.waitActive();
fsn = cluster.getNamesystem();
@@ -177,7 +178,7 @@ public class TestFSImageWithSnapshot {
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(false)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
@@ -188,7 +189,7 @@ public class TestFSImageWithSnapshot {
hdfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(false)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
@@ -215,7 +216,7 @@ public class TestFSImageWithSnapshot {
hdfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(false)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
@@ -248,20 +249,20 @@ public class TestFSImageWithSnapshot {
hdfs.createSnapshot(dir, "s" + ++s);
Path sub1file1 = new Path(sub1, "sub1file1");
Path sub1file2 = new Path(sub1, "sub1file2");
- DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, REPLICATION, seed);
- DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, REPLICATION, seed);
+ DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, (short) 1, seed);
+ DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, (short) 1, seed);
checkImage(s);
hdfs.createSnapshot(dir, "s" + ++s);
Path sub2 = new Path(dir, "sub2");
Path sub2file1 = new Path(sub2, "sub2file1");
Path sub2file2 = new Path(sub2, "sub2file2");
- DFSTestUtil.createFile(hdfs, sub2file1, BLOCKSIZE, REPLICATION, seed);
- DFSTestUtil.createFile(hdfs, sub2file2, BLOCKSIZE, REPLICATION, seed);
+ DFSTestUtil.createFile(hdfs, sub2file1, BLOCKSIZE, (short) 1, seed);
+ DFSTestUtil.createFile(hdfs, sub2file2, BLOCKSIZE, (short) 1, seed);
checkImage(s);
hdfs.createSnapshot(dir, "s" + ++s);
- hdfs.setReplication(sub1file1, (short) (REPLICATION - 1));
+ hdfs.setReplication(sub1file1, (short) 1);
hdfs.delete(sub1file2, true);
hdfs.setOwner(sub2, "dr.who", "unknown");
hdfs.delete(sub2file1, true);
@@ -300,7 +301,7 @@ public class TestFSImageWithSnapshot {
// restart the cluster, and format the cluster
cluster = new MiniDFSCluster.Builder(conf).format(true)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
@@ -338,8 +339,8 @@ public class TestFSImageWithSnapshot {
Path sub1 = new Path(dir, "sub1");
Path sub1file1 = new Path(sub1, "sub1file1");
Path sub1file2 = new Path(sub1, "sub1file2");
- DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, REPLICATION, seed);
- DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, REPLICATION, seed);
+ DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, (short) 1, seed);
+ DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, (short) 1, seed);
// 1. create snapshot s0
hdfs.allowSnapshot(dir);
@@ -372,7 +373,7 @@ public class TestFSImageWithSnapshot {
out.close();
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(true)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
@@ -394,8 +395,8 @@ public class TestFSImageWithSnapshot {
Path sub1 = new Path(dir, "sub1");
Path sub1file1 = new Path(sub1, "sub1file1");
Path sub1file2 = new Path(sub1, "sub1file2");
- DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, REPLICATION, seed);
- DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, REPLICATION, seed);
+ DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, (short) 1, seed);
+ DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, (short) 1, seed);
hdfs.allowSnapshot(dir);
hdfs.createSnapshot(dir, "s0");
@@ -410,7 +411,7 @@ public class TestFSImageWithSnapshot {
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(false)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
@@ -440,7 +441,7 @@ public class TestFSImageWithSnapshot {
// restart cluster
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(false)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
hdfs = cluster.getFileSystem();
@@ -478,7 +479,7 @@ public class TestFSImageWithSnapshot {
Path newDir = new Path(subsubDir, "newdir");
Path newFile = new Path(newDir, "newfile");
hdfs.mkdirs(newDir);
- DFSTestUtil.createFile(hdfs, newFile, BLOCKSIZE, REPLICATION, seed);
+ DFSTestUtil.createFile(hdfs, newFile, BLOCKSIZE, (short) 1, seed);
// create another snapshot
SnapshotTestHelper.createSnapshot(hdfs, dir, "s2");
@@ -491,7 +492,7 @@ public class TestFSImageWithSnapshot {
// restart cluster
cluster.shutdown();
- cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
.format(false).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
@@ -504,7 +505,7 @@ public class TestFSImageWithSnapshot {
cluster.shutdown();
cluster = new MiniDFSCluster.Builder(conf).format(false)
- .numDataNodes(REPLICATION).build();
+ .numDataNodes(NUM_DATANODES).build();
cluster.waitActive();
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPendingCorruptDnMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPendingCorruptDnMessages.java
index 5f116d95ccf..5063acdc8af 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPendingCorruptDnMessages.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPendingCorruptDnMessages.java
@@ -18,12 +18,14 @@
package org.apache.hadoop.hdfs.server.namenode.ha;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URISyntaxException;
import java.util.List;
+import java.util.concurrent.TimeoutException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -37,19 +39,22 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
-import org.apache.hadoop.util.ThreadUtil;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import com.google.common.base.Supplier;
+
import org.junit.Test;
public class TestPendingCorruptDnMessages {
private static final Path filePath = new Path("/foo.txt");
- @Test
+ @Test (timeout = 60000)
public void testChangedStorageId() throws IOException, URISyntaxException,
- InterruptedException {
+ InterruptedException, TimeoutException {
HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
- MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+ final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(1)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.build();
@@ -83,27 +88,27 @@ public class TestPendingCorruptDnMessages {
// Wait until the standby NN queues up the corrupt block in the pending DN
// message queue.
- while (cluster.getNamesystem(1).getBlockManager()
- .getPendingDataNodeMessageCount() < 1) {
- ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
- }
-
- assertEquals(1, cluster.getNamesystem(1).getBlockManager()
- .getPendingDataNodeMessageCount());
- String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
+ GenericTestUtils.waitFor(new Supplier() {
+ @Override
+ public Boolean get() {
+ return cluster.getNamesystem(1).getBlockManager()
+ .getPendingDataNodeMessageCount() == 1;
+ }
+ }, 1000, 30000);
+
+ final String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
+ assertNotNull(oldStorageId);
// Reformat/restart the DN.
assertTrue(wipeAndRestartDn(cluster, 0));
- // Give the DN time to start up and register, which will cause the
- // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
- String newStorageId = "";
- do {
- ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
- newStorageId = getRegisteredDatanodeUid(cluster, 1);
- System.out.println("====> oldStorageId: " + oldStorageId +
- " newStorageId: " + newStorageId);
- } while (newStorageId.equals(oldStorageId));
+ GenericTestUtils.waitFor(new Supplier() {
+ @Override
+ public Boolean get() {
+ final String newStorageId = getRegisteredDatanodeUid(cluster, 1);
+ return newStorageId != null && !newStorageId.equals(oldStorageId);
+ }
+ }, 1000, 30000);
assertEquals(0, cluster.getNamesystem(1).getBlockManager()
.getPendingDataNodeMessageCount());
@@ -121,8 +126,8 @@ public class TestPendingCorruptDnMessages {
List registeredDatanodes = cluster.getNamesystem(nnIndex)
.getBlockManager().getDatanodeManager()
.getDatanodeListForReport(DatanodeReportType.ALL);
- assertEquals(1, registeredDatanodes.size());
- return registeredDatanodes.get(0).getDatanodeUuid();
+ return registeredDatanodes.isEmpty() ? null :
+ registeredDatanodes.get(0).getDatanodeUuid();
}
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
index 3ca7fec54dc..63bdf740704 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java
@@ -234,7 +234,7 @@ public class TestDFSAdmin {
final List outs = Lists.newArrayList();
final List errs = Lists.newArrayList();
getReconfigurableProperties("namenode", address, outs, errs);
- assertEquals(4, outs.size());
+ assertEquals(5, outs.size());
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY, outs.get(1));
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, outs.get(2));
assertEquals(errs.size(), 0);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
index 56907431d16..47d23892545 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
@@ -77,6 +77,8 @@ import org.codehaus.jackson.node.JsonNodeFactory;
import org.codehaus.jackson.node.ObjectNode;
import com.google.common.annotations.VisibleForTesting;
+import com.sun.jersey.api.client.ClientHandlerException;
+
/**
* The job history events get routed to this class. This class writes the Job
* history events to the DFS directly into a staging dir and then moved to a
@@ -1032,12 +1034,9 @@ public class JobHistoryEventHandler extends AbstractService
+ error.getErrorCode());
}
}
- } catch (IOException ex) {
+ } catch (YarnException | IOException | ClientHandlerException ex) {
LOG.error("Error putting entity " + tEntity.getEntityId() + " to Timeline"
- + "Server", ex);
- } catch (YarnException ex) {
- LOG.error("Error putting entity " + tEntity.getEntityId() + " to Timeline"
- + "Server", ex);
+ + "Server", ex);
}
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptKillEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptKillEvent.java
index 9bcc838173e..767ef0d7a28 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptKillEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptKillEvent.java
@@ -24,14 +24,27 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
public class TaskAttemptKillEvent extends TaskAttemptEvent {
private final String message;
+ // Next map attempt will be rescheduled(i.e. updated in ask with higher
+ // priority equivalent to that of a fast fail map)
+ private final boolean rescheduleAttempt;
+
+ public TaskAttemptKillEvent(TaskAttemptId attemptID,
+ String message, boolean rescheduleAttempt) {
+ super(attemptID, TaskAttemptEventType.TA_KILL);
+ this.message = message;
+ this.rescheduleAttempt = rescheduleAttempt;
+ }
public TaskAttemptKillEvent(TaskAttemptId attemptID,
String message) {
- super(attemptID, TaskAttemptEventType.TA_KILL);
- this.message = message;
+ this(attemptID, message, false);
}
public String getMessage() {
return message;
}
+
+ public boolean getRescheduleAttempt() {
+ return rescheduleAttempt;
+ }
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskTAttemptKilledEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskTAttemptKilledEvent.java
new file mode 100644
index 00000000000..897444d7dc8
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskTAttemptKilledEvent.java
@@ -0,0 +1,40 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.mapreduce.v2.app.job.event;
+
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+
+/**
+ * Task Attempt killed event.
+ */
+public class TaskTAttemptKilledEvent extends TaskTAttemptEvent {
+
+ // Next map attempt will be rescheduled(i.e. updated in ask with
+ // higher priority equivalent to that of a fast fail map)
+ private final boolean rescheduleAttempt;
+
+ public TaskTAttemptKilledEvent(TaskAttemptId id, boolean rescheduleAttempt) {
+ super(id, TaskEventType.T_ATTEMPT_KILLED);
+ this.rescheduleAttempt = rescheduleAttempt;
+ }
+
+ public boolean getRescheduleAttempt() {
+ return rescheduleAttempt;
+ }
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
index c8c5ce90ca8..b7036a53639 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -1349,7 +1349,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
if (TaskType.MAP == id.getTaskId().getTaskType()) {
// reschedule only map tasks because their outputs maybe unusable
LOG.info(mesg + ". AttemptId:" + id);
- eventHandler.handle(new TaskAttemptKillEvent(id, mesg));
+ // Kill the attempt and indicate that next map attempt should be
+ // rescheduled (i.e. considered as a fast fail map).
+ eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true));
}
}
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
index 5f0a622ec44..da6617e00e5 100755
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -98,6 +98,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptTooManyFetchFailureEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
@@ -184,6 +185,7 @@ public abstract class TaskAttemptImpl implements
private int httpPort;
private Locality locality;
private Avataar avataar;
+ private boolean rescheduleNextAttempt = false;
private static final CleanupContainerTransition
CLEANUP_CONTAINER_TRANSITION = new CleanupContainerTransition();
@@ -1377,6 +1379,16 @@ public abstract class TaskAttemptImpl implements
return container != null;
}
+ //always called in write lock
+ private boolean getRescheduleNextAttempt() {
+ return rescheduleNextAttempt;
+ }
+
+ //always called in write lock
+ private void setRescheduleNextAttempt(boolean reschedule) {
+ rescheduleNextAttempt = reschedule;
+ }
+
//always called in write lock
private void setFinishTime() {
//set the finish time only if launch time is set
@@ -1745,9 +1757,8 @@ public abstract class TaskAttemptImpl implements
TaskEventType.T_ATTEMPT_FAILED));
break;
case KILLED:
- taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
- taskAttempt.attemptId,
- TaskEventType.T_ATTEMPT_KILLED));
+ taskAttempt.eventHandler.handle(new TaskTAttemptKilledEvent(
+ taskAttempt.attemptId, false));
break;
default:
LOG.error("Task final state is not FAILED or KILLED: " + finalState);
@@ -2014,8 +2025,13 @@ public abstract class TaskAttemptImpl implements
taskAttempt, TaskAttemptStateInternal.KILLED);
taskAttempt.eventHandler.handle(new JobHistoryEvent(taskAttempt.attemptId
.getTaskId().getJobId(), tauce));
- taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
- taskAttempt.attemptId, TaskEventType.T_ATTEMPT_KILLED));
+ boolean rescheduleNextTaskAttempt = false;
+ if (event instanceof TaskAttemptKillEvent) {
+ rescheduleNextTaskAttempt =
+ ((TaskAttemptKillEvent)event).getRescheduleAttempt();
+ }
+ taskAttempt.eventHandler.handle(new TaskTAttemptKilledEvent(
+ taskAttempt.attemptId, rescheduleNextTaskAttempt));
return TaskAttemptStateInternal.KILLED;
}
}
@@ -2044,6 +2060,12 @@ public abstract class TaskAttemptImpl implements
taskAttempt.getID().toString());
return TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP;
} else {
+ // Store reschedule flag so that after clean up is completed, new
+ // attempt is scheduled/rescheduled based on it.
+ if (event instanceof TaskAttemptKillEvent) {
+ taskAttempt.setRescheduleNextAttempt(
+ ((TaskAttemptKillEvent)event).getRescheduleAttempt());
+ }
return TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP;
}
}
@@ -2075,9 +2097,8 @@ public abstract class TaskAttemptImpl implements
((TaskAttemptKillEvent) event).getMessage());
}
- taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
- taskAttempt.attemptId,
- TaskEventType.T_ATTEMPT_KILLED));
+ taskAttempt.eventHandler.handle(new TaskTAttemptKilledEvent(
+ taskAttempt.attemptId, taskAttempt.getRescheduleNextAttempt()));
}
}
@@ -2095,9 +2116,8 @@ public abstract class TaskAttemptImpl implements
taskAttempt.getAssignedContainerID(), taskAttempt.getAssignedContainerMgrAddress(),
taskAttempt.container.getContainerToken(),
ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP));
- taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
- taskAttempt.attemptId,
- TaskEventType.T_ATTEMPT_KILLED));
+ taskAttempt.eventHandler.handle(new TaskTAttemptKilledEvent(
+ taskAttempt.attemptId, false));
}
}
@@ -2137,6 +2157,12 @@ public abstract class TaskAttemptImpl implements
// for it.
finalizeProgress(taskAttempt);
sendContainerCleanup(taskAttempt, event);
+ // Store reschedule flag so that after clean up is completed, new
+ // attempt is scheduled/rescheduled based on it.
+ if (event instanceof TaskAttemptKillEvent) {
+ taskAttempt.setRescheduleNextAttempt(
+ ((TaskAttemptKillEvent)event).getRescheduleAttempt());
+ }
}
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
index a392837eb1c..34d9f0ef13f 100755
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
import org.apache.hadoop.mapreduce.v2.app.rm.ContainerFailedEvent;
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
@@ -594,10 +595,15 @@ public abstract class TaskImpl implements Task, EventHandler {
// This is always called in the Write Lock
private void addAndScheduleAttempt(Avataar avataar) {
+ addAndScheduleAttempt(avataar, false);
+ }
+
+ // This is always called in the Write Lock
+ private void addAndScheduleAttempt(Avataar avataar, boolean reschedule) {
TaskAttempt attempt = addAttempt(avataar);
inProgressAttempts.add(attempt.getID());
//schedule the nextAttemptNumber
- if (failedAttempts.size() > 0) {
+ if (failedAttempts.size() > 0 || reschedule) {
eventHandler.handle(new TaskAttemptEvent(attempt.getID(),
TaskAttemptEventType.TA_RESCHEDULE));
} else {
@@ -968,7 +974,12 @@ public abstract class TaskImpl implements Task, EventHandler {
task.finishedAttempts.add(taskAttemptId);
task.inProgressAttempts.remove(taskAttemptId);
if (task.successfulAttempt == null) {
- task.addAndScheduleAttempt(Avataar.VIRGIN);
+ boolean rescheduleNewAttempt = false;
+ if (event instanceof TaskTAttemptKilledEvent) {
+ rescheduleNewAttempt =
+ ((TaskTAttemptKilledEvent)event).getRescheduleAttempt();
+ }
+ task.addAndScheduleAttempt(Avataar.VIRGIN, rescheduleNewAttempt);
}
if ((task.commitAttempt != null) && (task.commitAttempt == taskAttemptId)) {
task.commitAttempt = null;
@@ -1187,7 +1198,15 @@ public abstract class TaskImpl implements Task, EventHandler {
// from the map splitInfo. So the bad node might be sent as a location
// to the RM. But the RM would ignore that just like it would ignore
// currently pending container requests affinitized to bad nodes.
- task.addAndScheduleAttempt(Avataar.VIRGIN);
+ boolean rescheduleNextTaskAttempt = false;
+ if (event instanceof TaskTAttemptKilledEvent) {
+ // Decide whether to reschedule next task attempt. If true, this
+ // typically indicates that a successful map attempt was killed on an
+ // unusable node being reported.
+ rescheduleNextTaskAttempt =
+ ((TaskTAttemptKilledEvent)event).getRescheduleAttempt();
+ }
+ task.addAndScheduleAttempt(Avataar.VIRGIN, rescheduleNextTaskAttempt);
return TaskStateInternal.SCHEDULED;
}
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
index 73745d358e2..0f4b59bd3f2 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
@@ -925,9 +925,11 @@ public class RMContainerAllocator extends RMContainerRequestor
LOG.info("Killing taskAttempt:" + tid
+ " because it is running on unusable node:"
+ taskAttemptNodeId);
+ // If map, reschedule next task attempt.
+ boolean rescheduleNextAttempt = (i == 0) ? true : false;
eventHandler.handle(new TaskAttemptKillEvent(tid,
"TaskAttempt killed because it ran on unusable node"
- + taskAttemptNodeId));
+ + taskAttemptNodeId, rescheduleNextAttempt));
}
}
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java
index eb6b93292b8..eaf107050d7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java
@@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.base.Supplier;
import org.apache.hadoop.test.GenericTestUtils;
@@ -56,13 +57,19 @@ import org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
+import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
+import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
+import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.junit.Test;
+import org.mockito.Mockito;
/**
* Tests the state machine of MR App.
@@ -201,13 +208,18 @@ public class TestMRApp {
@Test
public void testUpdatedNodes() throws Exception {
int runCount = 0;
+ Dispatcher disp = Mockito.spy(new AsyncDispatcher());
MRApp app = new MRAppWithHistory(2, 2, false, this.getClass().getName(),
- true, ++runCount);
+ true, ++runCount, disp);
Configuration conf = new Configuration();
// after half of the map completion, reduce will start
conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
// uberization forces full slowstart (1.0), so disable that
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
+
+ ContainerAllocEventHandler handler = new ContainerAllocEventHandler();
+ disp.register(ContainerAllocator.EventType.class, handler);
+
final Job job1 = app.submit(conf);
app.waitForState(job1, JobState.RUNNING);
Assert.assertEquals("Num tasks not correct", 4, job1.getTasks().size());
@@ -285,6 +297,12 @@ public class TestMRApp {
events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 more completion events for killed", 4,
events.length);
+ // 2 map task attempts which were killed above should be requested from
+ // container allocator with the previous map task marked as failed. If
+ // this happens allocator will request the container for this mapper from
+ // RM at a higher priority of 5(i.e. with a priority equivalent to that of
+ // a fail fast map).
+ handler.waitForFailedMapContainerReqEvents(2);
// all maps must be back to running
app.waitForState(mapTask1, TaskState.RUNNING);
@@ -324,7 +342,7 @@ public class TestMRApp {
// rerun
// in rerun the 1st map will be recovered from previous run
app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), false,
- ++runCount);
+ ++runCount, (Dispatcher)new AsyncDispatcher());
conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
@@ -420,6 +438,25 @@ public class TestMRApp {
app.waitForState(job2, JobState.SUCCEEDED);
}
+ private final class ContainerAllocEventHandler
+ implements EventHandler {
+ private AtomicInteger failedMapContainerReqEventCnt = new AtomicInteger(0);
+ @Override
+ public void handle(ContainerAllocatorEvent event) {
+ if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ &&
+ ((ContainerRequestEvent)event).getEarlierAttemptFailed()) {
+ failedMapContainerReqEventCnt.incrementAndGet();
+ }
+ }
+ public void waitForFailedMapContainerReqEvents(int count)
+ throws InterruptedException {
+ while(failedMapContainerReqEventCnt.get() != count) {
+ Thread.sleep(50);
+ }
+ failedMapContainerReqEventCnt.set(0);
+ }
+ }
+
private static void waitFor(Supplier predicate, int
checkIntervalMillis, int checkTotalMillis) throws InterruptedException {
try {
@@ -590,9 +627,17 @@ public class TestMRApp {
}
private final class MRAppWithHistory extends MRApp {
+ private Dispatcher dispatcher;
public MRAppWithHistory(int maps, int reduces, boolean autoComplete,
- String testName, boolean cleanOnStart, int startCount) {
+ String testName, boolean cleanOnStart, int startCount,
+ Dispatcher disp) {
super(maps, reduces, autoComplete, testName, cleanOnStart, startCount);
+ this.dispatcher = disp;
+ }
+
+ @Override
+ protected Dispatcher createDispatcher() {
+ return dispatcher;
}
@Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
index 509f6af6129..98dffba4580 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
@@ -78,9 +78,13 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptTooManyFetchFailureEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
import org.apache.hadoop.security.Credentials;
@@ -982,7 +986,46 @@ public class TestTaskAttempt{
+ " Task attempt finish time is not the same ",
finishTime, Long.valueOf(taImpl.getFinishTime()));
}
-
+
+ private void containerKillBeforeAssignment(boolean scheduleAttempt)
+ throws Exception {
+ MockEventHandler eventHandler = new MockEventHandler();
+ ApplicationId appId = ApplicationId.newInstance(1, 2);
+ JobId jobId = MRBuilderUtils.newJobId(appId, 1);
+ TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
+
+ TaskAttemptImpl taImpl =
+ new MapTaskAttemptImpl(taskId, 1, eventHandler, mock(Path.class), 1,
+ mock(TaskSplitMetaInfo.class), new JobConf(),
+ mock(TaskAttemptListener.class), mock(Token.class),
+ new Credentials(), SystemClock.getInstance(),
+ mock(AppContext.class));
+ if (scheduleAttempt) {
+ taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
+ TaskAttemptEventType.TA_SCHEDULE));
+ }
+ taImpl.handle(new TaskAttemptKillEvent(taImpl.getID(),"", true));
+ assertEquals("Task attempt is not in KILLED state", taImpl.getState(),
+ TaskAttemptState.KILLED);
+ assertEquals("Task attempt's internal state is not KILLED",
+ taImpl.getInternalState(), TaskAttemptStateInternal.KILLED);
+ assertFalse("InternalError occurred", eventHandler.internalError);
+ TaskEvent event = eventHandler.lastTaskEvent;
+ assertEquals(TaskEventType.T_ATTEMPT_KILLED, event.getType());
+ // In NEW state, new map attempt should not be rescheduled.
+ assertFalse(((TaskTAttemptKilledEvent)event).getRescheduleAttempt());
+ }
+
+ @Test
+ public void testContainerKillOnNew() throws Exception {
+ containerKillBeforeAssignment(false);
+ }
+
+ @Test
+ public void testContainerKillOnUnassigned() throws Exception {
+ containerKillBeforeAssignment(true);
+ }
+
@Test
public void testContainerKillAfterAssigned() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
@@ -1032,7 +1075,7 @@ public class TestTaskAttempt{
taImpl.getInternalState(), TaskAttemptStateInternal.ASSIGNED);
taImpl.handle(new TaskAttemptEvent(attemptId,
TaskAttemptEventType.TA_KILL));
- assertEquals("Task should be in KILLED state",
+ assertEquals("Task should be in KILL_CONTAINER_CLEANUP state",
TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP,
taImpl.getInternalState());
}
@@ -1089,7 +1132,7 @@ public class TestTaskAttempt{
TaskAttemptEventType.TA_KILL));
assertFalse("InternalError occurred trying to handle TA_KILL",
eventHandler.internalError);
- assertEquals("Task should be in KILLED state",
+ assertEquals("Task should be in KILL_CONTAINER_CLEANUP state",
TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP,
taImpl.getInternalState());
}
@@ -1150,12 +1193,11 @@ public class TestTaskAttempt{
TaskAttemptEventType.TA_KILL));
assertFalse("InternalError occurred trying to handle TA_KILL",
eventHandler.internalError);
- assertEquals("Task should be in KILLED state",
+ assertEquals("Task should be in KILL_CONTAINER_CLEANUP state",
TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP,
taImpl.getInternalState());
}
-
@Test
public void testKillMapTaskWhileSuccessFinishing() throws Exception {
MockEventHandler eventHandler = new MockEventHandler();
@@ -1195,6 +1237,37 @@ public class TestTaskAttempt{
assertFalse("InternalError occurred", eventHandler.internalError);
}
+ @Test
+ public void testKillMapTaskAfterSuccess() throws Exception {
+ MockEventHandler eventHandler = new MockEventHandler();
+ TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler);
+
+ taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
+ TaskAttemptEventType.TA_DONE));
+
+ assertEquals("Task attempt is not in SUCCEEDED state", taImpl.getState(),
+ TaskAttemptState.SUCCEEDED);
+ assertEquals("Task attempt's internal state is not " +
+ "SUCCESS_FINISHING_CONTAINER", taImpl.getInternalState(),
+ TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER);
+
+ taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
+ TaskAttemptEventType.TA_CONTAINER_CLEANED));
+ // Send a map task attempt kill event indicating next map attempt has to be
+ // reschedule
+ taImpl.handle(new TaskAttemptKillEvent(taImpl.getID(),"", true));
+ assertEquals("Task attempt is not in KILLED state", taImpl.getState(),
+ TaskAttemptState.KILLED);
+ assertEquals("Task attempt's internal state is not KILLED",
+ taImpl.getInternalState(), TaskAttemptStateInternal.KILLED);
+ assertFalse("InternalError occurred", eventHandler.internalError);
+ TaskEvent event = eventHandler.lastTaskEvent;
+ assertEquals(TaskEventType.T_ATTEMPT_KILLED, event.getType());
+ // Send an attempt killed event to TaskImpl forwarding the same reschedule
+ // flag we received in task attempt kill event.
+ assertTrue(((TaskTAttemptKilledEvent)event).getRescheduleAttempt());
+ }
+
@Test
public void testKillMapTaskWhileFailFinishing() throws Exception {
MockEventHandler eventHandler = new MockEventHandler();
@@ -1406,9 +1479,13 @@ public class TestTaskAttempt{
public static class MockEventHandler implements EventHandler {
public boolean internalError;
+ public TaskEvent lastTaskEvent;
@Override
public void handle(Event event) {
+ if (event instanceof TaskEvent) {
+ lastTaskEvent = (TaskEvent)event;
+ }
if (event instanceof JobEvent) {
JobEvent je = ((JobEvent) event);
if (JobEventType.INTERNAL_ERROR == je.getType()) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
index 84576712b44..4abdff871d8 100755
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
@@ -92,7 +93,8 @@ public class TestTaskImpl {
private int taskCounter = 0;
private final int partition = 1;
- private InlineDispatcher dispatcher;
+ private InlineDispatcher dispatcher;
+ private MockTaskAttemptEventHandler taskAttemptEventHandler;
private List taskAttempts;
private class MockTaskImpl extends TaskImpl {
@@ -257,7 +259,10 @@ public class TestTaskImpl {
taskSplitMetaInfo = mock(TaskSplitMetaInfo.class);
when(taskSplitMetaInfo.getLocations()).thenReturn(dataLocations);
- taskAttempts = new ArrayList();
+ taskAttempts = new ArrayList();
+
+ taskAttemptEventHandler = new MockTaskAttemptEventHandler();
+ dispatcher.register(TaskAttemptEventType.class, taskAttemptEventHandler);
}
private MockTaskImpl createMockTask(TaskType taskType) {
@@ -294,8 +299,12 @@ public class TestTaskImpl {
}
private void killScheduledTaskAttempt(TaskAttemptId attemptId) {
- mockTask.handle(new TaskTAttemptEvent(attemptId,
- TaskEventType.T_ATTEMPT_KILLED));
+ killScheduledTaskAttempt(attemptId, false);
+ }
+
+ private void killScheduledTaskAttempt(TaskAttemptId attemptId,
+ boolean reschedule) {
+ mockTask.handle(new TaskTAttemptKilledEvent(attemptId, reschedule));
assertTaskScheduledState();
}
@@ -326,11 +335,15 @@ public class TestTaskImpl {
}
private void killRunningTaskAttempt(TaskAttemptId attemptId) {
- mockTask.handle(new TaskTAttemptEvent(attemptId,
- TaskEventType.T_ATTEMPT_KILLED));
+ killRunningTaskAttempt(attemptId, false);
+ }
+
+ private void killRunningTaskAttempt(TaskAttemptId attemptId,
+ boolean reschedule) {
+ mockTask.handle(new TaskTAttemptKilledEvent(attemptId, reschedule));
assertTaskRunningState();
}
-
+
private void failRunningTaskAttempt(TaskAttemptId attemptId) {
mockTask.handle(new TaskTAttemptEvent(attemptId,
TaskEventType.T_ATTEMPT_FAILED));
@@ -423,10 +436,12 @@ public class TestTaskImpl {
*/
public void testKillScheduledTaskAttempt() {
LOG.info("--- START: testKillScheduledTaskAttempt ---");
- mockTask = createMockTask(TaskType.MAP);
+ mockTask = createMockTask(TaskType.MAP);
TaskId taskId = getNewTaskID();
scheduleTaskAttempt(taskId);
- killScheduledTaskAttempt(getLastAttempt().getAttemptId());
+ killScheduledTaskAttempt(getLastAttempt().getAttemptId(), true);
+ assertEquals(TaskAttemptEventType.TA_RESCHEDULE,
+ taskAttemptEventHandler.lastTaskAttemptEvent.getType());
}
@Test
@@ -449,11 +464,13 @@ public class TestTaskImpl {
*/
public void testKillRunningTaskAttempt() {
LOG.info("--- START: testKillRunningTaskAttempt ---");
- mockTask = createMockTask(TaskType.MAP);
+ mockTask = createMockTask(TaskType.MAP);
TaskId taskId = getNewTaskID();
scheduleTaskAttempt(taskId);
launchTaskAttempt(getLastAttempt().getAttemptId());
- killRunningTaskAttempt(getLastAttempt().getAttemptId());
+ killRunningTaskAttempt(getLastAttempt().getAttemptId(), true);
+ assertEquals(TaskAttemptEventType.TA_RESCHEDULE,
+ taskAttemptEventHandler.lastTaskAttemptEvent.getType());
}
@Test
@@ -471,6 +488,28 @@ public class TestTaskImpl {
assertTaskSucceededState();
}
+ @Test
+ /**
+ * Kill map attempt for succeeded map task
+ * {@link TaskState#SUCCEEDED}->{@link TaskState#SCHEDULED}
+ */
+ public void testKillAttemptForSuccessfulTask() {
+ LOG.info("--- START: testKillAttemptForSuccessfulTask ---");
+ mockTask = createMockTask(TaskType.MAP);
+ TaskId taskId = getNewTaskID();
+ scheduleTaskAttempt(taskId);
+ launchTaskAttempt(getLastAttempt().getAttemptId());
+ commitTaskAttempt(getLastAttempt().getAttemptId());
+ mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
+ TaskEventType.T_ATTEMPT_SUCCEEDED));
+ assertTaskSucceededState();
+ mockTask.handle(
+ new TaskTAttemptKilledEvent(getLastAttempt().getAttemptId(), true));
+ assertEquals(TaskAttemptEventType.TA_RESCHEDULE,
+ taskAttemptEventHandler.lastTaskAttemptEvent.getType());
+ assertTaskScheduledState();
+ }
+
@Test
public void testTaskProgress() {
LOG.info("--- START: testTaskProgress ---");
@@ -728,8 +767,8 @@ public class TestTaskImpl {
assertEquals(TaskState.FAILED, mockTask.getState());
taskAttempt = taskAttempts.get(3);
taskAttempt.setState(TaskAttemptState.KILLED);
- mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
- TaskEventType.T_ATTEMPT_KILLED));
+ mockTask.handle(new TaskTAttemptKilledEvent(taskAttempt.getAttemptId(),
+ false));
assertEquals(TaskState.FAILED, mockTask.getState());
}
@@ -840,4 +879,14 @@ public class TestTaskImpl {
Counters taskCounters = mockTask.getCounters();
assertEquals("wrong counters for task", specAttemptCounters, taskCounters);
}
+
+ public static class MockTaskAttemptEventHandler implements EventHandler {
+ public TaskAttemptEvent lastTaskAttemptEvent;
+ @Override
+ public void handle(Event event) {
+ if (event instanceof TaskAttemptEvent) {
+ lastTaskAttemptEvent = (TaskAttemptEvent)event;
+ }
+ }
+ };
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java
index ad4e6bc67c9..4deb9ae2f0d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java
@@ -142,6 +142,7 @@ public class CompletedJob implements org.apache.hadoop.mapreduce.v2.app.job.Job
report.setFinishTime(jobInfo.getFinishTime());
report.setJobName(jobInfo.getJobname());
report.setUser(jobInfo.getUsername());
+ report.setDiagnostics(jobInfo.getErrorInfo());
if ( getTotalMaps() == 0 ) {
report.setMapProgress(1.0f);
@@ -335,6 +336,12 @@ public class CompletedJob implements org.apache.hadoop.mapreduce.v2.app.job.Job
}
}
+ protected JobHistoryParser createJobHistoryParser(Path historyFileAbsolute)
+ throws IOException {
+ return new JobHistoryParser(historyFileAbsolute.getFileSystem(conf),
+ historyFileAbsolute);
+ }
+
//History data is leisurely loaded when task level data is requested
protected synchronized void loadFullHistoryData(boolean loadTasks,
Path historyFileAbsolute) throws IOException {
@@ -347,7 +354,7 @@ public class CompletedJob implements org.apache.hadoop.mapreduce.v2.app.job.Job
JobHistoryParser parser = null;
try {
final FileSystem fs = historyFileAbsolute.getFileSystem(conf);
- parser = new JobHistoryParser(fs, historyFileAbsolute);
+ parser = createJobHistoryParser(historyFileAbsolute);
final Path jobConfPath = new Path(historyFileAbsolute.getParent(),
JobHistoryUtils.getIntermediateConfFileName(jobId));
final Configuration conf = new Configuration();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
index 9608fc8a3ba..c6ddae52ecb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
@@ -19,14 +19,18 @@ package org.apache.hadoop.mapreduce.v2.hs;
import static org.junit.Assert.assertEquals;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobACLsManager;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
@@ -236,4 +240,27 @@ public class TestJobHistoryEntities {
}
+ @Test (timeout=30000)
+ public void testCompletedJobWithDiagnostics() throws Exception {
+ final String jobError = "Job Diagnostics";
+ JobInfo jobInfo = spy(new JobInfo());
+ when(jobInfo.getErrorInfo()).thenReturn(jobError);
+ when(jobInfo.getJobStatus()).thenReturn(JobState.FAILED.toString());
+ when(jobInfo.getAMInfos()).thenReturn(Collections.emptyList());
+ final JobHistoryParser mockParser = mock(JobHistoryParser.class);
+ when(mockParser.parse()).thenReturn(jobInfo);
+ HistoryFileInfo info = mock(HistoryFileInfo.class);
+ when(info.getConfFile()).thenReturn(fullConfPath);
+ when(info.getHistoryFile()).thenReturn(fullHistoryPath);
+ CompletedJob job =
+ new CompletedJob(conf, jobId, fullHistoryPath, loadTasks, "user",
+ info, jobAclsManager) {
+ @Override
+ protected JobHistoryParser createJobHistoryParser(
+ Path historyFileAbsolute) throws IOException {
+ return mockParser;
+ }
+ };
+ assertEquals(jobError, job.getReport().getDiagnostics());
+ }
}
diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml
index 8f020736686..f9f4726f849 100644
--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@@ -133,6 +133,7 @@
+
+
+ fs.s3a.path.style.access
+ Enable S3 path style access ie disabling the default virtual hosting behaviour.
+ Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
+
+
+
fs.s3a.proxy.host
Hostname of the (optional) proxy server for S3 connections.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java
index ae1539d4c8d..4a0bfbbfc07 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java
@@ -19,10 +19,14 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.S3ClientOptions;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
+
import org.apache.commons.lang.StringUtils;
import com.amazonaws.AmazonClientException;
import org.apache.hadoop.conf.Configuration;
-
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
@@ -30,17 +34,19 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.File;
import java.net.URI;
-import java.io.IOException;
+import java.lang.reflect.Field;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.security.alias.CredentialProvider;
import org.apache.hadoop.security.alias.CredentialProviderFactory;
-
+import org.apache.http.HttpStatus;
import org.junit.rules.TemporaryFolder;
public class TestS3AConfiguration {
@@ -354,4 +360,39 @@ public class TestS3AConfiguration {
assertEquals("SecretKey incorrect.", "456", creds.getAccessSecret());
}
+
+ @Test
+ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() throws Exception {
+
+ conf = new Configuration();
+ conf.set(Constants.PATH_STYLE_ACCESS, Boolean.toString(true));
+ assertTrue(conf.getBoolean(Constants.PATH_STYLE_ACCESS, false));
+
+ try {
+ fs = S3ATestUtils.createTestFileSystem(conf);
+ final Object object = getClientOptionsField(fs.getAmazonS3Client(), "clientOptions");
+ assertNotNull(object);
+ assertTrue("Unexpected type found for clientOptions!", object instanceof S3ClientOptions);
+ assertTrue("Expected to find path style access to be switched on!", ((S3ClientOptions) object).isPathStyleAccess());
+ byte[] file = ContractTestUtils.toAsciiByteArray("test file");
+ ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
+ } catch (final AmazonS3Exception e) {
+ LOG.error("Caught exception: ", e);
+ // Catch/pass standard path style access behaviour when live bucket
+ // isn't in the same region as the s3 client default. See
+ // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
+ assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY);
+ }
+ }
+
+ private Object getClientOptionsField(AmazonS3Client s3client, String field)
+ throws NoSuchFieldException, IllegalAccessException {
+ final Field clientOptionsProps = s3client.getClass().getDeclaredField(field);
+ assertNotNull(clientOptionsProps);
+ if (!clientOptionsProps.isAccessible()) {
+ clientOptionsProps.setAccessible(true);
+ }
+ final Object object = clientOptionsProps.get(s3client);
+ return object;
+ }
}
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
index d2598a42d0e..cabb7e352aa 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -191,7 +191,7 @@ public class SimpleCopyListing extends CopyListing {
authority = fs.getUri().getAuthority();
}
- return new Path(scheme, authority, path.toUri().getPath());
+ return new Path(scheme, authority, makeQualified(path).toUri().getPath());
}
/**
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
index 04de8e4d6d9..90e6840f714 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
@@ -674,4 +674,42 @@ public class TestDistCpSync {
testAndVerify(numCreatedModified);
}
+
+ private void initData9(Path dir) throws Exception {
+ final Path foo = new Path(dir, "foo");
+ final Path foo_f1 = new Path(foo, "f1");
+
+ DFSTestUtil.createFile(dfs, foo_f1, BLOCK_SIZE, DATA_NUM, 0L);
+ }
+
+ private void changeData9(Path dir) throws Exception {
+ final Path foo = new Path(dir, "foo");
+ final Path foo_f2 = new Path(foo, "f2");
+
+ DFSTestUtil.createFile(dfs, foo_f2, BLOCK_SIZE, DATA_NUM, 0L);
+ }
+
+ /**
+ * Test a case where the source path is relative.
+ */
+ @Test
+ public void testSync9() throws Exception {
+
+ // use /user/$USER/source for source directory
+ Path sourcePath = new Path(dfs.getWorkingDirectory(), "source");
+ initData9(sourcePath);
+ initData9(target);
+ dfs.allowSnapshot(sourcePath);
+ dfs.allowSnapshot(target);
+ dfs.createSnapshot(sourcePath, "s1");
+ dfs.createSnapshot(target, "s1");
+ changeData9(sourcePath);
+ dfs.createSnapshot(sourcePath, "s2");
+
+ String[] args = new String[]{"-update","-diff", "s1", "s2",
+ "source", target.toString()};
+ new DistCp(conf, OptionsParser.parse(args)).execute();
+ verifyCopy(dfs.getFileStatus(sourcePath),
+ dfs.getFileStatus(target), false);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
index c118603d5e5..dba8fc0a4d1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
@@ -131,6 +131,12 @@
test-jar
test
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ test-jar
+ test
+
org.apache.hadoop
hadoop-hdfs
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
index cbe03480550..2b85ba8dc81 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
@@ -104,6 +104,7 @@ import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
import org.apache.log4j.LogManager;
import com.google.common.annotations.VisibleForTesting;
+import com.sun.jersey.api.client.ClientHandlerException;
/**
* An ApplicationMaster for executing shell commands on a set of launched
@@ -1149,13 +1150,14 @@ public class ApplicationMaster {
putContainerEntity(timelineClient,
container.getId().getApplicationAttemptId(),
entity));
- } catch (YarnException | IOException e) {
+ } catch (YarnException | IOException | ClientHandlerException e) {
LOG.error("Container start event could not be published for "
+ container.getId().toString(), e);
}
}
- private void publishContainerEndEvent(
+ @VisibleForTesting
+ void publishContainerEndEvent(
final TimelineClient timelineClient, ContainerStatus container,
String domainId, UserGroupInformation ugi) {
final TimelineEntity entity = new TimelineEntity();
@@ -1177,7 +1179,7 @@ public class ApplicationMaster {
putContainerEntity(timelineClient,
container.getContainerId().getApplicationAttemptId(),
entity));
- } catch (YarnException | IOException e) {
+ } catch (YarnException | IOException | ClientHandlerException e) {
LOG.error("Container end event could not be published for "
+ container.getContainerId().toString(), e);
}
@@ -1212,7 +1214,7 @@ public class ApplicationMaster {
try {
TimelinePutResponse response = timelineClient.putEntities(entity);
processTimelineResponseErrors(response);
- } catch (YarnException | IOException e) {
+ } catch (YarnException | IOException | ClientHandlerException e) {
LOG.error("App Attempt "
+ (appEvent.equals(DSEvent.DS_APP_ATTEMPT_START) ? "start" : "end")
+ " event could not be published for "
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
index 65360508caa..2b46fca4b45 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
@@ -18,6 +18,10 @@
package org.apache.hadoop.yarn.applications.distributedshell;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
+
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -27,6 +31,7 @@ import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.InetAddress;
+import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
@@ -46,14 +51,24 @@ import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.ServerSocketUtil;
+import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.JarFinder;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain;
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
+import org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster;
+import org.apache.hadoop.yarn.client.api.impl.DirectTimelineWriter;
+import org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl;
+import org.apache.hadoop.yarn.client.api.impl.TimelineWriter;
+import org.apache.hadoop.yarn.client.api.impl.TestTimelineClient;
+import org.apache.hadoop.yarn.client.api.TimelineClient;
import org.apache.hadoop.yarn.client.api.YarnClient;
+
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.MiniYARNCluster;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
@@ -61,6 +76,7 @@ import org.apache.hadoop.yarn.server.timeline.PluginStoreTestUtils;
import org.apache.hadoop.yarn.server.timeline.NameValuePair;
import org.apache.hadoop.yarn.server.timeline.TimelineVersion;
import org.apache.hadoop.yarn.server.timeline.TimelineVersionWatcher;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.After;
import org.junit.Assert;
@@ -69,6 +85,8 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
+import com.sun.jersey.api.client.ClientHandlerException;
+
public class TestDistributedShell {
private static final Log LOG =
@@ -77,6 +95,7 @@ public class TestDistributedShell {
protected MiniYARNCluster yarnCluster = null;
protected MiniDFSCluster hdfsCluster = null;
private FileSystem fs = null;
+ private TimelineWriter spyTimelineWriter;
protected YarnConfiguration conf = null;
private static final int NUM_NMS = 1;
private static final float DEFAULT_TIMELINE_VERSION = 1.0f;
@@ -865,6 +884,37 @@ public class TestDistributedShell {
}
}
+ @Test
+ public void testDSTimelineClientWithConnectionRefuse() throws Exception {
+ ApplicationMaster am = new ApplicationMaster();
+
+ TimelineClientImpl client = new TimelineClientImpl() {
+ @Override
+ protected TimelineWriter createTimelineWriter(Configuration conf,
+ UserGroupInformation authUgi, com.sun.jersey.api.client.Client client,
+ URI resURI) throws IOException {
+ TimelineWriter timelineWriter =
+ new DirectTimelineWriter(authUgi, client, resURI);
+ spyTimelineWriter = spy(timelineWriter);
+ return spyTimelineWriter;
+ }
+ };
+ client.init(conf);
+ client.start();
+ TestTimelineClient.mockEntityClientResponse(spyTimelineWriter, null,
+ false, true);
+ try {
+ UserGroupInformation ugi = mock(UserGroupInformation.class);
+ when(ugi.getShortUserName()).thenReturn("user1");
+ // verify no ClientHandlerException get thrown out.
+ am.publishContainerEndEvent(client, ContainerStatus.newInstance(
+ BuilderUtils.newContainerId(1, 1, 1, 1), ContainerState.COMPLETE, "",
+ 1), "domainId", ugi);
+ } finally {
+ client.stop();
+ }
+ }
+
protected void waitForNMsToRegister() throws Exception {
int sec = 60;
while (sec >= 0) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
index ef4622972f3..8c600416954 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
@@ -24,6 +24,7 @@ import java.lang.reflect.UndeclaredThrowableException;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
+import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
@@ -116,7 +117,9 @@ public class TimelineClientImpl extends TimelineClient {
TimelineClientConnectionRetry connectionRetry;
// Abstract class for an operation that should be retried by timeline client
- private static abstract class TimelineClientRetryOp {
+ @Private
+ @VisibleForTesting
+ public static abstract class TimelineClientRetryOp {
// The operation that should be retried
public abstract Object run() throws IOException;
// The method to indicate if we should retry given the incoming exception
@@ -449,27 +452,8 @@ public class TimelineClientImpl extends TimelineClient {
final PrivilegedExceptionAction> action)
throws IOException, YarnException {
// Set up the retry operation
- TimelineClientRetryOp tokenRetryOp = new TimelineClientRetryOp() {
-
- @Override
- public Object run() throws IOException {
- // Try pass the request, if fail, keep retrying
- authUgi.checkTGTAndReloginFromKeytab();
- try {
- return authUgi.doAs(action);
- } catch (UndeclaredThrowableException e) {
- throw new IOException(e.getCause());
- } catch (InterruptedException e) {
- throw new IOException(e);
- }
- }
-
- @Override
- public boolean shouldRetryOn(Exception e) {
- // Only retry on connection exceptions
- return (e instanceof ConnectException);
- }
- };
+ TimelineClientRetryOp tokenRetryOp =
+ createTimelineClientRetryOpForOperateDelegationToken(action);
return connectionRetry.retryOn(tokenRetryOp);
}
@@ -680,4 +664,50 @@ public class TimelineClientImpl extends TimelineClient {
public void setTimelineWriter(TimelineWriter writer) {
this.timelineWriter = writer;
}
+
+ @Private
+ @VisibleForTesting
+ public TimelineClientRetryOp
+ createTimelineClientRetryOpForOperateDelegationToken(
+ final PrivilegedExceptionAction> action) throws IOException {
+ return new TimelineClientRetryOpForOperateDelegationToken(
+ this.authUgi, action);
+ }
+
+ @Private
+ @VisibleForTesting
+ public class TimelineClientRetryOpForOperateDelegationToken
+ extends TimelineClientRetryOp {
+
+ private final UserGroupInformation authUgi;
+ private final PrivilegedExceptionAction> action;
+
+ public TimelineClientRetryOpForOperateDelegationToken(
+ UserGroupInformation authUgi, PrivilegedExceptionAction> action) {
+ this.authUgi = authUgi;
+ this.action = action;
+ }
+
+ @Override
+ public Object run() throws IOException {
+ // Try pass the request, if fail, keep retrying
+ authUgi.checkTGTAndReloginFromKeytab();
+ try {
+ return authUgi.doAs(action);
+ } catch (UndeclaredThrowableException e) {
+ throw new IOException(e.getCause());
+ } catch (InterruptedException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public boolean shouldRetryOn(Exception e) {
+ // retry on connection exceptions
+ // and SocketTimeoutException
+ return (e instanceof ConnectException
+ || e instanceof SocketTimeoutException);
+ }
+ }
+
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
index 39fc8deb3ae..41b788dcbac 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
@@ -27,7 +27,9 @@ import static org.mockito.Mockito.when;
import java.io.IOException;
import java.net.ConnectException;
+import java.net.SocketTimeoutException;
import java.net.URI;
+import java.security.PrivilegedExceptionAction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -234,6 +236,8 @@ public class TestTimelineClient {
UserGroupInformation.setConfiguration(conf);
TimelineClientImpl client = createTimelineClient(conf);
+ TimelineClientImpl clientFake =
+ createTimelineClientFakeTimelineClientRetryOp(conf);
TestTimlineDelegationTokenSecretManager dtManager =
new TestTimlineDelegationTokenSecretManager();
try {
@@ -278,8 +282,24 @@ public class TestTimelineClient {
} catch (RuntimeException ce) {
assertException(client, ce);
}
+
+ // Test DelegationTokenOperationsRetry on SocketTimeoutException
+ try {
+ TimelineDelegationTokenIdentifier timelineDT =
+ new TimelineDelegationTokenIdentifier(
+ new Text("tester"), new Text("tester"), new Text("tester"));
+ clientFake.cancelDelegationToken(
+ new Token(timelineDT.getBytes(),
+ dtManager.createPassword(timelineDT),
+ timelineDT.getKind(),
+ new Text("0.0.0.0:8188")));
+ assertFail();
+ } catch (RuntimeException ce) {
+ assertException(clientFake, ce);
+ }
} finally {
client.stop();
+ clientFake.stop();
dtManager.stopThreads();
}
}
@@ -298,7 +318,7 @@ public class TestTimelineClient {
client.connectionRetry.getRetired());
}
- private static ClientResponse mockEntityClientResponse(
+ public static ClientResponse mockEntityClientResponse(
TimelineWriter spyTimelineWriter, ClientResponse.Status status,
boolean hasError, boolean hasRuntimeError) {
ClientResponse response = mock(ClientResponse.class);
@@ -393,6 +413,27 @@ public class TestTimelineClient {
return client;
}
+ private TimelineClientImpl createTimelineClientFakeTimelineClientRetryOp(
+ YarnConfiguration conf) {
+ TimelineClientImpl client = new TimelineClientImpl() {
+
+ @Override
+ public TimelineClientRetryOp
+ createTimelineClientRetryOpForOperateDelegationToken(
+ final PrivilegedExceptionAction> action) throws IOException {
+ TimelineClientRetryOpForOperateDelegationToken op =
+ spy(new TimelineClientRetryOpForOperateDelegationToken(
+ UserGroupInformation.getCurrentUser(), action));
+ doThrow(new SocketTimeoutException("Test socketTimeoutException"))
+ .when(op).run();
+ return op;
+ }
+ };
+ client.init(conf);
+ client.start();
+ return client;
+ }
+
private static class TestTimlineDelegationTokenSecretManager extends
AbstractDelegationTokenSecretManager {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/JerseyTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/JerseyTestBase.java
index 7a225a3999e..d537fa748f9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/JerseyTestBase.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/JerseyTestBase.java
@@ -19,9 +19,10 @@
package org.apache.hadoop.yarn.webapp;
import java.io.IOException;
+import java.util.Random;
import org.apache.hadoop.net.ServerSocketUtil;
-import org.junit.Before;
+
import com.sun.jersey.test.framework.JerseyTest;
import com.sun.jersey.test.framework.WebAppDescriptor;
@@ -30,9 +31,16 @@ public abstract class JerseyTestBase extends JerseyTest {
super(appDescriptor);
}
- @Before
- public void initializeJerseyPort() throws IOException {
- int jerseyPort = ServerSocketUtil.getPort(9998, 10);
- System.setProperty("jersey.test.port", Integer.toString(jerseyPort));
+ @Override
+ protected int getPort(int port) {
+ Random rand = new Random();
+ int jerseyPort = port + rand.nextInt(1000);
+ try {
+ jerseyPort = ServerSocketUtil.getPort(jerseyPort, 10);
+ } catch (IOException e) {
+ // Ignore exception even after 10 times free port is
+ // not received.
+ }
+ return super.getPort(jerseyPort);
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index 8d09aa75bcc..b8cca28e82d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -296,20 +296,8 @@ public class ContainerManagerImpl extends CompositeService implements
if (LOG.isDebugEnabled()) {
LOG.debug("Recovering container with state: " + rcs);
}
-
recoverContainer(rcs);
}
-
- String diagnostic = "Application marked finished during recovery";
- for (ApplicationId appId : appsState.getFinishedApplications()) {
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("Application marked finished during recovery: " + appId);
- }
-
- dispatcher.getEventHandler().handle(
- new ApplicationFinishEvent(appId, diagnostic));
- }
} else {
LOG.info("Not a recoverable state store. Nothing to recover.");
}
@@ -1332,11 +1320,6 @@ public class ContainerManagerImpl extends CompositeService implements
} else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) {
diagnostic = "Application killed by ResourceManager";
}
- try {
- this.context.getNMStateStore().storeFinishedApplication(appID);
- } catch (IOException e) {
- LOG.error("Unable to update application state in store", e);
- }
this.dispatcher.getEventHandler().handle(
new ApplicationFinishEvent(appID,
diagnostic));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
index 81d6c57de6e..26dea2daa0d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
@@ -84,6 +84,7 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
private static final String APPLICATIONS_KEY_PREFIX =
"ContainerManager/applications/";
+ @Deprecated
private static final String FINISHED_APPS_KEY_PREFIX =
"ContainerManager/finishedApps/";
@@ -392,20 +393,6 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
state.applications.add(
ContainerManagerApplicationProto.parseFrom(entry.getValue()));
}
-
- state.finishedApplications = new ArrayList();
- keyPrefix = FINISHED_APPS_KEY_PREFIX;
- iter.seek(bytes(keyPrefix));
- while (iter.hasNext()) {
- Entry entry = iter.next();
- String key = asString(entry.getKey());
- if (!key.startsWith(keyPrefix)) {
- break;
- }
- ApplicationId appId =
- ConverterUtils.toApplicationId(key.substring(keyPrefix.length()));
- state.finishedApplications.add(appId);
- }
} catch (DBException e) {
throw new IOException(e);
} finally {
@@ -414,6 +401,8 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
}
}
+ cleanupDeprecatedFinishedApps();
+
return state;
}
@@ -433,21 +422,6 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
}
}
- @Override
- public void storeFinishedApplication(ApplicationId appId)
- throws IOException {
- if (LOG.isDebugEnabled()) {
- LOG.debug("storeFinishedApplication.appId: " + appId);
- }
-
- String key = FINISHED_APPS_KEY_PREFIX + appId;
- try {
- db.put(bytes(key), new byte[0]);
- } catch (DBException e) {
- throw new IOException(e);
- }
- }
-
@Override
public void removeApplication(ApplicationId appId)
throws IOException {
@@ -460,8 +434,6 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
try {
String key = APPLICATIONS_KEY_PREFIX + appId;
batch.delete(bytes(key));
- key = FINISHED_APPS_KEY_PREFIX + appId;
- batch.delete(bytes(key));
db.write(batch);
} finally {
batch.close();
@@ -979,6 +951,52 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
}
}
+ @SuppressWarnings("deprecation")
+ private void cleanupDeprecatedFinishedApps() {
+ try {
+ cleanupKeysWithPrefix(FINISHED_APPS_KEY_PREFIX);
+ } catch (Exception e) {
+ LOG.warn("cleanup keys with prefix " + FINISHED_APPS_KEY_PREFIX +
+ " from leveldb failed", e);
+ }
+ }
+
+ private void cleanupKeysWithPrefix(String prefix) throws IOException {
+ WriteBatch batch = null;
+ LeveldbIterator iter = null;
+ try {
+ iter = new LeveldbIterator(db);
+ try {
+ batch = db.createWriteBatch();
+ iter.seek(bytes(prefix));
+ while (iter.hasNext()) {
+ byte[] key = iter.next().getKey();
+ String keyStr = asString(key);
+ if (!keyStr.startsWith(prefix)) {
+ break;
+ }
+ batch.delete(key);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("cleanup " + keyStr + " from leveldb");
+ }
+ }
+ db.write(batch);
+ } catch (DBException e) {
+ throw new IOException(e);
+ } finally {
+ if (batch != null) {
+ batch.close();
+ }
+ }
+ } catch (DBException e) {
+ throw new IOException(e);
+ } finally {
+ if (iter != null) {
+ iter.close();
+ }
+ }
+ }
+
private String getLogDeleterKey(ApplicationId appId) {
return LOG_DELETER_KEY_PREFIX + appId;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
index d5dce9bb2ee..a887e71e9e1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
@@ -58,10 +58,6 @@ public class NMNullStateStoreService extends NMStateStoreService {
ContainerManagerApplicationProto p) throws IOException {
}
- @Override
- public void storeFinishedApplication(ApplicationId appId) {
- }
-
@Override
public void removeApplication(ApplicationId appId) throws IOException {
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
index 84c5aa982a7..463815ec9c1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
@@ -52,15 +52,11 @@ public abstract class NMStateStoreService extends AbstractService {
public static class RecoveredApplicationsState {
List applications;
- List finishedApplications;
public List getApplications() {
return applications;
}
- public List getFinishedApplications() {
- return finishedApplications;
- }
}
public enum RecoveredContainerStatus {
@@ -258,14 +254,6 @@ public abstract class NMStateStoreService extends AbstractService {
public abstract void storeApplication(ApplicationId appId,
ContainerManagerApplicationProto p) throws IOException;
- /**
- * Record that an application has finished
- * @param appId the application ID
- * @throws IOException
- */
- public abstract void storeFinishedApplication(ApplicationId appId)
- throws IOException;
-
/**
* Remove records corresponding to an application
* @param appId the application ID
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
index 2e014decbc8..9fa3fcc13c8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
@@ -259,6 +259,10 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
+ // no longer saving FINISH_APP event in NM stateStore,
+ // simulate by resending FINISH_APP event
+ cm.handle(new CMgrCompletedAppsEvent(finishedApps,
+ CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
assertTrue(context.getApplicationACLsManager().checkAccess(
UserGroupInformation.createRemoteUser(modUser),
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
index a1c95ab03b9..12798963390 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
public class NMMemoryStateStoreService extends NMStateStoreService {
private Map apps;
- private Set finishedApps;
private Map containerStates;
private Map trackerStates;
private Map deleteTasks;
@@ -59,7 +58,6 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
@Override
protected void initStorage(Configuration conf) {
apps = new HashMap();
- finishedApps = new HashSet();
containerStates = new HashMap();
nmTokenState = new RecoveredNMTokensState();
nmTokenState.applicationMasterKeys =
@@ -86,7 +84,6 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
RecoveredApplicationsState state = new RecoveredApplicationsState();
state.applications = new ArrayList(
apps.values());
- state.finishedApplications = new ArrayList(finishedApps);
return state;
}
@@ -98,16 +95,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
apps.put(appId, protoCopy);
}
- @Override
- public synchronized void storeFinishedApplication(ApplicationId appId) {
- finishedApps.add(appId);
- }
-
@Override
public synchronized void removeApplication(ApplicationId appId)
throws IOException {
apps.remove(appId);
- finishedApps.remove(appId);
}
@Override
@@ -393,7 +384,6 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
logDeleterState.remove(appId);
}
-
private static class TrackerState {
Map inProgressMap =
new HashMap();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
index 08b49e75383..47468d6c25e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
@@ -174,7 +174,6 @@ public class TestNMLeveldbStateStoreService {
// test empty when no state
RecoveredApplicationsState state = stateStore.loadApplicationsState();
assertTrue(state.getApplications().isEmpty());
- assertTrue(state.getFinishedApplications().isEmpty());
// store an application and verify recovered
final ApplicationId appId1 = ApplicationId.newInstance(1234, 1);
@@ -188,10 +187,8 @@ public class TestNMLeveldbStateStoreService {
state = stateStore.loadApplicationsState();
assertEquals(1, state.getApplications().size());
assertEquals(appProto1, state.getApplications().get(0));
- assertTrue(state.getFinishedApplications().isEmpty());
- // finish an application and add a new one
- stateStore.storeFinishedApplication(appId1);
+ // add a new app
final ApplicationId appId2 = ApplicationId.newInstance(1234, 2);
builder = ContainerManagerApplicationProto.newBuilder();
builder.setId(((ApplicationIdPBImpl) appId2).getProto());
@@ -203,18 +200,13 @@ public class TestNMLeveldbStateStoreService {
assertEquals(2, state.getApplications().size());
assertTrue(state.getApplications().contains(appProto1));
assertTrue(state.getApplications().contains(appProto2));
- assertEquals(1, state.getFinishedApplications().size());
- assertEquals(appId1, state.getFinishedApplications().get(0));
// test removing an application
- stateStore.storeFinishedApplication(appId2);
stateStore.removeApplication(appId2);
restartStateStore();
state = stateStore.loadApplicationsState();
assertEquals(1, state.getApplications().size());
assertEquals(appProto1, state.getApplications().get(0));
- assertEquals(1, state.getFinishedApplications().size());
- assertEquals(appId1, state.getFinishedApplications().get(0));
}
@Test
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java
index ec2708ebb3c..121c418fc3a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java
@@ -163,7 +163,7 @@ public class NodesListManager extends CompositeService implements
private void setDecomissionedNMs() {
Set excludeList = hostsReader.getExcludedHosts();
for (final String host : excludeList) {
- UnknownNodeId nodeId = new UnknownNodeId(host);
+ NodeId nodeId = createUnknownNodeId(host);
RMNodeImpl rmNode = new RMNodeImpl(nodeId,
rmContext, host, -1, -1, new UnknownNode(host), null, null);
rmContext.getInactiveRMNodes().put(nodeId, rmNode);
@@ -430,38 +430,8 @@ public class NodesListManager extends CompositeService implements
* A NodeId instance needed upon startup for populating inactive nodes Map.
* It only knows the hostname/ip and marks the port to -1 or invalid.
*/
- public static class UnknownNodeId extends NodeId {
-
- private String host;
-
- public UnknownNodeId(String host) {
- this.host = host;
- }
-
- @Override
- public String getHost() {
- return this.host;
- }
-
- @Override
- protected void setHost(String hst) {
-
- }
-
- @Override
- public int getPort() {
- return -1;
- }
-
- @Override
- protected void setPort(int port) {
-
- }
-
- @Override
- protected void build() {
-
- }
+ public static NodeId createUnknownNodeId(String host) {
+ return NodeId.newInstance(host, -1);
}
/**
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
index 5f8317e890a..9b807164e7e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
@@ -786,8 +786,8 @@ public class RMNodeImpl implements RMNode, EventHandler {
if (previousRMNode != null) {
rmNode.updateMetricsForRejoinedNode(previousRMNode.getState());
} else {
- NodesListManager.UnknownNodeId unknownNodeId =
- new NodesListManager.UnknownNodeId(nodeId.getHost());
+ NodeId unknownNodeId =
+ NodesListManager.createUnknownNodeId(nodeId.getHost());
previousRMNode =
rmNode.context.getInactiveRMNodes().remove(unknownNodeId);
if (previousRMNode != null) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index aabdf9c286b..fbcb91c453b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -1348,13 +1348,6 @@ public class LeafQueue extends AbstractCSQueue {
// Book-keeping
if (removed) {
- // track reserved resource for metrics, for normal container
- // getReservedResource will be null.
- Resource reservedRes = rmContainer.getReservedResource();
- if (reservedRes != null && !reservedRes.equals(Resources.none())) {
- decReservedResource(node.getPartition(), reservedRes);
- }
-
// Inform the ordering policy
orderingPolicy.containerReleased(application, rmContainer);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
index f474aad2d0d..35329d27f38 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
@@ -246,6 +246,8 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
// Update reserved metrics
queue.getMetrics().unreserveResource(getUser(),
rmContainer.getReservedResource());
+ queue.decReservedResource(node.getPartition(),
+ rmContainer.getReservedResource());
return true;
}
return false;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
index 331f3acde65..bb31f6e35f3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
@@ -28,6 +28,8 @@ import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
+import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
@@ -249,7 +251,7 @@ public class TestClientRMService {
Assert.assertTrue(report.getNodeLabels() != null
&& report.getNodeLabels().isEmpty());
}
-
+
rpc.stopProxy(client, conf);
rm.close();
}
@@ -1566,4 +1568,49 @@ public class TestClientRMService {
Assert.assertEquals("Incorrect priority has been returned", expected,
updateApplicationPriority.getApplicationPriority().getPriority());
}
+
+ private void createExcludeFile(String filename) throws IOException {
+ File file = new File(filename);
+ if (file.exists()) {
+ file.delete();
+ }
+
+ FileOutputStream out = new FileOutputStream(file);
+ out.write("decommisssionedHost".getBytes());
+ out.close();
+ }
+
+ @Test
+ public void testRMStartWithDecommissionedNode() throws Exception {
+ String excludeFile = "excludeFile";
+ createExcludeFile(excludeFile);
+ YarnConfiguration conf = new YarnConfiguration();
+ conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH,
+ excludeFile);
+ MockRM rm = new MockRM(conf) {
+ protected ClientRMService createClientRMService() {
+ return new ClientRMService(this.rmContext, scheduler,
+ this.rmAppManager, this.applicationACLsManager, this.queueACLsManager,
+ this.getRMContext().getRMDelegationTokenSecretManager());
+ };
+ };
+ rm.start();
+
+ YarnRPC rpc = YarnRPC.create(conf);
+ InetSocketAddress rmAddress = rm.getClientRMService().getBindAddress();
+ LOG.info("Connecting to ResourceManager at " + rmAddress);
+ ApplicationClientProtocol client =
+ (ApplicationClientProtocol) rpc
+ .getProxy(ApplicationClientProtocol.class, rmAddress, conf);
+
+ // Make call
+ GetClusterNodesRequest request =
+ GetClusterNodesRequest.newInstance(EnumSet.allOf(NodeState.class));
+ List nodeReports = client.getClusterNodes(request).getNodeReports();
+ Assert.assertEquals(1, nodeReports.size());
+
+ rm.stop();
+ rpc.stopProxy(client, conf);
+ new File(excludeFile).delete();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
index 6ba360bda4f..7c03574cbcd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
@@ -690,8 +690,8 @@ public class TestRMNodeTransitions {
@Test
public void testUnknownNodeId() {
- NodesListManager.UnknownNodeId nodeId =
- new NodesListManager.UnknownNodeId("host1");
+ NodeId nodeId =
+ NodesListManager.createUnknownNodeId("host1");
RMNodeImpl node =
new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);
rmContext.getInactiveRMNodes().putIfAbsent(nodeId,node);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
index 84eba109611..f94c963ec4d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.security.SecurityUtilTestHelper;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
@@ -37,6 +38,8 @@ import org.apache.hadoop.yarn.api.records.Token;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.ContainerType;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
+import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
@@ -50,8 +53,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptS
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -417,5 +425,183 @@ public class TestContainerAllocation {
rm1.close();
}
-
+
+ @Test(timeout = 60000)
+ public void testAllocationForReservedContainer() throws Exception {
+ /**
+ * Test case: Submit two application (app1/app2) to a queue. And there's one
+ * node with 8G resource in the cluster. App1 allocates a 6G container, Then
+ * app2 asks for a 4G container. App2's request will be reserved on the
+ * node.
+ *
+ * Before next node heartbeat, app1 container is completed/killed. So app1
+ * container which was reserved will be allocated.
+ */
+ // inject node label manager
+ MockRM rm1 = new MockRM();
+
+ rm1.getRMContext().setNodeLabelManager(mgr);
+ rm1.start();
+ MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
+ MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB);
+
+ // launch an app to queue, AM container should be launched in nm1
+ RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ // launch another app to queue, AM container should be launched in nm1
+ RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "default");
+ MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
+
+ am1.allocate("*", 4 * GB, 1, new ArrayList());
+ am2.allocate("*", 4 * GB, 1, new ArrayList());
+
+ CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
+ RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
+ LeafQueue leafQueue = (LeafQueue) cs.getQueue("default");
+
+ // Do node heartbeats 2 times
+ // First time will allocate container for app1, second time will reserve
+ // container for app2
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+
+ // App2 will get preference to be allocated on node1, and node1 will be all
+ // used by App2.
+ FiCaSchedulerApp schedulerApp1 =
+ cs.getApplicationAttempt(am1.getApplicationAttemptId());
+ FiCaSchedulerApp schedulerApp2 =
+ cs.getApplicationAttempt(am2.getApplicationAttemptId());
+
+ // Check if a 4G container allocated for app1, and nothing allocated for app2
+ Assert.assertEquals(2, schedulerApp1.getLiveContainers().size());
+ Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
+ Assert.assertTrue(schedulerApp2.getReservedContainers().size() > 0);
+
+ // NM1 has available resource = 2G (8G - 2 * 1G - 4G)
+ Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId())
+ .getUnallocatedResource().getMemory());
+ Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
+ // Usage of queue = 4G + 2 * 1G + 4G (reserved)
+ Assert.assertEquals(10 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getUsed().getMemory());
+ Assert.assertEquals(4 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getReserved().getMemory());
+ Assert.assertEquals(4 * GB, leafQueue.getQueueResourceUsage().getReserved()
+ .getMemory());
+
+ // Mark one app1 container as killed/completed and re-kick RM
+ for (RMContainer container : schedulerApp1.getLiveContainers()) {
+ if (container.isAMContainer()) {
+ continue;
+ }
+ cs.markContainerForKillable(container);
+ }
+ // Cancel asks of app1 and re-kick RM
+ am1.allocate("*", 4 * GB, 0, new ArrayList());
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+
+ // Check 4G container cancelled for app1, and one container allocated for
+ // app2
+ Assert.assertEquals(1, schedulerApp1.getLiveContainers().size());
+ Assert.assertEquals(2, schedulerApp2.getLiveContainers().size());
+ Assert.assertFalse(schedulerApp2.getReservedContainers().size() > 0);
+
+ // NM1 has available resource = 2G (8G - 2 * 1G - 4G)
+ Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId())
+ .getUnallocatedResource().getMemory());
+ Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
+ // Usage of queue = 4G + 2 * 1G
+ Assert.assertEquals(6 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getUsed().getMemory());
+ Assert.assertEquals(0 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getReserved().getMemory());
+ Assert.assertEquals(0 * GB, leafQueue.getQueueResourceUsage().getReserved()
+ .getMemory());
+
+ rm1.close();
+ }
+
+ @Test(timeout = 60000)
+ public void testReservedContainerMetricsOnDecommisionedNode() throws Exception {
+ /**
+ * Test case: Submit two application (app1/app2) to a queue. And there's one
+ * node with 8G resource in the cluster. App1 allocates a 6G container, Then
+ * app2 asks for a 4G container. App2's request will be reserved on the
+ * node.
+ *
+ * Before next node heartbeat, app1 container is completed/killed. So app1
+ * container which was reserved will be allocated.
+ */
+ // inject node label manager
+ MockRM rm1 = new MockRM();
+
+ rm1.getRMContext().setNodeLabelManager(mgr);
+ rm1.start();
+ MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
+ MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB);
+
+ // launch an app to queue, AM container should be launched in nm1
+ RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ // launch another app to queue, AM container should be launched in nm1
+ RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "default");
+ MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
+
+ am1.allocate("*", 4 * GB, 1, new ArrayList());
+ am2.allocate("*", 4 * GB, 1, new ArrayList());
+
+ CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
+ RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
+ LeafQueue leafQueue = (LeafQueue) cs.getQueue("default");
+
+ // Do node heartbeats 2 times
+ // First time will allocate container for app1, second time will reserve
+ // container for app2
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+
+ // App2 will get preference to be allocated on node1, and node1 will be all
+ // used by App2.
+ FiCaSchedulerApp schedulerApp1 =
+ cs.getApplicationAttempt(am1.getApplicationAttemptId());
+ FiCaSchedulerApp schedulerApp2 =
+ cs.getApplicationAttempt(am2.getApplicationAttemptId());
+
+ // Check if a 4G container allocated for app1, and nothing allocated for app2
+ Assert.assertEquals(2, schedulerApp1.getLiveContainers().size());
+ Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
+ Assert.assertTrue(schedulerApp2.getReservedContainers().size() > 0);
+
+ // NM1 has available resource = 2G (8G - 2 * 1G - 4G)
+ Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId())
+ .getUnallocatedResource().getMemory());
+ Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
+ // Usage of queue = 4G + 2 * 1G + 4G (reserved)
+ Assert.assertEquals(10 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getUsed().getMemory());
+ Assert.assertEquals(4 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getReserved().getMemory());
+ Assert.assertEquals(4 * GB, leafQueue.getQueueResourceUsage().getReserved()
+ .getMemory());
+
+ // Remove the node
+ cs.handle(new NodeRemovedSchedulerEvent(rmNode1));
+
+ // Check all container cancelled for app1 and app2
+ Assert.assertEquals(0, schedulerApp1.getLiveContainers().size());
+ Assert.assertEquals(0, schedulerApp2.getLiveContainers().size());
+ Assert.assertFalse(schedulerApp2.getReservedContainers().size() > 0);
+
+ // Usage and Reserved capacity of queue is 0
+ Assert.assertEquals(0 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getUsed().getMemory());
+ Assert.assertEquals(0 * GB, cs.getRootQueue().getQueueResourceUsage()
+ .getReserved().getMemory());
+ Assert.assertEquals(0 * GB, leafQueue.getQueueResourceUsage().getReserved()
+ .getMemory());
+
+ rm1.close();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ReservationSystem.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ReservationSystem.md
new file mode 100644
index 00000000000..eda8d4d806e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ReservationSystem.md
@@ -0,0 +1,65 @@
+
+
+Reservation System
+==================
+
+
+* [Purpose](#Purpose)
+* [Overview](#Overview)
+* [Flow of a Reservation](#Flow_of_a_Reservation)
+* [Configuring the Reservation System](#Configuring_the_Reservation_System)
+
+Purpose
+-------
+
+This document provides a brief overview of the `YARN ReservationSystem`.
+
+Overview
+--------
+
+The `ReservationSystem` of YARN provides the user the ability to reserve resources over (and ahead of) time, to ensure that important production jobs will be run very predictably. The ReservationSystem performs careful admission control and provides guarantees over absolute amounts of resources (instead of % of cluster size). Reservation can be both malleable or have gang semantics, and can have time-varying resource requirements. The ReservationSystem is a component of the YARN ResourceManager.
+
+
+Flow of a Reservation
+----------------------
+
+![YARN Reservation System | width=600px](./images/yarn_reservation_system.png)
+
+With reference to the figure above, a typical reservation proceeds as follows:
+
+ * **Step 1** The user (or an automated tool on its behalf) submit a reservation request specified by the Reservation Definition Language (RDL). This describes the user need for resources over-time (e.g., a skyline of resources) and temporal constraints (e.g., deadline). This can be done both programmatically through the usual Client-to-RM protocols or via the REST api of the RM.
+
+ * **Step 2** The ReservationSystem leverages a ReservationAgent (GREE in the figure) to find a plausible allocation for the reservation in the Plan, a data structure tracking all reservation currently accepted and the available resources in the system.
+
+ * **Step 3** The SharingPolicy provides a way to enforce invariants on the reservation being accepted, potentially rejecting reservations. For example, the CapacityOvertimePolicy allows enforcement of both instantaneous max-capacity a user can request across all of his/her reservations and a limit on the integral of resources over a period of time, e.g., the user can reserve up to 50% of the cluster capacity instantanesouly, but in any 24h period of time he/she cannot exceed 10% average.
+
+ * **Step 4** Upon a successful validation the ReservationSystem returns to the user a ReservationId (think of it as an airline ticket).
+
+ * **Step 5** When the time comes, a new component called the PlanFollower publishes the state of the plan to the scheduler, by dynamically creating/tweaking/destroying queues.
+
+ * **Step 6** The user can then submit one (or more) jobs to the reservable queue, by simply including the ReservationId as part of the ApplicationSubmissionContext.
+
+ * **Step 7** The Scheduler will then provide containers from a special queue created to ensure resources reservation is respected. Within the limits of the reservation, the user has guaranteed access to the resources, above that resource sharing proceed with standard Capacity/Fairness sharing.
+
+ * **Step 8** The system includes mechanisms to adapt to drop in cluster capacity. This consists in replanning by "moving" the reservation if possible, or rejecting the smallest amount of previously accepted reservation (to ensure that other reservation will receive their full amount).
+
+
+
+
+
+Configuring the Reservation System
+----------------------------------
+
+Configuring the `ReservationSystem` is simple. Currently we have added support for *reservations* in both `CapacityScheduler` and `FairScheduler`. You can mark any **leaf queue** in the **capacity-scheduler.xml** or **fair-scheduler.xml** as available for "reservations" (see [CapacityScheduler](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html#Configuring_ReservationSystem_with_CapacityScheduler) and the [FairScheduler](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html) for details). Then the capacity/fair share within that queue can be used for making reservations. Jobs can still be submitted to the *reservable queue* without a reservation, in which case they will be run in best-effort mode in whatever capacity is left over by the jobs running within active reservations.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRest.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRest.md
index c72b7f408a7..dd6ac0448fd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRest.md
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRest.md
@@ -34,6 +34,9 @@ ResourceManager REST API's.
* [Cluster Application Priority API](#Cluster_Application_Priority_API)
* [Cluster Delegation Tokens API](#Cluster_Delegation_Tokens_API)
* [Cluster Reservation API List](#Cluster_Reservation_API_List)
+* [Cluster Reservation API Submit](#Cluster_Reservation_API_Submit)
+* [Cluster Reservation API Update](#Cluster_Reservation_API_Update)
+* [Cluster Reservation API Delete](#Cluster_Reservation_API_Delete)
Overview
--------
@@ -3223,8 +3226,8 @@ The Cluster Reservation API can be used to list reservations. When listing reser
| Item | Data Type | Description |
|:---- |:---- |:---- |
| arrival | long | The UTC time representation of the earliest time this reservation can be allocated from. |
-| deadline | long | The UTC time representation of the latest time within which this reservatino can be allocated. |
-| reservation-name | string | A mnemonic name of the reservaiton (not a valid identifier). |
+| deadline | long | The UTC time representation of the latest time within which this reservation can be allocated. |
+| reservation-name | string | A mnemonic name of the reservation (not a valid identifier). |
| reservation-requests | object | A list of "stages" or phases of this reservation, each describing resource requirements and duration |
### Elements of the *reservation-requests* object
@@ -3381,3 +3384,443 @@ Response Body:
```
+
+Cluster Reservation API Submit
+------------------------------
+
+The Cluster Reservation API can be used to submit reservations.When submitting a reservation the user specify the constraints in terms of resources, and time that are required, the resulting page returns a reservation-id that the user can use to get access to the resources by specifying it as part of [Cluster Submit Applications API](#Cluster_Applications_APISubmit_Application).
+
+### URI
+
+ * http:///ws/v1/cluster/reservation/submit
+
+### HTTP Operations Supported
+
+ * POST
+
+### POST Response Examples
+
+POST requests can be used to submit reservations to the ResourceManager. As mentioned above, a reservation-id is returned upon success (in the body of the answer). Successful submissions result in a 200 response. Please note that in order to submit a reservation, you must have an authentication filter setup for the HTTP interface. The functionality requires that a username is set in the HttpServletRequest. If no filter is setup, the response will be an "UNAUTHORIZED" response.
+
+Please note that this feature is currently in the alpha stage and may change in the future.
+
+#### Elements of the POST request object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| queue | string | The (reservable) queue you are submitting to|
+| reservation-definition | object | A set of constraints representing the need for resources over time of a user. |
+
+Elements of the *reservation-definition* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+|arrival | long | The UTC time representation of the earliest time this reservation can be allocated from. |
+| deadline | long | The UTC time representation of the latest time within which this reservation can be allocated. |
+| reservation-name | string | A mnemonic name of the reservation (not a valid identifier). |
+| reservation-requests | object | A list of "stages" or phases of this reservation, each describing resource requirements and duration |
+
+Elements of the *reservation-requests* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| reservation-request-interpreter | int | A numeric choice of how to interpret the set of ReservationRequest: 0 is an ANY, 1 for ALL, 2 for ORDER, 3 for ORDER\_NO\_GAP |
+| reservation-request | object | The description of the resource and time capabilities for a phase/stage of this reservation |
+
+Elements of the *reservation-request* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| duration | long | The duration of a ReservationRequeust in milliseconds (amount of consecutive milliseconds a satisfiable allocation for this portion of the reservation should exist for). |
+| num-containers | int | The number of containers required in this phase of the reservation (capture the maximum parallelism of the job(s) in this phase). |
+| min-concurrency | int | The minimum number of containers that must be concurrently allocated to satisfy this allocation (capture min-parallelism, useful to express gang semantics). |
+| capability | object | Allows to specify the size of each container (memory, vCores).|
+
+Elements of the *capability* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| memory | int | the number of MB of memory for this container |
+| vCores | int | the number of virtual cores for this container |
+
+
+**JSON response**
+
+This examples contains a reservation composed of two stages (alternative to each other as the *reservation-request-interpreter* is set to 0), so that the first is shorter and "taller" and "gang"
+with exactly 220 containers for 60 seconds, while the second alternative is longer with 120 seconds duration and less tall with 110 containers (and a min-concurrency of 1 container, thus no gang semantics).
+
+HTTP Request:
+
+```json
+POST http://rmdns:8088/ws/v1/cluster/reservation/submit
+Content-Type: application/json
+{
+ "queue" : "dedicated",
+ "reservation-definition" : {
+ "arrival" : 1765541532000,
+ "deadline" : 1765542252000,
+ "reservation-name" : "res_1",
+ "reservation-requests" : {
+ "reservation-request-interpreter" : 0,
+ "reservation-request" : [
+ {
+ "duration" : 60000,
+ "num-containers" : 220,
+ "min-concurrency" : 220,
+ "capability" : {
+ "memory" : 1024,
+ "vCores" : 1
+ }
+ },
+ {
+ "duration" : 120000,
+ "num-containers" : 110,
+ "min-concurrency" : 1,
+ "capability" : {
+ "memory" : 1024,
+ "vCores" : 1
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+Response Header:
+
+200 OK
+Cache-Control: no-cache
+Expires: Thu, 17 Dec 2015 23:36:34 GMT, Thu, 17 Dec 2015 23:36:34 GMT
+Date: Thu, 17 Dec 2015 23:36:34 GMT, Thu, 17 Dec 2015 23:36:34 GMT
+Pragma: no-cache, no-cache
+Content-Type: application/xml
+Content-Encoding: gzip
+Content-Length: 137
+Server: Jetty(6.1.26)
+
+Response Body:
+
+```json
+{"reservation-id":"reservation_1448064217915_0009"}
+```
+
+**XML response**
+
+HTTP Request:
+
+```xml
+POST http://rmdns:8088/ws/v1/cluster/reservation/submit
+Accept: application/xml
+Content-Type: application/xml
+
+ dedicated
+
+ 1765541532000
+ 1765542252000
+ res_1
+
+ 0
+
+ 60000
+ 220
+ 220
+
+ 1024
+ 1
+
+
+
+ 120000
+ 110
+ 1
+
+ 1024
+ 1
+
+
+
+
+
+```
+
+Response Header:
+
+200 OK
+Cache-Control: no-cache
+Expires: Thu, 17 Dec 2015 23:49:21 GMT, Thu, 17 Dec 2015 23:49:21 GMT
+Date: Thu, 17 Dec 2015 23:49:21 GMT, Thu, 17 Dec 2015 23:49:21 GMT
+Pragma: no-cache, no-cache
+Content-Type: application/xml
+Content-Encoding: gzip
+Content-Length: 137
+Server: Jetty(6.1.26)
+
+Response Body:
+
+```xml
+
+
+ reservation_1448064217915_0010
+
+```
+
+
+Cluster Reservation API Update
+------------------------------
+
+The Cluster Reservation API Update can be used to update existing reservations.Update of a Reservation works similarly to submit described above, but the user submits the reservation-id of an existing reservation to be updated. The semantics is a try-and-swap, successful operation will modify the existing reservation based on the requested update parameter, while a failed execution will leave the existing reservation unchanged.
+
+### URI
+
+ * http:///ws/v1/cluster/reservation/update
+
+### HTTP Operations Supported
+
+ * POST
+
+### POST Response Examples
+
+POST requests can be used to update reservations to the ResourceManager. Successful submissions result in a 200 response, indicate in-place update of the existing reservation (id does not change). Please note that in order to update a reservation, you must have an authentication filter setup for the HTTP interface. The functionality requires that a username is set in the HttpServletRequest. If no filter is setup, the response will be an "UNAUTHORIZED" response.
+
+Please note that this feature is currently in the alpha stage and may change in the future.
+
+#### Elements of the POST request object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| reservation-id | string | The id of the reservation to be updated (the system automatically looks up the right queue from this)|
+| reservation-definition | object | A set of constraints representing the need for resources over time of a user. |
+
+Elements of the *reservation-definition* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+|arrival | long | The UTC time representation of the earliest time this reservation can be allocated from. |
+| deadline | long | The UTC time representation of the latest time within which this reservation can be allocated. |
+| reservation-name | string | A mnemonic name of the reservation (not a valid identifier). |
+| reservation-requests | object | A list of "stages" or phases of this reservation, each describing resource requirements and duration |
+
+Elements of the *reservation-requests* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| reservation-request-interpreter | int | A numeric choice of how to interpret the set of ReservationRequest: 0 is an ANY, 1 for ALL, 2 for ORDER, 3 for ORDER\_NO\_GAP |
+| reservation-request | object | The description of the resource and time capabilities for a phase/stage of this reservation |
+
+Elements of the *reservation-request* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| duration | long | The duration of a ReservationRequeust in milliseconds (amount of consecutive milliseconds a satisfiable allocation for this portion of the reservation should exist for). |
+| num-containers | int | The number of containers required in this phase of the reservation (capture the maximum parallelism of the job(s) in this phase). |
+| min-concurrency | int | The minimum number of containers that must be concurrently allocated to satisfy this allocation (capture min-parallelism, useful to express gang semantics). |
+| capability | object | Allows to specify the size of each container (memory, vCores).|
+
+Elements of the *capability* object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| memory | int | the number of MB of memory for this container |
+| vCores | int | the number of virtual cores for this container |
+
+
+**JSON response**
+
+This examples updates an existing reservation identified by *reservation_1449259268893_0005* with two stages (in order as the *reservation-request-interpreter* is set to 2), with the first stage being a "gang" of 10 containers for 5 minutes (min-concurrency of 10 containers) followed by a 50 containers for 10 minutes(min-concurrency of 1 container, thus no gang semantics).
+
+HTTP Request:
+
+```json
+POST http://rmdns:8088/ws/v1/cluster/reservation/update
+Accept: application/json
+Content-Type: application/json
+{
+ "reservation-id" : "reservation_1449259268893_0005",
+ "reservation-definition" : {
+ "arrival" : 1765541532000,
+ "deadline" : 1765542252000,
+ "reservation-name" : "res_1",
+ "reservation-requests" : {
+ "reservation-request-interpreter" : 2,
+ "reservation-request" : [
+ {
+ "duration" : 300000,
+ "num-containers" : 10,
+ "min-concurrency" : 10,
+ "capability" : {
+ "memory" : 1024,
+ "vCores" : 1
+ }
+ },
+ {
+ "duration" : 60000,
+ "num-containers" : 50,
+ "min-concurrency" : 1,
+ "capability" : {
+ "memory" : 1024,
+ "vCores" : 1
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+Response Header:
+
+200 OK
+Cache-Control: no-cache
+Expires: Thu, 17 Dec 2015 23:36:34 GMT, Thu, 17 Dec 2015 23:36:34 GMT
+Date: Thu, 17 Dec 2015 23:36:34 GMT, Thu, 17 Dec 2015 23:36:34 GMT
+Pragma: no-cache, no-cache
+Content-Type: application/json
+Content-Encoding: gzip
+Content-Length: 137
+Server: Jetty(6.1.26)
+
+Response Body:
+
+ No response body
+
+**XML response**
+
+HTTP Request:
+
+```xml
+POST http://rmdns:8088/ws/v1/cluster/reservation/update
+Accept: application/xml
+Content-Type: application/xml
+
+ reservation_1449259268893_0005
+
+ 1765541532000
+ 1765542252000
+ res_1
+
+ 2
+
+ 300000
+ 10
+ 10
+
+ 1024
+ 1
+
+
+
+ 60000
+ 50
+ 1
+
+ 1024
+ 1
+
+
+
+
+
+```
+
+Response Header:
+
+200 OK
+Cache-Control: no-cache
+Expires: Thu, 17 Dec 2015 23:49:21 GMT, Thu, 17 Dec 2015 23:49:21 GMT
+Date: Thu, 17 Dec 2015 23:49:21 GMT, Thu, 17 Dec 2015 23:49:21 GMT
+Pragma: no-cache, no-cache
+Content-Type: application/xml
+Content-Encoding: gzip
+Content-Length: 137
+Server: Jetty(6.1.26)
+
+Response Body:
+
+ No response body
+
+Cluster Reservation API Delete
+------------------------------
+
+The Cluster Reservation API Delete can be used to delete existing reservations.Delete works similar to update. The requests contains the reservation-id, and if successful the reservation is cancelled, otherwise the reservation remains in the system.
+
+### URI
+
+ * http:///ws/v1/cluster/reservation/delete
+
+### HTTP Operations Supported
+
+ * POST
+
+### POST Response Examples
+
+POST requests can be used to delete reservations to the ResourceManager. Successful submissions result in a 200 response, indicating that the delete succeeded. Please note that in order to delete a reservation, you must have an authentication filter setup for the HTTP interface. The functionality requires that a username is set in the HttpServletRequest. If no filter is setup, the response will be an "UNAUTHORIZED" response.
+
+Please note that this feature is currently in the alpha stage and may change in the future.
+
+#### Elements of the POST request object
+
+| Item | Data Type | Description |
+|:---- |:---- |:---- |
+| reservation-id | string | The id of the reservation to be deleted (the system automatically looks up the right queue from this)|
+
+
+**JSON response**
+
+This examples deletes an existing reservation identified by *reservation_1449259268893_0006*
+
+HTTP Request:
+
+```json
+POST http://10.200.91.98:8088/ws/v1/cluster/reservation/delete
+Accept: application/json
+Content-Type: application/json
+{
+ "reservation-id" : "reservation_1449259268893_0006"
+}
+```
+
+Response Header:
+
+200 OK
+Cache-Control: no-cache
+Expires: Fri, 18 Dec 2015 01:31:05 GMT, Fri, 18 Dec 2015 01:31:05 GMT
+Date: Fri, 18 Dec 2015 01:31:05 GMT, Fri, 18 Dec 2015 01:31:05 GMT
+Pragma: no-cache, no-cache
+Content-Type: application/json
+Content-Encoding: gzip
+Transfer-Encoding: chunked
+Server: Jetty(6.1.26)
+
+Response Body:
+
+ No response body
+
+**XML response**
+
+HTTP Request:
+
+```xml
+POST http://10.200.91.98:8088/ws/v1/cluster/reservation/delete
+Accept: application/xml
+Content-Type: application/xml
+
+reservation_1449259268893_0006
+
+```
+
+Response Header:
+
+200 OK
+Cache-Control: no-cache
+Expires: Fri, 18 Dec 2015 01:33:23 GMT, Fri, 18 Dec 2015 01:33:23 GMT
+Date: Fri, 18 Dec 2015 01:33:23 GMT, Fri, 18 Dec 2015 01:33:23 GMT
+Pragma: no-cache, no-cache
+Content-Type: application/xml
+Content-Encoding: gzip
+Content-Length: 101
+Server: Jetty(6.1.26)
+
+Response Body:
+
+ No response body
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YARN.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YARN.md
index f8e8154774c..974f41dfdcd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YARN.md
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YARN.md
@@ -32,3 +32,5 @@ The Scheduler has a pluggable policy which is responsible for partitioning the c
The ApplicationsManager is responsible for accepting job-submissions, negotiating the first container for executing the application specific ApplicationMaster and provides the service for restarting the ApplicationMaster container on failure. The per-application ApplicationMaster has the responsibility of negotiating appropriate resource containers from the Scheduler, tracking their status and monitoring for progress.
MapReduce in hadoop-2.x maintains **API compatibility** with previous stable release (hadoop-1.x). This means that all MapReduce jobs should still run unchanged on top of YARN with just a recompile.
+
+YARN also supports the notion of **resource reservation** via the [ReservationSystem](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ReservationSystem.html), a component that allows users to specify a profile of resources over-time and temporal constraints (e.g., deadlines), and reserve resources to ensure the predictable execution of important jobs.The *ReservationSystem* tracks resources over-time, performs admission control for reservations, and dynamically instruct the underlying scheduler to ensure that the reservation is fullfilled.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/images/yarn_reservation_system.png b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/images/yarn_reservation_system.png
new file mode 100644
index 00000000000..cbe197510c2
Binary files /dev/null and b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/images/yarn_reservation_system.png differ