Merge trunk into HA branch.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1234924 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
e42bbfbca2
|
@ -282,6 +282,9 @@ Release 0.23.1 - Unreleased
|
|||
HADOOP-7982. UserGroupInformation fails to login if thread's context
|
||||
classloader can't load HadoopLoginModule. (todd)
|
||||
|
||||
HADOOP-7986. Adding config for MapReduce History Server protocol in
|
||||
hadoop-policy.xml for service level authorization. (Mahadev Konar via vinodkv)
|
||||
|
||||
Release 0.23.0 - 2011-11-01
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -217,4 +217,14 @@
|
|||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.mrhs.client.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for HSClientProtocol, used by job clients to
|
||||
communciate with the MR History Server job status etc.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
|
|
|
@ -1,3 +1,19 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<FindBugsFilter>
|
||||
<Match>
|
||||
<Class name="org.apache.hadoop.lib.service.instrumentation.InstrumentationService" />
|
||||
|
|
|
@ -272,6 +272,10 @@ Release 0.23.1 - UNRELEASED
|
|||
HDFS-2803. Add logging to LeaseRenewer for better lease expiration debugging.
|
||||
(Jimmy Xiang via todd)
|
||||
|
||||
HDFS-2817. Combine the two TestSafeMode test suites. (todd)
|
||||
|
||||
HDFS-2818. Fix a missing space issue in HDFS webapps' title tags. (Devaraj K via harsh)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-2130. Switch default checksum to CRC32C. (todd)
|
||||
|
@ -343,6 +347,9 @@ Release 0.23.1 - UNRELEASED
|
|||
HDFS-2751. Datanode may incorrectly drop OS cache behind reads
|
||||
even for short reads. (todd)
|
||||
|
||||
HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml.
|
||||
(hitesh via tucu)
|
||||
|
||||
Release 0.23.0 - 2011-11-01
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<link rel="stylesheet" type="text/css" href="/static/hadoop.css">
|
||||
<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
|
||||
<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
|
||||
<body>
|
||||
<h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
|
||||
<%=NamenodeJspHelper.getVersionTable(fsn)%>
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
<html>
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="/static/hadoop.css">
|
||||
<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
|
||||
<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
|
||||
|
||||
<body>
|
||||
<h1><%=namenodeRole%> '<%=namenodeLabel%>' (<%=namenodeState%>)</h1>
|
||||
|
|
|
@ -37,7 +37,7 @@ String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameN
|
|||
<html>
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="/static/hadoop.css">
|
||||
<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
|
||||
<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
|
||||
|
||||
<body>
|
||||
<h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
|
||||
|
|
|
@ -113,6 +113,21 @@ public class TestSafeMode {
|
|||
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that, if there are no blocks in the filesystem,
|
||||
* the NameNode doesn't enter the "safemode extension" period.
|
||||
*/
|
||||
@Test(timeout=45000)
|
||||
public void testNoExtensionIfNoBlocks() throws IOException {
|
||||
cluster.getConfiguration(0).setInt(
|
||||
DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 60000);
|
||||
cluster.restartNameNode();
|
||||
// Even though we have safemode extension set high, we should immediately
|
||||
// exit safemode on startup because there are no blocks in the namespace.
|
||||
String status = cluster.getNameNode().getNamesystem().getSafemode();
|
||||
assertEquals("", status);
|
||||
}
|
||||
|
||||
public interface FSRun {
|
||||
public abstract void run(FileSystem fs) throws IOException;
|
||||
}
|
||||
|
@ -193,5 +208,37 @@ public class TestSafeMode {
|
|||
assertFalse("Could not leave SM",
|
||||
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Verify that the NameNode stays in safemode when dfs.safemode.datanode.min
|
||||
* is set to a number greater than the number of live datanodes.
|
||||
*/
|
||||
@Test
|
||||
public void testDatanodeThreshold() throws IOException {
|
||||
cluster.shutdown();
|
||||
Configuration conf = cluster.getConfiguration(0);
|
||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
|
||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
|
||||
|
||||
cluster.restartNameNode();
|
||||
fs = (DistributedFileSystem)cluster.getFileSystem();
|
||||
|
||||
String tipMsg = cluster.getNamesystem().getSafemode();
|
||||
assertTrue("Safemode tip message looks right: " + tipMsg,
|
||||
tipMsg.contains("The number of live datanodes 0 needs an additional " +
|
||||
"2 live datanodes to reach the minimum number 1. " +
|
||||
"Safe mode will be turned off automatically."));
|
||||
|
||||
// Start a datanode
|
||||
cluster.startDataNodes(conf, 1, true, null, null);
|
||||
|
||||
// Wait long enough for safemode check to refire
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignored) {}
|
||||
|
||||
// We now should be out of safe mode.
|
||||
assertEquals("", cluster.getNamesystem().getSafemode());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
/**
|
||||
* Tests to verify safe mode correctness.
|
||||
*/
|
||||
public class TestSafeMode {
|
||||
|
||||
/**
|
||||
* Verify that the NameNode stays in safemode when dfs.safemode.datanode.min
|
||||
* is set to a number greater than the number of live datanodes.
|
||||
*/
|
||||
@Test
|
||||
public void testDatanodeThreshold() throws IOException {
|
||||
MiniDFSCluster cluster = null;
|
||||
DistributedFileSystem fs = null;
|
||||
try {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
|
||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
|
||||
|
||||
// bring up a cluster with no datanodes
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(true).build();
|
||||
cluster.waitActive();
|
||||
fs = (DistributedFileSystem)cluster.getFileSystem();
|
||||
|
||||
assertTrue("No datanode started, but we require one - safemode expected",
|
||||
fs.setSafeMode(SafeModeAction.SAFEMODE_GET));
|
||||
|
||||
String tipMsg = cluster.getNamesystem().getSafeModeTip();
|
||||
assertTrue("Safemode tip message looks right",
|
||||
tipMsg.contains("The number of live datanodes 0 needs an additional " +
|
||||
"2 live datanodes to reach the minimum number 1. " +
|
||||
"Safe mode will be turned off automatically."));
|
||||
|
||||
// Start a datanode
|
||||
cluster.startDataNodes(conf, 1, true, null, null);
|
||||
|
||||
// Wait long enough for safemode check to refire
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignored) {}
|
||||
|
||||
// We now should be out of safe mode.
|
||||
assertFalse(
|
||||
"Out of safe mode after starting datanode.",
|
||||
fs.setSafeMode(SafeModeAction.SAFEMODE_GET));
|
||||
} finally {
|
||||
if (fs != null) fs.close();
|
||||
if (cluster != null) cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -517,6 +517,18 @@ Release 0.23.1 - Unreleased
|
|||
MAPREDUCE-3582. Move successfully passing MR1 tests to MR2 maven tree.
|
||||
(ahmed via tucu)
|
||||
|
||||
MAPREDUCE-3698. Client cannot talk to the history server in secure mode.
|
||||
(mahadev)
|
||||
|
||||
MAPREDUCE-3689. RM web UI doesn't handle newline in job name.
|
||||
(Thomas Graves via mahadev)
|
||||
|
||||
MAPREDUCE-3549. write api documentation for web service apis for RM, NM,
|
||||
mapreduce app master, and job history server (Thomas Graves via mahadev)
|
||||
|
||||
MAPREDUCE-3705. ant build fails on 0.23 branch. (Thomas Graves via
|
||||
mahadev)
|
||||
|
||||
Release 0.23.0 - 2011-11-01
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -575,8 +575,6 @@
|
|||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/mapred/test.tar" todir="${test.cache.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/mapred/test.tgz" todir="${test.cache.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/mapred/test.tar.gz" todir="${test.cache.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/cli/testMRConf.xml" todir="${test.cache.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/cli/data60bytes" todir="${test.cache.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/mapred/concat.bz2" todir="${test.concat.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/mapred/concat.gz" todir="${test.concat.data}"/>
|
||||
<copy file="${test.src.dir}/mapred/org/apache/hadoop/mapred/testCompressThenConcat.txt.bz2" todir="${test.concat.data}"/>
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.mapreduce.v2.app.security.authorize;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
|
||||
import org.apache.hadoop.security.authorize.PolicyProvider;
|
||||
import org.apache.hadoop.security.authorize.Service;
|
||||
import org.apache.hadoop.yarn.proto.HSClientProtocol;
|
||||
|
||||
/**
|
||||
* {@link PolicyProvider} for YARN MapReduce protocols.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Unstable
|
||||
public class ClientHSPolicyProvider extends PolicyProvider {
|
||||
|
||||
private static final Service[] mrHSServices =
|
||||
new Service[] {
|
||||
new Service(
|
||||
JHAdminConfig.MR_HS_SECURITY_SERVICE_AUTHORIZATION,
|
||||
HSClientProtocol.HSClientProtocolService.BlockingInterface.class)
|
||||
};
|
||||
|
||||
@Override
|
||||
public Service[] getServices() {
|
||||
return mrHSServices;
|
||||
}
|
||||
}
|
|
@ -30,7 +30,7 @@ import org.apache.hadoop.mapreduce.Counters;
|
|||
import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
|
||||
import org.apache.hadoop.mapreduce.v2.util.MRApps;
|
||||
|
||||
@XmlRootElement(name = "JobTaskAttemptCounters")
|
||||
@XmlRootElement(name = "jobTaskAttemptCounters")
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
public class JobTaskAttemptCounterInfo {
|
||||
|
||||
|
|
|
@ -629,7 +629,7 @@ public class TestAMWebServicesAttempts extends JerseyTest {
|
|||
assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
|
||||
JSONObject json = response.getEntity(JSONObject.class);
|
||||
assertEquals("incorrect number of elements", 1, json.length());
|
||||
JSONObject info = json.getJSONObject("JobTaskAttemptCounters");
|
||||
JSONObject info = json.getJSONObject("jobTaskAttemptCounters");
|
||||
verifyAMJobTaskAttemptCounters(info, att);
|
||||
}
|
||||
}
|
||||
|
@ -661,7 +661,7 @@ public class TestAMWebServicesAttempts extends JerseyTest {
|
|||
InputSource is = new InputSource();
|
||||
is.setCharacterStream(new StringReader(xml));
|
||||
Document dom = db.parse(is);
|
||||
NodeList nodes = dom.getElementsByTagName("JobTaskAttemptCounters");
|
||||
NodeList nodes = dom.getElementsByTagName("jobTaskAttemptCounters");
|
||||
|
||||
verifyAMTaskCountersXML(nodes, att);
|
||||
}
|
||||
|
|
|
@ -22,13 +22,20 @@ import java.io.IOException;
|
|||
import java.net.InetSocketAddress;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.ipc.RPC;
|
||||
import org.apache.hadoop.mapreduce.v2.api.HSClientProtocol;
|
||||
import org.apache.hadoop.yarn.ipc.ProtoOverHadoopRpcEngine;
|
||||
import org.apache.hadoop.yarn.proto.HSClientProtocol.HSClientProtocolService;
|
||||
|
||||
public class HSClientProtocolPBClientImpl extends MRClientProtocolPBClientImpl
|
||||
implements HSClientProtocol {
|
||||
|
||||
public HSClientProtocolPBClientImpl(long clientVersion,
|
||||
InetSocketAddress addr, Configuration conf) throws IOException {
|
||||
super(clientVersion, addr, conf);
|
||||
super();
|
||||
RPC.setProtocolEngine(conf, HSClientProtocolService.BlockingInterface.class,
|
||||
ProtoOverHadoopRpcEngine.class);
|
||||
proxy = (HSClientProtocolService.BlockingInterface)RPC.getProxy(
|
||||
HSClientProtocolService.BlockingInterface.class, clientVersion, addr, conf);
|
||||
}
|
||||
}
|
|
@ -93,7 +93,9 @@ import com.google.protobuf.ServiceException;
|
|||
|
||||
public class MRClientProtocolPBClientImpl implements MRClientProtocol {
|
||||
|
||||
private MRClientProtocolService.BlockingInterface proxy;
|
||||
protected MRClientProtocolService.BlockingInterface proxy;
|
||||
|
||||
public MRClientProtocolPBClientImpl() {};
|
||||
|
||||
public MRClientProtocolPBClientImpl(long clientVersion, InetSocketAddress addr, Configuration conf) throws IOException {
|
||||
RPC.setProtocolEngine(conf, MRClientProtocolService.BlockingInterface.class, ProtoOverHadoopRpcEngine.class);
|
||||
|
|
|
@ -111,4 +111,9 @@ public class JHAdminConfig {
|
|||
public static final int DEFAULT_MR_HISTORY_WEBAPP_PORT = 19888;
|
||||
public static final String DEFAULT_MR_HISTORY_WEBAPP_ADDRESS =
|
||||
"0.0.0.0:" + DEFAULT_MR_HISTORY_WEBAPP_PORT;
|
||||
/*
|
||||
* HS Service Authorization
|
||||
*/
|
||||
public static final String MR_HS_SECURITY_SERVICE_AUTHORIZATION =
|
||||
"security.mrhs.client.protocol.acl";
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.hadoop.mapreduce.v2.security.client;
|
|||
|
||||
import java.lang.annotation.Annotation;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
|
||||
import org.apache.hadoop.security.KerberosInfo;
|
||||
|
@ -30,7 +32,7 @@ import org.apache.hadoop.security.token.TokenSelector;
|
|||
import org.apache.hadoop.yarn.proto.HSClientProtocol;
|
||||
|
||||
public class ClientHSSecurityInfo extends SecurityInfo {
|
||||
|
||||
|
||||
@Override
|
||||
public KerberosInfo getKerberosInfo(Class<?> protocol, Configuration conf) {
|
||||
if (!protocol
|
||||
|
|
|
@ -66,7 +66,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
|
|||
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.Job;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.Task;
|
||||
import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider;
|
||||
import org.apache.hadoop.mapreduce.v2.app.security.authorize.ClientHSPolicyProvider;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.webapp.HsWebApp;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
|
@ -136,9 +136,9 @@ public class HistoryClientService extends AbstractService {
|
|||
if (conf.getBoolean(
|
||||
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
|
||||
false)) {
|
||||
server.refreshServiceAcl(conf, new MRAMPolicyProvider());
|
||||
server.refreshServiceAcl(conf, new ClientHSPolicyProvider());
|
||||
}
|
||||
|
||||
|
||||
server.start();
|
||||
this.bindAddress =
|
||||
NetUtils.createSocketAddr(hostNameResolved.getHostAddress()
|
||||
|
|
|
@ -642,7 +642,7 @@ public class TestHsWebServicesAttempts extends JerseyTest {
|
|||
assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
|
||||
JSONObject json = response.getEntity(JSONObject.class);
|
||||
assertEquals("incorrect number of elements", 1, json.length());
|
||||
JSONObject info = json.getJSONObject("JobTaskAttemptCounters");
|
||||
JSONObject info = json.getJSONObject("jobTaskAttemptCounters");
|
||||
verifyHsJobTaskAttemptCounters(info, att);
|
||||
}
|
||||
}
|
||||
|
@ -674,7 +674,7 @@ public class TestHsWebServicesAttempts extends JerseyTest {
|
|||
InputSource is = new InputSource();
|
||||
is.setCharacterStream(new StringReader(xml));
|
||||
Document dom = db.parse(is);
|
||||
NodeList nodes = dom.getElementsByTagName("JobTaskAttemptCounters");
|
||||
NodeList nodes = dom.getElementsByTagName("jobTaskAttemptCounters");
|
||||
|
||||
verifyHsTaskCountersXML(nodes, att);
|
||||
}
|
||||
|
|
|
@ -68,7 +68,6 @@ public class ProtoOverHadoopRpcEngine implements RpcEngine {
|
|||
public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion,
|
||||
InetSocketAddress addr, UserGroupInformation ticket, Configuration conf,
|
||||
SocketFactory factory, int rpcTimeout) throws IOException {
|
||||
|
||||
return new ProtocolProxy<T>(protocol, (T) Proxy.newProxyInstance(protocol
|
||||
.getClassLoader(), new Class[] { protocol }, new Invoker(protocol,
|
||||
addr, ticket, conf, factory, rpcTimeout)), false);
|
||||
|
|
|
@ -40,7 +40,7 @@ public class Jsons {
|
|||
|
||||
public static PrintWriter appendProgressBar(PrintWriter out,
|
||||
float progress) {
|
||||
return appendProgressBar(out, String.format("%.1f", progress * 100));
|
||||
return appendProgressBar(out, String.format("%.1f", progress));
|
||||
}
|
||||
|
||||
public static PrintWriter appendSortable(PrintWriter out, Object value) {
|
||||
|
|
|
@ -66,7 +66,7 @@ class AppsList implements ToJSON {
|
|||
appendLink(out, appInfo.getAppId(), rc.prefix(), "app",
|
||||
appInfo.getAppId()).append(_SEP).
|
||||
append(escapeHtml(appInfo.getUser())).append(_SEP).
|
||||
append(escapeHtml(appInfo.getName())).append(_SEP).
|
||||
append(escapeJavaScript(escapeHtml(appInfo.getName()))).append(_SEP).
|
||||
append(escapeHtml(appInfo.getQueue())).append(_SEP).
|
||||
append(appInfo.getState()).append(_SEP).
|
||||
append(appInfo.getFinalStatus()).append(_SEP);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,635 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
NodeManager REST API's.
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
NodeManager REST API's.
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0|toDepth=2}
|
||||
|
||||
* Overview
|
||||
|
||||
The NodeManager REST API's allow the user to get status on the node and information about applications and containers running on that node.
|
||||
|
||||
* NodeManager Information API
|
||||
|
||||
The node information resource provides overall information about that particular node.
|
||||
|
||||
** URI
|
||||
|
||||
Both of the following URI's give you the cluster information.
|
||||
|
||||
------
|
||||
* http://<nm http address:port>/ws/v1/node
|
||||
* http://<nm http address:port>/ws/v1/node/info
|
||||
------
|
||||
|
||||
** HTTP Operations Supported
|
||||
|
||||
------
|
||||
* GET
|
||||
------
|
||||
|
||||
** Query Parameters Supported
|
||||
|
||||
------
|
||||
None
|
||||
------
|
||||
|
||||
** Elements of the <nodeInfo> object
|
||||
|
||||
*---------------+--------------+-------------------------------+
|
||||
|| Item || Data Type || Description |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| id | long | The NodeManager id |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| nodeHostName | string | The host name of the NodeManager |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| totalPmemAllocatedContainersMB | long | The amount of physical memory allocated for use by containers in MB |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| totalVmemAllocatedContainersMB | long | The amount of virtual memory allocated for use by containers in MB |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| lastNodeUpdateTime | long | The last timestamp at which the health report was received (in ms since epoch)|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| healthReport | string | The diagnostic health report of the node |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| nodeHealthy | boolean | true/false indicator of if the node is healthy|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| nodeManagerVersion | string | Version of the NodeManager |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| nodeManagerBuildVersion | string | NodeManager build string with build version, user, and checksum |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| nodeManagerVersionBuiltOn | string | Timestamp when NodeManager was built(in ms since epoch) |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| hadoopVersion | string | Version of hadoop common |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| hadoopBuildVersion | string | Hadoop common build string with build version, user, and checksum |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| hadoopVersionBuiltOn | string | Timestamp when hadoop common was built(in ms since epoch) |
|
||||
*---------------+--------------+-------------------------------+
|
||||
|
||||
** Response Examples
|
||||
|
||||
<<JSON response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/info
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
"nodeInfo" : {
|
||||
"hadoopVersionBuiltOn" : "Mon Jan 9 14:58:42 UTC 2012",
|
||||
"nodeManagerBuildVersion" : "0.23.1-SNAPSHOT from 1228355 by user1 source checksum 20647f76c36430e888cc7204826a445c",
|
||||
"lastNodeUpdateTime" : 1326222266126,
|
||||
"totalVmemAllocatedContainersMB" : 17203,
|
||||
"nodeHealthy" : true,
|
||||
"healthReport" : "",
|
||||
"totalPmemAllocatedContainersMB" : 8192,
|
||||
"nodeManagerVersionBuiltOn" : "Mon Jan 9 15:01:59 UTC 2012",
|
||||
"nodeManagerVersion" : "0.23.1-SNAPSHOT",
|
||||
"id" : "host.domain.com:45454",
|
||||
"hadoopBuildVersion" : "0.23.1-SNAPSHOT from 1228292 by user1 source checksum 3eba233f2248a089e9b28841a784dd00",
|
||||
"nodeHostName" : "host.domain.com",
|
||||
"hadoopVersion" : "0.23.1-SNAPSHOT"
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
<<XML response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
-----
|
||||
Accept: application/xml
|
||||
GET http://<nm http address:port>/ws/v1/node/info
|
||||
-----
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/xml
|
||||
Content-Length: 983
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<nodeInfo>
|
||||
<healthReport/>
|
||||
<totalVmemAllocatedContainersMB>17203</totalVmemAllocatedContainersMB>
|
||||
<totalPmemAllocatedContainersMB>8192</totalPmemAllocatedContainersMB>
|
||||
<lastNodeUpdateTime>1326222386134</lastNodeUpdateTime>
|
||||
<nodeHealthy>true</nodeHealthy>
|
||||
<nodeManagerVersion>0.23.1-SNAPSHOT</nodeManagerVersion>
|
||||
<nodeManagerBuildVersion>0.23.1-SNAPSHOT from 1228355 by user1 source checksum 20647f76c36430e888cc7204826a445c</nodeManagerBuildVersion>
|
||||
<nodeManagerVersionBuiltOn>Mon Jan 9 15:01:59 UTC 2012</nodeManagerVersionBuiltOn>
|
||||
<hadoopVersion>0.23.1-SNAPSHOT</hadoopVersion>
|
||||
<hadoopBuildVersion>0.23.1-SNAPSHOT from 1228292 by user1 source checksum 3eba233f2248a089e9b28841a784dd00</hadoopBuildVersion>
|
||||
<hadoopVersionBuiltOn>Mon Jan 9 14:58:42 UTC 2012</hadoopVersionBuiltOn>
|
||||
<id>host.domain.com:45454</id>
|
||||
<nodeHostName>host.domain.com</nodeHostName>
|
||||
</nodeInfo>
|
||||
+---+
|
||||
|
||||
* Applications API
|
||||
|
||||
With the Applications API, you can obtain a collection of resources, each of which represents an application. When you run a GET operation on this resource, you obtain a collection of Application Objects. See also {{Application API}} for syntax of the application object.
|
||||
|
||||
** URI
|
||||
|
||||
------
|
||||
* http://<nm http address:port>/ws/v1/node/apps
|
||||
------
|
||||
|
||||
** HTTP Operations Supported
|
||||
|
||||
------
|
||||
* GET
|
||||
------
|
||||
|
||||
** Query Parameters Supported
|
||||
|
||||
Multiple paramters can be specified.
|
||||
|
||||
------
|
||||
* state - application state
|
||||
* user - user name
|
||||
------
|
||||
|
||||
** Elements of the <apps> (Applications) object
|
||||
|
||||
When you make a request for the list of applications, the information will be returned as a collection of app objects.
|
||||
See also {{Application API}} for syntax of the app object.
|
||||
|
||||
*---------------+--------------+-------------------------------+
|
||||
|| Item || Data Type || Description |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| app | array of app objects(JSON)/zero or more app objects(XML) | A collection of application objects |
|
||||
*---------------+--------------+--------------------------------+
|
||||
|
||||
** Response Examples
|
||||
|
||||
<<JSON response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/apps
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
"apps" : {
|
||||
"app" : [
|
||||
{
|
||||
"containerids" : [
|
||||
"container_1326121700862_0003_01_000001",
|
||||
"container_1326121700862_0003_01_000002"
|
||||
],
|
||||
"user" : "user1",
|
||||
"id" : "application_1326121700862_0003",
|
||||
"state" : "RUNNING"
|
||||
},
|
||||
{
|
||||
"user" : "user1",
|
||||
"id" : "application_1326121700862_0002",
|
||||
"state" : "FINISHED"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
<<XML response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/apps
|
||||
Accept: application/xml
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/xml
|
||||
Content-Length: 400
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<apps>
|
||||
<app>
|
||||
<id>application_1326121700862_0002</id>
|
||||
<state>FINISHED</state>
|
||||
<user>user1</user>
|
||||
</app>
|
||||
<app>
|
||||
<id>application_1326121700862_0003</id>
|
||||
<state>RUNNING</state>
|
||||
<user>user1</user>
|
||||
<containerids>container_1326121700862_0003_01_000002</containerids>
|
||||
<containerids>container_1326121700862_0003_01_000001</containerids>
|
||||
</app>
|
||||
</apps>
|
||||
|
||||
+---+
|
||||
|
||||
* {Application API}
|
||||
|
||||
An application resource contains information about a particular application that was run or is running on this NodeManager.
|
||||
|
||||
** URI
|
||||
|
||||
Use the following URI to obtain an app Object, for a application identified by the {appid} value.
|
||||
|
||||
------
|
||||
* http://<nm http address:port>/ws/v1/node/apps/{appid}
|
||||
------
|
||||
|
||||
** HTTP Operations Supported
|
||||
|
||||
------
|
||||
* GET
|
||||
------
|
||||
|
||||
** Query Parameters Supported
|
||||
|
||||
------
|
||||
None
|
||||
------
|
||||
|
||||
** Elements of the <app> (Application) object
|
||||
|
||||
*---------------+--------------+-------------------------------+
|
||||
|| Item || Data Type || Description |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| id | string | The application id |
|
||||
*---------------+--------------+--------------------------------+
|
||||
| user | string | The user who started the application |
|
||||
*---------------+--------------+--------------------------------+
|
||||
| state | string | The state of the application - valid states are: NEW, INITING, RUNNING, FINISHING_CONTAINERS_WAIT, APPLICATION_RESOURCES_CLEANINGUP, FINISHED |
|
||||
*---------------+--------------+--------------------------------+
|
||||
| containerids | array of containerids(JSON)/zero or more containerids(XML) | The list of containerids currently being used by the application on this node. If not present then no containers are currently running for this application.|
|
||||
*---------------+--------------+--------------------------------+
|
||||
|
||||
** Response Examples
|
||||
|
||||
<<JSON response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/apps/application_1326121700862_0005
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
"app" : {
|
||||
"containerids" : [
|
||||
"container_1326121700862_0005_01_000003",
|
||||
"container_1326121700862_0005_01_000001"
|
||||
],
|
||||
"user" : "user1",
|
||||
"id" : "application_1326121700862_0005",
|
||||
"state" : "RUNNING"
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
<<XML response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/apps/application_1326121700862_0005
|
||||
Accept: application/xml
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/xml
|
||||
Content-Length: 281
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<app>
|
||||
<id>application_1326121700862_0005</id>
|
||||
<state>RUNNING</state>
|
||||
<user>user1</user>
|
||||
<containerids>container_1326121700862_0005_01_000003</containerids>
|
||||
<containerids>container_1326121700862_0005_01_000001</containerids>
|
||||
</app>
|
||||
+---+
|
||||
|
||||
|
||||
* Containers API
|
||||
|
||||
With the containers API, you can obtain a collection of resources, each of which represents a container. When you run a GET operation on this resource, you obtain a collection of Container Objects. See also {{Container API}} for syntax of the container object.
|
||||
|
||||
** URI
|
||||
|
||||
------
|
||||
* http://<nm http address:port>/ws/v1/node/containers
|
||||
------
|
||||
|
||||
** HTTP Operations Supported
|
||||
|
||||
------
|
||||
* GET
|
||||
------
|
||||
|
||||
** Query Parameters Supported
|
||||
|
||||
------
|
||||
None
|
||||
------
|
||||
|
||||
** Elements of the <containers> object
|
||||
|
||||
When you make a request for the list of containers, the information will be returned as collection of container objects.
|
||||
See also {{Container API}} for syntax of the container object.
|
||||
|
||||
*---------------+--------------+-------------------------------+
|
||||
|| Item || Data Type || Description |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| containers | array of container objects(JSON)/zero or more container objects(XML) | A collection of container objects |
|
||||
*---------------+--------------+-------------------------------+
|
||||
|
||||
** Response Examples
|
||||
|
||||
<<JSON response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/containers
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
"containers" : {
|
||||
"container" : [
|
||||
{
|
||||
"nodeId" : "host.domain.com:45454",
|
||||
"totalMemoryNeededMB" : 2048,
|
||||
"state" : "RUNNING",
|
||||
"diagnostics" : "",
|
||||
"containerLogsLink" : "http://host.domain.com:9999/node/containerlogs/container_1326121700862_0006_01_000001/user1",
|
||||
"user" : "user1",
|
||||
"id" : "container_1326121700862_0006_01_000001",
|
||||
"exitCode" : -1000
|
||||
},
|
||||
{
|
||||
"nodeId" : "host.domain.com:45454",
|
||||
"totalMemoryNeededMB" : 2048,
|
||||
"state" : "RUNNING",
|
||||
"diagnostics" : "",
|
||||
"containerLogsLink" : "http://host.domain.com:9999/node/containerlogs/container_1326121700862_0006_01_000003/user1",
|
||||
"user" : "user1",
|
||||
"id" : "container_1326121700862_0006_01_000003",
|
||||
"exitCode" : -1000
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
<<XML response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/containers
|
||||
Accept: application/xml
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/xml
|
||||
Content-Length: 988
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<containers>
|
||||
<container>
|
||||
<id>container_1326121700862_0006_01_000001</id>
|
||||
<state>RUNNING</state>
|
||||
<exitCode>-1000</exitCode>
|
||||
<diagnostics/>
|
||||
<user>user1</user>
|
||||
<totalMemoryNeededMB>2048</totalMemoryNeededMB>
|
||||
<containerLogsLink>http://host.domain.com:9999/node/containerlogs/container_1326121700862_0006_01_000001/user1</containerLogsLink>
|
||||
<nodeId>host.domain.com:45454</nodeId>
|
||||
</container>
|
||||
<container>
|
||||
<id>container_1326121700862_0006_01_000003</id>
|
||||
<state>DONE</state>
|
||||
<exitCode>0</exitCode>
|
||||
<diagnostics>Container killed by the ApplicationMaster.</diagnostics>
|
||||
<user>user1</user>
|
||||
<totalMemoryNeededMB>2048</totalMemoryNeededMB>
|
||||
<containerLogsLink>http://host.domain.com:9999/node/containerlogs/container_1326121700862_0006_01_000003/user1</containerLogsLink>
|
||||
<nodeId>host.domain.com:45454</nodeId>
|
||||
</container>
|
||||
</containers>
|
||||
+---+
|
||||
|
||||
|
||||
* {Container API}
|
||||
|
||||
A container resource contains information about a particular container that is running on this NodeManager.
|
||||
|
||||
** URI
|
||||
|
||||
Use the following URI to obtain a Container Object, from a container identified by the {containerid} value.
|
||||
|
||||
------
|
||||
* http://<nm http address:port>/ws/v1/node/containers/{containerid}
|
||||
------
|
||||
|
||||
** HTTP Operations Supported
|
||||
|
||||
------
|
||||
* GET
|
||||
------
|
||||
|
||||
** Query Parameters Supported
|
||||
|
||||
------
|
||||
None
|
||||
------
|
||||
|
||||
** Elements of the <container> object
|
||||
|
||||
*---------------+--------------+-------------------------------+
|
||||
|| Item || Data Type || Description |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| id | string | The container id |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| state | string | State of the container - valid states are: NEW, LOCALIZING, LOCALIZATION_FAILED, LOCALIZED, RUNNING, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, KILLING, CONTAINER_CLEANEDUP_AFTER_KILL, CONTAINER_RESOURCES_CLEANINGUP, DONE|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| nodeId | string | The id of the node the container is on|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| containerLogsLink | string | The http link to the container logs |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| user | string | The user name of the user which started the container|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| exitCode | int | Exit code of the container |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| diagnostics | string | A diagnostic message for failed containers |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| totalMemoryNeededMB | long | Total amout of memory needed by the container (in MB) |
|
||||
*---------------+--------------+-------------------------------+
|
||||
|
||||
** Response Examples
|
||||
|
||||
<<JSON response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/nodes/containers/container_1326121700862_0007_01_000001
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
"container" : {
|
||||
"nodeId" : "host.domain.com:45454",
|
||||
"totalMemoryNeededMB" : 2048,
|
||||
"state" : "RUNNING",
|
||||
"diagnostics" : "",
|
||||
"containerLogsLink" : "http://host.domain.com:9999/node/containerlogs/container_1326121700862_0007_01_000001/user1",
|
||||
"user" : "user1",
|
||||
"id" : "container_1326121700862_0007_01_000001",
|
||||
"exitCode" : -1000
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
<<XML response>>
|
||||
|
||||
HTTP Request:
|
||||
|
||||
------
|
||||
GET http://<nm http address:port>/ws/v1/node/containers/container_1326121700862_0007_01_000001
|
||||
Accept: application/xml
|
||||
------
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/xml
|
||||
Content-Length: 491
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<container>
|
||||
<id>container_1326121700862_0007_01_000001</id>
|
||||
<state>RUNNING</state>
|
||||
<exitCode>-1000</exitCode>
|
||||
<diagnostics/>
|
||||
<user>user1</user>
|
||||
<totalMemoryNeededMB>2048</totalMemoryNeededMB>
|
||||
<containerLogsLink>http://host.domain.com:9999/node/containerlogs/container_1326121700862_0007_01_000001/user1</containerLogsLink>
|
||||
<nodeId>host.domain.com:45454</nodeId>
|
||||
</container>
|
||||
+---+
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,595 @@
|
|||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Hadoop YARN - Introduction to the web services REST API's.
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Hadoop YARN - Introduction to the web services REST API's.
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
The Hadoop YARN web service REST APIs are a set of URI resources that give access to the cluster, nodes, applications, and application historical information. The URI resources are grouped into APIs based on the type of information returned. Some URI resources return collections while others return singletons.
|
||||
|
||||
* URI's
|
||||
|
||||
The URIs for the REST-based Web services have the following syntax:
|
||||
|
||||
------
|
||||
http://{http address of service}/ws/{version}/{resourcepath}
|
||||
------
|
||||
|
||||
The elements in this syntax are as follows:
|
||||
|
||||
------
|
||||
{http address of service} - The http address of the service to get information about.
|
||||
Currently supported are the ResourceManager, NodeManager,
|
||||
MapReduce application master, and history server.
|
||||
{version} - The version of the APIs. In this release, the version is v1.
|
||||
{resourcepath} - A path that defines a singleton resource or a collection of resources.
|
||||
------
|
||||
|
||||
* HTTP Requests
|
||||
|
||||
To invoke a REST API, your application calls an HTTP operation on the URI associated with a resource.
|
||||
|
||||
** Summary of HTTP operations
|
||||
|
||||
Currently only GET is supported. It retrieves information about the resource specified.
|
||||
|
||||
** Security
|
||||
|
||||
The web service REST API's go through the same security as the web ui. If your cluster adminstrators have filters enabled you must authenticate via the mechanism they specified.
|
||||
|
||||
** Headers Supported
|
||||
|
||||
-----
|
||||
* Accept
|
||||
* Accept-Encoding
|
||||
-----
|
||||
|
||||
Currently the only fields used in the header is Accept and Accept-Encoding. Accept currently supports XML and JSON for the response type you accept. Accept-Encoding currently only supports gzip format and will return gzip compressed output if this is specified, otherwise output is uncompressed. All other header fields are ignored.
|
||||
|
||||
* HTTP Responses
|
||||
|
||||
The next few sections describe some of the syntax and other details of the HTTP Responses of the web service REST APIs.
|
||||
|
||||
** Compression
|
||||
|
||||
This release supports gzip compression if you specify gzip in the Accept-Encoding header of the HTTP request (Accept-Encoding: gzip).
|
||||
|
||||
** Response Formats
|
||||
|
||||
This release of the web service REST APIs supports responses in JSON and XML formats. JSON is the default. To set the response format, you can specify the format in the Accept header of the HTTP request.
|
||||
|
||||
As specified in HTTP Response Codes, the response body can contain the data that represents the resource or an error message. In the case of success, the response body is in the selected format, either JSON or XML. In the case of error, the resonse body is in either JSON or XML based on the format requested. The Content-Type header of the response contains the format requested. If the application requests an unsupported format, the response status code is 500.
|
||||
Note that the order of the fields within response body is not specified and might change. Also, additional fields might be added to a response body. Therefore, your applications should use parsing routines that can extract data from a response body in any order.
|
||||
|
||||
** Response Errors
|
||||
|
||||
After calling an HTTP request, an application should check the response status code to verify success or detect an error. If the response status code indicates an error, the response body contains an error message. The first field is the exception type, currently only RemoteException is returned. The following table lists the items within the RemoteException error message:
|
||||
|
||||
*---------------*--------------*-------------------------------*
|
||||
|| Item || Data Type || Description |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| exception | String | Exception type |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| javaClassName | String | Java class name of exception |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| message | String | Detailed message of exception |
|
||||
*---------------+--------------+-------------------------------+
|
||||
|
||||
** Response Examples
|
||||
|
||||
*** JSON response with single resource
|
||||
|
||||
HTTP Request:
|
||||
GET http://rmhost.domain:8088/ws/v1/cluster/app/application_1324057493980_0001
|
||||
|
||||
Response Status Line:
|
||||
HTTP/1.1 200 OK
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
app":
|
||||
{
|
||||
"id":"application_1324057493980_0001",
|
||||
"user":"user1",
|
||||
"name":"",
|
||||
"queue":"default",
|
||||
"state":"ACCEPTED",
|
||||
"finalStatus":"UNDEFINED",
|
||||
"progress":0,
|
||||
"trackingUI":"UNASSIGNED",
|
||||
"diagnostics":"",
|
||||
"clusterId":1324057493980,
|
||||
"startedTime":1324057495921,
|
||||
"finishedTime":0,
|
||||
"elapsedTime":2063,
|
||||
"amContainerLogs":"http:\/\/amNM:2\/node\/containerlogs\/container_1324057493980_0001_01_000001",
|
||||
"amHostHttpAddress":"amNM:2"
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
*** JSON response with Error response
|
||||
|
||||
Here we request information about an application that doesn't exist yet.
|
||||
|
||||
HTTP Request:
|
||||
GET http://rmhost.domain:8088/ws/v1/cluster/app/application_1324057493980_9999
|
||||
|
||||
Response Status Line:
|
||||
HTTP/1.1 404 Not Found
|
||||
|
||||
Response Header:
|
||||
|
||||
+---+
|
||||
HTTP/1.1 404 Not Found
|
||||
Content-Type: application/json
|
||||
Transfer-Encoding: chunked
|
||||
Server: Jetty(6.1.26)
|
||||
+---+
|
||||
|
||||
Response Body:
|
||||
|
||||
+---+
|
||||
{
|
||||
"RemoteException" : {
|
||||
"javaClassName" : "org.apache.hadoop.yarn.webapp.NotFoundException",
|
||||
"exception" : "NotFoundException",
|
||||
"message" : "java.lang.Exception: app with id: application_1324057493980_9999 not found"
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
* Example usage
|
||||
|
||||
You can use any number of ways/languages to use the web services REST API's. This example uses the curl command line interface to do the REST GET calls.
|
||||
|
||||
In this example, a user submits a MapReduce application to the ResourceManager using a command like:
|
||||
|
||||
+---+
|
||||
hadoop jar hadoop-mapreduce-test.jar sleep -Dmapred.job.queue.name=a1 -m 1 -r 1 -rt 1200000 -mt 20
|
||||
+---+
|
||||
|
||||
The client prints information about the job submitted along with the application id, similar to:
|
||||
|
||||
+---+
|
||||
12/01/18 04:25:15 INFO mapred.ResourceMgrDelegate: Submitted application application_1326821518301_0010 to ResourceManager at host.domain.com/10.10.10.10:8040
|
||||
12/01/18 04:25:15 INFO mapreduce.Job: Running job: job_1326821518301_0010
|
||||
12/01/18 04:25:21 INFO mapred.ClientServiceDelegate: The url to track the job: host.domain.com:8088/proxy/application_1326821518301_0010/
|
||||
12/01/18 04:25:22 INFO mapreduce.Job: Job job_1326821518301_0010 running in uber mode : false
|
||||
12/01/18 04:25:22 INFO mapreduce.Job: map 0% reduce 0%
|
||||
+---+
|
||||
|
||||
The user then wishes to track the application. The users starts by getting the information about the application from the ResourceManager. Use the --comopressed option to request output compressed. curl handles uncompressing on client side.
|
||||
|
||||
+---+
|
||||
curl --compressed -H "Accept: application/json" -X GET "http://host.domain.com:8088/ws/v1/cluster/apps/application_1326821518301_0010"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"app" : {
|
||||
"finishedTime" : 0,
|
||||
"amContainerLogs" : "http://host.domain.com:9999/node/containerlogs/container_1326821518301_0010_01_000001",
|
||||
"trackingUI" : "ApplicationMaster",
|
||||
"state" : "RUNNING",
|
||||
"user" : "user1",
|
||||
"id" : "application_1326821518301_0010",
|
||||
"clusterId" : 1326821518301,
|
||||
"finalStatus" : "UNDEFINED",
|
||||
"amHostHttpAddress" : "host.domain.com:9999",
|
||||
"progress" : 82.44703,
|
||||
"name" : "Sleep job",
|
||||
"startedTime" : 1326860715335,
|
||||
"elapsedTime" : 31814,
|
||||
"diagnostics" : "",
|
||||
"trackingUrl" : "http://host.domain.com:8088/proxy/application_1326821518301_0010/",
|
||||
"queue" : "a1"
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
The user then wishes to get more details about the running application and goes directly to the MapReduce application master for this application. The ResourceManager lists the trackingUrl that can be used for this application: http://host.domain.com:8088/proxy/application_1326821518301_0010. This could either go to the web browser or use the web service REST API's. The user uses the web services REST API's to get the list of jobs this MapReduce application master is running:
|
||||
|
||||
+---+
|
||||
curl --compressed -H "Accept: application/json" -X GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce/jobs"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"jobs" : {
|
||||
"job" : [
|
||||
{
|
||||
"runningReduceAttempts" : 1,
|
||||
"reduceProgress" : 72.104515,
|
||||
"failedReduceAttempts" : 0,
|
||||
"newMapAttempts" : 0,
|
||||
"mapsRunning" : 0,
|
||||
"state" : "RUNNING",
|
||||
"successfulReduceAttempts" : 0,
|
||||
"reducesRunning" : 1,
|
||||
"acls" : [
|
||||
{
|
||||
"value" : " ",
|
||||
"name" : "mapreduce.job.acl-modify-job"
|
||||
},
|
||||
{
|
||||
"value" : " ",
|
||||
"name" : "mapreduce.job.acl-view-job"
|
||||
}
|
||||
],
|
||||
"reducesPending" : 0,
|
||||
"user" : "user1",
|
||||
"reducesTotal" : 1,
|
||||
"mapsCompleted" : 1,
|
||||
"startTime" : 1326860720902,
|
||||
"id" : "job_1326821518301_10_10",
|
||||
"successfulMapAttempts" : 1,
|
||||
"runningMapAttempts" : 0,
|
||||
"newReduceAttempts" : 0,
|
||||
"name" : "Sleep job",
|
||||
"mapsPending" : 0,
|
||||
"elapsedTime" : 64432,
|
||||
"reducesCompleted" : 0,
|
||||
"mapProgress" : 100,
|
||||
"diagnostics" : "",
|
||||
"failedMapAttempts" : 0,
|
||||
"killedReduceAttempts" : 0,
|
||||
"mapsTotal" : 1,
|
||||
"uberized" : false,
|
||||
"killedMapAttempts" : 0,
|
||||
"finishTime" : 0
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
The user then wishes to get the task details about the job with job id job_1326821518301_10_10 that was listed above.
|
||||
|
||||
+---+
|
||||
curl --compressed -H "Accept: application/json" -X GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce/jobs/job_1326821518301_10_10/tasks"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"tasks" : {
|
||||
"task" : [
|
||||
{
|
||||
"progress" : 100,
|
||||
"elapsedTime" : 5059,
|
||||
"state" : "SUCCEEDED",
|
||||
"startTime" : 1326860725014,
|
||||
"id" : "task_1326821518301_10_10_m_0",
|
||||
"type" : "MAP",
|
||||
"successfulAttempt" : "attempt_1326821518301_10_10_m_0_0",
|
||||
"finishTime" : 1326860730073
|
||||
},
|
||||
{
|
||||
"progress" : 72.104515,
|
||||
"elapsedTime" : 0,
|
||||
"state" : "RUNNING",
|
||||
"startTime" : 1326860732984,
|
||||
"id" : "task_1326821518301_10_10_r_0",
|
||||
"type" : "REDUCE",
|
||||
"successfulAttempt" : "",
|
||||
"finishTime" : 0
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
The map task has finished but the reduce task is still running. The users wishes to get the task attempt information for the reduce task task_1326821518301_10_10_r_0, note that the Accept header isn't really required here since JSON is the default output format:
|
||||
|
||||
+---+
|
||||
curl --compressed -X GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce/jobs/job_1326821518301_10_10/tasks/task_1326821518301_10_10_r_0/attempts"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"taskAttempts" : {
|
||||
"taskAttempt" : [
|
||||
{
|
||||
"elapsedMergeTime" : 158,
|
||||
"shuffleFinishTime" : 1326860735378,
|
||||
"assignedContainerId" : "container_1326821518301_0010_01_000003",
|
||||
"progress" : 72.104515,
|
||||
"elapsedTime" : 0,
|
||||
"state" : "RUNNING",
|
||||
"elapsedShuffleTime" : 2394,
|
||||
"mergeFinishTime" : 1326860735536,
|
||||
"rack" : "/10.10.10.0",
|
||||
"elapsedReduceTime" : 0,
|
||||
"nodeHttpAddress" : "host.domain.com:9999",
|
||||
"type" : "REDUCE",
|
||||
"startTime" : 1326860732984,
|
||||
"id" : "attempt_1326821518301_10_10_r_0_0",
|
||||
"finishTime" : 0
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
The reduce attempt is still running and the user wishes to see the current counter values for that attempt:
|
||||
|
||||
+---+
|
||||
curl --compressed -H "Accept: application/json" -X GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce/jobs/job_1326821518301_10_10/tasks/task_1326821518301_10_10_r_0/attempts/attempt_1326821518301_10_10_r_0_0/counters"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"JobTaskAttemptCounters" : {
|
||||
"taskAttemptCounterGroup" : [
|
||||
{
|
||||
"counterGroupName" : "org.apache.hadoop.mapreduce.FileSystemCounter",
|
||||
"counter" : [
|
||||
{
|
||||
"value" : 4216,
|
||||
"name" : "FILE_BYTES_READ"
|
||||
},
|
||||
{
|
||||
"value" : 77151,
|
||||
"name" : "FILE_BYTES_WRITTEN"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "FILE_READ_OPS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "FILE_LARGE_READ_OPS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "FILE_WRITE_OPS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "HDFS_BYTES_READ"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "HDFS_BYTES_WRITTEN"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "HDFS_READ_OPS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "HDFS_LARGE_READ_OPS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "HDFS_WRITE_OPS"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"counterGroupName" : "org.apache.hadoop.mapreduce.TaskCounter",
|
||||
"counter" : [
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "COMBINE_INPUT_RECORDS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "COMBINE_OUTPUT_RECORDS"
|
||||
},
|
||||
{
|
||||
"value" : 1767,
|
||||
"name" : "REDUCE_INPUT_GROUPS"
|
||||
},
|
||||
{
|
||||
"value" : 25104,
|
||||
"name" : "REDUCE_SHUFFLE_BYTES"
|
||||
},
|
||||
{
|
||||
"value" : 1767,
|
||||
"name" : "REDUCE_INPUT_RECORDS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "REDUCE_OUTPUT_RECORDS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "SPILLED_RECORDS"
|
||||
},
|
||||
{
|
||||
"value" : 1,
|
||||
"name" : "SHUFFLED_MAPS"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "FAILED_SHUFFLE"
|
||||
},
|
||||
{
|
||||
"value" : 1,
|
||||
"name" : "MERGED_MAP_OUTPUTS"
|
||||
},
|
||||
{
|
||||
"value" : 50,
|
||||
"name" : "GC_TIME_MILLIS"
|
||||
},
|
||||
{
|
||||
"value" : 1580,
|
||||
"name" : "CPU_MILLISECONDS"
|
||||
},
|
||||
{
|
||||
"value" : 141320192,
|
||||
"name" : "PHYSICAL_MEMORY_BYTES"
|
||||
},
|
||||
{
|
||||
"value" : 1118552064,
|
||||
"name" : "VIRTUAL_MEMORY_BYTES"
|
||||
},
|
||||
{
|
||||
"value" : 73728000,
|
||||
"name" : "COMMITTED_HEAP_BYTES"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"counterGroupName" : "Shuffle Errors",
|
||||
"counter" : [
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "BAD_ID"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "CONNECTION"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "IO_ERROR"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "WRONG_LENGTH"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "WRONG_MAP"
|
||||
},
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "WRONG_REDUCE"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"counterGroupName" : "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter",
|
||||
"counter" : [
|
||||
{
|
||||
"value" : 0,
|
||||
"name" : "BYTES_WRITTEN"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"id" : "attempt_1326821518301_10_10_r_0_0"
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
The job finishes and the user wishes to get the final job information from the history server for this job.
|
||||
|
||||
+---+
|
||||
curl --compressed -X GET "http://host.domain.com:19888/ws/v1/history/mapreduce/jobs/job_1326821518301_10_10"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"job" : {
|
||||
"avgReduceTime" : 1250784,
|
||||
"failedReduceAttempts" : 0,
|
||||
"state" : "SUCCEEDED",
|
||||
"successfulReduceAttempts" : 1,
|
||||
"acls" : [
|
||||
{
|
||||
"value" : " ",
|
||||
"name" : "mapreduce.job.acl-modify-job"
|
||||
},
|
||||
{
|
||||
"value" : " ",
|
||||
"name" : "mapreduce.job.acl-view-job"
|
||||
}
|
||||
],
|
||||
"user" : "user1",
|
||||
"reducesTotal" : 1,
|
||||
"mapsCompleted" : 1,
|
||||
"startTime" : 1326860720902,
|
||||
"id" : "job_1326821518301_10_10",
|
||||
"avgMapTime" : 5059,
|
||||
"successfulMapAttempts" : 1,
|
||||
"name" : "Sleep job",
|
||||
"avgShuffleTime" : 2394,
|
||||
"reducesCompleted" : 1,
|
||||
"diagnostics" : "",
|
||||
"failedMapAttempts" : 0,
|
||||
"avgMergeTime" : 2552,
|
||||
"killedReduceAttempts" : 0,
|
||||
"mapsTotal" : 1,
|
||||
"queue" : "a1",
|
||||
"uberized" : false,
|
||||
"killedMapAttempts" : 0,
|
||||
"finishTime" : 1326861986164
|
||||
}
|
||||
}
|
||||
+---+
|
||||
|
||||
The user also gets the final applications information from the ResourceManager.
|
||||
|
||||
+---+
|
||||
curl --compressed -H "Accept: application/json" -X GET "http://host.domain.com:8088/ws/v1/cluster/apps/application_1326821518301_0010"
|
||||
+---+
|
||||
|
||||
Output:
|
||||
|
||||
+---+
|
||||
{
|
||||
"app" : {
|
||||
"finishedTime" : 1326861991282,
|
||||
"amContainerLogs" : "http://host.domain.com:9999/node/containerlogs/container_1326821518301_0010_01_000001",
|
||||
"trackingUI" : "History",
|
||||
"state" : "FINISHED",
|
||||
"user" : "user1",
|
||||
"id" : "application_1326821518301_0010",
|
||||
"clusterId" : 1326821518301,
|
||||
"finalStatus" : "SUCCEEDED",
|
||||
"amHostHttpAddress" : "host.domain.com:9999",
|
||||
"progress" : 100,
|
||||
"name" : "Sleep job",
|
||||
"startedTime" : 1326860715335,
|
||||
"elapsedTime" : 1275947,
|
||||
"diagnostics" : "",
|
||||
"trackingUrl" : "http://host.domain.com:8088/proxy/application_1326821518301_0010/jobhistory/job/job_1326821518301_10_10",
|
||||
"queue" : "a1"
|
||||
}
|
||||
}
|
||||
+---+
|
|
@ -99,6 +99,8 @@
|
|||
rev="${yarn.version}" conf="compile->default">
|
||||
<artifact name="hadoop-mapreduce-client-jobclient" type="tests" ext="jar" m:classifier="tests"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-rumen"
|
||||
rev="${hadoop-common.version}" conf="compile->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-archives"
|
||||
rev="${hadoop-common.version}" conf="compile->default"/>
|
||||
|
||||
|
|
|
@ -70,6 +70,8 @@
|
|||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-archives"
|
||||
rev="${hadoop-common.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-rumen"
|
||||
rev="${hadoop-common.version}" conf="common->default"/>
|
||||
<dependency org="commons-logging"
|
||||
name="commons-logging"
|
||||
rev="${commons-logging.version}"
|
||||
|
|
|
@ -1,136 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.tools.rumen;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestConcurrentRead {
|
||||
static final List<LoggedJob> cachedTrace = new ArrayList<LoggedJob>();
|
||||
static final String traceFile =
|
||||
"rumen/small-trace-test/job-tracker-logs-trace-output.gz";
|
||||
|
||||
static Configuration conf;
|
||||
static FileSystem lfs;
|
||||
static Path path;
|
||||
|
||||
@BeforeClass
|
||||
static public void globalSetUp() throws IOException {
|
||||
conf = new Configuration();
|
||||
lfs = FileSystem.getLocal(conf);
|
||||
Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", ""))
|
||||
.makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
|
||||
path = new Path(rootInputDir, traceFile);
|
||||
JobTraceReader reader = new JobTraceReader(path, conf);
|
||||
try {
|
||||
LoggedJob job;
|
||||
while ((job = reader.getNext()) != null) {
|
||||
cachedTrace.add(job);
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
void readAndCompare() throws IOException {
|
||||
JobTraceReader reader = new JobTraceReader(path, conf);
|
||||
try {
|
||||
for (Iterator<LoggedJob> it = cachedTrace.iterator(); it.hasNext();) {
|
||||
LoggedJob jobExpected = it.next();
|
||||
LoggedJob jobRead = reader.getNext();
|
||||
assertNotNull(jobRead);
|
||||
try {
|
||||
jobRead.deepCompare(jobExpected, null);
|
||||
} catch (DeepInequalityException e) {
|
||||
fail(e.toString());
|
||||
}
|
||||
}
|
||||
assertNull(reader.getNext());
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
class TestThread extends Thread {
|
||||
final int repeat;
|
||||
final CountDownLatch startSignal, doneSignal;
|
||||
final Map<String, Throwable> errors;
|
||||
|
||||
TestThread(int id, int repeat, CountDownLatch startSignal, CountDownLatch doneSignal, Map<String, Throwable> errors) {
|
||||
super(String.format("TestThread-%d", id));
|
||||
this.repeat = repeat;
|
||||
this.startSignal = startSignal;
|
||||
this.doneSignal = doneSignal;
|
||||
this.errors = errors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
startSignal.await();
|
||||
for (int i = 0; i < repeat; ++i) {
|
||||
try {
|
||||
readAndCompare();
|
||||
} catch (Throwable e) {
|
||||
errors.put(getName(), e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
doneSignal.countDown();
|
||||
} catch (Throwable e) {
|
||||
errors.put(getName(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcurrentRead() throws InterruptedException {
|
||||
int nThr = conf.getInt("test.rumen.concurrent-read.threads", 4);
|
||||
int repeat = conf.getInt("test.rumen.concurrent-read.repeat", 10);
|
||||
CountDownLatch startSignal = new CountDownLatch(1);
|
||||
CountDownLatch doneSignal = new CountDownLatch(nThr);
|
||||
Map<String, Throwable> errors = Collections
|
||||
.synchronizedMap(new TreeMap<String, Throwable>());
|
||||
for (int i = 0; i < nThr; ++i) {
|
||||
new TestThread(i, repeat, startSignal, doneSignal, errors).start();
|
||||
}
|
||||
startSignal.countDown();
|
||||
doneSignal.await();
|
||||
if (!errors.isEmpty()) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Map.Entry<String, Throwable> e : errors.entrySet()) {
|
||||
sb.append(String.format("%s:\n%s\n", e.getKey(), e.getValue().toString()));
|
||||
}
|
||||
fail(sb.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.tools.rumen;
|
||||
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestParsedLine {
|
||||
static final char[] CHARS_TO_ESCAPE = new char[]{'=', '"', '.'};
|
||||
|
||||
String buildLine(String type, String[] kvseq) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(type);
|
||||
for (int i=0; i<kvseq.length; ++i) {
|
||||
sb.append(" ");
|
||||
if (kvseq[i].equals(".") || kvseq[i].equals("\n")) {
|
||||
sb.append(kvseq[i]);
|
||||
continue;
|
||||
}
|
||||
if (i == kvseq.length-1) {
|
||||
fail("Incorrect input, expecting value.");
|
||||
}
|
||||
sb.append(kvseq[i++]);
|
||||
sb.append("=\"");
|
||||
sb.append(StringUtils.escapeString(kvseq[i], StringUtils.ESCAPE_CHAR,
|
||||
CHARS_TO_ESCAPE));
|
||||
sb.append("\"");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
void testOneLine(String type, String... kvseq) {
|
||||
String line = buildLine(type, kvseq);
|
||||
ParsedLine pl = new ParsedLine(line, Hadoop20JHParser.internalVersion);
|
||||
assertEquals("Mismatching type", type, pl.getType().toString());
|
||||
for (int i = 0; i < kvseq.length; ++i) {
|
||||
if (kvseq[i].equals(".") || kvseq[i].equals("\n")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assertEquals("Key mismatching for " + kvseq[i], kvseq[i + 1], StringUtils
|
||||
.unEscapeString(pl.get(kvseq[i]), StringUtils.ESCAPE_CHAR,
|
||||
CHARS_TO_ESCAPE));
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEscapedQuote() {
|
||||
testOneLine("REC", "A", "x", "B", "abc\"de", "C", "f");
|
||||
testOneLine("REC", "B", "abcde\"", "C", "f");
|
||||
testOneLine("REC", "A", "x", "B", "\"abcde");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEqualSign() {
|
||||
testOneLine("REC1", "A", "x", "B", "abc=de", "C", "f");
|
||||
testOneLine("REC2", "B", "=abcde", "C", "f");
|
||||
testOneLine("REC3", "A", "x", "B", "abcde=");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpace() {
|
||||
testOneLine("REC1", "A", "x", "B", "abc de", "C", "f");
|
||||
testOneLine("REC2", "B", " ab c de", "C", "f");
|
||||
testOneLine("REC3", "A", "x", "B", "abc\t de ");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBackSlash() {
|
||||
testOneLine("REC1", "A", "x", "B", "abc\\de", "C", "f");
|
||||
testOneLine("REC2", "B", "\\ab\\c\\de", "C", "f");
|
||||
testOneLine("REC3", "A", "x", "B", "abc\\\\de\\");
|
||||
testOneLine("REC4", "A", "x", "B", "abc\\\"de\\\"", "C", "f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLineDelimiter() {
|
||||
testOneLine("REC1", "A", "x", "B", "abc.de", "C", "f");
|
||||
testOneLine("REC2", "B", ".ab.de");
|
||||
testOneLine("REC3", "A", "x", "B", "abc.de.");
|
||||
testOneLine("REC4", "A", "x", "B", "abc.de", ".");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleLines() {
|
||||
testOneLine("REC1", "A", "x", "\n", "B", "abc.de", "\n", "C", "f", "\n", ".");
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,196 +0,0 @@
|
|||
package org.apache.hadoop.tools.rumen;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestRumenFolder {
|
||||
@Test
|
||||
public void testFoldingSmallTrace() throws Exception {
|
||||
final Configuration conf = new Configuration();
|
||||
final FileSystem lfs = FileSystem.getLocal(conf);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
final Path rootInputDir =
|
||||
new Path(System.getProperty("test.tools.input.dir", ""))
|
||||
.makeQualified(lfs);
|
||||
@SuppressWarnings("deprecation")
|
||||
final Path rootTempDir =
|
||||
new Path(System.getProperty("test.build.data", "/tmp"))
|
||||
.makeQualified(lfs);
|
||||
|
||||
final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test");
|
||||
final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces");
|
||||
lfs.delete(tempDir, true);
|
||||
|
||||
final Path foldedTracePath = new Path(tempDir, "folded-trace.json");
|
||||
|
||||
final Path inputFile =
|
||||
new Path(rootInputFile, "folder-input-trace.json.gz");
|
||||
|
||||
System.out.println("folded trace result path = " + foldedTracePath);
|
||||
|
||||
String[] args =
|
||||
{ "-input-cycle", "100S", "-output-duration", "300S",
|
||||
"-skew-buffer-length", "1", "-seed", "100", "-concentration", "2",
|
||||
inputFile.toString(), foldedTracePath.toString() };
|
||||
|
||||
final Path foldedGoldFile =
|
||||
new Path(rootInputFile, "goldFoldedTrace.json.gz");
|
||||
|
||||
Folder folder = new Folder();
|
||||
int result = ToolRunner.run(folder, args);
|
||||
assertEquals("Non-zero exit", 0, result);
|
||||
|
||||
TestRumenFolder.<LoggedJob> jsonFileMatchesGold(conf, lfs, foldedTracePath,
|
||||
foldedGoldFile, LoggedJob.class, "trace");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStartsAfterOption() throws Exception {
|
||||
final Configuration conf = new Configuration();
|
||||
final FileSystem lfs = FileSystem.getLocal(conf);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
final Path rootInputDir =
|
||||
new Path(System.getProperty("test.tools.input.dir", ""))
|
||||
.makeQualified(lfs);
|
||||
@SuppressWarnings("deprecation")
|
||||
final Path rootTempDir =
|
||||
new Path(System.getProperty("test.build.data", "/tmp"))
|
||||
.makeQualified(lfs);
|
||||
|
||||
final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test");
|
||||
final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces");
|
||||
lfs.delete(tempDir, true);
|
||||
|
||||
final Path inputFile =
|
||||
new Path(rootInputFile, "goldFoldedTrace.json.gz");
|
||||
|
||||
final Path foldedTracePath = new Path(tempDir,
|
||||
"folded-skippedjob-trace.json");
|
||||
String[] args =
|
||||
{ "-input-cycle", "300S", "-output-duration", "300S",
|
||||
"-starts-after", "30S",
|
||||
inputFile.toString(), foldedTracePath.toString() };
|
||||
|
||||
Folder folder = new Folder();
|
||||
int result = ToolRunner.run(folder, args);
|
||||
assertEquals("Non-zero exit", 0, result);
|
||||
|
||||
TestRumenFolder.<LoggedJob> checkValidityAfterSkippingJobs(conf, lfs, foldedTracePath,
|
||||
inputFile, LoggedJob.class, "trace", 30000, 300000);
|
||||
}
|
||||
|
||||
static private <T extends DeepCompare> void
|
||||
checkValidityAfterSkippingJobs(Configuration conf,
|
||||
FileSystem lfs, Path result, Path inputFile,
|
||||
Class<? extends T> clazz, String fileDescription,
|
||||
long startsAfter, long duration) throws IOException {
|
||||
|
||||
JsonObjectMapperParser<T> inputFileParser =
|
||||
new JsonObjectMapperParser<T>(inputFile, clazz, conf);
|
||||
InputStream resultStream = lfs.open(result);
|
||||
JsonObjectMapperParser<T> resultParser =
|
||||
new JsonObjectMapperParser<T>(resultStream, clazz);
|
||||
List<Long> gpSubmitTimes = new LinkedList<Long>();
|
||||
List<Long> rpSubmitTimes = new LinkedList<Long>();
|
||||
try {
|
||||
//Get submitTime of first job
|
||||
LoggedJob firstJob = (LoggedJob)inputFileParser.getNext();
|
||||
gpSubmitTimes.add(firstJob.getSubmitTime());
|
||||
long absoluteStartsAfterTime = firstJob.getSubmitTime() + startsAfter;
|
||||
|
||||
//total duration
|
||||
long endTime = firstJob.getSubmitTime() + duration;
|
||||
|
||||
//read original trace
|
||||
LoggedJob oriJob = null;
|
||||
while((oriJob = (LoggedJob)inputFileParser.getNext()) != null) {
|
||||
gpSubmitTimes.add(oriJob.getSubmitTime());
|
||||
}
|
||||
|
||||
//check if retained jobs have submittime > starts-after
|
||||
LoggedJob job = null;
|
||||
while((job = (LoggedJob) resultParser.getNext()) != null) {
|
||||
assertTrue("job's submit time in the output trace is less " +
|
||||
"than the specified value of starts-after",
|
||||
(job.getSubmitTime() >= absoluteStartsAfterTime));
|
||||
rpSubmitTimes.add(job.getSubmitTime());
|
||||
}
|
||||
|
||||
List<Long> skippedJobs = new LinkedList<Long>();
|
||||
skippedJobs.addAll(gpSubmitTimes);
|
||||
skippedJobs.removeAll(rpSubmitTimes);
|
||||
|
||||
//check if the skipped job submittime < starts-after
|
||||
for(Long submitTime : skippedJobs) {
|
||||
assertTrue("skipped job submit time " + submitTime +
|
||||
" in the trace is greater " +
|
||||
"than the specified value of starts-after "
|
||||
+ absoluteStartsAfterTime,
|
||||
(submitTime < absoluteStartsAfterTime));
|
||||
}
|
||||
} finally {
|
||||
IOUtils.cleanup(null, inputFileParser, resultParser);
|
||||
}
|
||||
}
|
||||
|
||||
static private <T extends DeepCompare> void jsonFileMatchesGold(
|
||||
Configuration conf, FileSystem lfs, Path result, Path gold,
|
||||
Class<? extends T> clazz, String fileDescription) throws IOException {
|
||||
JsonObjectMapperParser<T> goldParser =
|
||||
new JsonObjectMapperParser<T>(gold, clazz, conf);
|
||||
InputStream resultStream = lfs.open(result);
|
||||
JsonObjectMapperParser<T> resultParser =
|
||||
new JsonObjectMapperParser<T>(resultStream, clazz);
|
||||
try {
|
||||
while (true) {
|
||||
DeepCompare goldJob = goldParser.getNext();
|
||||
DeepCompare resultJob = resultParser.getNext();
|
||||
if ((goldJob == null) || (resultJob == null)) {
|
||||
assertTrue(goldJob == resultJob);
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
resultJob.deepCompare(goldJob, new TreePath(null, "<root>"));
|
||||
} catch (DeepInequalityException e) {
|
||||
String error = e.path.toString();
|
||||
|
||||
assertFalse(fileDescription + " mismatches: " + error, true);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
IOUtils.cleanup(null, goldParser, resultParser);
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,338 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.tools.rumen;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapred.TaskStatus.State;
|
||||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestZombieJob {
|
||||
final double epsilon = 0.01;
|
||||
private final int[] attemptTimesPercentiles = new int[] { 10, 50, 90 };
|
||||
private long[] succeededCDF = new long[] { 5268, 5268, 5268, 5268, 5268 };
|
||||
private long[] failedCDF = new long[] { 18592, 18592, 18592, 18592, 18592 };
|
||||
private double[] expectedPs = new double[] { 0.000001, 0.18707660239708182,
|
||||
0.0013027618551328818, 2.605523710265763E-4 };
|
||||
|
||||
private final long[] mapTaskCounts = new long[] { 7838525L, 342277L, 100228L,
|
||||
1564L, 1234L };
|
||||
private final long[] reduceTaskCounts = new long[] { 4405338L, 139391L,
|
||||
1514383L, 139391, 1234L };
|
||||
|
||||
List<LoggedJob> loggedJobs = new ArrayList<LoggedJob>();
|
||||
List<JobStory> jobStories = new ArrayList<JobStory>();
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
final Configuration conf = new Configuration();
|
||||
final FileSystem lfs = FileSystem.getLocal(conf);
|
||||
|
||||
final Path rootInputDir = new Path(
|
||||
System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
|
||||
final Path rootInputFile = new Path(rootInputDir, "rumen/zombie");
|
||||
|
||||
ZombieJobProducer parser = new ZombieJobProducer(new Path(rootInputFile,
|
||||
"input-trace.json"), new ZombieCluster(new Path(rootInputFile,
|
||||
"input-topology.json"), null, conf), conf);
|
||||
|
||||
JobStory job = null;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
job = parser.getNextJob();
|
||||
ZombieJob zJob = (ZombieJob) job;
|
||||
LoggedJob loggedJob = zJob.getLoggedJob();
|
||||
System.out.println(i + ":" + job.getNumberMaps() + "m, "
|
||||
+ job.getNumberReduces() + "r");
|
||||
System.out
|
||||
.println(loggedJob.getOutcome() + ", " + loggedJob.getJobtype());
|
||||
|
||||
System.out.println("Input Splits -- " + job.getInputSplits().length
|
||||
+ ", " + job.getNumberMaps());
|
||||
|
||||
System.out.println("Successful Map CDF -------");
|
||||
for (LoggedDiscreteCDF cdf : loggedJob.getSuccessfulMapAttemptCDFs()) {
|
||||
System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum()
|
||||
+ "--" + cdf.getMaximum());
|
||||
for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
|
||||
System.out.println(" " + ranking.getRelativeRanking() + ":"
|
||||
+ ranking.getDatum());
|
||||
}
|
||||
}
|
||||
System.out.println("Failed Map CDF -----------");
|
||||
for (LoggedDiscreteCDF cdf : loggedJob.getFailedMapAttemptCDFs()) {
|
||||
System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum()
|
||||
+ "--" + cdf.getMaximum());
|
||||
for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
|
||||
System.out.println(" " + ranking.getRelativeRanking() + ":"
|
||||
+ ranking.getDatum());
|
||||
}
|
||||
}
|
||||
System.out.println("Successful Reduce CDF ----");
|
||||
LoggedDiscreteCDF cdf = loggedJob.getSuccessfulReduceAttemptCDF();
|
||||
System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
|
||||
+ cdf.getMaximum());
|
||||
for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
|
||||
System.out.println(" " + ranking.getRelativeRanking() + ":"
|
||||
+ ranking.getDatum());
|
||||
}
|
||||
System.out.println("Failed Reduce CDF --------");
|
||||
cdf = loggedJob.getFailedReduceAttemptCDF();
|
||||
System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
|
||||
+ cdf.getMaximum());
|
||||
for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
|
||||
System.out.println(" " + ranking.getRelativeRanking() + ":"
|
||||
+ ranking.getDatum());
|
||||
}
|
||||
System.out.print("map attempts to success -- ");
|
||||
for (double p : loggedJob.getMapperTriesToSucceed()) {
|
||||
System.out.print(p + ", ");
|
||||
}
|
||||
System.out.println();
|
||||
System.out.println("===============");
|
||||
|
||||
loggedJobs.add(loggedJob);
|
||||
jobStories.add(job);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFirstJob() {
|
||||
// 20th job seems reasonable: "totalMaps":329,"totalReduces":101
|
||||
// successful map: 80 node-local, 196 rack-local, 53 rack-remote, 2 unknown
|
||||
// failed map: 0-0-0-1
|
||||
// successful reduce: 99 failed reduce: 13
|
||||
// map attempts to success -- 0.9969879518072289, 0.0030120481927710845,
|
||||
JobStory job = jobStories.get(0);
|
||||
assertEquals(1, job.getNumberMaps());
|
||||
assertEquals(1, job.getNumberReduces());
|
||||
|
||||
// get splits
|
||||
|
||||
TaskAttemptInfo taInfo = null;
|
||||
long expectedRuntime = 2423;
|
||||
// get a succeeded map task attempt, expect the exact same task attempt
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 1);
|
||||
assertEquals(expectedRuntime, taInfo.getRuntime());
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
// get a succeeded map attempt, but reschedule with different locality.
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 2);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 0);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
expectedRuntime = 97502;
|
||||
// get a succeeded reduce task attempt, expect the exact same task attempt
|
||||
taInfo = job.getTaskAttemptInfo(TaskType.REDUCE, 14, 0);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
// get a failed reduce task attempt, expect the exact same task attempt
|
||||
taInfo = job.getTaskAttemptInfo(TaskType.REDUCE, 14, 0);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
// get a non-exist reduce task attempt, expect a made-up task attempt
|
||||
// TODO fill in test case
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSecondJob() {
|
||||
// 7th job has many failed tasks.
|
||||
// 3204 m, 0 r
|
||||
// successful maps 497-586-23-1, failed maps 0-0-0-2714
|
||||
// map attempts to success -- 0.8113600833767587, 0.18707660239708182,
|
||||
// 0.0013027618551328818, 2.605523710265763E-4,
|
||||
JobStory job = jobStories.get(1);
|
||||
assertEquals(20, job.getNumberMaps());
|
||||
assertEquals(1, job.getNumberReduces());
|
||||
|
||||
TaskAttemptInfo taInfo = null;
|
||||
// get a succeeded map task attempt
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 1);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
// get a succeeded map task attempt, with different locality
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 2);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 0);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
// get a failed map task attempt
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 1);
|
||||
assertEquals(1927, taInfo.getRuntime());
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
|
||||
// get a failed map task attempt, with different locality
|
||||
// TODO: this test does not make sense here, because I don't have
|
||||
// available data set.
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFourthJob() {
|
||||
// 7th job has many failed tasks.
|
||||
// 3204 m, 0 r
|
||||
// successful maps 497-586-23-1, failed maps 0-0-0-2714
|
||||
// map attempts to success -- 0.8113600833767587, 0.18707660239708182,
|
||||
// 0.0013027618551328818, 2.605523710265763E-4,
|
||||
JobStory job = jobStories.get(3);
|
||||
assertEquals(131, job.getNumberMaps());
|
||||
assertEquals(47, job.getNumberReduces());
|
||||
|
||||
TaskAttemptInfo taInfo = null;
|
||||
// get a succeeded map task attempt
|
||||
long runtime = 5268;
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 1);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
assertEquals(runtime, taInfo.getRuntime());
|
||||
|
||||
// get a succeeded map task attempt, with different locality
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 2);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
assertEquals(runtime, taInfo.getRuntime() / 2);
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 0);
|
||||
assertEquals(State.SUCCEEDED, taInfo.getRunState());
|
||||
assertEquals((long) (runtime / 1.5), taInfo.getRuntime());
|
||||
|
||||
// get a failed map task attempt
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(113, 0, 1);
|
||||
assertEquals(18592, taInfo.getRuntime());
|
||||
assertEquals(State.FAILED, taInfo.getRunState());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRecordIOInfo() {
|
||||
JobStory job = jobStories.get(3);
|
||||
|
||||
TaskInfo mapTask = job.getTaskInfo(TaskType.MAP, 113);
|
||||
|
||||
TaskInfo reduceTask = job.getTaskInfo(TaskType.REDUCE, 0);
|
||||
|
||||
assertEquals(mapTaskCounts[0], mapTask.getInputBytes());
|
||||
assertEquals(mapTaskCounts[1], mapTask.getInputRecords());
|
||||
assertEquals(mapTaskCounts[2], mapTask.getOutputBytes());
|
||||
assertEquals(mapTaskCounts[3], mapTask.getOutputRecords());
|
||||
assertEquals(mapTaskCounts[4], mapTask.getTaskMemory());
|
||||
|
||||
assertEquals(reduceTaskCounts[0], reduceTask.getInputBytes());
|
||||
assertEquals(reduceTaskCounts[1], reduceTask.getInputRecords());
|
||||
assertEquals(reduceTaskCounts[2], reduceTask.getOutputBytes());
|
||||
assertEquals(reduceTaskCounts[3], reduceTask.getOutputRecords());
|
||||
assertEquals(reduceTaskCounts[4], reduceTask.getTaskMemory());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMakeUpInfo() {
|
||||
// get many non-exist tasks
|
||||
// total 3204 map tasks, 3300 is a non-exist task.
|
||||
checkMakeUpTask(jobStories.get(3), 113, 1);
|
||||
}
|
||||
|
||||
private void checkMakeUpTask(JobStory job, int taskNumber, int locality) {
|
||||
TaskAttemptInfo taInfo = null;
|
||||
|
||||
Histogram sampleSucceeded = new Histogram();
|
||||
Histogram sampleFailed = new Histogram();
|
||||
List<Integer> sampleAttempts = new ArrayList<Integer>();
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
int attemptId = 0;
|
||||
while (true) {
|
||||
taInfo = job.getMapTaskAttemptInfoAdjusted(taskNumber, attemptId, 1);
|
||||
if (taInfo.getRunState() == State.SUCCEEDED) {
|
||||
sampleSucceeded.enter(taInfo.getRuntime());
|
||||
break;
|
||||
}
|
||||
sampleFailed.enter(taInfo.getRuntime());
|
||||
attemptId++;
|
||||
}
|
||||
sampleAttempts.add(attemptId);
|
||||
}
|
||||
|
||||
// check state distribution
|
||||
int[] countTries = new int[] { 0, 0, 0, 0 };
|
||||
for (int attempts : sampleAttempts) {
|
||||
assertTrue(attempts < 4);
|
||||
countTries[attempts]++;
|
||||
}
|
||||
/*
|
||||
* System.out.print("Generated map attempts to success -- "); for (int
|
||||
* count: countTries) { System.out.print((double)count/sampleAttempts.size()
|
||||
* + ", "); } System.out.println(); System.out.println("===============");
|
||||
*/
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int count = countTries[i];
|
||||
double p = (double) count / sampleAttempts.size();
|
||||
assertTrue(expectedPs[i] - p < epsilon);
|
||||
}
|
||||
|
||||
// check succeeded attempts runtime distribution
|
||||
long[] expectedCDF = succeededCDF;
|
||||
LoggedDiscreteCDF cdf = new LoggedDiscreteCDF();
|
||||
cdf.setCDF(sampleSucceeded, attemptTimesPercentiles, 100);
|
||||
/*
|
||||
* System.out.println("generated succeeded map runtime distribution");
|
||||
* System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
|
||||
* + cdf.getMaximum()); for (LoggedSingleRelativeRanking ranking:
|
||||
* cdf.getRankings()) { System.out.println(" " +
|
||||
* ranking.getRelativeRanking() + ":" + ranking.getDatum()); }
|
||||
*/
|
||||
assertRuntimeEqual(cdf.getMinimum(), expectedCDF[0]);
|
||||
assertRuntimeEqual(cdf.getMaximum(), expectedCDF[4]);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
LoggedSingleRelativeRanking ranking = cdf.getRankings().get(i);
|
||||
assertRuntimeEqual(expectedCDF[i + 1], ranking.getDatum());
|
||||
}
|
||||
|
||||
// check failed attempts runtime distribution
|
||||
expectedCDF = failedCDF;
|
||||
cdf = new LoggedDiscreteCDF();
|
||||
cdf.setCDF(sampleFailed, attemptTimesPercentiles, 100);
|
||||
|
||||
System.out.println("generated failed map runtime distribution");
|
||||
System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
|
||||
+ cdf.getMaximum());
|
||||
for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
|
||||
System.out.println(" " + ranking.getRelativeRanking() + ":"
|
||||
+ ranking.getDatum());
|
||||
}
|
||||
assertRuntimeEqual(cdf.getMinimum(), expectedCDF[0]);
|
||||
assertRuntimeEqual(cdf.getMaximum(), expectedCDF[4]);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
LoggedSingleRelativeRanking ranking = cdf.getRankings().get(i);
|
||||
assertRuntimeEqual(expectedCDF[i + 1], ranking.getDatum());
|
||||
}
|
||||
}
|
||||
|
||||
private void assertRuntimeEqual(long expected, long generated) {
|
||||
if (expected == 0) {
|
||||
assertTrue(generated > -1000 && generated < 1000);
|
||||
} else {
|
||||
long epsilon = Math.max(expected / 10, 5000);
|
||||
assertTrue(expected - generated > -epsilon);
|
||||
assertTrue(expected - generated < epsilon);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -62,6 +62,14 @@
|
|||
<item name="Writing Yarn Applications" href="hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html"/>
|
||||
<item name="Capacity Scheduler" href="hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html"/>
|
||||
</menu>
|
||||
|
||||
<menu name="YARN REST API's" inherit="top">
|
||||
<item name="Introduction" href="hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html"/>
|
||||
<item name="Resource Manager" href="hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html"/>
|
||||
<item name="Node Manager" href="hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html"/>
|
||||
<item name="Mapreduce Application Master" href="hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html"/>
|
||||
<item name="History Server" href="hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html"/>
|
||||
</menu>
|
||||
|
||||
<menu name="Auth" inherit="top">
|
||||
<item name="Overview" href="hadoop-auth/index.html"/>
|
||||
|
|
Loading…
Reference in New Issue