YARN-8041. [Router] Federation: Improve Router REST API Metrics. (#4938)
This commit is contained in:
parent
1962851356
commit
5b52123c9d
@ -107,6 +107,20 @@ public final class RouterMetrics {
|
|||||||
private MutableGaugeInt numDeleteReservationFailedRetrieved;
|
private MutableGaugeInt numDeleteReservationFailedRetrieved;
|
||||||
@Metric("# of listReservation failed to be retrieved")
|
@Metric("# of listReservation failed to be retrieved")
|
||||||
private MutableGaugeInt numListReservationFailedRetrieved;
|
private MutableGaugeInt numListReservationFailedRetrieved;
|
||||||
|
@Metric("# of getAppActivities failed to be retrieved")
|
||||||
|
private MutableGaugeInt numGetAppActivitiesFailedRetrieved;
|
||||||
|
@Metric("# of getAppStatistics failed to be retrieved")
|
||||||
|
private MutableGaugeInt numGetAppStatisticsFailedRetrieved;
|
||||||
|
@Metric("# of getAppPriority failed to be retrieved")
|
||||||
|
private MutableGaugeInt numGetAppPriorityFailedRetrieved;
|
||||||
|
@Metric("# of getAppQueue failed to be retrieved")
|
||||||
|
private MutableGaugeInt numGetAppQueueFailedRetrieved;
|
||||||
|
@Metric("# of updateAppQueue failed to be retrieved")
|
||||||
|
private MutableGaugeInt numUpdateAppQueueFailedRetrieved;
|
||||||
|
@Metric("# of getAppTimeout failed to be retrieved")
|
||||||
|
private MutableGaugeInt numGetAppTimeoutFailedRetrieved;
|
||||||
|
@Metric("# of getAppTimeouts failed to be retrieved")
|
||||||
|
private MutableGaugeInt numGetAppTimeoutsFailedRetrieved;
|
||||||
|
|
||||||
// Aggregate metrics are shared, and don't have to be looked up per call
|
// Aggregate metrics are shared, and don't have to be looked up per call
|
||||||
@Metric("Total number of successful Submitted apps and latency(ms)")
|
@Metric("Total number of successful Submitted apps and latency(ms)")
|
||||||
@ -175,6 +189,20 @@ public final class RouterMetrics {
|
|||||||
private MutableRate totalSucceededDeleteReservationRetrieved;
|
private MutableRate totalSucceededDeleteReservationRetrieved;
|
||||||
@Metric("Total number of successful Retrieved ListReservation and latency(ms)")
|
@Metric("Total number of successful Retrieved ListReservation and latency(ms)")
|
||||||
private MutableRate totalSucceededListReservationRetrieved;
|
private MutableRate totalSucceededListReservationRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved GetAppActivities and latency(ms)")
|
||||||
|
private MutableRate totalSucceededGetAppActivitiesRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved GetAppStatistics and latency(ms)")
|
||||||
|
private MutableRate totalSucceededGetAppStatisticsRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved GetAppPriority and latency(ms)")
|
||||||
|
private MutableRate totalSucceededGetAppPriorityRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved GetAppQueue and latency(ms)")
|
||||||
|
private MutableRate totalSucceededGetAppQueueRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved UpdateAppQueue and latency(ms)")
|
||||||
|
private MutableRate totalSucceededUpdateAppQueueRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved GetAppTimeout and latency(ms)")
|
||||||
|
private MutableRate totalSucceededGetAppTimeoutRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved GetAppTimeouts and latency(ms)")
|
||||||
|
private MutableRate totalSucceededGetAppTimeoutsRetrieved;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provide quantile counters for all latencies.
|
* Provide quantile counters for all latencies.
|
||||||
@ -212,6 +240,13 @@ public final class RouterMetrics {
|
|||||||
private MutableQuantiles updateReservationLatency;
|
private MutableQuantiles updateReservationLatency;
|
||||||
private MutableQuantiles deleteReservationLatency;
|
private MutableQuantiles deleteReservationLatency;
|
||||||
private MutableQuantiles listReservationLatency;
|
private MutableQuantiles listReservationLatency;
|
||||||
|
private MutableQuantiles getAppActivitiesLatency;
|
||||||
|
private MutableQuantiles getAppStatisticsLatency;
|
||||||
|
private MutableQuantiles getAppPriorityLatency;
|
||||||
|
private MutableQuantiles getAppQueueLatency;
|
||||||
|
private MutableQuantiles getUpdateQueueLatency;
|
||||||
|
private MutableQuantiles getAppTimeoutLatency;
|
||||||
|
private MutableQuantiles getAppTimeoutsLatency;
|
||||||
|
|
||||||
private static volatile RouterMetrics instance = null;
|
private static volatile RouterMetrics instance = null;
|
||||||
private static MetricsRegistry registry;
|
private static MetricsRegistry registry;
|
||||||
@ -342,6 +377,27 @@ private RouterMetrics() {
|
|||||||
listReservationLatency =
|
listReservationLatency =
|
||||||
registry.newQuantiles("listReservationLatency",
|
registry.newQuantiles("listReservationLatency",
|
||||||
"latency of list reservation timeouts", "ops", "latency", 10);
|
"latency of list reservation timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getAppActivitiesLatency = registry.newQuantiles("getAppActivitiesLatency",
|
||||||
|
"latency of get app activities timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getAppStatisticsLatency = registry.newQuantiles("getAppStatisticsLatency",
|
||||||
|
"latency of get app statistics timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getAppPriorityLatency = registry.newQuantiles("getAppPriorityLatency",
|
||||||
|
"latency of get app priority timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getAppQueueLatency = registry.newQuantiles("getAppQueueLatency",
|
||||||
|
"latency of get app queue timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getUpdateQueueLatency = registry.newQuantiles("getUpdateQueueLatency",
|
||||||
|
"latency of update app queue timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getAppTimeoutLatency = registry.newQuantiles("getAppTimeoutLatency",
|
||||||
|
"latency of get apptimeout timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
getAppTimeoutsLatency = registry.newQuantiles("getAppTimeoutsLatency",
|
||||||
|
"latency of get apptimeouts timeouts", "ops", "latency", 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static RouterMetrics getMetrics() {
|
public static RouterMetrics getMetrics() {
|
||||||
@ -528,6 +584,41 @@ public long getNumSucceededListReservationRetrieved() {
|
|||||||
return totalSucceededListReservationRetrieved.lastStat().numSamples();
|
return totalSucceededListReservationRetrieved.lastStat().numSamples();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededGetAppActivitiesRetrieved() {
|
||||||
|
return totalSucceededGetAppActivitiesRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededGetAppStatisticsRetrieved() {
|
||||||
|
return totalSucceededGetAppStatisticsRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededGetAppPriorityRetrieved() {
|
||||||
|
return totalSucceededGetAppPriorityRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededGetAppQueueRetrieved() {
|
||||||
|
return totalSucceededGetAppQueueRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededUpdateAppQueueRetrieved() {
|
||||||
|
return totalSucceededUpdateAppQueueRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededGetAppTimeoutRetrieved() {
|
||||||
|
return totalSucceededGetAppTimeoutRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededGetAppTimeoutsRetrieved() {
|
||||||
|
return totalSucceededGetAppTimeoutsRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public double getLatencySucceededAppsCreated() {
|
public double getLatencySucceededAppsCreated() {
|
||||||
return totalSucceededAppsCreated.lastStat().mean();
|
return totalSucceededAppsCreated.lastStat().mean();
|
||||||
@ -693,6 +784,41 @@ public double getLatencySucceededListReservationRetrieved() {
|
|||||||
return totalSucceededListReservationRetrieved.lastStat().mean();
|
return totalSucceededListReservationRetrieved.lastStat().mean();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededGetAppActivitiesRetrieved() {
|
||||||
|
return totalSucceededGetAppActivitiesRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededGetAppStatisticsRetrieved() {
|
||||||
|
return totalSucceededGetAppStatisticsRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededGetAppPriorityRetrieved() {
|
||||||
|
return totalSucceededGetAppPriorityRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededGetAppQueueRetrieved() {
|
||||||
|
return totalSucceededGetAppQueueRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededUpdateAppQueueRetrieved() {
|
||||||
|
return totalSucceededUpdateAppQueueRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededGetAppTimeoutRetrieved() {
|
||||||
|
return totalSucceededGetAppTimeoutRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededGetAppTimeoutsRetrieved() {
|
||||||
|
return totalSucceededGetAppTimeoutsRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public int getAppsFailedCreated() {
|
public int getAppsFailedCreated() {
|
||||||
return numAppsFailedCreated.value();
|
return numAppsFailedCreated.value();
|
||||||
@ -846,6 +972,34 @@ public int getListReservationFailedRetrieved() {
|
|||||||
return numListReservationFailedRetrieved.value();
|
return numListReservationFailedRetrieved.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getAppActivitiesFailedRetrieved() {
|
||||||
|
return numGetAppActivitiesFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAppStatisticsFailedRetrieved() {
|
||||||
|
return numGetAppStatisticsFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAppPriorityFailedRetrieved() {
|
||||||
|
return numGetAppPriorityFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAppQueueFailedRetrieved() {
|
||||||
|
return numGetAppQueueFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getUpdateAppQueueFailedRetrieved() {
|
||||||
|
return numUpdateAppQueueFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAppTimeoutFailedRetrieved() {
|
||||||
|
return numGetAppTimeoutFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getAppTimeoutsFailedRetrieved() {
|
||||||
|
return numGetAppTimeoutsFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
public void succeededAppsCreated(long duration) {
|
public void succeededAppsCreated(long duration) {
|
||||||
totalSucceededAppsCreated.add(duration);
|
totalSucceededAppsCreated.add(duration);
|
||||||
getNewApplicationLatency.add(duration);
|
getNewApplicationLatency.add(duration);
|
||||||
@ -1011,6 +1165,41 @@ public void succeededListReservationRetrieved(long duration) {
|
|||||||
listReservationLatency.add(duration);
|
listReservationLatency.add(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void succeededGetAppActivitiesRetrieved(long duration) {
|
||||||
|
totalSucceededGetAppActivitiesRetrieved.add(duration);
|
||||||
|
getAppActivitiesLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededGetAppStatisticsRetrieved(long duration) {
|
||||||
|
totalSucceededGetAppStatisticsRetrieved.add(duration);
|
||||||
|
getAppStatisticsLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededGetAppPriorityRetrieved(long duration) {
|
||||||
|
totalSucceededGetAppPriorityRetrieved.add(duration);
|
||||||
|
getAppPriorityLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededGetAppQueueRetrieved(long duration) {
|
||||||
|
totalSucceededGetAppQueueRetrieved.add(duration);
|
||||||
|
getAppQueueLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededUpdateAppQueueRetrieved(long duration) {
|
||||||
|
totalSucceededUpdateAppQueueRetrieved.add(duration);
|
||||||
|
getUpdateQueueLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededGetAppTimeoutRetrieved(long duration) {
|
||||||
|
totalSucceededGetAppTimeoutRetrieved.add(duration);
|
||||||
|
getAppTimeoutLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededGetAppTimeoutsRetrieved(long duration) {
|
||||||
|
totalSucceededGetAppTimeoutsRetrieved.add(duration);
|
||||||
|
getAppTimeoutsLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
public void incrAppsFailedCreated() {
|
public void incrAppsFailedCreated() {
|
||||||
numAppsFailedCreated.incr();
|
numAppsFailedCreated.incr();
|
||||||
}
|
}
|
||||||
@ -1063,11 +1252,11 @@ public void incrQueueUserAclsFailedRetrieved() {
|
|||||||
numGetQueueUserAclsFailedRetrieved.incr();
|
numGetQueueUserAclsFailedRetrieved.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrContainerReportFailedRetrieved() {
|
public void incrGetContainerReportFailedRetrieved() {
|
||||||
numGetContainerReportFailedRetrieved.incr();
|
numGetContainerReportFailedRetrieved.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrContainerFailedRetrieved() {
|
public void incrGetContainersFailedRetrieved() {
|
||||||
numGetContainersFailedRetrieved.incr();
|
numGetContainersFailedRetrieved.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1142,4 +1331,32 @@ public void incrDeleteReservationFailedRetrieved() {
|
|||||||
public void incrListReservationFailedRetrieved() {
|
public void incrListReservationFailedRetrieved() {
|
||||||
numListReservationFailedRetrieved.incr();
|
numListReservationFailedRetrieved.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrGetAppActivitiesFailedRetrieved() {
|
||||||
|
numGetAppActivitiesFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrGetAppStatisticsFailedRetrieved() {
|
||||||
|
numGetAppStatisticsFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrGetAppPriorityFailedRetrieved() {
|
||||||
|
numGetAppPriorityFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrGetAppQueueFailedRetrieved() {
|
||||||
|
numGetAppQueueFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrUpdateAppQueueFailedRetrieved() {
|
||||||
|
numUpdateAppQueueFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrGetAppTimeoutFailedRetrieved() {
|
||||||
|
numGetAppTimeoutFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrGetAppTimeoutsFailedRetrieved() {
|
||||||
|
numGetAppTimeoutsFailedRetrieved.incr();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.yarn.server.router;
|
package org.apache.hadoop.yarn.server.router;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
@ -44,6 +45,14 @@
|
|||||||
@Unstable
|
@Unstable
|
||||||
public final class RouterServerUtil {
|
public final class RouterServerUtil {
|
||||||
|
|
||||||
|
private static final String APPLICATION_ID_PREFIX = "application_";
|
||||||
|
|
||||||
|
private static final String APP_ATTEMPT_ID_PREFIX = "appattempt_";
|
||||||
|
|
||||||
|
private static final String CONTAINER_PREFIX = "container_";
|
||||||
|
|
||||||
|
private static final String EPOCH_PREFIX = "e";
|
||||||
|
|
||||||
/** Disable constructor. */
|
/** Disable constructor. */
|
||||||
private RouterServerUtil() {
|
private RouterServerUtil() {
|
||||||
}
|
}
|
||||||
@ -181,6 +190,28 @@ public static void logAndThrowIOException(String errMsg, Throwable t)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throws an IOException due to an error.
|
||||||
|
*
|
||||||
|
* @param t the throwable raised in the called class.
|
||||||
|
* @param errMsgFormat the error message format string.
|
||||||
|
* @param args referenced by the format specifiers in the format string.
|
||||||
|
* @throws IOException on failure
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static void logAndThrowIOException(Throwable t, String errMsgFormat, Object... args)
|
||||||
|
throws IOException {
|
||||||
|
String msg = String.format(errMsgFormat, args);
|
||||||
|
if (t != null) {
|
||||||
|
LOG.error(msg, t);
|
||||||
|
throw new IOException(msg, t);
|
||||||
|
} else {
|
||||||
|
LOG.error(msg);
|
||||||
|
throw new IOException(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Throws an RunTimeException due to an error.
|
* Throws an RunTimeException due to an error.
|
||||||
*
|
*
|
||||||
@ -222,4 +253,197 @@ public static void logAndThrowRunTimeException(Throwable t, String errMsgFormat,
|
|||||||
throw new RuntimeException(msg);
|
throw new RuntimeException(msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throws an RunTimeException due to an error.
|
||||||
|
*
|
||||||
|
* @param t the throwable raised in the called class.
|
||||||
|
* @param errMsgFormat the error message format string.
|
||||||
|
* @param args referenced by the format specifiers in the format string.
|
||||||
|
* @return RuntimeException
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static RuntimeException logAndReturnRunTimeException(
|
||||||
|
Throwable t, String errMsgFormat, Object... args) {
|
||||||
|
String msg = String.format(errMsgFormat, args);
|
||||||
|
if (t != null) {
|
||||||
|
LOG.error(msg, t);
|
||||||
|
return new RuntimeException(msg, t);
|
||||||
|
} else {
|
||||||
|
LOG.error(msg);
|
||||||
|
return new RuntimeException(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throws an RunTimeException due to an error.
|
||||||
|
*
|
||||||
|
* @param errMsgFormat the error message format string.
|
||||||
|
* @param args referenced by the format specifiers in the format string.
|
||||||
|
* @return RuntimeException
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static RuntimeException logAndReturnRunTimeException(
|
||||||
|
String errMsgFormat, Object... args) {
|
||||||
|
return logAndReturnRunTimeException(null, errMsgFormat, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check applicationId is accurate.
|
||||||
|
*
|
||||||
|
* We need to ensure that applicationId cannot be empty and
|
||||||
|
* can be converted to ApplicationId object normally.
|
||||||
|
*
|
||||||
|
* @param applicationId applicationId of type string
|
||||||
|
* @throws IllegalArgumentException If the format of the applicationId is not accurate,
|
||||||
|
* an IllegalArgumentException needs to be thrown.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static void validateApplicationId(String applicationId)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
|
||||||
|
// Make Sure applicationId is not empty.
|
||||||
|
if (applicationId == null || applicationId.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the prefix information of applicationId is accurate.
|
||||||
|
if (!applicationId.startsWith(APPLICATION_ID_PREFIX)) {
|
||||||
|
throw new IllegalArgumentException("Invalid ApplicationId prefix: "
|
||||||
|
+ applicationId + ". The valid ApplicationId should start with prefix application");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the split position of the string.
|
||||||
|
int pos1 = APPLICATION_ID_PREFIX.length() - 1;
|
||||||
|
int pos2 = applicationId.indexOf('_', pos1 + 1);
|
||||||
|
if (pos2 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid ApplicationId: " + applicationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm that the parsed rmId and appId are numeric types.
|
||||||
|
String rmId = applicationId.substring(pos1 + 1, pos2);
|
||||||
|
String appId = applicationId.substring(pos2 + 1);
|
||||||
|
if(!NumberUtils.isDigits(rmId) || !NumberUtils.isDigits(appId)){
|
||||||
|
throw new IllegalArgumentException("Invalid ApplicationId: " + applicationId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check appAttemptId is accurate.
|
||||||
|
*
|
||||||
|
* We need to ensure that appAttemptId cannot be empty and
|
||||||
|
* can be converted to ApplicationAttemptId object normally.
|
||||||
|
*
|
||||||
|
* @param appAttemptId appAttemptId of type string.
|
||||||
|
* @throws IllegalArgumentException If the format of the appAttemptId is not accurate,
|
||||||
|
* an IllegalArgumentException needs to be thrown.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static void validateApplicationAttemptId(String appAttemptId)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
|
||||||
|
// Make Sure appAttemptId is not empty.
|
||||||
|
if (appAttemptId == null || appAttemptId.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("Parameter error, the appAttemptId is empty or null.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the prefix information of appAttemptId is accurate.
|
||||||
|
if (!appAttemptId.startsWith(APP_ATTEMPT_ID_PREFIX)) {
|
||||||
|
throw new IllegalArgumentException("Invalid AppAttemptId prefix: " + appAttemptId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the split position of the string.
|
||||||
|
int pos1 = APP_ATTEMPT_ID_PREFIX.length() - 1;
|
||||||
|
int pos2 = appAttemptId.indexOf('_', pos1 + 1);
|
||||||
|
if (pos2 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid AppAttemptId: " + appAttemptId);
|
||||||
|
}
|
||||||
|
int pos3 = appAttemptId.indexOf('_', pos2 + 1);
|
||||||
|
if (pos3 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid AppAttemptId: " + appAttemptId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm that the parsed rmId and appId and attemptId are numeric types.
|
||||||
|
String rmId = appAttemptId.substring(pos1 + 1, pos2);
|
||||||
|
String appId = appAttemptId.substring(pos2 + 1, pos3);
|
||||||
|
String attemptId = appAttemptId.substring(pos3 + 1);
|
||||||
|
|
||||||
|
if (!NumberUtils.isDigits(rmId) || !NumberUtils.isDigits(appId)
|
||||||
|
|| !NumberUtils.isDigits(attemptId)) {
|
||||||
|
throw new IllegalArgumentException("Invalid AppAttemptId: " + appAttemptId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check containerId is accurate.
|
||||||
|
*
|
||||||
|
* We need to ensure that containerId cannot be empty and
|
||||||
|
* can be converted to ContainerId object normally.
|
||||||
|
*
|
||||||
|
* @param containerId containerId of type string.
|
||||||
|
* @throws IllegalArgumentException If the format of the appAttemptId is not accurate,
|
||||||
|
* an IllegalArgumentException needs to be thrown.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public static void validateContainerId(String containerId)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
|
||||||
|
// Make Sure containerId is not empty.
|
||||||
|
if (containerId == null || containerId.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("Parameter error, the containerId is empty or null.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the prefix information of containerId is accurate.
|
||||||
|
if (!containerId.startsWith(CONTAINER_PREFIX)) {
|
||||||
|
throw new IllegalArgumentException("Invalid ContainerId prefix: " + containerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the split position of the string.
|
||||||
|
int pos1 = CONTAINER_PREFIX.length() - 1;
|
||||||
|
|
||||||
|
String epoch = "0";
|
||||||
|
if (containerId.regionMatches(pos1 + 1, EPOCH_PREFIX, 0, EPOCH_PREFIX.length())) {
|
||||||
|
int pos2 = containerId.indexOf('_', pos1 + 1);
|
||||||
|
if (pos2 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid ContainerId: " + containerId);
|
||||||
|
}
|
||||||
|
String epochStr = containerId.substring(pos1 + 1 + EPOCH_PREFIX.length(), pos2);
|
||||||
|
epoch = epochStr;
|
||||||
|
// rewind the current position
|
||||||
|
pos1 = pos2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pos2 = containerId.indexOf('_', pos1 + 1);
|
||||||
|
if (pos2 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid ContainerId: " + containerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
int pos3 = containerId.indexOf('_', pos2 + 1);
|
||||||
|
if (pos3 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid ContainerId: " + containerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
int pos4 = containerId.indexOf('_', pos3 + 1);
|
||||||
|
if (pos4 < 0) {
|
||||||
|
throw new IllegalArgumentException("Invalid ContainerId: " + containerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm that the parsed appId and clusterTimestamp and attemptId and cid and epoch
|
||||||
|
// are numeric types.
|
||||||
|
String appId = containerId.substring(pos2 + 1, pos3);
|
||||||
|
String clusterTimestamp = containerId.substring(pos1 + 1, pos2);
|
||||||
|
String attemptId = containerId.substring(pos3 + 1, pos4);
|
||||||
|
String cid = containerId.substring(pos4 + 1);
|
||||||
|
|
||||||
|
if (!NumberUtils.isDigits(appId) || !NumberUtils.isDigits(clusterTimestamp)
|
||||||
|
|| !NumberUtils.isDigits(attemptId) || !NumberUtils.isDigits(cid)
|
||||||
|
|| !NumberUtils.isDigits(epoch)) {
|
||||||
|
throw new IllegalArgumentException("Invalid ContainerId: " + containerId);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -855,7 +855,7 @@ public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues(
|
|||||||
try {
|
try {
|
||||||
response = clientRMProxy.moveApplicationAcrossQueues(request);
|
response = clientRMProxy.moveApplicationAcrossQueues(request);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
routerMetrics.incrAppAttemptsFailedRetrieved();
|
routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to moveApplicationAcrossQueues for " +
|
RouterServerUtil.logAndThrowException("Unable to moveApplicationAcrossQueues for " +
|
||||||
applicationId + " to SubCluster " + subClusterId.getId(), e);
|
applicationId + " to SubCluster " + subClusterId.getId(), e);
|
||||||
}
|
}
|
||||||
@ -1174,7 +1174,7 @@ public GetApplicationAttemptReportResponse getApplicationAttemptReport(
|
|||||||
try {
|
try {
|
||||||
response = clientRMProxy.getApplicationAttemptReport(request);
|
response = clientRMProxy.getApplicationAttemptReport(request);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
routerMetrics.incrAppAttemptsFailedRetrieved();
|
routerMetrics.incrAppAttemptReportFailedRetrieved();
|
||||||
String msg = String.format(
|
String msg = String.format(
|
||||||
"Unable to get the applicationAttempt report for %s to SubCluster %s.",
|
"Unable to get the applicationAttempt report for %s to SubCluster %s.",
|
||||||
request.getApplicationAttemptId(), subClusterId.getId());
|
request.getApplicationAttemptId(), subClusterId.getId());
|
||||||
@ -1237,7 +1237,7 @@ public GetApplicationAttemptsResponse getApplicationAttempts(
|
|||||||
public GetContainerReportResponse getContainerReport(
|
public GetContainerReportResponse getContainerReport(
|
||||||
GetContainerReportRequest request) throws YarnException, IOException {
|
GetContainerReportRequest request) throws YarnException, IOException {
|
||||||
if(request == null || request.getContainerId() == null){
|
if(request == null || request.getContainerId() == null){
|
||||||
routerMetrics.incrContainerReportFailedRetrieved();
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Missing getContainerReport request " +
|
RouterServerUtil.logAndThrowException("Missing getContainerReport request " +
|
||||||
"or containerId", null);
|
"or containerId", null);
|
||||||
}
|
}
|
||||||
@ -1249,7 +1249,7 @@ public GetContainerReportResponse getContainerReport(
|
|||||||
try {
|
try {
|
||||||
subClusterId = getApplicationHomeSubCluster(applicationId);
|
subClusterId = getApplicationHomeSubCluster(applicationId);
|
||||||
} catch (YarnException ex) {
|
} catch (YarnException ex) {
|
||||||
routerMetrics.incrContainerReportFailedRetrieved();
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Application " + applicationId +
|
RouterServerUtil.logAndThrowException("Application " + applicationId +
|
||||||
" does not exist in FederationStateStore.", ex);
|
" does not exist in FederationStateStore.", ex);
|
||||||
}
|
}
|
||||||
@ -1260,7 +1260,7 @@ public GetContainerReportResponse getContainerReport(
|
|||||||
try {
|
try {
|
||||||
response = clientRMProxy.getContainerReport(request);
|
response = clientRMProxy.getContainerReport(request);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrContainerReportFailedRetrieved();
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
LOG.error("Unable to get the container report for {} from SubCluster {}.",
|
LOG.error("Unable to get the container report for {} from SubCluster {}.",
|
||||||
applicationId, subClusterId.getId(), ex);
|
applicationId, subClusterId.getId(), ex);
|
||||||
}
|
}
|
||||||
@ -1280,7 +1280,7 @@ public GetContainerReportResponse getContainerReport(
|
|||||||
public GetContainersResponse getContainers(GetContainersRequest request)
|
public GetContainersResponse getContainers(GetContainersRequest request)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
if (request == null || request.getApplicationAttemptId() == null) {
|
if (request == null || request.getApplicationAttemptId() == null) {
|
||||||
routerMetrics.incrContainerFailedRetrieved();
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException(
|
RouterServerUtil.logAndThrowException(
|
||||||
"Missing getContainers request or ApplicationAttemptId.", null);
|
"Missing getContainers request or ApplicationAttemptId.", null);
|
||||||
}
|
}
|
||||||
@ -1291,7 +1291,7 @@ public GetContainersResponse getContainers(GetContainersRequest request)
|
|||||||
try {
|
try {
|
||||||
subClusterId = getApplicationHomeSubCluster(applicationId);
|
subClusterId = getApplicationHomeSubCluster(applicationId);
|
||||||
} catch (YarnException ex) {
|
} catch (YarnException ex) {
|
||||||
routerMetrics.incrContainerFailedRetrieved();
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Application " + applicationId +
|
RouterServerUtil.logAndThrowException("Application " + applicationId +
|
||||||
" does not exist in FederationStateStore.", ex);
|
" does not exist in FederationStateStore.", ex);
|
||||||
}
|
}
|
||||||
@ -1302,7 +1302,7 @@ public GetContainersResponse getContainers(GetContainersRequest request)
|
|||||||
try {
|
try {
|
||||||
response = clientRMProxy.getContainers(request);
|
response = clientRMProxy.getContainers(request);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrContainerFailedRetrieved();
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get the containers for " +
|
RouterServerUtil.logAndThrowException("Unable to get the containers for " +
|
||||||
applicationId + " from SubCluster " + subClusterId.getId(), ex);
|
applicationId + " from SubCluster " + subClusterId.getId(), ex);
|
||||||
}
|
}
|
||||||
|
@ -44,10 +44,12 @@
|
|||||||
import javax.ws.rs.core.Response.Status;
|
import javax.ws.rs.core.Response.Status;
|
||||||
|
|
||||||
import org.apache.commons.lang3.NotImplementedException;
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.security.authorize.AuthorizationException;
|
import org.apache.hadoop.security.authorize.AuthorizationException;
|
||||||
import org.apache.hadoop.util.ReflectionUtils;
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
import org.apache.hadoop.util.Sets;
|
import org.apache.hadoop.util.Sets;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
@ -1000,11 +1002,11 @@ public NodesInfo getNodes(String states) {
|
|||||||
nodes.addAll(nodesInfo.getNodes());
|
nodes.addAll(nodesInfo.getNodes());
|
||||||
});
|
});
|
||||||
} catch (NotFoundException e) {
|
} catch (NotFoundException e) {
|
||||||
LOG.error("Get all active sub cluster(s) error.", e);
|
LOG.error("get all active sub cluster(s) error.", e);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
LOG.error("getNodes error.", e);
|
LOG.error("getNodes by states = {} error.", states, e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("getNodes error with io error.", e);
|
LOG.error("getNodes by states = {} error with io error.", states, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete duplicate from all the node reports got from all the available
|
// Delete duplicate from all the node reports got from all the available
|
||||||
@ -1170,32 +1172,45 @@ public AppActivitiesInfo getAppActivities(HttpServletRequest hsr,
|
|||||||
// Only verify the app_id,
|
// Only verify the app_id,
|
||||||
// because the specific subCluster needs to be found according to the app_id,
|
// because the specific subCluster needs to be found according to the app_id,
|
||||||
// and other verifications are directly handed over to the corresponding subCluster RM
|
// and other verifications are directly handed over to the corresponding subCluster RM
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
|
|
||||||
final HttpServletRequest hsrCopy = clone(hsr);
|
final HttpServletRequest hsrCopy = clone(hsr);
|
||||||
return interceptor.getAppActivities(hsrCopy, appId, time, requestPriorities,
|
AppActivitiesInfo appActivitiesInfo = interceptor.getAppActivities(hsrCopy, appId, time,
|
||||||
allocationRequestIds, groupBy, limit, actions, summarize);
|
requestPriorities, allocationRequestIds, groupBy, limit, actions, summarize);
|
||||||
} catch (IllegalArgumentException e) {
|
if (appActivitiesInfo != null) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e, "Unable to get subCluster by appId: %s.",
|
long stopTime = clock.getTime();
|
||||||
appId);
|
routerMetrics.succeededGetAppActivitiesRetrieved(stopTime - startTime);
|
||||||
} catch (YarnException e) {
|
return appActivitiesInfo;
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getAppActivities Failed.", e);
|
|
||||||
}
|
}
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
return null;
|
routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
|
"Unable to get subCluster by appId: %s.", appId);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
|
"getAppActivities by appId = %s error .", appId);
|
||||||
|
}
|
||||||
|
routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
||||||
|
throw new RuntimeException("getAppActivities Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ApplicationStatisticsInfo getAppStatistics(HttpServletRequest hsr,
|
public ApplicationStatisticsInfo getAppStatistics(HttpServletRequest hsr,
|
||||||
Set<String> stateQueries, Set<String> typeQueries) {
|
Set<String> stateQueries, Set<String> typeQueries) {
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
||||||
final HttpServletRequest hsrCopy = clone(hsr);
|
final HttpServletRequest hsrCopy = clone(hsr);
|
||||||
Class[] argsClasses = new Class[]{HttpServletRequest.class, Set.class, Set.class};
|
Class[] argsClasses = new Class[]{HttpServletRequest.class, Set.class, Set.class};
|
||||||
@ -1203,19 +1218,38 @@ public ApplicationStatisticsInfo getAppStatistics(HttpServletRequest hsr,
|
|||||||
ClientMethod remoteMethod = new ClientMethod("getAppStatistics", argsClasses, args);
|
ClientMethod remoteMethod = new ClientMethod("getAppStatistics", argsClasses, args);
|
||||||
Map<SubClusterInfo, ApplicationStatisticsInfo> appStatisticsMap = invokeConcurrent(
|
Map<SubClusterInfo, ApplicationStatisticsInfo> appStatisticsMap = invokeConcurrent(
|
||||||
subClustersActive.values(), remoteMethod, ApplicationStatisticsInfo.class);
|
subClustersActive.values(), remoteMethod, ApplicationStatisticsInfo.class);
|
||||||
return RouterWebServiceUtil.mergeApplicationStatisticsInfo(appStatisticsMap.values());
|
ApplicationStatisticsInfo applicationStatisticsInfo =
|
||||||
} catch (IOException e) {
|
RouterWebServiceUtil.mergeApplicationStatisticsInfo(appStatisticsMap.values());
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e, "Get all active sub cluster(s) error.");
|
if (applicationStatisticsInfo != null) {
|
||||||
} catch (YarnException e) {
|
long stopTime = clock.getTime();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e, "getAppStatistics error.");
|
routerMetrics.succeededGetAppStatisticsRetrieved(stopTime - startTime);
|
||||||
|
return applicationStatisticsInfo;
|
||||||
}
|
}
|
||||||
return null;
|
} catch (NotFoundException e) {
|
||||||
|
routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("get all active sub cluster(s) error.", e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
|
"getAppStatistics error by stateQueries = %s, typeQueries = %s with io error.",
|
||||||
|
StringUtils.join(stateQueries, ","), StringUtils.join(typeQueries, ","));
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
|
"getAppStatistics by stateQueries = %s, typeQueries = %s with yarn error.",
|
||||||
|
StringUtils.join(stateQueries, ","), StringUtils.join(typeQueries, ","));
|
||||||
|
}
|
||||||
|
routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
||||||
|
throw RouterServerUtil.logAndReturnRunTimeException(
|
||||||
|
"getAppStatistics by stateQueries = %s, typeQueries = %s Failed.",
|
||||||
|
StringUtils.join(stateQueries, ","), StringUtils.join(typeQueries, ","));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr)
|
public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
||||||
final HttpServletRequest hsrCopy = clone(hsr);
|
final HttpServletRequest hsrCopy = clone(hsr);
|
||||||
Class[] argsClasses = new Class[]{HttpServletRequest.class};
|
Class[] argsClasses = new Class[]{HttpServletRequest.class};
|
||||||
@ -1223,27 +1257,35 @@ public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr)
|
|||||||
ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", argsClasses, args);
|
ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", argsClasses, args);
|
||||||
Map<SubClusterInfo, NodeToLabelsInfo> nodeToLabelsInfoMap =
|
Map<SubClusterInfo, NodeToLabelsInfo> nodeToLabelsInfoMap =
|
||||||
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeToLabelsInfo.class);
|
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeToLabelsInfo.class);
|
||||||
return RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap);
|
NodeToLabelsInfo nodeToLabelsInfo =
|
||||||
} catch (NotFoundException e) {
|
RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap);
|
||||||
LOG.error("Get all active sub cluster(s) error.", e);
|
if (nodeToLabelsInfo != null) {
|
||||||
throw new IOException("Get all active sub cluster(s) error.", e);
|
long stopTime = clock.getTime();
|
||||||
} catch (YarnException e) {
|
routerMetrics.succeededGetNodeToLabelsRetrieved(stopTime - startTime);
|
||||||
LOG.error("getNodeToLabels error.", e);
|
return nodeToLabelsInfo;
|
||||||
throw new IOException("getNodeToLabels error.", e);
|
|
||||||
}
|
}
|
||||||
|
} catch (NotFoundException e) {
|
||||||
|
routerMetrics.incrNodeToLabelsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrNodeToLabelsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException("getNodeToLabels error.", e);
|
||||||
|
}
|
||||||
|
routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
||||||
|
throw new RuntimeException("getNodeToLabels Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LabelsToNodesInfo getLabelsToNodes(Set<String> labels)
|
public LabelsToNodesInfo getLabelsToNodes(Set<String> labels)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
||||||
Class[] argsClasses = new Class[]{Set.class};
|
Class[] argsClasses = new Class[]{Set.class};
|
||||||
Object[] args = new Object[]{labels};
|
Object[] args = new Object[]{labels};
|
||||||
ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", argsClasses, args);
|
ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", argsClasses, args);
|
||||||
Map<SubClusterInfo, LabelsToNodesInfo> labelsToNodesInfoMap =
|
Map<SubClusterInfo, LabelsToNodesInfo> labelsToNodesInfoMap =
|
||||||
invokeConcurrent(subClustersActive.values(), remoteMethod, LabelsToNodesInfo.class);
|
invokeConcurrent(subClustersActive.values(), remoteMethod, LabelsToNodesInfo.class);
|
||||||
|
|
||||||
Map<NodeLabelInfo, NodeIDsInfo> labelToNodesMap = new HashMap<>();
|
Map<NodeLabelInfo, NodeIDsInfo> labelToNodesMap = new HashMap<>();
|
||||||
labelsToNodesInfoMap.values().forEach(labelsToNode -> {
|
labelsToNodesInfoMap.values().forEach(labelsToNode -> {
|
||||||
Map<NodeLabelInfo, NodeIDsInfo> values = labelsToNode.getLabelsToNodes();
|
Map<NodeLabelInfo, NodeIDsInfo> values = labelsToNode.getLabelsToNodes();
|
||||||
@ -1255,13 +1297,23 @@ public LabelsToNodesInfo getLabelsToNodes(Set<String> labels)
|
|||||||
labelToNodesMap.put(key, newValue);
|
labelToNodesMap.put(key, newValue);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return new LabelsToNodesInfo(labelToNodesMap);
|
LabelsToNodesInfo labelsToNodesInfo = new LabelsToNodesInfo(labelToNodesMap);
|
||||||
} catch (NotFoundException e) {
|
if (labelsToNodesInfo != null) {
|
||||||
RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
|
long stopTime = clock.getTime();
|
||||||
} catch (YarnException e) {
|
routerMetrics.succeededGetLabelsToNodesRetrieved(stopTime - startTime);
|
||||||
RouterServerUtil.logAndThrowIOException("getLabelsToNodes error.", e);
|
return labelsToNodesInfo;
|
||||||
}
|
}
|
||||||
return null;
|
} catch (NotFoundException e) {
|
||||||
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException(
|
||||||
|
e, "getLabelsToNodes by labels = %s with yarn error.", StringUtils.join(labels, ","));
|
||||||
|
}
|
||||||
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
|
throw RouterServerUtil.logAndReturnRunTimeException(
|
||||||
|
"getLabelsToNodes by labels = %s Failed.", StringUtils.join(labels, ","));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1280,6 +1332,7 @@ public Response replaceLabelsOnNode(Set<String> newNodeLabelsName,
|
|||||||
public NodeLabelsInfo getClusterNodeLabels(HttpServletRequest hsr)
|
public NodeLabelsInfo getClusterNodeLabels(HttpServletRequest hsr)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
||||||
final HttpServletRequest hsrCopy = clone(hsr);
|
final HttpServletRequest hsrCopy = clone(hsr);
|
||||||
Class[] argsClasses = new Class[]{HttpServletRequest.class};
|
Class[] argsClasses = new Class[]{HttpServletRequest.class};
|
||||||
@ -1289,13 +1342,21 @@ public NodeLabelsInfo getClusterNodeLabels(HttpServletRequest hsr)
|
|||||||
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
||||||
Set<NodeLabel> hashSets = Sets.newHashSet();
|
Set<NodeLabel> hashSets = Sets.newHashSet();
|
||||||
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
|
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
|
||||||
return new NodeLabelsInfo(hashSets);
|
NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets);
|
||||||
} catch (NotFoundException e) {
|
if (nodeLabelsInfo != null) {
|
||||||
RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
|
long stopTime = clock.getTime();
|
||||||
} catch (YarnException e) {
|
routerMetrics.succeededGetClusterNodeLabelsRetrieved(stopTime - startTime);
|
||||||
RouterServerUtil.logAndThrowIOException("getClusterNodeLabels error.", e);
|
return nodeLabelsInfo;
|
||||||
}
|
}
|
||||||
return null;
|
} catch (NotFoundException e) {
|
||||||
|
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException("getClusterNodeLabels with yarn error.", e);
|
||||||
|
}
|
||||||
|
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
||||||
|
throw new RuntimeException("getClusterNodeLabels Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1314,6 +1375,7 @@ public Response removeFromClusterNodeLabels(Set<String> oldNodeLabels,
|
|||||||
public NodeLabelsInfo getLabelsOnNode(HttpServletRequest hsr, String nodeId)
|
public NodeLabelsInfo getLabelsOnNode(HttpServletRequest hsr, String nodeId)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
||||||
final HttpServletRequest hsrCopy = clone(hsr);
|
final HttpServletRequest hsrCopy = clone(hsr);
|
||||||
Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class};
|
Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class};
|
||||||
@ -1323,36 +1385,58 @@ public NodeLabelsInfo getLabelsOnNode(HttpServletRequest hsr, String nodeId)
|
|||||||
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
||||||
Set<NodeLabel> hashSets = Sets.newHashSet();
|
Set<NodeLabel> hashSets = Sets.newHashSet();
|
||||||
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
|
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
|
||||||
return new NodeLabelsInfo(hashSets);
|
NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets);
|
||||||
} catch (NotFoundException e) {
|
if (nodeLabelsInfo != null) {
|
||||||
RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
|
long stopTime = clock.getTime();
|
||||||
} catch (YarnException e) {
|
routerMetrics.succeededGetLabelsToNodesRetrieved(stopTime - startTime);
|
||||||
RouterServerUtil.logAndThrowIOException("getClusterNodeLabels error.", e);
|
return nodeLabelsInfo;
|
||||||
}
|
}
|
||||||
return null;
|
} catch (NotFoundException e) {
|
||||||
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowIOException(
|
||||||
|
e, "getLabelsOnNode nodeId = %s with yarn error.", nodeId);
|
||||||
|
}
|
||||||
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
|
throw RouterServerUtil.logAndReturnRunTimeException(
|
||||||
|
"getLabelsOnNode by nodeId = %s Failed.", nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AppPriority getAppPriority(HttpServletRequest hsr, String appId)
|
public AppPriority getAppPriority(HttpServletRequest hsr, String appId)
|
||||||
throws AuthorizationException {
|
throws AuthorizationException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppPriorityFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getAppPriority(hsr, appId);
|
AppPriority appPriority = interceptor.getAppPriority(hsr, appId);
|
||||||
|
if (appPriority != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededGetAppPriorityRetrieved(stopTime - startTime);
|
||||||
|
return appPriority;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppPriorityFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the getAppPriority appId: %s.", appId);
|
"Unable to get the getAppPriority appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getAppPriority Failed.", e);
|
routerMetrics.incrGetAppPriorityFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("getAppPriority error.", e);
|
||||||
}
|
}
|
||||||
|
routerMetrics.incrGetAppPriorityFailedRetrieved();
|
||||||
return null;
|
throw new RuntimeException("getAppPriority Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1360,50 +1444,74 @@ public Response updateApplicationPriority(AppPriority targetPriority,
|
|||||||
HttpServletRequest hsr, String appId) throws AuthorizationException,
|
HttpServletRequest hsr, String appId) throws AuthorizationException,
|
||||||
YarnException, InterruptedException, IOException {
|
YarnException, InterruptedException, IOException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (targetPriority == null) {
|
if (targetPriority == null) {
|
||||||
|
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
||||||
throw new IllegalArgumentException("Parameter error, the targetPriority is empty or null.");
|
throw new IllegalArgumentException("Parameter error, the targetPriority is empty or null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.updateApplicationPriority(targetPriority, hsr, appId);
|
Response response = interceptor.updateApplicationPriority(targetPriority, hsr, appId);
|
||||||
|
if (response != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededUpdateAppPriorityRetrieved(stopTime - startTime);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the updateApplicationPriority appId: %s.", appId);
|
"Unable to get the updateApplicationPriority appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("updateApplicationPriority Failed.", e);
|
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("updateApplicationPriority error.", e);
|
||||||
}
|
}
|
||||||
|
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
||||||
return null;
|
throw new RuntimeException("updateApplicationPriority Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AppQueue getAppQueue(HttpServletRequest hsr, String appId)
|
public AppQueue getAppQueue(HttpServletRequest hsr, String appId)
|
||||||
throws AuthorizationException {
|
throws AuthorizationException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppQueueFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getAppQueue(hsr, appId);
|
AppQueue queue = interceptor.getAppQueue(hsr, appId);
|
||||||
} catch (IllegalArgumentException e) {
|
if (queue != null) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
long stopTime = clock.getTime();
|
||||||
"Unable to get queue by appId: %s.", appId);
|
routerMetrics.succeededGetAppQueueRetrieved((stopTime - startTime));
|
||||||
} catch (YarnException e) {
|
return queue;
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getAppQueue Failed.", e);
|
|
||||||
}
|
}
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
return null;
|
routerMetrics.incrGetAppQueueFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e, "Unable to get queue by appId: %s.", appId);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrGetAppQueueFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("getAppQueue error.", e);
|
||||||
|
}
|
||||||
|
routerMetrics.incrGetAppQueueFailedRetrieved();
|
||||||
|
throw new RuntimeException("getAppQueue Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1411,27 +1519,40 @@ public Response updateAppQueue(AppQueue targetQueue, HttpServletRequest hsr,
|
|||||||
String appId) throws AuthorizationException, YarnException,
|
String appId) throws AuthorizationException, YarnException,
|
||||||
InterruptedException, IOException {
|
InterruptedException, IOException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (targetQueue == null) {
|
if (targetQueue == null) {
|
||||||
|
routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
||||||
throw new IllegalArgumentException("Parameter error, the targetQueue is null.");
|
throw new IllegalArgumentException("Parameter error, the targetQueue is null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.updateAppQueue(targetQueue, hsr, appId);
|
Response response = interceptor.updateAppQueue(targetQueue, hsr, appId);
|
||||||
|
if (response != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededUpdateAppQueueRetrieved(stopTime - startTime);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to update app queue by appId: %s.", appId);
|
"Unable to update app queue by appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("updateAppQueue Failed.", e);
|
routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("updateAppQueue error.", e);
|
||||||
}
|
}
|
||||||
|
routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
||||||
return null;
|
throw new RuntimeException("updateAppQueue Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1497,7 +1618,16 @@ public Response listReservation(String queue, String reservationId,
|
|||||||
throw new IllegalArgumentException("Parameter error, the reservationId is empty or null.");
|
throw new IllegalArgumentException("Parameter error, the reservationId is empty or null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that the appId format is accurate
|
||||||
try {
|
try {
|
||||||
|
ReservationId.parseReservationId(reservationId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrListReservationFailedRetrieved();
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
long startTime1 = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByReservationId(reservationId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByReservationId(reservationId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
@ -1505,11 +1635,13 @@ public Response listReservation(String queue, String reservationId,
|
|||||||
Response response = interceptor.listReservation(queue, reservationId, startTime, endTime,
|
Response response = interceptor.listReservation(queue, reservationId, startTime, endTime,
|
||||||
includeResourceAllocations, hsrCopy);
|
includeResourceAllocations, hsrCopy);
|
||||||
if (response != null) {
|
if (response != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededListReservationRetrieved(stopTime - startTime1);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
routerMetrics.incrListReservationFailedRetrieved();
|
routerMetrics.incrListReservationFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException("listReservation Failed.", e);
|
RouterServerUtil.logAndThrowRunTimeException("listReservation error.", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
routerMetrics.incrListReservationFailedRetrieved();
|
routerMetrics.incrListReservationFailedRetrieved();
|
||||||
@ -1521,47 +1653,80 @@ public AppTimeoutInfo getAppTimeout(HttpServletRequest hsr, String appId,
|
|||||||
String type) throws AuthorizationException {
|
String type) throws AuthorizationException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
if (appId == null || appId.isEmpty()) {
|
||||||
|
routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check that the appId format is accurate
|
||||||
|
try {
|
||||||
|
ApplicationId.fromString(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
if (type == null || type.isEmpty()) {
|
if (type == null || type.isEmpty()) {
|
||||||
|
routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
throw new IllegalArgumentException("Parameter error, the type is empty or null.");
|
throw new IllegalArgumentException("Parameter error, the type is empty or null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getAppTimeout(hsr, appId, type);
|
AppTimeoutInfo appTimeoutInfo = interceptor.getAppTimeout(hsr, appId, type);
|
||||||
|
if (appTimeoutInfo != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededGetAppTimeoutRetrieved((stopTime - startTime));
|
||||||
|
return appTimeoutInfo;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the getAppTimeout appId: %s.", appId);
|
"Unable to get the getAppTimeout appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getAppTimeout Failed.", e);
|
routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("getAppTimeout error.", e);
|
||||||
}
|
}
|
||||||
return null;
|
routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
|
throw new RuntimeException("getAppTimeout Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AppTimeoutsInfo getAppTimeouts(HttpServletRequest hsr, String appId)
|
public AppTimeoutsInfo getAppTimeouts(HttpServletRequest hsr, String appId)
|
||||||
throws AuthorizationException {
|
throws AuthorizationException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getAppTimeouts(hsr, appId);
|
AppTimeoutsInfo appTimeoutsInfo = interceptor.getAppTimeouts(hsr, appId);
|
||||||
|
if (appTimeoutsInfo != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededGetAppTimeoutsRetrieved((stopTime - startTime));
|
||||||
|
return appTimeoutsInfo;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the getAppTimeouts appId: %s.", appId);
|
"Unable to get the getAppTimeouts appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getAppTimeouts Failed.", e);
|
routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("getAppTimeouts error.", e);
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
|
routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
||||||
|
throw new RuntimeException("getAppTimeouts Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1569,47 +1734,76 @@ public Response updateApplicationTimeout(AppTimeoutInfo appTimeout,
|
|||||||
HttpServletRequest hsr, String appId) throws AuthorizationException,
|
HttpServletRequest hsr, String appId) throws AuthorizationException,
|
||||||
YarnException, InterruptedException, IOException {
|
YarnException, InterruptedException, IOException {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (appTimeout == null) {
|
if (appTimeout == null) {
|
||||||
|
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
||||||
throw new IllegalArgumentException("Parameter error, the appTimeout is null.");
|
throw new IllegalArgumentException("Parameter error, the appTimeout is null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = Time.now();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.updateApplicationTimeout(appTimeout, hsr, appId);
|
Response response = interceptor.updateApplicationTimeout(appTimeout, hsr, appId);
|
||||||
|
if (response != null) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededUpdateAppTimeoutsRetrieved((stopTime - startTime));
|
||||||
|
return response;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the updateApplicationTimeout appId: %s.", appId);
|
"Unable to get the updateApplicationTimeout appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("updateApplicationTimeout Failed.", e);
|
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("updateApplicationTimeout error.", e);
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
|
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
||||||
|
throw new RuntimeException("updateApplicationTimeout Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AppAttemptsInfo getAppAttempts(HttpServletRequest hsr, String appId) {
|
public AppAttemptsInfo getAppAttempts(HttpServletRequest hsr, String appId) {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrAppAttemptsFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = Time.now();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getAppAttempts(hsr, appId);
|
AppAttemptsInfo appAttemptsInfo = interceptor.getAppAttempts(hsr, appId);
|
||||||
|
if (appAttemptsInfo != null) {
|
||||||
|
long stopTime = Time.now();
|
||||||
|
routerMetrics.succeededAppAttemptsRetrieved(stopTime - startTime);
|
||||||
|
return appAttemptsInfo;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrAppAttemptsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the AppAttempt appId: %s.", appId);
|
"Unable to get the AppAttempt appId: %s.", appId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getAppAttempts Failed.", e);
|
routerMetrics.incrAppAttemptsFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException("getAppAttempts error.", e);
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
|
routerMetrics.incrAppAttemptsFailedRetrieved();
|
||||||
|
throw new RuntimeException("getAppAttempts Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1622,59 +1816,87 @@ public RMQueueAclInfo checkUserAccessToQueue(String queue, String username,
|
|||||||
public AppAttemptInfo getAppAttempt(HttpServletRequest req,
|
public AppAttemptInfo getAppAttempt(HttpServletRequest req,
|
||||||
HttpServletResponse res, String appId, String appAttemptId) {
|
HttpServletResponse res, String appId, String appAttemptId) {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// Check that the appId/appAttemptId format is accurate
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
||||||
}
|
|
||||||
if (appAttemptId == null || appAttemptId.isEmpty()) {
|
|
||||||
throw new IllegalArgumentException("Parameter error, the appAttemptId is empty or null.");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
|
RouterServerUtil.validateApplicationAttemptId(appAttemptId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrAppAttemptReportFailedRetrieved();
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the getAppAttempt method
|
||||||
|
try {
|
||||||
|
long startTime = Time.now();
|
||||||
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getAppAttempt(req, res, appId, appAttemptId);
|
AppAttemptInfo appAttemptInfo = interceptor.getAppAttempt(req, res, appId, appAttemptId);
|
||||||
|
if (appAttemptInfo != null) {
|
||||||
|
long stopTime = Time.now();
|
||||||
|
routerMetrics.succeededAppAttemptReportRetrieved(stopTime - startTime);
|
||||||
|
return appAttemptInfo;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrAppAttemptReportFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(e,
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
"Unable to get the AppAttempt appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
"Unable to getAppAttempt by appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getContainer Failed.", e);
|
routerMetrics.incrAppAttemptReportFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
||||||
|
"getAppAttempt error, appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
routerMetrics.incrAppAttemptReportFailedRetrieved();
|
||||||
|
throw RouterServerUtil.logAndReturnRunTimeException(
|
||||||
|
"getAppAttempt failed, appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ContainersInfo getContainers(HttpServletRequest req,
|
public ContainersInfo getContainers(HttpServletRequest req,
|
||||||
HttpServletResponse res, String appId, String appAttemptId) {
|
HttpServletResponse res, String appId, String appAttemptId) {
|
||||||
|
|
||||||
ContainersInfo containersInfo = new ContainersInfo();
|
// Check that the appId/appAttemptId format is accurate
|
||||||
|
|
||||||
Map<SubClusterId, SubClusterInfo> subClustersActive;
|
|
||||||
try {
|
try {
|
||||||
subClustersActive = getActiveSubclusters();
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
} catch (NotFoundException e) {
|
RouterServerUtil.validateApplicationAttemptId(appAttemptId);
|
||||||
LOG.error("Get all active sub cluster(s) error.", e);
|
} catch (IllegalArgumentException e) {
|
||||||
return containersInfo;
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
|
ContainersInfo containersInfo = new ContainersInfo();
|
||||||
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
||||||
Class[] argsClasses = new Class[]{
|
Class[] argsClasses = new Class[]{
|
||||||
HttpServletRequest.class, HttpServletResponse.class, String.class, String.class};
|
HttpServletRequest.class, HttpServletResponse.class, String.class, String.class};
|
||||||
Object[] args = new Object[]{req, res, appId, appAttemptId};
|
Object[] args = new Object[]{req, res, appId, appAttemptId};
|
||||||
ClientMethod remoteMethod = new ClientMethod("getContainers", argsClasses, args);
|
ClientMethod remoteMethod = new ClientMethod("getContainers", argsClasses, args);
|
||||||
Map<SubClusterInfo, ContainersInfo> containersInfoMap =
|
Map<SubClusterInfo, ContainersInfo> containersInfoMap =
|
||||||
invokeConcurrent(subClustersActive.values(), remoteMethod, ContainersInfo.class);
|
invokeConcurrent(subClustersActive.values(), remoteMethod, ContainersInfo.class);
|
||||||
if (containersInfoMap != null) {
|
if (containersInfoMap != null && !containersInfoMap.isEmpty()) {
|
||||||
containersInfoMap.values().forEach(containers ->
|
containersInfoMap.values().forEach(containers ->
|
||||||
containersInfo.addAll(containers.getContainers()));
|
containersInfo.addAll(containers.getContainers()));
|
||||||
}
|
}
|
||||||
} catch (Exception ex) {
|
if (containersInfo != null) {
|
||||||
LOG.error("Failed to return GetContainers.", ex);
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededGetContainersRetrieved(stopTime - startTime);
|
||||||
|
return containersInfo;
|
||||||
|
}
|
||||||
|
} catch (NotFoundException e) {
|
||||||
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e, "getContainers error, appId = %s, " +
|
||||||
|
" appAttemptId = %s, Probably getActiveSubclusters error.", appId, appAttemptId);
|
||||||
|
} catch (IOException | YarnException e) {
|
||||||
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowRunTimeException(e, "getContainers error, appId = %s, " +
|
||||||
|
" appAttemptId = %s.", appId, appAttemptId);
|
||||||
}
|
}
|
||||||
|
|
||||||
return containersInfo;
|
routerMetrics.incrGetContainersFailedRetrieved();
|
||||||
|
throw RouterServerUtil.logAndReturnRunTimeException(
|
||||||
|
"getContainers failed, appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1682,32 +1904,45 @@ public ContainerInfo getContainer(HttpServletRequest req,
|
|||||||
HttpServletResponse res, String appId, String appAttemptId,
|
HttpServletResponse res, String appId, String appAttemptId,
|
||||||
String containerId) {
|
String containerId) {
|
||||||
|
|
||||||
if (appId == null || appId.isEmpty()) {
|
// FederationInterceptorREST#getContainer is logically
|
||||||
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
// the same as FederationClientInterceptor#getContainerReport,
|
||||||
}
|
// so use the same Metric.
|
||||||
if (appAttemptId == null || appAttemptId.isEmpty()) {
|
|
||||||
throw new IllegalArgumentException("Parameter error, the appAttemptId is empty or null.");
|
// Check that the appId/appAttemptId/containerId format is accurate
|
||||||
}
|
try {
|
||||||
if (containerId == null || containerId.isEmpty()) {
|
RouterServerUtil.validateApplicationId(appId);
|
||||||
throw new IllegalArgumentException("Parameter error, the containerId is empty or null.");
|
RouterServerUtil.validateApplicationAttemptId(appAttemptId);
|
||||||
|
RouterServerUtil.validateContainerId(containerId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = Time.now();
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
||||||
|
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.getContainer(req, res, appId, appAttemptId, containerId);
|
ContainerInfo containerInfo =
|
||||||
|
interceptor.getContainer(req, res, appId, appAttemptId, containerId);
|
||||||
|
if (containerInfo != null) {
|
||||||
|
long stopTime = Time.now();
|
||||||
|
routerMetrics.succeededGetContainerReportRetrieved(stopTime - startTime);
|
||||||
|
return containerInfo;
|
||||||
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
String msg = String.format(
|
String msg = String.format(
|
||||||
"Unable to get the AppAttempt appId: %s, appAttemptId: %s, containerId: %s.", appId,
|
"Unable to get the AppAttempt appId: %s, appAttemptId: %s, containerId: %s.", appId,
|
||||||
appAttemptId, containerId);
|
appAttemptId, containerId);
|
||||||
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException(msg, e);
|
RouterServerUtil.logAndThrowRunTimeException(msg, e);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException("getContainer Failed.", e);
|
RouterServerUtil.logAndThrowRunTimeException("getContainer Failed.", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
routerMetrics.incrGetContainerReportFailedRetrieved();
|
||||||
|
throw new RuntimeException("getContainer Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1735,31 +1970,45 @@ public void setNextInterceptor(RESTRequestInterceptor next) {
|
|||||||
public Response signalToContainer(String containerId, String command,
|
public Response signalToContainer(String containerId, String command,
|
||||||
HttpServletRequest req) {
|
HttpServletRequest req) {
|
||||||
|
|
||||||
if (containerId == null || containerId.isEmpty()) {
|
// Check if containerId is empty or null
|
||||||
throw new IllegalArgumentException("Parameter error, the containerId is empty or null.");
|
try {
|
||||||
|
RouterServerUtil.validateContainerId(containerId);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
routerMetrics.incrSignalToContainerFailedRetrieved();
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if command is empty or null
|
||||||
if (command == null || command.isEmpty()) {
|
if (command == null || command.isEmpty()) {
|
||||||
|
routerMetrics.incrSignalToContainerFailedRetrieved();
|
||||||
throw new IllegalArgumentException("Parameter error, the command is empty or null.");
|
throw new IllegalArgumentException("Parameter error, the command is empty or null.");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
long startTime = Time.now();
|
||||||
|
|
||||||
ContainerId containerIdObj = ContainerId.fromString(containerId);
|
ContainerId containerIdObj = ContainerId.fromString(containerId);
|
||||||
ApplicationId applicationId = containerIdObj.getApplicationAttemptId().getApplicationId();
|
ApplicationId applicationId = containerIdObj.getApplicationAttemptId().getApplicationId();
|
||||||
|
|
||||||
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(applicationId.toString());
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(applicationId.toString());
|
||||||
|
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
||||||
return interceptor.signalToContainer(containerId, command, req);
|
|
||||||
|
|
||||||
|
Response response = interceptor.signalToContainer(containerId, command, req);
|
||||||
|
if (response != null) {
|
||||||
|
long stopTime = Time.now();
|
||||||
|
routerMetrics.succeededSignalToContainerRetrieved(stopTime - startTime);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrSignalToContainerFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException("signalToContainer Failed.", e);
|
RouterServerUtil.logAndThrowRunTimeException("signalToContainer Failed.", e);
|
||||||
} catch (AuthorizationException e) {
|
} catch (AuthorizationException e) {
|
||||||
|
routerMetrics.incrSignalToContainerFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowRunTimeException("signalToContainer Author Failed.", e);
|
RouterServerUtil.logAndThrowRunTimeException("signalToContainer Author Failed.", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
routerMetrics.incrSignalToContainerFailedRetrieved();
|
||||||
|
throw new RuntimeException("signalToContainer Failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -1777,6 +2026,7 @@ private <R> Map<SubClusterInfo, R> invokeConcurrent(Collection<SubClusterInfo> c
|
|||||||
// Send the requests in parallel
|
// Send the requests in parallel
|
||||||
CompletionService<R> compSvc = new ExecutorCompletionService<>(this.threadpool);
|
CompletionService<R> compSvc = new ExecutorCompletionService<>(this.threadpool);
|
||||||
|
|
||||||
|
// Error Msg
|
||||||
for (final SubClusterInfo info : clusterIds) {
|
for (final SubClusterInfo info : clusterIds) {
|
||||||
compSvc.submit(() -> {
|
compSvc.submit(() -> {
|
||||||
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
||||||
@ -1831,6 +2081,8 @@ private SubClusterInfo getHomeSubClusterInfoByAppId(String appId)
|
|||||||
}
|
}
|
||||||
subClusterInfo = federationFacade.getSubCluster(subClusterId);
|
subClusterInfo = federationFacade.getSubCluster(subClusterId);
|
||||||
return subClusterInfo;
|
return subClusterInfo;
|
||||||
|
} catch (IllegalArgumentException e){
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
RouterServerUtil.logAndThrowException(e,
|
RouterServerUtil.logAndThrowException(e,
|
||||||
"Get HomeSubClusterInfo by applicationId %s failed.", appId);
|
"Get HomeSubClusterInfo by applicationId %s failed.", appId);
|
||||||
|
@ -386,12 +386,12 @@ public void getApplicationAttempts() {
|
|||||||
|
|
||||||
public void getContainerReport() {
|
public void getContainerReport() {
|
||||||
LOG.info("Mocked: failed getContainerReport call");
|
LOG.info("Mocked: failed getContainerReport call");
|
||||||
metrics.incrContainerReportFailedRetrieved();
|
metrics.incrGetContainerReportFailedRetrieved();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getContainer() {
|
public void getContainers() {
|
||||||
LOG.info("Mocked: failed getContainer call");
|
LOG.info("Mocked: failed getContainer call");
|
||||||
metrics.incrContainerFailedRetrieved();
|
metrics.incrGetContainersFailedRetrieved();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getResourceTypeInfo() {
|
public void getResourceTypeInfo() {
|
||||||
@ -478,6 +478,41 @@ public void getListReservationFailed() {
|
|||||||
LOG.info("Mocked: failed getListReservationFailed call");
|
LOG.info("Mocked: failed getListReservationFailed call");
|
||||||
metrics.incrListReservationFailedRetrieved();
|
metrics.incrListReservationFailedRetrieved();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getAppActivitiesFailed() {
|
||||||
|
LOG.info("Mocked: failed getAppActivitiesFailed call");
|
||||||
|
metrics.incrGetAppActivitiesFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppStatisticsFailed() {
|
||||||
|
LOG.info("Mocked: failed getAppStatisticsFailed call");
|
||||||
|
metrics.incrGetAppStatisticsFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppPriorityFailed() {
|
||||||
|
LOG.info("Mocked: failed getAppPriorityFailed call");
|
||||||
|
metrics.incrGetAppPriorityFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppQueueFailed() {
|
||||||
|
LOG.info("Mocked: failed getAppQueueFailed call");
|
||||||
|
metrics.incrGetAppQueueFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getUpdateQueueFailed() {
|
||||||
|
LOG.info("Mocked: failed getUpdateQueueFailed call");
|
||||||
|
metrics.incrUpdateAppQueueFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppTimeoutFailed() {
|
||||||
|
LOG.info("Mocked: failed getAppTimeoutFailed call");
|
||||||
|
metrics.incrGetAppTimeoutFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppTimeoutsFailed() {
|
||||||
|
LOG.info("Mocked: failed getAppTimeoutsFailed call");
|
||||||
|
metrics.incrGetAppTimeoutsFailedRetrieved();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Records successes for all calls
|
// Records successes for all calls
|
||||||
@ -564,7 +599,7 @@ public void getContainerReport(long duration) {
|
|||||||
metrics.succeededGetContainerReportRetrieved(duration);
|
metrics.succeededGetContainerReportRetrieved(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getContainer(long duration) {
|
public void getContainers(long duration) {
|
||||||
LOG.info("Mocked: successful getContainer call with duration {}", duration);
|
LOG.info("Mocked: successful getContainer call with duration {}", duration);
|
||||||
metrics.succeededGetContainersRetrieved(duration);
|
metrics.succeededGetContainersRetrieved(duration);
|
||||||
}
|
}
|
||||||
@ -653,6 +688,41 @@ public void getListReservationRetrieved(long duration) {
|
|||||||
LOG.info("Mocked: successful getListReservation call with duration {}", duration);
|
LOG.info("Mocked: successful getListReservation call with duration {}", duration);
|
||||||
metrics.succeededListReservationRetrieved(duration);
|
metrics.succeededListReservationRetrieved(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getAppActivitiesRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getAppActivities call with duration {}", duration);
|
||||||
|
metrics.succeededGetAppActivitiesRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppStatisticsRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getAppStatistics call with duration {}", duration);
|
||||||
|
metrics.succeededGetAppStatisticsRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppPriorityRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getAppPriority call with duration {}", duration);
|
||||||
|
metrics.succeededGetAppPriorityRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppQueueRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getAppQueue call with duration {}", duration);
|
||||||
|
metrics.succeededGetAppQueueRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getUpdateQueueRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getUpdateQueue call with duration {}", duration);
|
||||||
|
metrics.succeededUpdateAppQueueRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppTimeoutRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getAppTimeout call with duration {}", duration);
|
||||||
|
metrics.succeededGetAppTimeoutRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getAppTimeoutsRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful getAppTimeouts call with duration {}", duration);
|
||||||
|
metrics.succeededGetAppTimeoutsRetrieved(duration);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -827,12 +897,12 @@ public void testGetContainerReportFailed() {
|
|||||||
@Test
|
@Test
|
||||||
public void testSucceededGetContainers() {
|
public void testSucceededGetContainers() {
|
||||||
long totalGoodBefore = metrics.getNumSucceededGetContainersRetrieved();
|
long totalGoodBefore = metrics.getNumSucceededGetContainersRetrieved();
|
||||||
goodSubCluster.getContainer(150);
|
goodSubCluster.getContainers(150);
|
||||||
Assert.assertEquals(totalGoodBefore + 1,
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
metrics.getNumSucceededGetContainersRetrieved());
|
metrics.getNumSucceededGetContainersRetrieved());
|
||||||
Assert.assertEquals(150,
|
Assert.assertEquals(150,
|
||||||
metrics.getLatencySucceededGetContainersRetrieved(), ASSERT_DOUBLE_DELTA);
|
metrics.getLatencySucceededGetContainersRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
goodSubCluster.getContainer(300);
|
goodSubCluster.getContainers(300);
|
||||||
Assert.assertEquals(totalGoodBefore + 2,
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
metrics.getNumSucceededGetContainersRetrieved());
|
metrics.getNumSucceededGetContainersRetrieved());
|
||||||
Assert.assertEquals(225, metrics.getLatencySucceededGetContainersRetrieved(),
|
Assert.assertEquals(225, metrics.getLatencySucceededGetContainersRetrieved(),
|
||||||
@ -840,9 +910,9 @@ public void testSucceededGetContainers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetContainerFailed() {
|
public void testGetContainersFailed() {
|
||||||
long totalBadBefore = metrics.getContainersFailedRetrieved();
|
long totalBadBefore = metrics.getContainersFailedRetrieved();
|
||||||
badSubCluster.getContainer();
|
badSubCluster.getContainers();
|
||||||
Assert.assertEquals(totalBadBefore + 1, metrics.getContainersFailedRetrieved());
|
Assert.assertEquals(totalBadBefore + 1, metrics.getContainersFailedRetrieved());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1234,4 +1304,165 @@ public void testGetListReservationRetrievedFailed() {
|
|||||||
Assert.assertEquals(totalBadBefore + 1,
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
metrics.getListReservationFailedRetrieved());
|
metrics.getListReservationFailedRetrieved());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppActivitiesRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetAppActivitiesRetrieved();
|
||||||
|
goodSubCluster.getAppActivitiesRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededGetAppActivitiesRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededGetAppActivitiesRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getAppActivitiesRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededGetAppActivitiesRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededGetAppActivitiesRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppActivitiesRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getAppActivitiesFailedRetrieved();
|
||||||
|
badSubCluster.getAppActivitiesFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getAppActivitiesFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppStatisticsLatencyRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetAppStatisticsRetrieved();
|
||||||
|
goodSubCluster.getAppStatisticsRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededGetAppStatisticsRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededGetAppStatisticsRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getAppStatisticsRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededGetAppStatisticsRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededGetAppStatisticsRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppStatisticsRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getAppStatisticsFailedRetrieved();
|
||||||
|
badSubCluster.getAppStatisticsFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getAppStatisticsFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppPriorityLatencyRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetAppPriorityRetrieved();
|
||||||
|
goodSubCluster.getAppPriorityRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededGetAppPriorityRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededGetAppPriorityRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getAppPriorityRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededGetAppPriorityRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededGetAppPriorityRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppPriorityRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getAppPriorityFailedRetrieved();
|
||||||
|
badSubCluster.getAppPriorityFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getAppPriorityFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppQueueLatencyRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetAppQueueRetrieved();
|
||||||
|
goodSubCluster.getAppQueueRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededGetAppQueueRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededGetAppQueueRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getAppQueueRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededGetAppQueueRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededGetAppQueueRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppQueueRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getAppQueueFailedRetrieved();
|
||||||
|
badSubCluster.getAppQueueFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getAppQueueFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateAppQueueLatencyRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededUpdateAppQueueRetrieved();
|
||||||
|
goodSubCluster.getUpdateQueueRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededUpdateAppQueueRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededUpdateAppQueueRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getUpdateQueueRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededUpdateAppQueueRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededUpdateAppQueueRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateAppQueueRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getUpdateAppQueueFailedRetrieved();
|
||||||
|
badSubCluster.getUpdateQueueFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getUpdateAppQueueFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppTimeoutLatencyRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetAppTimeoutRetrieved();
|
||||||
|
goodSubCluster.getAppTimeoutRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededGetAppTimeoutRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededGetAppTimeoutRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getAppTimeoutRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededGetAppTimeoutRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededGetAppTimeoutRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppTimeoutRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getAppTimeoutFailedRetrieved();
|
||||||
|
badSubCluster.getAppTimeoutFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getAppTimeoutFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppTimeoutsLatencyRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetAppTimeoutsRetrieved();
|
||||||
|
goodSubCluster.getAppTimeoutsRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededGetAppTimeoutsRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededGetAppTimeoutsRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getAppTimeoutsRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededGetAppTimeoutsRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededGetAppTimeoutsRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppTimeoutsRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getAppTimeoutsFailedRetrieved();
|
||||||
|
badSubCluster.getAppTimeoutsFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1,
|
||||||
|
metrics.getAppTimeoutsFailedRetrieved());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -117,6 +117,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
|
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
|
||||||
import org.apache.hadoop.yarn.util.SystemClock;
|
import org.apache.hadoop.yarn.util.SystemClock;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
import org.apache.hadoop.yarn.webapp.BadRequestException;
|
||||||
import org.apache.hadoop.yarn.webapp.NotFoundException;
|
import org.apache.hadoop.yarn.webapp.NotFoundException;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -343,6 +344,23 @@ public ContainersInfo getContainers(HttpServletRequest req, HttpServletResponse
|
|||||||
throw new RuntimeException("RM is stopped");
|
throw new RuntimeException("RM is stopped");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try format conversion for app_id
|
||||||
|
ApplicationId applicationId = null;
|
||||||
|
try {
|
||||||
|
applicationId = ApplicationId.fromString(appId);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new BadRequestException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try format conversion for app_attempt_id
|
||||||
|
ApplicationAttemptId applicationAttemptId = null;
|
||||||
|
try {
|
||||||
|
applicationAttemptId =
|
||||||
|
ApplicationAttemptId.fromString(appAttemptId);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new BadRequestException(e);
|
||||||
|
}
|
||||||
|
|
||||||
// We avoid to check if the Application exists in the system because we need
|
// We avoid to check if the Application exists in the system because we need
|
||||||
// to validate that each subCluster returns 1 container.
|
// to validate that each subCluster returns 1 container.
|
||||||
ContainersInfo containers = new ContainersInfo();
|
ContainersInfo containers = new ContainersInfo();
|
||||||
@ -453,8 +471,7 @@ public ContainerInfo getContainer(HttpServletRequest req, HttpServletResponse re
|
|||||||
throw new RuntimeException("RM is stopped");
|
throw new RuntimeException("RM is stopped");
|
||||||
}
|
}
|
||||||
|
|
||||||
ContainerId newContainerId = ContainerId.newContainerId(
|
ContainerId newContainerId = ContainerId.fromString(containerId);
|
||||||
ApplicationAttemptId.fromString(appAttemptId), Integer.valueOf(containerId));
|
|
||||||
|
|
||||||
Resource allocatedResource = Resource.newInstance(1024, 2);
|
Resource allocatedResource = Resource.newInstance(1024, 2);
|
||||||
|
|
||||||
@ -505,15 +522,15 @@ public AppAttemptInfo getAppAttempt(HttpServletRequest req, HttpServletResponse
|
|||||||
throw new NotFoundException("app with id: " + appId + " not found");
|
throw new NotFoundException("app with id: " + appId + " not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ApplicationAttemptId attemptId = ApplicationAttemptId.fromString(appAttemptId);
|
||||||
|
|
||||||
ApplicationReport newApplicationReport = ApplicationReport.newInstance(
|
ApplicationReport newApplicationReport = ApplicationReport.newInstance(
|
||||||
applicationId, ApplicationAttemptId.newInstance(applicationId, Integer.parseInt(appAttemptId)),
|
applicationId, attemptId, "user", "queue", "appname", "host", 124, null,
|
||||||
"user", "queue", "appname", "host", 124, null,
|
|
||||||
YarnApplicationState.RUNNING, "diagnostics", "url", 1, 2, 3, 4,
|
YarnApplicationState.RUNNING, "diagnostics", "url", 1, 2, 3, 4,
|
||||||
FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.53789f, "YARN", null);
|
FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.53789f, "YARN", null);
|
||||||
|
|
||||||
ApplicationAttemptReport attempt = ApplicationAttemptReport.newInstance(
|
ApplicationAttemptReport attempt = ApplicationAttemptReport.newInstance(
|
||||||
ApplicationAttemptId.newInstance(applicationId, Integer.parseInt(appAttemptId)),
|
attemptId, "host", 124, "url", "oUrl", "diagnostics",
|
||||||
"host", 124, "url", "oUrl", "diagnostics",
|
|
||||||
YarnApplicationAttemptState.FINISHED, ContainerId.newContainerId(
|
YarnApplicationAttemptState.FINISHED, ContainerId.newContainerId(
|
||||||
newApplicationReport.getCurrentApplicationAttemptId(), 1));
|
newApplicationReport.getCurrentApplicationAttemptId(), 1));
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
import javax.ws.rs.core.Response;
|
import javax.ws.rs.core.Response;
|
||||||
import javax.ws.rs.core.Response.Status;
|
import javax.ws.rs.core.Response.Status;
|
||||||
|
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.http.HttpConfig;
|
import org.apache.hadoop.http.HttpConfig;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
@ -41,6 +42,7 @@
|
|||||||
import org.apache.hadoop.yarn.api.records.NodeLabel;
|
import org.apache.hadoop.yarn.api.records.NodeLabel;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType;
|
import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager;
|
import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager;
|
||||||
@ -634,23 +636,28 @@ public void testGetContainers()
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetContainersNotExists() {
|
public void testGetContainersNotExists() throws Exception {
|
||||||
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
|
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
|
||||||
ContainersInfo response = interceptor.getContainers(null, null, appId.toString(), null);
|
LambdaTestUtils.intercept(IllegalArgumentException.class,
|
||||||
Assert.assertTrue(response.getContainers().isEmpty());
|
"Parameter error, the appAttemptId is empty or null.",
|
||||||
|
() -> interceptor.getContainers(null, null, appId.toString(), null));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetContainersWrongFormat() {
|
public void testGetContainersWrongFormat() throws Exception {
|
||||||
ContainersInfo response = interceptor.getContainers(null, null, "Application_wrong_id", null);
|
|
||||||
|
|
||||||
Assert.assertNotNull(response);
|
|
||||||
Assert.assertTrue(response.getContainers().isEmpty());
|
|
||||||
|
|
||||||
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
|
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
|
||||||
response = interceptor.getContainers(null, null, appId.toString(), "AppAttempt_wrong_id");
|
ApplicationAttemptId appAttempt = ApplicationAttemptId.newInstance(appId, 1);
|
||||||
|
|
||||||
Assert.assertTrue(response.getContainers().isEmpty());
|
// Test Case 1: appId is wrong format, appAttemptId is accurate.
|
||||||
|
LambdaTestUtils.intercept(IllegalArgumentException.class,
|
||||||
|
"Invalid ApplicationId prefix: Application_wrong_id. " +
|
||||||
|
"The valid ApplicationId should start with prefix application",
|
||||||
|
() -> interceptor.getContainers(null, null, "Application_wrong_id", appAttempt.toString()));
|
||||||
|
|
||||||
|
// Test Case2: appId is accurate, appAttemptId is wrong format.
|
||||||
|
LambdaTestUtils.intercept(IllegalArgumentException.class,
|
||||||
|
"Invalid AppAttemptId prefix: AppAttempt_wrong_id",
|
||||||
|
() -> interceptor.getContainers(null, null, appId.toString(), "AppAttempt_wrong_id"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -739,20 +746,28 @@ public void testGetLabelsOnNode() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetContainer()
|
public void testGetContainer() throws Exception {
|
||||||
throws IOException, InterruptedException, YarnException {
|
//
|
||||||
// Submit application to multiSubCluster
|
|
||||||
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
|
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
|
||||||
ApplicationSubmissionContextInfo context = new ApplicationSubmissionContextInfo();
|
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
|
||||||
context.setApplicationId(appId.toString());
|
ContainerId appContainerId = ContainerId.newContainerId(appAttemptId, 1);
|
||||||
|
String applicationId = appId.toString();
|
||||||
|
String attemptId = appAttemptId.toString();
|
||||||
|
String containerId = appContainerId.toString();
|
||||||
|
|
||||||
|
// Submit application to multiSubCluster
|
||||||
|
ApplicationSubmissionContextInfo context = new ApplicationSubmissionContextInfo();
|
||||||
|
context.setApplicationId(applicationId);
|
||||||
Assert.assertNotNull(interceptor.submitApplication(context, null));
|
Assert.assertNotNull(interceptor.submitApplication(context, null));
|
||||||
|
|
||||||
ApplicationAttemptId appAttemptId =
|
// Test Case1: Wrong ContainerId
|
||||||
ApplicationAttemptId.newInstance(appId, 1);
|
LambdaTestUtils.intercept(IllegalArgumentException.class, "Invalid ContainerId prefix: 0",
|
||||||
|
() -> interceptor.getContainer(null, null, applicationId, attemptId, "0"));
|
||||||
|
|
||||||
ContainerInfo containerInfo = interceptor.getContainer(null, null,
|
// Test Case2: Correct ContainerId
|
||||||
appId.toString(), appAttemptId.toString(), "0");
|
|
||||||
|
ContainerInfo containerInfo = interceptor.getContainer(null, null, applicationId,
|
||||||
|
attemptId, containerId);
|
||||||
Assert.assertNotNull(containerInfo);
|
Assert.assertNotNull(containerInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -800,9 +815,10 @@ public void testGetAppAttempt()
|
|||||||
// Generate ApplicationAttemptId information
|
// Generate ApplicationAttemptId information
|
||||||
Assert.assertNotNull(interceptor.submitApplication(context, null));
|
Assert.assertNotNull(interceptor.submitApplication(context, null));
|
||||||
ApplicationAttemptId expectAppAttemptId = ApplicationAttemptId.newInstance(appId, 1);
|
ApplicationAttemptId expectAppAttemptId = ApplicationAttemptId.newInstance(appId, 1);
|
||||||
|
String appAttemptId = expectAppAttemptId.toString();
|
||||||
|
|
||||||
org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo
|
org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo
|
||||||
appAttemptInfo = interceptor.getAppAttempt(null, null, appId.toString(), "1");
|
appAttemptInfo = interceptor.getAppAttempt(null, null, appId.toString(), appAttemptId);
|
||||||
|
|
||||||
Assert.assertNotNull(appAttemptInfo);
|
Assert.assertNotNull(appAttemptInfo);
|
||||||
Assert.assertEquals(expectAppAttemptId.toString(), appAttemptInfo.getAppAttemptId());
|
Assert.assertEquals(expectAppAttemptId.toString(), appAttemptInfo.getAppAttemptId());
|
||||||
|
Loading…
x
Reference in New Issue
Block a user