+ */
+ public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100;
private AbfsHttpConstants() {}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java
index 8bc31c4f92b..8a5e9db8553 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java
@@ -66,6 +66,10 @@ public enum AzureServiceErrorCode {
return this.errorCode;
}
+ public String getErrorMessage() {
+ return this.errorMessage;
+ }
+
public static List getAzureServiceCode(int httpStatusCode) {
List errorCodes = new ArrayList<>();
if (httpStatusCode == UNKNOWN.httpStatusCode) {
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
index 00da9b66013..ad99020390a 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
@@ -28,6 +28,7 @@ import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.azurebfs.AbfsStatistic;
import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
@@ -73,6 +74,12 @@ public class AbfsRestOperation {
private AbfsHttpOperation result;
private AbfsCounters abfsCounters;
+ /**
+ * This variable contains the reason of last API call within the same
+ * AbfsRestOperation object.
+ */
+ private String failureReason;
+
/**
* Checks if there is non-null HTTP response.
* @return true if there is a non-null HTTP response from the ABFS call.
@@ -208,7 +215,7 @@ public class AbfsRestOperation {
private void completeExecute(TracingContext tracingContext)
throws AzureBlobFileSystemException {
// see if we have latency reports from the previous requests
- String latencyHeader = this.client.getAbfsPerfTracker().getClientLatency();
+ String latencyHeader = getClientLatency();
if (latencyHeader != null && !latencyHeader.isEmpty()) {
AbfsHttpHeader httpHeader =
new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_ABFS_CLIENT_LATENCY, latencyHeader);
@@ -237,6 +244,11 @@ public class AbfsRestOperation {
LOG.trace("{} REST operation complete", operationType);
}
+ @VisibleForTesting
+ String getClientLatency() {
+ return client.getAbfsPerfTracker().getClientLatency();
+ }
+
/**
* Executes a single HTTP operation to complete the REST operation. If it
* fails, there may be a retry. The retryCount is incremented with each
@@ -248,9 +260,9 @@ public class AbfsRestOperation {
try {
// initialize the HTTP request and open the connection
- httpOperation = new AbfsHttpOperation(url, method, requestHeaders);
+ httpOperation = createHttpOperation();
incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1);
- tracingContext.constructHeader(httpOperation);
+ tracingContext.constructHeader(httpOperation, failureReason);
switch(client.getAuthType()) {
case Custom:
@@ -303,6 +315,7 @@ public class AbfsRestOperation {
} catch (UnknownHostException ex) {
String hostname = null;
hostname = httpOperation.getHost();
+ failureReason = RetryReason.getAbbreviation(ex, null, null);
LOG.warn("Unknown host name: {}. Retrying to resolve the host name...",
hostname);
if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) {
@@ -314,6 +327,8 @@ public class AbfsRestOperation {
LOG.debug("HttpRequestFailure: {}, {}", httpOperation, ex);
}
+ failureReason = RetryReason.getAbbreviation(ex, -1, "");
+
if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) {
throw new InvalidAbfsRestOperationException(ex);
}
@@ -326,6 +341,8 @@ public class AbfsRestOperation {
LOG.debug("HttpRequest: {}: {}", operationType, httpOperation);
if (client.getRetryPolicy().shouldRetry(retryCount, httpOperation.getStatusCode())) {
+ int status = httpOperation.getStatusCode();
+ failureReason = RetryReason.getAbbreviation(null, status, httpOperation.getStorageErrorMessage());
return false;
}
@@ -334,6 +351,15 @@ public class AbfsRestOperation {
return true;
}
+ /**
+ * Creates new object of {@link AbfsHttpOperation} with the url, method, and
+ * requestHeaders fields of the AbfsRestOperation object.
+ */
+ @VisibleForTesting
+ AbfsHttpOperation createHttpOperation() throws IOException {
+ return new AbfsHttpOperation(url, method, requestHeaders);
+ }
+
/**
* Incrementing Abfs counters with a long value.
*
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java
new file mode 100644
index 00000000000..40e8cdc1e07
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ClientErrorRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionResetRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionTimeoutRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ReadTimeoutRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.RetryReasonCategory;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ServerErrorRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownHostRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownIOExceptionRetryReason;
+import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownSocketExceptionRetryReason;
+
+
+/**
+ * This utility class exposes methods to convert a server response-error to a
+ * category of error.
+ */
+final class RetryReason {
+
+ /**
+ * Linked-list of the implementations of RetryReasonCategory. The objects in the
+ * list are arranged by the rank of their significance.
+ *
+ *
ServerError (statusCode==5XX), ClientError (statusCode==4XX) are
+ * independent of other retryReason categories.
+ *
Since {@link java.net.SocketException} is subclass of
+ * {@link java.io.IOException},
+ * hence, {@link UnknownIOExceptionRetryReason} is placed before
+ * {@link UnknownSocketExceptionRetryReason}
+ *
Since, connectionTimeout, readTimeout, and connectionReset are
+ * {@link java.net.SocketTimeoutException} exceptions with different messages,
+ * hence, {@link ConnectionTimeoutRetryReason}, {@link ReadTimeoutRetryReason},
+ * {@link ConnectionResetRetryReason} are above {@link UnknownIOExceptionRetryReason}.
+ * There is no order between the three reasons as they are differentiated
+ * by exception-message.
+ *
Since, {@link java.net.UnknownHostException} is subclass of
+ * {@link java.io.IOException}, {@link UnknownHostRetryReason} is placed
+ * over {@link UnknownIOExceptionRetryReason}
+ *
+ */
+ private static List rankedReasonCategories
+ = new LinkedList() {{
+ add(new ServerErrorRetryReason());
+ add(new ClientErrorRetryReason());
+ add(new UnknownIOExceptionRetryReason());
+ add(new UnknownSocketExceptionRetryReason());
+ add(new ConnectionTimeoutRetryReason());
+ add(new ReadTimeoutRetryReason());
+ add(new UnknownHostRetryReason());
+ add(new ConnectionResetRetryReason());
+ }};
+
+ private RetryReason() {
+
+ }
+
+ /**
+ * Method to get correct abbreviation for a given set of exception, statusCode,
+ * storageStatusCode.
+ *
+ * @param ex exception caught during server communication.
+ * @param statusCode statusCode in the server response.
+ * @param storageErrorMessage storageErrorMessage in the server response.
+ *
+ * @return abbreviation for the the given set of exception, statusCode, storageStatusCode.
+ */
+ static String getAbbreviation(Exception ex,
+ Integer statusCode,
+ String storageErrorMessage) {
+ String result = null;
+ for (RetryReasonCategory retryReasonCategory : rankedReasonCategories) {
+ final String abbreviation
+ = retryReasonCategory.captureAndGetAbbreviation(ex,
+ statusCode, storageErrorMessage);
+ if (abbreviation != null) {
+ result = abbreviation;
+ }
+ }
+ return result;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java
new file mode 100644
index 00000000000..8a0af183e30
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+public final class RetryReasonConstants {
+
+ private RetryReasonConstants() {
+
+ }
+ public static final String CONNECTION_TIMEOUT_JDK_MESSAGE = "connect timed out";
+ public static final String READ_TIMEOUT_JDK_MESSAGE = "Read timed out";
+ public static final String CONNECTION_RESET_MESSAGE = "Connection reset";
+ public static final String OPERATION_BREACH_MESSAGE = "Operations per second is over the account limit.";
+ public static final String CONNECTION_RESET_ABBREVIATION = "CR";
+ public static final String CONNECTION_TIMEOUT_ABBREVIATION = "CT";
+ public static final String READ_TIMEOUT_ABBREVIATION = "RT";
+ public static final String INGRESS_LIMIT_BREACH_ABBREVIATION = "ING";
+ public static final String EGRESS_LIMIT_BREACH_ABBREVIATION = "EGR";
+ public static final String OPERATION_LIMIT_BREACH_ABBREVIATION = "OPR";
+ public static final String UNKNOWN_HOST_EXCEPTION_ABBREVIATION = "UH";
+ public static final String IO_EXCEPTION_ABBREVIATION = "IOE";
+ public static final String SOCKET_EXCEPTION_ABBREVIATION = "SE";
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java
new file mode 100644
index 00000000000..cf1c47e3eb0
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT;
+
+/**
+ * Category that can capture server-response errors for 4XX status-code.
+ */
+public class ClientErrorRetryReason extends RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 4) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return statusCode + "";
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java
new file mode 100644
index 00000000000..702f8875646
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE;
+
+/**
+ * Category that can capture server-response errors for connection-reset exception.
+ */
+public class ConnectionResetRetryReason extends
+ RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ return checkExceptionMessage(ex, CONNECTION_RESET_MESSAGE);
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return CONNECTION_RESET_ABBREVIATION;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java
new file mode 100644
index 00000000000..28f35dcc805
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE;
+
+/**
+ * Category that can capture server-response errors for connection-timeout.
+ */
+public class ConnectionTimeoutRetryReason extends
+ RetryReasonCategory {
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return CONNECTION_TIMEOUT_ABBREVIATION;
+ }
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ return checkExceptionMessage(ex, CONNECTION_TIMEOUT_JDK_MESSAGE);
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java
new file mode 100644
index 00000000000..4663d9a52bb
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE;
+
+/**
+ * Category that can capture server-response errors for read-timeout.
+ */
+public class ReadTimeoutRetryReason extends RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ return checkExceptionMessage(ex, READ_TIMEOUT_JDK_MESSAGE);
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return READ_TIMEOUT_ABBREVIATION;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java
new file mode 100644
index 00000000000..893451b496f
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import java.util.Locale;
+
+/**
+ * Provides methods to define if given exception can be categorised to certain category.
+ * Each category has a different implementation of the abstract class.
+ */
+public abstract class RetryReasonCategory {
+
+ /**
+ * Returns if given server response error can be categorised by the implementation.
+ *
+ * @param ex exception captured in the server response.
+ * @param statusCode statusCode on the server response
+ * @param serverErrorMessage serverErrorMessage on the server response.
+ *
+ * @return
true if server response error can be categorised by the implementation
+ *
false if response error can not be categorised by the implementation
+ */
+ abstract Boolean canCapture(Exception ex,
+ Integer statusCode,
+ String serverErrorMessage);
+
+ /**
+ * Returns the abbreviation corresponding to the server response error.
+ *
+ * @param statusCode statusCode on the server response
+ * @param serverErrorMessage serverErrorMessage on the server response.
+ *
+ * @return abbreviation on the basis of the statusCode and the serverErrorMessage
+ */
+ abstract String getAbbreviation(Integer statusCode, String serverErrorMessage);
+
+ /**
+ * Converts the server-error response to an abbreviation if the response can be
+ * categorised by the implementation.
+ *
+ * @param ex exception received while making API request
+ * @param statusCode statusCode received in the server-response
+ * @param serverErrorMessage error-message received in the server-response
+ *
+ * @return abbreviation if the server-response can be categorised by the implementation.
+ * null if the server-response can not be categorised by the implementation.
+ */
+ public String captureAndGetAbbreviation(Exception ex,
+ Integer statusCode,
+ String serverErrorMessage) {
+ if (canCapture(ex, statusCode, serverErrorMessage)) {
+ return getAbbreviation(statusCode, serverErrorMessage);
+ }
+ return null;
+ }
+
+ /**
+ * Checks if a required search-string is in the exception's message.
+ */
+ Boolean checkExceptionMessage(final Exception exceptionCaptured,
+ final String search) {
+ if (search == null) {
+ return false;
+ }
+ if (exceptionCaptured != null
+ && exceptionCaptured.getMessage() != null
+ && exceptionCaptured.getMessage()
+ .toLowerCase(Locale.US)
+ .contains(search.toLowerCase(Locale.US))) {
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java
new file mode 100644
index 00000000000..dd67a0cb8cb
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import static java.net.HttpURLConnection.HTTP_UNAVAILABLE;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT;
+import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT;
+import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE;
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION;
+
+/**
+ * Category that can capture server-response errors for 5XX status-code.
+ */
+public class ServerErrorRetryReason extends RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 5) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ if (statusCode == HTTP_UNAVAILABLE && serverErrorMessage != null) {
+ String splitedServerErrorMessage = serverErrorMessage.split(System.lineSeparator(),
+ 2)[0];
+ if (INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase(
+ splitedServerErrorMessage)) {
+ return INGRESS_LIMIT_BREACH_ABBREVIATION;
+ }
+ if (EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase(
+ splitedServerErrorMessage)) {
+ return EGRESS_LIMIT_BREACH_ABBREVIATION;
+ }
+ if (OPERATION_BREACH_MESSAGE.equalsIgnoreCase(
+ splitedServerErrorMessage)) {
+ return OPERATION_LIMIT_BREACH_ABBREVIATION;
+ }
+ return HTTP_UNAVAILABLE + "";
+ }
+ return statusCode + "";
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java
new file mode 100644
index 00000000000..c329348d81f
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import java.net.UnknownHostException;
+
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION;
+
+/**
+ * Category that can capture server-response errors for {@link UnknownHostException}.
+ */
+public class UnknownHostRetryReason extends RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ if (ex instanceof UnknownHostException) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return UNKNOWN_HOST_EXCEPTION_ABBREVIATION;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java
new file mode 100644
index 00000000000..8a69ebb928d
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import java.io.IOException;
+
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION;
+
+
+/**
+ * Category that can capture server-response errors for {@link IOException}.
+ */
+public class UnknownIOExceptionRetryReason extends
+ RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ if (ex instanceof IOException) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return IO_EXCEPTION_ABBREVIATION;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java
new file mode 100644
index 00000000000..18e9f115fea
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import java.net.SocketException;
+
+import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION;
+
+/**
+ * Category that can capture server-response errors for {@link SocketException}.
+ */
+public class UnknownSocketExceptionRetryReason extends
+ RetryReasonCategory {
+
+ @Override
+ Boolean canCapture(final Exception ex,
+ final Integer statusCode,
+ final String serverErrorMessage) {
+ if (ex instanceof SocketException) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ String getAbbreviation(final Integer statusCode,
+ final String serverErrorMessage) {
+ return SOCKET_EXCEPTION_ABBREVIATION;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java
new file mode 100644
index 00000000000..7d8078620af
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A retryReasonCategory defines methods applicable on server-response errors.
+ */
+@Private
+@Evolving
+package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Evolving;
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
index 5a115451df1..9a2ccda36fb 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
@@ -152,8 +152,10 @@ public class TracingContext {
* X_MS_CLIENT_REQUEST_ID header of the http operation
* @param httpOperation AbfsHttpOperation instance to set header into
* connection
+ * @param previousFailure List of failures seen before this API trigger on
+ * same operation from AbfsClient.
*/
- public void constructHeader(AbfsHttpOperation httpOperation) {
+ public void constructHeader(AbfsHttpOperation httpOperation, String previousFailure) {
clientRequestId = UUID.randomUUID().toString();
switch (format) {
case ALL_ID_FORMAT: // Optional IDs (e.g. streamId) may be empty
@@ -161,6 +163,7 @@ public class TracingContext {
clientCorrelationID + ":" + clientRequestId + ":" + fileSystemID + ":"
+ primaryRequestId + ":" + streamID + ":" + opType + ":"
+ retryCount;
+ header = addFailureReasons(header, previousFailure);
break;
case TWO_ID_FORMAT:
header = clientCorrelationID + ":" + clientRequestId;
@@ -174,6 +177,14 @@ public class TracingContext {
httpOperation.setRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID, header);
}
+ private String addFailureReasons(final String header,
+ final String previousFailure) {
+ if (previousFailure == null) {
+ return header;
+ }
+ return String.format("%s_%s", header, previousFailure);
+ }
+
/**
* Return header representing the request associated with the tracingContext
* @return Header string set into X_MS_CLIENT_REQUEST_ID
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java
new file mode 100644
index 00000000000..bfa524a25e6
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java
@@ -0,0 +1,302 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.router.webapp.dao;
+
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.ArrayList;
+import java.util.List;
+
+@XmlRootElement
+@XmlAccessorType(XmlAccessType.FIELD)
+public class FederationClusterInfo extends ClusterInfo {
+
+ @XmlElement(name = "subCluster")
+ private List list = new ArrayList<>();
+
+ public FederationClusterInfo() {
+ } // JAXB needs this
+
+ public FederationClusterInfo(ArrayList list) {
+ this.list = list;
+ }
+
+ public List getList() {
+ return list;
+ }
+
+ public void setList(List list) {
+ this.list = list;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java
new file mode 100644
index 00000000000..b4a19b7919d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.router.webapp.dao;
+
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.ArrayList;
+import java.util.List;
+
+@XmlRootElement
+@XmlAccessorType(XmlAccessType.FIELD)
+public class FederationClusterUserInfo extends ClusterUserInfo {
+ @XmlElement(name = "subCluster")
+ private List list = new ArrayList<>();
+
+ public FederationClusterUserInfo() {
+ } // JAXB needs this
+
+ public FederationClusterUserInfo(ArrayList list) {
+ this.list = list;
+ }
+
+ public List getList() {
+ return list;
+ }
+
+ public void setList(List list) {
+ this.list = list;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
index a3756174573..955948c91c8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
@@ -568,6 +568,16 @@ public class TestRouterMetrics {
LOG.info("Mocked: failed getBulkActivitie call");
metrics.incrGetBulkActivitiesFailedRetrieved();
}
+
+ public void getClusterInfoFailed() {
+ LOG.info("Mocked: failed getClusterInfo call");
+ metrics.incrGetClusterInfoFailedRetrieved();
+ }
+
+ public void getClusterUserInfoFailed() {
+ LOG.info("Mocked: failed getClusterUserInfo call");
+ metrics.incrGetClusterUserInfoFailedRetrieved();
+ }
}
// Records successes for all calls
@@ -838,6 +848,16 @@ public class TestRouterMetrics {
LOG.info("Mocked: successful AddToClusterNodeLabels call with duration {}", duration);
metrics.succeededAddToClusterNodeLabelsRetrieved(duration);
}
+
+ public void getClusterInfoRetrieved(long duration) {
+ LOG.info("Mocked: successful GetClusterInfoRetrieved call with duration {}", duration);
+ metrics.succeededGetClusterInfoRetrieved(duration);
+ }
+
+ public void getClusterUserInfoRetrieved(long duration) {
+ LOG.info("Mocked: successful GetClusterUserInfoRetrieved call with duration {}", duration);
+ metrics.succeededGetClusterUserInfoRetrieved(duration);
+ }
}
@Test
@@ -1848,4 +1868,48 @@ public class TestRouterMetrics {
Assert.assertEquals(225,
metrics.getLatencySucceededAddToClusterNodeLabelsRetrieved(), ASSERT_DOUBLE_DELTA);
}
+
+ @Test
+ public void testGetClusterInfoRetrievedFailed() {
+ long totalBadBefore = metrics.getClusterInfoFailedRetrieved();
+ badSubCluster.getClusterInfoFailed();
+ Assert.assertEquals(totalBadBefore + 1, metrics.getClusterInfoFailedRetrieved());
+ }
+
+ @Test
+ public void testGetClusterInfoRetrieved() {
+ long totalGoodBefore = metrics.getNumSucceededGetClusterInfoRetrieved();
+ goodSubCluster.getClusterInfoRetrieved(150);
+ Assert.assertEquals(totalGoodBefore + 1,
+ metrics.getNumSucceededGetClusterInfoRetrieved());
+ Assert.assertEquals(150,
+ metrics.getLatencySucceededGetClusterInfoRetrieved(), ASSERT_DOUBLE_DELTA);
+ goodSubCluster.getClusterInfoRetrieved(300);
+ Assert.assertEquals(totalGoodBefore + 2,
+ metrics.getNumSucceededGetClusterInfoRetrieved());
+ Assert.assertEquals(225,
+ metrics.getLatencySucceededGetClusterInfoRetrieved(), ASSERT_DOUBLE_DELTA);
+ }
+
+ @Test
+ public void testGetClusterUserInfoRetrievedFailed() {
+ long totalBadBefore = metrics.getClusterUserInfoFailedRetrieved();
+ badSubCluster.getClusterUserInfoFailed();
+ Assert.assertEquals(totalBadBefore + 1, metrics.getClusterUserInfoFailedRetrieved());
+ }
+
+ @Test
+ public void testGetClusterUserInfoRetrieved() {
+ long totalGoodBefore = metrics.getNumSucceededGetClusterUserInfoRetrieved();
+ goodSubCluster.getClusterUserInfoRetrieved(150);
+ Assert.assertEquals(totalGoodBefore + 1,
+ metrics.getNumSucceededGetClusterUserInfoRetrieved());
+ Assert.assertEquals(150,
+ metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA);
+ goodSubCluster.getClusterUserInfoRetrieved(300);
+ Assert.assertEquals(totalGoodBefore + 2,
+ metrics.getNumSucceededGetClusterUserInfoRetrieved());
+ Assert.assertEquals(225,
+ metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA);
+ }
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
index 653224a7d37..c34167f9219 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
@@ -111,7 +111,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppState;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ApplicationSubmissionContextInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NewApplication;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodesInfo;
@@ -161,7 +163,6 @@ import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import static org.apache.hadoop.yarn.server.router.webapp.BaseRouterWebServicesTest.QUEUE_DEFAULT;
import static org.apache.hadoop.yarn.server.router.webapp.BaseRouterWebServicesTest.QUEUE_DEFAULT_FULL;
import static org.apache.hadoop.yarn.server.router.webapp.BaseRouterWebServicesTest.QUEUE_DEDICATED;
@@ -1363,4 +1364,17 @@ public class MockDefaultRequestInterceptorREST
}
throw new YarnException("removeFromClusterNodeLabels Error");
}
+
+ @Override
+ public ClusterInfo getClusterInfo() {
+ ClusterInfo clusterInfo = new ClusterInfo(mockRM);
+ return clusterInfo;
+ }
+
+ @Override
+ public ClusterUserInfo getClusterUserInfo(HttpServletRequest hsr) {
+ String remoteUser = hsr.getRemoteUser();
+ UserGroupInformation callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
+ return new ClusterUserInfo(mockRM, callerUGI);
+ }
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
index a2831657dc8..784fbd15ce1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
@@ -73,10 +73,13 @@ import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationHome
import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreTestUtil;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppState;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ApplicationSubmissionContextInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NewApplication;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
@@ -128,9 +131,12 @@ import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationBulkActivitiesInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo;
+import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo;
+import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo;
import org.apache.hadoop.yarn.util.LRUCacheHashMap;
import org.apache.hadoop.yarn.util.MonotonicClock;
import org.apache.hadoop.yarn.util.Times;
+import org.apache.hadoop.yarn.util.YarnVersionInfo;
import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.junit.Assert;
@@ -2127,4 +2133,86 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
LambdaTestUtils.intercept(YarnRuntimeException.class, "removeFromClusterNodeLabels Error",
() -> interceptor.removeFromClusterNodeLabels(oldNodeLabels1, null));
}
+
+ @Test
+ public void testGetClusterUserInfo() {
+ String requestUserName = "test-user";
+ HttpServletRequest hsr = mock(HttpServletRequest.class);
+ when(hsr.getRemoteUser()).thenReturn(requestUserName);
+ ClusterUserInfo clusterUserInfo = interceptor.getClusterUserInfo(hsr);
+
+ Assert.assertNotNull(clusterUserInfo);
+ Assert.assertTrue(clusterUserInfo instanceof FederationClusterUserInfo);
+
+ FederationClusterUserInfo federationClusterUserInfo =
+ (FederationClusterUserInfo) clusterUserInfo;
+
+ List fedClusterUserInfoList = federationClusterUserInfo.getList();
+ Assert.assertNotNull(fedClusterUserInfoList);
+ Assert.assertEquals(4, fedClusterUserInfoList.size());
+
+ List subClusterIds = subClusters.stream().map(
+ subClusterId -> subClusterId.getId()).collect(Collectors.toList());
+ MockRM mockRM = interceptor.getMockRM();
+
+ for (ClusterUserInfo fedClusterUserInfo : fedClusterUserInfoList) {
+ // Check subClusterId
+ String subClusterId = fedClusterUserInfo.getSubClusterId();
+ Assert.assertNotNull(subClusterId);
+ Assert.assertTrue(subClusterIds.contains(subClusterId));
+
+ // Check requestedUser
+ String requestedUser = fedClusterUserInfo.getRequestedUser();
+ Assert.assertNotNull(requestedUser);
+ Assert.assertEquals(requestUserName, requestedUser);
+
+ // Check rmLoginUser
+ String rmLoginUser = fedClusterUserInfo.getRmLoginUser();
+ Assert.assertNotNull(rmLoginUser);
+ Assert.assertEquals(mockRM.getRMLoginUser(), rmLoginUser);
+ }
+ }
+
+ @Test
+ public void testGetClusterInfo() {
+ ClusterInfo clusterInfos = interceptor.getClusterInfo();
+ Assert.assertNotNull(clusterInfos);
+ Assert.assertTrue(clusterInfos instanceof FederationClusterInfo);
+
+ FederationClusterInfo federationClusterInfos =
+ (FederationClusterInfo) (clusterInfos);
+
+ List fedClusterInfosList = federationClusterInfos.getList();
+ Assert.assertNotNull(fedClusterInfosList);
+ Assert.assertEquals(4, fedClusterInfosList.size());
+
+ List subClusterIds = subClusters.stream().map(
+ subClusterId -> subClusterId.getId()).collect(Collectors.toList());
+
+ MockRM mockRM = interceptor.getMockRM();
+ String yarnVersion = YarnVersionInfo.getVersion();
+
+ for (ClusterInfo clusterInfo : fedClusterInfosList) {
+ String subClusterId = clusterInfo.getSubClusterId();
+ // Check subClusterId
+ Assert.assertTrue(subClusterIds.contains(subClusterId));
+
+ // Check state
+ String clusterState = mockRM.getServiceState().toString();
+ Assert.assertEquals(clusterState, clusterInfo.getState());
+
+ // Check rmStateStoreName
+ String rmStateStoreName =
+ mockRM.getRMContext().getStateStore().getClass().getName();
+ Assert.assertEquals(rmStateStoreName, clusterInfo.getRMStateStore());
+
+ // Check RM Version
+ Assert.assertEquals(yarnVersion, clusterInfo.getRMVersion());
+
+ // Check haZooKeeperConnectionState
+ String rmHAZookeeperConnectionState = mockRM.getRMContext().getHAZookeeperConnectionState();
+ Assert.assertEquals(rmHAZookeeperConnectionState,
+ clusterInfo.getHAZookeeperConnectionState());
+ }
+ }
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java
index 31fd756b664..0e37b7c9749 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java
@@ -117,4 +117,8 @@ public class TestableFederationInterceptorREST
}
super.shutdown();
}
+
+ public MockRM getMockRM() {
+ return mockRM;
+ }
}
\ No newline at end of file
From b6a9d7b4429970e332e5e07f0cf3265ee5d0e909 Mon Sep 17 00:00:00 2001
From: Viraj Jasani
Date: Fri, 17 Mar 2023 15:33:50 -0700
Subject: [PATCH 16/78] HADOOP-18631. (ADDENDUM) Use LogCapturer to match audit
log pattern and remove hdfs async audit log configs (#5451)
---
.../org/apache/hadoop/hdfs/DFSConfigKeys.java | 37 ------
.../hdfs/server/namenode/FSNamesystem.java | 10 +-
.../src/main/resources/hdfs-default.xml | 29 -----
.../hdfs/server/namenode/TestAuditLogs.java | 106 ++++++++----------
.../hadoop/hdfs/server/namenode/TestFsck.java | 67 ++++++-----
5 files changed, 89 insertions(+), 160 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index e5e21e4307a..3286ffb4f09 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -733,43 +733,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default";
public static final String DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY = "dfs.namenode.audit.log.token.tracking.id";
public static final boolean DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT = false;
- /**
- * Deprecated. Use log4j properties instead.
- * Set system env variable HDFS_AUDIT_LOGGER, which in tern assigns the value to
- * "hdfs.audit.logger" for log4j properties to determine log level and appender.
- */
- @Deprecated
- public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async";
- @Deprecated
- public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT = false;
-
- /**
- * Deprecated. Use log4j properties instead.
- * Set value to Async appender "blocking" property as part of log4j properties configuration.
- *
- * For example,
- * log4j.appender.ASYNCAPPENDER=org.apache.log4j.AsyncAppender
- * log4j.appender.ASYNCAPPENDER.blocking=false
- */
- @Deprecated
- public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY =
- "dfs.namenode.audit.log.async.blocking";
- @Deprecated
- public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT = true;
-
- /**
- * Deprecated. Use log4j properties instead.
- * Set value to Async appender "bufferSize" property as part of log4j properties configuration.
- *
- * For example,
- * log4j.appender.ASYNCAPPENDER=org.apache.log4j.AsyncAppender
- * log4j.appender.ASYNCAPPENDER.bufferSize=128
- */
- @Deprecated
- public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY =
- "dfs.namenode.audit.log.async.buffer.size";
- @Deprecated
- public static final int DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT = 128;
public static final String DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST = "dfs.namenode.audit.log.debug.cmdlist";
public static final String DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY =
"dfs.namenode.metrics.logger.period.seconds";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 0e46dca9dff..107439defee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -48,8 +48,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
@@ -1069,11 +1067,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
}
}
- @SuppressWarnings("deprecation")
private static void checkForAsyncLogEnabledByOldConfigs(Configuration conf) {
- if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
- LOG.warn("Use log4j properties to enable async log for audit logs. {} is deprecated",
- DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY);
+ // dfs.namenode.audit.log.async is no longer in use. Use log4j properties instead.
+ if (conf.getBoolean("dfs.namenode.audit.log.async", false)) {
+ LOG.warn("Use log4j properties to enable async log for audit logs. "
+ + "dfs.namenode.audit.log.async is no longer in use.");
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 5643a9b5c5e..bdd048004d3 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -5099,35 +5099,6 @@
-
- dfs.namenode.audit.log.async
- false
-
- If true, enables asynchronous audit log.
-
-
-
-
- dfs.namenode.audit.log.async.blocking
- true
-
- Only used when enables asynchronous audit log. Sets whether audit log async
- appender should wait if there is no space available in the event buffer or
- immediately return. Default value is true.
-
-
-
-
- dfs.namenode.audit.log.async.buffer.size
- 128
-
- Only used when enables asynchronous audit log. Sets the number of audit
- logs allowed in the event buffer before the calling thread is blocked
- (if dfs.namenode.audit.log.async.blocking is true) or until logs are
- summarized and discarded. Default value is 128.
-
-
-
dfs.namenode.audit.log.token.tracking.idfalse
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
index 698178e4e96..0f736696751 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
@@ -20,12 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.*;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
import java.io.InputStream;
-import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -46,12 +41,15 @@ import org.apache.hadoop.hdfs.web.WebHdfsTestUtil;
import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
import org.apache.log4j.Appender;
import org.apache.log4j.AsyncAppender;
import org.apache.log4j.Logger;
import org.junit.After;
+import org.junit.AfterClass;
import org.junit.Before;
+import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -66,11 +64,10 @@ public class TestAuditLogs {
private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TestAuditLogs.class);
- private static final File AUDIT_LOG_FILE =
- new File(System.getProperty("hadoop.log.dir"), "hdfs-audit.log");
-
final boolean useAsyncEdits;
+ private static LogCapturer auditLogCapture;
+
@Parameters
public static Collection
+
+ fs.oss.fast.upload.buffer
+ disk
+
+ The buffering mechanism to use.
+ Values: disk, array, bytebuffer, array_disk, bytebuffer_disk.
+
+ "disk" will use the directories listed in fs.oss.buffer.dir as
+ the location(s) to save data prior to being uploaded.
+
+ "array" uses arrays in the JVM heap
+
+ "bytebuffer" uses off-heap memory within the JVM.
+
+ Both "array" and "bytebuffer" will consume memory in a single stream up to the number
+ of blocks set by:
+
+ fs.oss.multipart.upload.size * fs.oss.upload.active.blocks.
+
+ If using either of these mechanisms, keep this value low
+
+ The total number of threads performing work across all threads is set by
+ fs.oss.multipart.download.threads(Currently fast upload shares the same thread tool with download.
+ The thread pool size is specified in "fs.oss.multipart.download.threads"),
+ with fs.oss.max.total.tasks values setting the number of queued work items.
+
+ "array_disk" and "bytebuffer_disk" support fallback to disk.
+
+
+
+
+ fs.oss.fast.upload.memory.limit
+ 1073741824
+
+ Memory limit of "array_disk" and "bytebuffer_disk" upload buffers.
+ Will fallback to disk buffers if used memory reaches the limit.
+
+
+
fs.oss.buffer.dir
- Comma separated list of directories to buffer OSS data before uploading to Aliyun OSS
+ ${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/oss
+ Comma separated list of directories to buffer
+ OSS data before uploading to Aliyun OSS.
+ Yarn container path will be used as default value on yarn applications,
+ otherwise fall back to hadoop.tmp.dir
+
diff --git a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java
index 69aa0a5a795..891890dfc40 100644
--- a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java
+++ b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java
@@ -22,6 +22,8 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.aliyun.oss.OSSDataBlocks.ByteBufferBlockFactory;
+import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
@@ -37,12 +39,19 @@ import java.util.ArrayList;
import java.util.LinkedHashSet;
import static org.apache.hadoop.fs.aliyun.oss.Constants.BUFFER_DIR_KEY;
+import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER;
+import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER_ARRAY_DISK;
+import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER_DISK;
+import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER_MEMORY_LIMIT;
+import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BYTEBUFFER;
+import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BYTEBUFFER_DISK;
import static org.apache.hadoop.fs.aliyun.oss.Constants.MULTIPART_UPLOAD_PART_SIZE_DEFAULT;
import static org.apache.hadoop.fs.aliyun.oss.Constants.MULTIPART_UPLOAD_PART_SIZE_KEY;
import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
/**
@@ -54,6 +63,7 @@ public class TestAliyunOSSBlockOutputStream {
private static final int PART_SIZE = 1024 * 1024;
private static String testRootPath =
AliyunOSSTestUtils.generateUniqueTestPath();
+ private static final long MEMORY_LIMIT = 10 * 1024 * 1024;
@Rule
public Timeout testTimeout = new Timeout(30 * 60 * 1000);
@@ -65,6 +75,7 @@ public class TestAliyunOSSBlockOutputStream {
conf.setInt(IO_CHUNK_BUFFER_SIZE,
conf.getInt(MULTIPART_UPLOAD_PART_SIZE_KEY, 0));
conf.setInt(Constants.UPLOAD_ACTIVE_BLOCKS_KEY, 20);
+ conf.setLong(FAST_UPLOAD_BUFFER_MEMORY_LIMIT, MEMORY_LIMIT);
fs = AliyunOSSTestUtils.createTestFileSystem(conf);
}
@@ -82,7 +93,7 @@ public class TestAliyunOSSBlockOutputStream {
@Test
public void testZeroByteUpload() throws IOException {
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), 0);
- bufferDirShouldEmpty();
+ bufferShouldReleased(true);
}
@Test
@@ -106,20 +117,21 @@ public class TestAliyunOSSBlockOutputStream {
assertEquals(size - 1, statistics.getBytesRead());
assertEquals(3, statistics.getWriteOps());
assertEquals(size - 1, statistics.getBytesWritten());
+ bufferShouldReleased();
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size);
assertEquals(14, statistics.getReadOps());
assertEquals(2 * size - 1, statistics.getBytesRead());
assertEquals(6, statistics.getWriteOps());
assertEquals(2 * size - 1, statistics.getBytesWritten());
+ bufferShouldReleased();
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size + 1);
-
assertEquals(22, statistics.getReadOps());
assertEquals(3 * size, statistics.getBytesRead());
assertEquals(10, statistics.getWriteOps());
assertEquals(3 * size, statistics.getBytesWritten());
- bufferDirShouldEmpty();
+ bufferShouldReleased();
}
@Test
@@ -133,19 +145,21 @@ public class TestAliyunOSSBlockOutputStream {
assertEquals(size - 1, statistics.getBytesRead());
assertEquals(8, statistics.getWriteOps());
assertEquals(size - 1, statistics.getBytesWritten());
+ bufferShouldReleased();
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size);
assertEquals(34, statistics.getReadOps());
assertEquals(2 * size - 1, statistics.getBytesRead());
assertEquals(16, statistics.getWriteOps());
assertEquals(2 * size - 1, statistics.getBytesWritten());
+ bufferShouldReleased();
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size + 1);
assertEquals(52, statistics.getReadOps());
assertEquals(3 * size, statistics.getBytesRead());
assertEquals(25, statistics.getWriteOps());
assertEquals(3 * size, statistics.getBytesWritten());
- bufferDirShouldEmpty();
+ bufferShouldReleased();
}
@Test
@@ -159,16 +173,18 @@ public class TestAliyunOSSBlockOutputStream {
assertEquals(size, statistics.getBytesRead());
assertEquals(52, statistics.getWriteOps());
assertEquals(size, statistics.getBytesWritten());
- bufferDirShouldEmpty();
+ bufferShouldReleased();
}
@Test
public void testHugeUpload() throws IOException {
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), PART_SIZE - 1);
+ bufferShouldReleased();
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), PART_SIZE);
+ bufferShouldReleased();
ContractTestUtils.createAndVerifyFile(fs, getTestPath(),
MULTIPART_UPLOAD_PART_SIZE_DEFAULT + 1);
- bufferDirShouldEmpty();
+ bufferShouldReleased();
}
@Test
@@ -199,15 +215,43 @@ public class TestAliyunOSSBlockOutputStream {
public void testSmallUpload() throws IOException {
long size = fs.getConf().getInt(MULTIPART_UPLOAD_PART_SIZE_KEY, 1024);
ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size - 1);
- bufferDirShouldEmpty();
+ bufferShouldReleased();
}
- private void bufferDirShouldEmpty() throws IOException {
- Path bufferPath = new Path(fs.getConf().get(BUFFER_DIR_KEY));
- FileStatus[] files = bufferPath.getFileSystem(
- fs.getConf()).listStatus(bufferPath);
- // Temporary file should be deleted
- assertEquals(0, files.length);
+ private void bufferShouldReleased() throws IOException {
+ bufferShouldReleased(false);
+ }
+
+ private void bufferShouldReleased(boolean zeroSizeFile) throws IOException {
+ String bufferDir = fs.getConf().get(BUFFER_DIR_KEY);
+ String bufferType = fs.getConf().get(FAST_UPLOAD_BUFFER);
+ if (bufferType.equals(FAST_UPLOAD_BUFFER_DISK)) {
+ assertNotNull(bufferDir);
+ Path bufferPath = new Path(fs.getConf().get(BUFFER_DIR_KEY));
+ FileStatus[] files = bufferPath.getFileSystem(
+ fs.getConf()).listStatus(bufferPath);
+ // Temporary file should be deleted
+ assertEquals(0, files.length);
+ } else {
+ if (bufferType.equals(FAST_UPLOAD_BYTEBUFFER)) {
+ OSSDataBlocks.ByteBufferBlockFactory
+ blockFactory = (OSSDataBlocks.ByteBufferBlockFactory)
+ ((AliyunOSSFileSystem)fs).getBlockFactory();
+ assertEquals("outstanding buffers in " + blockFactory,
+ 0, blockFactory.getOutstandingBufferCount());
+ }
+ }
+ BlockOutputStreamStatistics statistics =
+ ((AliyunOSSFileSystem)fs).getBlockOutputStreamStatistics();
+ assertEquals(statistics.getBlocksAllocated(),
+ statistics.getBlocksReleased());
+ if (zeroSizeFile) {
+ assertEquals(statistics.getBlocksAllocated(), 0);
+ } else {
+ assertTrue(statistics.getBlocksAllocated() >= 1);
+ }
+ assertEquals(statistics.getBytesReleased(),
+ statistics.getBytesAllocated());
}
@Test
@@ -249,4 +293,127 @@ public class TestAliyunOSSBlockOutputStream {
assertNotEquals("round robin not working",
tmp1.getParent(), tmp2.getParent());
}
+
+ @Test
+ public void testByteBufferIO() throws IOException {
+ try (OSSDataBlocks.ByteBufferBlockFactory factory =
+ new OSSDataBlocks.ByteBufferBlockFactory((AliyunOSSFileSystem)fs)) {
+ int limit = 128;
+ OSSDataBlocks.ByteBufferBlockFactory.ByteBufferBlock block
+ = factory.create(1, limit, null);
+ assertEquals("outstanding buffers in " + factory,
+ 1, factory.getOutstandingBufferCount());
+
+ byte[] buffer = ContractTestUtils.toAsciiByteArray("test data");
+ int bufferLen = buffer.length;
+ block.write(buffer, 0, bufferLen);
+ assertEquals(bufferLen, block.dataSize());
+ assertEquals("capacity in " + block,
+ limit - bufferLen, block.remainingCapacity());
+ assertTrue("hasCapacity(64) in " + block, block.hasCapacity(64));
+ assertTrue("No capacity in " + block,
+ block.hasCapacity(limit - bufferLen));
+
+ // now start the write
+ OSSDataBlocks.BlockUploadData blockUploadData = block.startUpload();
+ ByteBufferBlockFactory.ByteBufferBlock.ByteBufferInputStream
+ stream =
+ (ByteBufferBlockFactory.ByteBufferBlock.ByteBufferInputStream)
+ blockUploadData.getUploadStream();
+ assertTrue("Mark not supported in " + stream, stream.markSupported());
+ assertTrue("!hasRemaining() in " + stream, stream.hasRemaining());
+
+ int expected = bufferLen;
+ assertEquals("wrong available() in " + stream,
+ expected, stream.available());
+
+ assertEquals('t', stream.read());
+ stream.mark(limit);
+ expected--;
+ assertEquals("wrong available() in " + stream,
+ expected, stream.available());
+
+ // read into a byte array with an offset
+ int offset = 5;
+ byte[] in = new byte[limit];
+ assertEquals(2, stream.read(in, offset, 2));
+ assertEquals('e', in[offset]);
+ assertEquals('s', in[offset + 1]);
+ expected -= 2;
+ assertEquals("wrong available() in " + stream,
+ expected, stream.available());
+
+ // read to end
+ byte[] remainder = new byte[limit];
+ int c;
+ int index = 0;
+ while ((c = stream.read()) >= 0) {
+ remainder[index++] = (byte) c;
+ }
+ assertEquals(expected, index);
+ assertEquals('a', remainder[--index]);
+
+ assertEquals("wrong available() in " + stream,
+ 0, stream.available());
+ assertTrue("hasRemaining() in " + stream, !stream.hasRemaining());
+
+ // go the mark point
+ stream.reset();
+ assertEquals('e', stream.read());
+
+ // when the stream is closed, the data should be returned
+ stream.close();
+ assertEquals("outstanding buffers in " + factory,
+ 1, factory.getOutstandingBufferCount());
+ block.close();
+ assertEquals("outstanding buffers in " + factory,
+ 0, factory.getOutstandingBufferCount());
+ stream.close();
+ assertEquals("outstanding buffers in " + factory,
+ 0, factory.getOutstandingBufferCount());
+ }
+ }
+
+ @Test
+ public void testFastUploadArrayDisk() throws IOException {
+ testFastUploadFallback(FAST_UPLOAD_BUFFER_ARRAY_DISK);
+ }
+
+ @Test
+ public void testFastUploadByteBufferDisk() throws IOException {
+ testFastUploadFallback(FAST_UPLOAD_BYTEBUFFER_DISK);
+ }
+
+ private void testFastUploadFallback(String name) throws IOException {
+ Configuration conf = fs.getConf();
+ fs.close();
+
+ conf.set(FAST_UPLOAD_BUFFER, name);
+
+ fs = AliyunOSSTestUtils.createTestFileSystem(conf);
+ long size = 5 * MEMORY_LIMIT;
+ ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size);
+ OSSDataBlocks.MemoryBlockFactory
+ blockFactory = ((OSSDataBlocks.MemoryAndDiskBlockFactory)
+ ((AliyunOSSFileSystem)fs).getBlockFactory()).getMemoryFactory();
+ assertEquals(blockFactory.getMemoryUsed(), 0);
+
+ Path bufferPath = new Path(fs.getConf().get(BUFFER_DIR_KEY));
+ FileStatus[] files = bufferPath.getFileSystem(
+ fs.getConf()).listStatus(bufferPath);
+ // Temporary file should be deleted
+ assertEquals(0, files.length);
+
+ BlockOutputStreamStatistics statistics =
+ ((AliyunOSSFileSystem)fs).getBlockOutputStreamStatistics();
+ assertEquals(statistics.getBlocksAllocated(),
+ statistics.getBlocksReleased());
+ assertTrue(statistics.getBlocksAllocated() > 1);
+ assertEquals(statistics.getBytesReleased(),
+ statistics.getBytesAllocated());
+ assertTrue(statistics.getBytesAllocated() >= MEMORY_LIMIT);
+ assertTrue(statistics.getDiskBlocksAllocated() > 0);
+ assertEquals(statistics.getDiskBlocksAllocated(),
+ statistics.getDiskBlocksReleased());
+ }
}
From 700147b4ac18ceca5137e9d7fc8f53a5619768d4 Mon Sep 17 00:00:00 2001
From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com>
Date: Tue, 28 Mar 2023 16:14:59 +0800
Subject: [PATCH 31/78] HDFS-16964. Improve processing of excess redundancy
after failover. (#5510). Contributed by Shuyan Zhang.
Signed-off-by: He Xiaoqiao
---
.../server/blockmanagement/BlockManager.java | 59 ++++++++++++-------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index e5a6cf73b69..ec8bbf82498 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -3987,17 +3987,11 @@ public class BlockManager implements BlockStatsMXBean {
}
if (shouldProcessExtraRedundancy(num, expectedRedundancy)) {
- if (num.replicasOnStaleNodes() > 0) {
- // If any of the replicas of this block are on nodes that are
- // considered "stale", then these replicas may in fact have
- // already been deleted. So, we cannot safely act on the
- // over-replication until a later point in time, when
- // the "stale" nodes have block reported.
+ // extra redundancy block
+ if (!processExtraRedundancyBlockWithoutPostpone(block, expectedRedundancy,
+ null, null)) {
return MisReplicationResult.POSTPONE;
}
-
- // extra redundancy block
- processExtraRedundancyBlock(block, expectedRedundancy, null, null);
return MisReplicationResult.OVER_REPLICATED;
}
@@ -4020,12 +4014,26 @@ public class BlockManager implements BlockStatsMXBean {
}
}
+ /**
+ * Process blocks with redundant replicas. If there are replicas in
+ * stale storages, mark them in the postponedMisreplicatedBlocks.
+ */
+ private void processExtraRedundancyBlock(final BlockInfo block,
+ final short replication, final DatanodeDescriptor addedNode,
+ DatanodeDescriptor delNodeHint) {
+ if (!processExtraRedundancyBlockWithoutPostpone(block, replication,
+ addedNode, delNodeHint)) {
+ postponeBlock(block);
+ }
+ }
+
/**
* Find how many of the containing nodes are "extra", if any.
* If there are any extras, call chooseExcessRedundancies() to
* mark them in the excessRedundancyMap.
+ * @return true if all redundancy replicas are removed.
*/
- private void processExtraRedundancyBlock(final BlockInfo block,
+ private boolean processExtraRedundancyBlockWithoutPostpone(final BlockInfo block,
final short replication, final DatanodeDescriptor addedNode,
DatanodeDescriptor delNodeHint) {
assert namesystem.hasWriteLock();
@@ -4035,17 +4043,17 @@ public class BlockManager implements BlockStatsMXBean {
Collection nonExcess = new ArrayList<>();
Collection corruptNodes = corruptReplicas
.getNodes(block);
+ boolean hasStaleStorage = false;
+ Set staleStorages = new HashSet<>();
for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) {
if (storage.getState() != State.NORMAL) {
continue;
}
final DatanodeDescriptor cur = storage.getDatanodeDescriptor();
if (storage.areBlockContentsStale()) {
- LOG.trace("BLOCK* processExtraRedundancyBlock: Postponing {}"
- + " since storage {} does not yet have up-to-date information.",
- block, storage);
- postponeBlock(block);
- return;
+ hasStaleStorage = true;
+ staleStorages.add(storage);
+ continue;
}
if (!isExcess(cur, block)) {
if (cur.isInService()) {
@@ -4058,6 +4066,13 @@ public class BlockManager implements BlockStatsMXBean {
}
chooseExcessRedundancies(nonExcess, block, replication, addedNode,
delNodeHint);
+ if (hasStaleStorage) {
+ LOG.trace("BLOCK* processExtraRedundancyBlockWithoutPostpone: Postponing {}"
+ + " since storages {} does not yet have up-to-date information.",
+ block, staleStorages);
+ return false;
+ }
+ return true;
}
private void chooseExcessRedundancies(
@@ -4071,12 +4086,14 @@ public class BlockManager implements BlockStatsMXBean {
if (storedBlock.isStriped()) {
chooseExcessRedundancyStriped(bc, nonExcess, storedBlock, delNodeHint);
} else {
- final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(
- bc.getStoragePolicyID());
- final List excessTypes = storagePolicy.chooseExcess(
- replication, DatanodeStorageInfo.toStorageTypes(nonExcess));
- chooseExcessRedundancyContiguous(nonExcess, storedBlock, replication,
- addedNode, delNodeHint, excessTypes);
+ if (nonExcess.size() > replication) {
+ final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(
+ bc.getStoragePolicyID());
+ final List excessTypes = storagePolicy.chooseExcess(
+ replication, DatanodeStorageInfo.toStorageTypes(nonExcess));
+ chooseExcessRedundancyContiguous(nonExcess, storedBlock, replication,
+ addedNode, delNodeHint, excessTypes);
+ }
}
}
From aa602381c595db4b958709c03874ad54597ba197 Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Tue, 28 Mar 2023 22:48:46 +0800
Subject: [PATCH 32/78] YARN-11426. Improve YARN NodeLabel Memory Display.
(#5335)
YARN-11426. Improve YARN NodeLabel Memory Display.
Co-authored-by: slfan1989
Reviewed-by: Inigo Goiri
Reviewed-by: Chris Nauroth
Signed-off-by: Shilun Fan
---
.../hadoop-yarn/hadoop-yarn-api/pom.xml | 5 ++++
.../hadoop/yarn/api/records/Resource.java | 11 ++++++++
.../hadoop/yarn/api/records/TestResource.java | 25 +++++++++++++++++++
.../webapp/NodeLabelsPage.java | 2 +-
4 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
index 61747c2cd80..e4b8ee28227 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
@@ -115,6 +115,11 @@
com.fasterxml.jackson.corejackson-annotations
+
+ org.mockito
+ mockito-core
+ test
+
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
index 0c10e017685..80e569d5a9e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.classification.InterfaceStability.Stable;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
@@ -543,4 +544,14 @@ public abstract class Resource implements Comparable {
ri.setMaximumAllocation(Long.MAX_VALUE);
return ri;
}
+
+ @VisibleForTesting
+ protected void setResources(ResourceInformation[] resources) {
+ this.resources = resources;
+ }
+
+ public String getFormattedString(long memory) {
+ return getFormattedString(
+ StringUtils.byteDesc(memory * 1024 * 1024));
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java
index 638ecf9d322..060ca39c697 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.api.records;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
/**
* The class to test {@link Resource}.
@@ -42,4 +44,27 @@ class TestResource {
"Cast to Integer.MAX_VALUE if the long is greater than "
+ "Integer.MAX_VALUE");
}
+
+ @Test
+ public void testResourceFormatted() {
+ Resource resource = spy(Resource.class);
+ resource.setResources(new ResourceInformation[0]);
+ when(resource.getVirtualCores()).thenReturn(1);
+
+ // We set 10MB
+ String expectedResult1 = "";
+ assertEquals(expectedResult1, resource.getFormattedString(10));
+
+ // We set 1024 MB = 1GB
+ String expectedResult2 = "";
+ assertEquals(expectedResult2, resource.getFormattedString(1024));
+
+ // We set 1024 * 1024 MB = 1024 GB = 1TB
+ String expectedResult3 = "";
+ assertEquals(expectedResult3, resource.getFormattedString(1024 * 1024));
+
+ // We set 1024 * 1024 * 1024 MB = 1024 * 1024 GB = 1 * 1024 TB = 1 PB
+ String expectedResult4 = "";
+ assertEquals(expectedResult4, resource.getFormattedString(1024 * 1024 * 1024));
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java
index 6ff76281007..c4df6aa0e27 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java
@@ -75,7 +75,7 @@ public class NodeLabelsPage extends RmView {
} else {
row = row.td(String.valueOf(nActiveNMs));
}
- row.td(info.getResource().toString()).__();
+ row.td(info.getResource().toFormattedString()).__();
}
tbody.__().__();
}
From 5bc8f2532746453cb0b57a4c3552b02fae984ffd Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Wed, 29 Mar 2023 00:33:19 +0800
Subject: [PATCH 33/78] YARN-11446. [Federation] Add
updateSchedulerConfiguration, getSchedulerConfiguration REST APIs for Router.
(#5476)
---
.../hadoop/yarn/webapp/dao/ConfInfo.java | 11 ++
.../yarn/webapp/dao/SchedConfUpdateInfo.java | 11 ++
.../yarn/server/router/RouterMetrics.java | 62 ++++++++
.../webapp/FederationInterceptorREST.java | 134 +++++++++++++++++-
.../router/webapp/dao/FederationConfInfo.java | 55 +++++++
.../yarn/server/router/TestRouterMetrics.java | 66 +++++++++
.../MockDefaultRequestInterceptorREST.java | 28 +++-
.../webapp/TestFederationInterceptorREST.java | 69 +++++++++
8 files changed, 430 insertions(+), 6 deletions(-)
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java
index 1971efa5684..7ca396f49d5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java
@@ -33,6 +33,8 @@ public class ConfInfo {
private ArrayList property = new ArrayList<>();
+ private String subClusterId;
+
public ConfInfo() {
} // JAXB needs this
@@ -74,5 +76,14 @@ public class ConfInfo {
public String getValue() {
return value;
}
+
+ }
+
+ public String getSubClusterId() {
+ return subClusterId;
+ }
+
+ public void setSubClusterId(String subClusterId) {
+ this.subClusterId = subClusterId;
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java
index 45462919ed1..8f3ad5d66e0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java
@@ -44,6 +44,9 @@ public class SchedConfUpdateInfo {
@XmlElement(name = "update-queue")
private ArrayList updateQueueInfo = new ArrayList<>();
+ @XmlElement(name = "subClusterId")
+ private String subClusterId = "";
+
private HashMap global = new HashMap<>();
public SchedConfUpdateInfo() {
@@ -82,4 +85,12 @@ public class SchedConfUpdateInfo {
public void setGlobalParams(HashMap globalInfo) {
this.global = globalInfo;
}
+
+ public String getSubClusterId() {
+ return subClusterId;
+ }
+
+ public void setSubClusterId(String subClusterId) {
+ this.subClusterId = subClusterId;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java
index 3a581dfbd1f..a84a315b93c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java
@@ -159,6 +159,10 @@ public final class RouterMetrics {
private MutableGaugeInt numAddToClusterNodeLabelsFailedRetrieved;
@Metric("# of removeFromClusterNodeLabels failed to be retrieved")
private MutableGaugeInt numRemoveFromClusterNodeLabelsFailedRetrieved;
+ @Metric("# of numUpdateSchedulerConfiguration failed to be retrieved")
+ private MutableGaugeInt numUpdateSchedulerConfigurationFailedRetrieved;
+ @Metric("# of numGetSchedulerConfiguration failed to be retrieved")
+ private MutableGaugeInt numGetSchedulerConfigurationFailedRetrieved;
@Metric("# of getClusterInfo failed to be retrieved")
private MutableGaugeInt numGetClusterInfoFailedRetrieved;
@Metric("# of getClusterUserInfo failed to be retrieved")
@@ -287,6 +291,10 @@ public final class RouterMetrics {
private MutableRate totalSucceededAddToClusterNodeLabelsRetrieved;
@Metric("Total number of successful Retrieved RemoveFromClusterNodeLabels and latency(ms)")
private MutableRate totalSucceededRemoveFromClusterNodeLabelsRetrieved;
+ @Metric("Total number of successful Retrieved updateSchedulerConfiguration and latency(ms)")
+ private MutableRate totalSucceededUpdateSchedulerConfigurationRetrieved;
+ @Metric("Total number of successful Retrieved getSchedulerConfiguration and latency(ms)")
+ private MutableRate totalSucceededGetSchedulerConfigurationRetrieved;
@Metric("Total number of successful Retrieved GetClusterInfoRetrieved and latency(ms)")
private MutableRate totalSucceededGetClusterInfoRetrieved;
@Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)")
@@ -358,6 +366,8 @@ public final class RouterMetrics {
private MutableQuantiles replaceLabelsOnNodeLatency;
private MutableQuantiles addToClusterNodeLabelsLatency;
private MutableQuantiles removeFromClusterNodeLabelsLatency;
+ private MutableQuantiles updateSchedulerConfigLatency;
+ private MutableQuantiles getSchedulerConfigurationLatency;
private MutableQuantiles getClusterInfoLatency;
private MutableQuantiles getClusterUserInfoLatency;
private MutableQuantiles updateNodeResourceLatency;
@@ -572,6 +582,12 @@ public final class RouterMetrics {
removeFromClusterNodeLabelsLatency = registry.newQuantiles("removeFromClusterNodeLabelsLatency",
"latency of remove cluster nodelabels timeouts", "ops", "latency", 10);
+ updateSchedulerConfigLatency = registry.newQuantiles("updateSchedulerConfigurationLatency",
+ "latency of update scheduler configuration timeouts", "ops", "latency", 10);
+
+ getSchedulerConfigurationLatency = registry.newQuantiles("getSchedulerConfigurationLatency",
+ "latency of get scheduler configuration timeouts", "ops", "latency", 10);
+
getClusterInfoLatency = registry.newQuantiles("getClusterInfoLatency",
"latency of get cluster info timeouts", "ops", "latency", 10);
@@ -879,6 +895,16 @@ public final class RouterMetrics {
return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().numSamples();
}
+ @VisibleForTesting
+ public long getNumSucceededUpdateSchedulerConfigurationRetrieved() {
+ return totalSucceededUpdateSchedulerConfigurationRetrieved.lastStat().numSamples();
+ }
+
+ @VisibleForTesting
+ public long getNumSucceededGetSchedulerConfigurationRetrieved() {
+ return totalSucceededGetSchedulerConfigurationRetrieved.lastStat().numSamples();
+ }
+
@VisibleForTesting
public long getNumSucceededGetClusterInfoRetrieved() {
return totalSucceededGetClusterInfoRetrieved.lastStat().numSamples();
@@ -1189,6 +1215,16 @@ public final class RouterMetrics {
return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().mean();
}
+ @VisibleForTesting
+ public double getLatencySucceededUpdateSchedulerConfigurationRetrieved() {
+ return totalSucceededUpdateSchedulerConfigurationRetrieved.lastStat().mean();
+ }
+
+ @VisibleForTesting
+ public double getLatencySucceededGetSchedulerConfigurationRetrieved() {
+ return totalSucceededGetSchedulerConfigurationRetrieved.lastStat().mean();
+ }
+
@VisibleForTesting
public double getLatencySucceededGetClusterInfoRetrieved() {
return totalSucceededGetClusterInfoRetrieved.lastStat().mean();
@@ -1454,6 +1490,14 @@ public final class RouterMetrics {
return numRemoveFromClusterNodeLabelsFailedRetrieved.value();
}
+ public int getUpdateSchedulerConfigurationFailedRetrieved() {
+ return numUpdateSchedulerConfigurationFailedRetrieved.value();
+ }
+
+ public int getSchedulerConfigurationFailedRetrieved() {
+ return numGetSchedulerConfigurationFailedRetrieved.value();
+ }
+
public int getClusterInfoFailedRetrieved() {
return numGetClusterInfoFailedRetrieved.value();
}
@@ -1773,6 +1817,16 @@ public final class RouterMetrics {
removeFromClusterNodeLabelsLatency.add(duration);
}
+ public void succeededUpdateSchedulerConfigurationRetrieved(long duration) {
+ totalSucceededUpdateSchedulerConfigurationRetrieved.add(duration);
+ updateSchedulerConfigLatency.add(duration);
+ }
+
+ public void succeededGetSchedulerConfigurationRetrieved(long duration) {
+ totalSucceededGetSchedulerConfigurationRetrieved.add(duration);
+ getSchedulerConfigurationLatency.add(duration);
+ }
+
public void succeededGetClusterInfoRetrieved(long duration) {
totalSucceededGetClusterInfoRetrieved.add(duration);
getClusterInfoLatency.add(duration);
@@ -2013,6 +2067,14 @@ public final class RouterMetrics {
numRemoveFromClusterNodeLabelsFailedRetrieved.incr();
}
+ public void incrUpdateSchedulerConfigurationFailedRetrieved() {
+ numUpdateSchedulerConfigurationFailedRetrieved.incr();
+ }
+
+ public void incrGetSchedulerConfigurationFailedRetrieved() {
+ numGetSchedulerConfigurationFailedRetrieved.incr();
+ }
+
public void incrGetClusterInfoFailedRetrieved() {
numGetClusterInfoFailedRetrieved.incr();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
index 857e4c52c6f..9975823ec2b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
@@ -44,7 +44,6 @@ import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.impl.prefetch.Validate;
@@ -129,6 +128,7 @@ import org.apache.hadoop.yarn.server.router.webapp.dao.FederationBulkActivitiesI
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.SubClusterResult;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo;
+import org.apache.hadoop.yarn.server.router.webapp.dao.FederationConfInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -136,6 +136,7 @@ import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.util.LRUCacheHashMap;
+import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.MonotonicClock;
@@ -848,6 +849,29 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
}
}
+ /**
+ * Get the active subcluster in the federation.
+ *
+ * @param subClusterId subClusterId.
+ * @return subClusterInfo.
+ * @throws NotFoundException If the subclusters cannot be found.
+ */
+ private SubClusterInfo getActiveSubCluster(String subClusterId)
+ throws NotFoundException {
+ try {
+ SubClusterId pSubClusterId = SubClusterId.newInstance(subClusterId);
+ Map subClusterInfoMap =
+ federationFacade.getSubClusters(true);
+ SubClusterInfo subClusterInfo = subClusterInfoMap.get(pSubClusterId);
+ if (subClusterInfo == null) {
+ throw new NotFoundException(subClusterId + " not found.");
+ }
+ return subClusterInfo;
+ } catch (YarnException e) {
+ throw new NotFoundException(e.getMessage());
+ }
+ }
+
/**
* The YARN Router will forward to the request to all the SubClusters to find
* where the node is running.
@@ -2906,17 +2930,117 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throw new RuntimeException("getContainer Failed.");
}
+ /**
+ * This method updates the Scheduler configuration, and it is reachable by
+ * using {@link RMWSConsts#SCHEDULER_CONF}.
+ *
+ * @param mutationInfo th information for making scheduler configuration
+ * changes (supports adding, removing, or updating a queue, as well
+ * as global scheduler conf changes)
+ * @param hsr the servlet request
+ * @return Response containing the status code
+ * @throws AuthorizationException if the user is not authorized to invoke this
+ * method
+ * @throws InterruptedException if interrupted
+ */
@Override
public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo,
- HttpServletRequest hsr)
- throws AuthorizationException, InterruptedException {
- throw new NotImplementedException("Code is not implemented");
+ HttpServletRequest hsr) throws AuthorizationException, InterruptedException {
+
+ // Make Sure mutationInfo is not null.
+ if (mutationInfo == null) {
+ routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
+ throw new IllegalArgumentException(
+ "Parameter error, the schedConfUpdateInfo is empty or null.");
+ }
+
+ // In federated mode, we may have a mix of multiple schedulers.
+ // In order to ensure accurate update scheduler configuration,
+ // we need users to explicitly set subClusterId.
+ String pSubClusterId = mutationInfo.getSubClusterId();
+ if (StringUtils.isBlank(pSubClusterId)) {
+ routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
+ throw new IllegalArgumentException("Parameter error, " +
+ "the subClusterId is empty or null.");
+ }
+
+ // Get the subClusterInfo , then update the scheduler configuration.
+ try {
+ long startTime = clock.getTime();
+ SubClusterInfo subClusterInfo = getActiveSubCluster(pSubClusterId);
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
+ subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ Response response = interceptor.updateSchedulerConfiguration(mutationInfo, hsr);
+ if (response != null) {
+ long endTime = clock.getTime();
+ routerMetrics.succeededUpdateSchedulerConfigurationRetrieved(endTime - startTime);
+ return Response.status(response.getStatus()).entity(response.getEntity()).build();
+ }
+ } catch (NotFoundException e) {
+ routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
+ RouterServerUtil.logAndThrowRunTimeException(e,
+ "Get subCluster error. subClusterId = %s", pSubClusterId);
+ } catch (Exception e) {
+ routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
+ RouterServerUtil.logAndThrowRunTimeException(e,
+ "UpdateSchedulerConfiguration error. subClusterId = %s", pSubClusterId);
+ }
+
+ routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
+ throw new RuntimeException("UpdateSchedulerConfiguration error. subClusterId = "
+ + pSubClusterId);
}
+ /**
+ * This method retrieves all the Scheduler configuration, and it is reachable
+ * by using {@link RMWSConsts#SCHEDULER_CONF}.
+ *
+ * @param hsr the servlet request
+ * @return Response containing the status code
+ * @throws AuthorizationException if the user is not authorized to invoke this
+ * method.
+ */
@Override
public Response getSchedulerConfiguration(HttpServletRequest hsr)
throws AuthorizationException {
- throw new NotImplementedException("Code is not implemented");
+ try {
+ long startTime = clock.getTime();
+ FederationConfInfo federationConfInfo = new FederationConfInfo();
+ Map subClustersActive = getActiveSubclusters();
+ final HttpServletRequest hsrCopy = clone(hsr);
+ Class[] argsClasses = new Class[]{HttpServletRequest.class};
+ Object[] args = new Object[]{hsrCopy};
+ ClientMethod remoteMethod = new ClientMethod("getSchedulerConfiguration", argsClasses, args);
+ Map responseMap =
+ invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class);
+ responseMap.forEach((subClusterInfo, response) -> {
+ SubClusterId subClusterId = subClusterInfo.getSubClusterId();
+ if (response == null) {
+ String errorMsg = subClusterId + " Can't getSchedulerConfiguration.";
+ federationConfInfo.getErrorMsgs().add(errorMsg);
+ } else if (response.getStatus() == Status.BAD_REQUEST.getStatusCode()) {
+ String errorMsg = String.valueOf(response.getEntity());
+ federationConfInfo.getErrorMsgs().add(errorMsg);
+ } else if (response.getStatus() == Status.OK.getStatusCode()) {
+ ConfInfo fedConfInfo = ConfInfo.class.cast(response.getEntity());
+ fedConfInfo.setSubClusterId(subClusterId.getId());
+ federationConfInfo.getList().add(fedConfInfo);
+ }
+ });
+ long endTime = clock.getTime();
+ routerMetrics.succeededGetSchedulerConfigurationRetrieved(endTime - startTime);
+ return Response.status(Status.OK).entity(federationConfInfo).build();
+ } catch (NotFoundException e) {
+ RouterServerUtil.logAndThrowRunTimeException("get all active sub cluster(s) error.", e);
+ routerMetrics.incrGetSchedulerConfigurationFailedRetrieved();
+ } catch (Exception e) {
+ routerMetrics.incrGetSchedulerConfigurationFailedRetrieved();
+ RouterServerUtil.logAndThrowRunTimeException("getSchedulerConfiguration error.", e);
+ return Response.status(Status.BAD_REQUEST).entity("getSchedulerConfiguration error.").build();
+ }
+
+ routerMetrics.incrGetSchedulerConfigurationFailedRetrieved();
+ throw new RuntimeException("getSchedulerConfiguration error.");
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java
new file mode 100644
index 00000000000..6a5e611a4f8
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.router.webapp.dao;
+
+import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import java.util.ArrayList;
+import java.util.List;
+
+@XmlRootElement
+@XmlAccessorType(XmlAccessType.FIELD)
+public class FederationConfInfo extends ConfInfo {
+ @XmlElement(name = "subCluster")
+ private List list = new ArrayList<>();
+
+ @XmlElement(name = "errorMsgs")
+ private List errorMsgs = new ArrayList<>();
+ public FederationConfInfo() {
+ } // JAXB needs this
+
+ public List getList() {
+ return list;
+ }
+
+ public void setList(List list) {
+ this.list = list;
+ }
+
+ public List getErrorMsgs() {
+ return errorMsgs;
+ }
+
+ public void setErrorMsgs(List errorMsgs) {
+ this.errorMsgs = errorMsgs;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
index 4af7e8c7f5a..f8dc03a04c6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
@@ -569,6 +569,16 @@ public class TestRouterMetrics {
metrics.incrGetBulkActivitiesFailedRetrieved();
}
+ public void getSchedulerConfigurationFailed() {
+ LOG.info("Mocked: failed getSchedulerConfiguration call");
+ metrics.incrGetSchedulerConfigurationFailedRetrieved();
+ }
+
+ public void updateSchedulerConfigurationFailedRetrieved() {
+ LOG.info("Mocked: failed updateSchedulerConfiguration call");
+ metrics.incrUpdateSchedulerConfigurationFailedRetrieved();
+ }
+
public void getClusterInfoFailed() {
LOG.info("Mocked: failed getClusterInfo call");
metrics.incrGetClusterInfoFailedRetrieved();
@@ -859,6 +869,16 @@ public class TestRouterMetrics {
metrics.succeededAddToClusterNodeLabelsRetrieved(duration);
}
+ public void getSchedulerConfigurationRetrieved(long duration) {
+ LOG.info("Mocked: successful GetSchedulerConfiguration call with duration {}", duration);
+ metrics.succeededGetSchedulerConfigurationRetrieved(duration);
+ }
+
+ public void getUpdateSchedulerConfigurationRetrieved(long duration) {
+ LOG.info("Mocked: successful UpdateSchedulerConfiguration call with duration {}", duration);
+ metrics.succeededUpdateSchedulerConfigurationRetrieved(duration);
+ }
+
public void getClusterInfoRetrieved(long duration) {
LOG.info("Mocked: successful GetClusterInfoRetrieved call with duration {}", duration);
metrics.succeededGetClusterInfoRetrieved(duration);
@@ -1889,6 +1909,52 @@ public class TestRouterMetrics {
metrics.getLatencySucceededAddToClusterNodeLabelsRetrieved(), ASSERT_DOUBLE_DELTA);
}
+ @Test
+ public void testGetSchedulerConfigurationRetrievedFailed() {
+ long totalBadBefore = metrics.getSchedulerConfigurationFailedRetrieved();
+ badSubCluster.getSchedulerConfigurationFailed();
+ Assert.assertEquals(totalBadBefore + 1,
+ metrics.getSchedulerConfigurationFailedRetrieved());
+ }
+
+ @Test
+ public void testGetSchedulerConfigurationRetrieved() {
+ long totalGoodBefore = metrics.getNumSucceededGetSchedulerConfigurationRetrieved();
+ goodSubCluster.getSchedulerConfigurationRetrieved(150);
+ Assert.assertEquals(totalGoodBefore + 1,
+ metrics.getNumSucceededGetSchedulerConfigurationRetrieved());
+ Assert.assertEquals(150,
+ metrics.getLatencySucceededGetSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
+ goodSubCluster.getSchedulerConfigurationRetrieved(300);
+ Assert.assertEquals(totalGoodBefore + 2,
+ metrics.getNumSucceededGetSchedulerConfigurationRetrieved());
+ Assert.assertEquals(225,
+ metrics.getLatencySucceededGetSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
+ }
+
+ @Test
+ public void testUpdateSchedulerConfigurationRetrievedFailed() {
+ long totalBadBefore = metrics.getUpdateSchedulerConfigurationFailedRetrieved();
+ badSubCluster.updateSchedulerConfigurationFailedRetrieved();
+ Assert.assertEquals(totalBadBefore + 1,
+ metrics.getUpdateSchedulerConfigurationFailedRetrieved());
+ }
+
+ @Test
+ public void testUpdateSchedulerConfigurationRetrieved() {
+ long totalGoodBefore = metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved();
+ goodSubCluster.getUpdateSchedulerConfigurationRetrieved(150);
+ Assert.assertEquals(totalGoodBefore + 1,
+ metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved());
+ Assert.assertEquals(150,
+ metrics.getLatencySucceededUpdateSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
+ goodSubCluster.getUpdateSchedulerConfigurationRetrieved(300);
+ Assert.assertEquals(totalGoodBefore + 2,
+ metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved());
+ Assert.assertEquals(225,
+ metrics.getLatencySucceededUpdateSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
+ }
+
@Test
public void testGetClusterInfoRetrievedFailed() {
long totalBadBefore = metrics.getClusterInfoFailedRetrieved();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
index c34167f9219..d4e1b5145cf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
@@ -102,6 +102,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueu
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerTestUtilities;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf.MutableCSConfigurationProvider;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo;
@@ -159,6 +160,8 @@ import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.ForbiddenException;
import org.apache.hadoop.yarn.webapp.NotFoundException;
+import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
+import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1007,7 +1010,7 @@ public class MockDefaultRequestInterceptorREST
}
if (resContext.getReservationId() == null) {
- throw new BadRequestException("Update operations must specify an existing ReservaitonId");
+ throw new BadRequestException("Update operations must specify an existing ReservationId");
}
ReservationRequestInterpreter[] values = ReservationRequestInterpreter.values();
@@ -1366,6 +1369,29 @@ public class MockDefaultRequestInterceptorREST
}
@Override
+ public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo,
+ HttpServletRequest req) throws AuthorizationException, InterruptedException {
+ RMContext rmContext = mockRM.getRMContext();
+ MutableCSConfigurationProvider provider = new MutableCSConfigurationProvider(rmContext);
+ try {
+ Configuration conf = new Configuration();
+ conf.set(YarnConfiguration.SCHEDULER_CONFIGURATION_STORE_CLASS,
+ YarnConfiguration.MEMORY_CONFIGURATION_STORE);
+ provider.init(conf);
+ provider.logAndApplyMutation(UserGroupInformation.getCurrentUser(), mutationInfo);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ return Response.status(Status.OK).
+ entity("Configuration change successfully applied.").build();
+ }
+
+ @Override
+ public Response getSchedulerConfiguration(HttpServletRequest req) throws AuthorizationException {
+ return Response.status(Status.OK).entity(new ConfInfo(mockRM.getConfig()))
+ .build();
+ }
+
public ClusterInfo getClusterInfo() {
ClusterInfo clusterInfo = new ClusterInfo(mockRM);
return clusterInfo;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
index 784fbd15ce1..19bba51e270 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
@@ -126,6 +126,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationDelet
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeAllocationInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesInfo;
+import org.apache.hadoop.yarn.server.router.webapp.dao.FederationConfInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo;
@@ -138,6 +139,9 @@ import org.apache.hadoop.yarn.util.MonotonicClock;
import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.util.YarnVersionInfo;
import org.apache.hadoop.yarn.webapp.BadRequestException;
+import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
+import org.apache.hadoop.yarn.webapp.dao.QueueConfigInfo;
+import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.junit.Assert;
import org.junit.Test;
@@ -171,6 +175,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
private final static int NUM_SUBCLUSTER = 4;
private static final int BAD_REQUEST = 400;
private static final int ACCEPTED = 202;
+ private static final int OK = 200;
private static String user = "test-user";
private TestableFederationInterceptorREST interceptor;
private MemoryFederationStateStore stateStore;
@@ -2134,6 +2139,35 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
() -> interceptor.removeFromClusterNodeLabels(oldNodeLabels1, null));
}
+ @Test
+ public void testGetSchedulerConfiguration() throws Exception {
+ Response response = interceptor.getSchedulerConfiguration(null);
+ Assert.assertNotNull(response);
+ Assert.assertEquals(OK, response.getStatus());
+
+ Object entity = response.getEntity();
+ Assert.assertNotNull(entity);
+ Assert.assertTrue(entity instanceof FederationConfInfo);
+
+ FederationConfInfo federationConfInfo = FederationConfInfo.class.cast(entity);
+ List confInfos = federationConfInfo.getList();
+ Assert.assertNotNull(confInfos);
+ Assert.assertEquals(4, confInfos.size());
+
+ List errors = federationConfInfo.getErrorMsgs();
+ Assert.assertEquals(0, errors.size());
+
+ Set subClusterSet = subClusters.stream()
+ .map(subClusterId -> subClusterId.getId()).collect(Collectors.toSet());
+
+ for (ConfInfo confInfo : confInfos) {
+ List confItems = confInfo.getItems();
+ Assert.assertNotNull(confItems);
+ Assert.assertTrue(confItems.size() > 0);
+ Assert.assertTrue(subClusterSet.contains(confInfo.getSubClusterId()));
+ }
+ }
+
@Test
public void testGetClusterUserInfo() {
String requestUserName = "test-user";
@@ -2173,6 +2207,41 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
}
+ @Test
+ public void testUpdateSchedulerConfigurationErrorMsg() throws Exception {
+ SchedConfUpdateInfo mutationInfo = new SchedConfUpdateInfo();
+ LambdaTestUtils.intercept(IllegalArgumentException.class,
+ "Parameter error, the subClusterId is empty or null.",
+ () -> interceptor.updateSchedulerConfiguration(mutationInfo, null));
+
+ LambdaTestUtils.intercept(IllegalArgumentException.class,
+ "Parameter error, the schedConfUpdateInfo is empty or null.",
+ () -> interceptor.updateSchedulerConfiguration(null, null));
+ }
+
+ @Test
+ public void testUpdateSchedulerConfiguration()
+ throws AuthorizationException, InterruptedException {
+ SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo();
+ updateInfo.setSubClusterId("1");
+ Map goodUpdateMap = new HashMap<>();
+ goodUpdateMap.put("goodKey", "goodVal");
+ QueueConfigInfo goodUpdateInfo = new
+ QueueConfigInfo("root.default", goodUpdateMap);
+ updateInfo.getUpdateQueueInfo().add(goodUpdateInfo);
+ Response response = interceptor.updateSchedulerConfiguration(updateInfo, null);
+
+ Assert.assertNotNull(response);
+ Assert.assertEquals(OK, response.getStatus());
+
+ String expectMsg = "Configuration change successfully applied.";
+ Object entity = response.getEntity();
+ Assert.assertNotNull(entity);
+
+ String entityMsg = String.valueOf(entity);
+ Assert.assertEquals(expectMsg, entityMsg);
+ }
+
@Test
public void testGetClusterInfo() {
ClusterInfo clusterInfos = interceptor.getClusterInfo();
From b4bcbb9515b5b264156b379034b8e9c923bcb25d Mon Sep 17 00:00:00 2001
From: Viraj Jasani
Date: Wed, 29 Mar 2023 10:43:13 -0700
Subject: [PATCH 34/78] HDFS-16959. RBF: State store cache loading metrics
(#5497)
---
.../src/site/markdown/Metrics.md | 24 +++++++------
.../federation/metrics/StateStoreMetrics.java | 28 +++++++++++++++
.../federation/store/CachedRecordStore.java | 2 ++
.../driver/TestStateStoreDriverBase.java | 36 +++++++++++++++++++
.../store/driver/TestStateStoreFile.java | 12 +++++++
.../driver/TestStateStoreFileSystem.java | 12 +++++++
.../store/driver/TestStateStoreZK.java | 14 ++++++++
7 files changed, 117 insertions(+), 11 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
index a551e3ae15f..0777fc42abe 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
@@ -592,17 +592,19 @@ StateStoreMetrics
-----------------
StateStoreMetrics shows the statistics of the State Store component in Router-based federation.
-| Name | Description |
-|:---- |:---- |
-| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric |
-| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds |
-| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric |
-| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds |
-| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric |
-| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds |
-| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric |
-| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds |
-| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store |
+| Name | Description |
+|:------------------------------------------|:-----------------------------------------------------------------------------------|
+| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric |
+| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds |
+| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric |
+| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds |
+| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric |
+| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds |
+| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric |
+| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds |
+| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store |
+| `Cache`*BaseRecord*`LoadNumOps` | Number of times store records are loaded in the State Store Cache from State Store |
+| `Cache`*BaseRecord*`LoadAvgTime` | Average time of loading State Store Cache from State Store in milliseconds |
yarn context
============
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java
index 371b33e05e2..b5c4047acd1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.federation.metrics;
import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName;
import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -54,6 +55,7 @@ public class StateStoreMetrics implements StateStoreMBean {
private MutableRate failures;
private Map cacheSizes;
+ private final Map cacheLoadMetrics = new HashMap<>();
protected StateStoreMetrics() {}
@@ -150,6 +152,32 @@ public class StateStoreMetrics implements StateStoreMBean {
counter.set(count);
}
+ /**
+ * Set the cache loading metrics for the state store interface.
+ *
+ * @param name Name of the record of the cache.
+ * @param value The time duration interval as the cache value.
+ */
+ public void setCacheLoading(String name, long value) {
+ String cacheLoad = "Cache" + name + "Load";
+ MutableRate cacheLoadMetric = cacheLoadMetrics.get(cacheLoad);
+ if (cacheLoadMetric == null) {
+ cacheLoadMetric = registry.newRate(cacheLoad, name, false);
+ cacheLoadMetrics.put(cacheLoad, cacheLoadMetric);
+ }
+ cacheLoadMetrics.get(cacheLoad).add(value);
+ }
+
+ /**
+ * Retrieve unmodifiable map of cache loading metrics.
+ *
+ * @return unmodifiable map of cache loading metrics.
+ */
+ @VisibleForTesting
+ public Map getCacheLoadMetrics() {
+ return Collections.unmodifiableMap(cacheLoadMetrics);
+ }
+
@VisibleForTesting
public void reset() {
reads.resetMinMax();
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java
index 6fea9b9946d..08dcc1c6e46 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java
@@ -113,6 +113,7 @@ public abstract class CachedRecordStore
if (force || isUpdateTime()) {
List newRecords = null;
long t = -1;
+ long startTime = Time.monotonicNow();
try {
QueryResult result = getDriver().get(getRecordClass());
newRecords = result.getRecords();
@@ -143,6 +144,7 @@ public abstract class CachedRecordStore
StateStoreMetrics metrics = getDriver().getMetrics();
if (metrics != null) {
String recordName = getRecordClass().getSimpleName();
+ metrics.setCacheLoading(recordName, Time.monotonicNow() - startTime);
metrics.setCacheSize(recordName, this.records.size());
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java
index 4eb38b06b12..48d84f9326b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java
@@ -48,6 +48,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.Query;
import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult;
import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
import org.apache.hadoop.hdfs.server.federation.store.records.StateStoreVersion;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+
import org.junit.After;
import org.junit.AfterClass;
import org.slf4j.Logger;
@@ -76,6 +78,10 @@ public class TestStateStoreDriverBase {
return stateStore.getDriver();
}
+ protected StateStoreService getStateStoreService() {
+ return stateStore;
+ }
+
@After
public void cleanMetrics() {
if (stateStore != null) {
@@ -574,6 +580,36 @@ public class TestStateStoreDriverBase {
return getters;
}
+ public long getMountTableCacheLoadSamples(StateStoreDriver driver) throws IOException {
+ final MutableRate mountTableCache = getMountTableCache(driver);
+ return mountTableCache.lastStat().numSamples();
+ }
+
+ private static MutableRate getMountTableCache(StateStoreDriver driver) throws IOException {
+ StateStoreMetrics metrics = stateStore.getMetrics();
+ final Query query = new Query<>(MountTable.newInstance());
+ driver.getMultiple(MountTable.class, query);
+ final Map cacheLoadMetrics = metrics.getCacheLoadMetrics();
+ final MutableRate mountTableCache = cacheLoadMetrics.get("CacheMountTableLoad");
+ assertNotNull("CacheMountTableLoad should be present in the state store metrics",
+ mountTableCache);
+ return mountTableCache;
+ }
+
+ public void testCacheLoadMetrics(StateStoreDriver driver, long numRefresh,
+ double expectedHigherThan) throws IOException, IllegalArgumentException {
+ final MutableRate mountTableCache = getMountTableCache(driver);
+ // CacheMountTableLoadNumOps
+ final long mountTableCacheLoadNumOps = getMountTableCacheLoadSamples(driver);
+ assertEquals("Num of samples collected should match", numRefresh, mountTableCacheLoadNumOps);
+ // CacheMountTableLoadAvgTime ms
+ final double mountTableCacheLoadAvgTimeMs = mountTableCache.lastStat().mean();
+ assertTrue(
+ "Mean time duration for cache load is expected to be higher than " + expectedHigherThan
+ + " ms." + " Actual value: " + mountTableCacheLoadAvgTimeMs,
+ mountTableCacheLoadAvgTimeMs > expectedHigherThan);
+ }
+
/**
* Get the type of field.
*
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java
index a8a9020744c..b01500b2ea1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java
@@ -73,4 +73,16 @@ public class TestStateStoreFile extends TestStateStoreDriverBase {
throws IllegalArgumentException, IllegalAccessException, IOException {
testMetrics(getStateStoreDriver());
}
+
+ @Test
+ public void testCacheLoadMetrics() throws IOException {
+ // inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime
+ // value as -1 ms, that would mean no other sample with value >= 0 would have been received and
+ // hence this would be failure to assert that mount table avg load time is higher than -1
+ getStateStoreService().getMetrics().setCacheLoading("MountTable", -1);
+ long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver());
+ getStateStoreService().refreshCaches(true);
+ testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 1, -1);
+ }
+
}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java
index dbd4b9bdae2..8c06e6b8ed1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java
@@ -115,4 +115,16 @@ public class TestStateStoreFileSystem extends TestStateStoreDriverBase {
testInsertWithErrorDuringWrite(driver, MembershipState.class);
}
+
+ @Test
+ public void testCacheLoadMetrics() throws IOException {
+ // inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime
+ // value as -1 ms, that would mean no other sample with value >= 0 would have been received and
+ // hence this would be failure to assert that mount table avg load time is higher than -1
+ getStateStoreService().getMetrics().setCacheLoading("MountTable", -1);
+ long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver());
+ getStateStoreService().refreshCaches(true);
+ getStateStoreService().refreshCaches(true);
+ testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 2, -1);
+ }
}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java
index 3ad106697ac..f94e415b4d5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java
@@ -206,4 +206,18 @@ public class TestStateStoreZK extends TestStateStoreDriverBase {
stateStoreDriver.setEnableConcurrent(true);
testFetchErrors(stateStoreDriver);
}
+
+ @Test
+ public void testCacheLoadMetrics() throws IOException {
+ // inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime
+ // value as -1 ms, that would mean no other sample with value >= 0 would have been received and
+ // hence this would be failure to assert that mount table avg load time is higher than -1
+ getStateStoreService().getMetrics().setCacheLoading("MountTable", -1);
+ long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver());
+ getStateStoreService().refreshCaches(true);
+ getStateStoreService().refreshCaches(true);
+ getStateStoreService().refreshCaches(true);
+ testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 3, -1);
+ }
+
}
\ No newline at end of file
From 016362a28b516630c748809261a40ac63fdbc4f4 Mon Sep 17 00:00:00 2001
From: Galsza <109229906+Galsza@users.noreply.github.com>
Date: Thu, 30 Mar 2023 01:12:02 +0200
Subject: [PATCH 35/78] HADOOP-18548. Hadoop Archive tool (HAR) should acquire
delegation tokens from source and destination file systems (#5355)
Signed-off-by: Chris Nauroth
---
.../org/apache/hadoop/tools/HadoopArchives.java | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
index 471f3549449..6d082380ffa 100644
--- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
@@ -37,6 +37,8 @@ import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.Parser;
+import org.apache.hadoop.mapreduce.security.TokenCache;
+import org.apache.hadoop.security.Credentials;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -487,6 +489,11 @@ public class HadoopArchives implements Tool {
+ " should be a directory but is a file");
}
conf.set(DST_DIR_LABEL, outputPath.toString());
+ Credentials credentials = conf.getCredentials();
+ Path[] allPaths = new Path[] {parentPath, dest};
+ TokenCache.obtainTokensForNamenodes(credentials, allPaths, conf);
+ conf.setCredentials(credentials);
+
Path stagingArea;
try {
stagingArea = JobSubmissionFiles.getStagingDir(new Cluster(conf),
@@ -498,11 +505,11 @@ public class HadoopArchives implements Tool {
NAME+"_"+Integer.toString(new Random().nextInt(Integer.MAX_VALUE), 36));
FsPermission mapredSysPerms =
new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
- FileSystem.mkdirs(jobDirectory.getFileSystem(conf), jobDirectory,
- mapredSysPerms);
+ FileSystem jobfs = jobDirectory.getFileSystem(conf);
+ FileSystem.mkdirs(jobfs, jobDirectory,
+ mapredSysPerms);
conf.set(JOB_DIR_LABEL, jobDirectory.toString());
//get a tmp directory for input splits
- FileSystem jobfs = jobDirectory.getFileSystem(conf);
Path srcFiles = new Path(jobDirectory, "_har_src_files");
conf.set(SRC_LIST_LABEL, srcFiles.toString());
SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf,
From 389b3ea6e3acd9cb3e3ab22b2e021499692a68f0 Mon Sep 17 00:00:00 2001
From: sreeb-msft <111426823+sreeb-msft@users.noreply.github.com>
Date: Fri, 31 Mar 2023 23:45:15 +0530
Subject: [PATCH 36/78] HADOOP-18012. ABFS: Enable config controlled ETag check
for Rename idempotency (#5488)
To support recovery of network failures during rename, the abfs client
fetches the etag of the source file, and when recovering from a
failure, uses this tag to determine whether the rename succeeded
before the failure happened.
* This works for files, but not directories
* It adds the overhead of a HEAD request before each rename.
* The option can be disabled by setting "fs.azure.enable.rename.resilience"
to false
Contributed by Sree Bhattacharyya
---
.../hadoop/fs/azurebfs/AbfsConfiguration.java | 11 +
.../fs/azurebfs/AzureBlobFileSystem.java | 6 +-
.../fs/azurebfs/AzureBlobFileSystemStore.java | 4 +-
.../azurebfs/constants/ConfigurationKeys.java | 3 +
.../constants/FileSystemConfigurations.java | 1 +
.../fs/azurebfs/services/AbfsClient.java | 199 ++++++---
.../services/AbfsClientRenameResult.java | 12 +
.../azurebfs/services/AbfsRestOperation.java | 53 ++-
...ITestAzureBlobFileSystemDelegationSAS.java | 8 +-
.../fs/azurebfs/ITestCustomerProvidedKey.java | 6 +-
.../services/TestAbfsRenameRetryRecovery.java | 414 +++++++++++++++++-
11 files changed, 621 insertions(+), 96 deletions(-)
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
index 124c4d9de72..0bcb97a8496 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -333,6 +333,10 @@ public class AbfsConfiguration{
FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR)
private boolean enableAbfsListIterator;
+ @BooleanConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE)
+ private boolean renameResilience;
+
public AbfsConfiguration(final Configuration rawConfig, String accountName)
throws IllegalAccessException, InvalidConfigurationValueException, IOException {
this.rawConfig = ProviderUtils.excludeIncompatibleCredentialProviders(
@@ -1139,4 +1143,11 @@ public class AbfsConfiguration{
this.enableAbfsListIterator = enableAbfsListIterator;
}
+ public boolean getRenameResilience() {
+ return renameResilience;
+ }
+
+ void setRenameResilience(boolean actualResilience) {
+ renameResilience = actualResilience;
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
index 5534b5fb44a..9c9d6f561d7 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
@@ -201,9 +201,9 @@ public class AzureBlobFileSystem extends FileSystem
tracingHeaderFormat = abfsConfiguration.getTracingHeaderFormat();
this.setWorkingDirectory(this.getHomeDirectory());
+ TracingContext tracingContext = new TracingContext(clientCorrelationId,
+ fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, listener);
if (abfsConfiguration.getCreateRemoteFileSystemDuringInitialization()) {
- TracingContext tracingContext = new TracingContext(clientCorrelationId,
- fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, listener);
if (this.tryGetFileStatus(new Path(AbfsHttpConstants.ROOT_PATH), tracingContext) == null) {
try {
this.createFileSystem(tracingContext);
@@ -442,7 +442,7 @@ public class AzureBlobFileSystem extends FileSystem
}
// Non-HNS account need to check dst status on driver side.
- if (!abfsStore.getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) {
+ if (!getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) {
dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext);
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
index 3cee9b4f90c..79ffc796c3a 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -923,9 +923,11 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport {
do {
try (AbfsPerfInfo perfInfo = startTracking("rename", "renamePath")) {
+ boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext);
final AbfsClientRenameResult abfsClientRenameResult =
client.renamePath(sourceRelativePath, destinationRelativePath,
- continuation, tracingContext, sourceEtag, false);
+ continuation, tracingContext, sourceEtag, false,
+ isNamespaceEnabled);
AbfsRestOperation op = abfsClientRenameResult.getOp();
perfInfo.registerResult(op.getResult());
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
index e3052cd7bbc..872364a8e61 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
@@ -238,6 +238,9 @@ public final class ConfigurationKeys {
/** Key for rate limit capacity, as used by IO operations which try to throttle themselves. */
public static final String FS_AZURE_ABFS_IO_RATE_LIMIT = "fs.azure.io.rate.limit";
+ /** Add extra resilience to rename failures, at the expense of performance. */
+ public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience";
+
public static String accountProperty(String property, String account) {
return property + "." + account;
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
index 68b492a5791..32f9966e30a 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -118,6 +118,7 @@ public final class FileSystemConfigurations {
public static final int STREAM_ID_LEN = 12;
public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true;
+ public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true;
/**
* Limit of queued block upload operations before writes
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
index 2c367333300..77b8dcb2b98 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
@@ -55,6 +55,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams;
+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException;
@@ -68,6 +69,7 @@ import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
+import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS;
import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader;
@@ -77,8 +79,8 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.S
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME;
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*;
-import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND;
+import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND;
/**
* AbfsClient.
@@ -106,9 +108,12 @@ public class AbfsClient implements Closeable {
private final ListeningScheduledExecutorService executorService;
- /** logging the rename failure if metadata is in an incomplete state. */
- private static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE =
- new LogExactlyOnce(LOG);
+ private boolean renameResilience;
+
+ /**
+ * logging the rename failure if metadata is in an incomplete state.
+ */
+ private static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE = new LogExactlyOnce(LOG);
private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials,
final AbfsConfiguration abfsConfiguration,
@@ -123,6 +128,7 @@ public class AbfsClient implements Closeable {
this.accountName = abfsConfiguration.getAccountName().substring(0, abfsConfiguration.getAccountName().indexOf(AbfsHttpConstants.DOT));
this.authType = abfsConfiguration.getAuthType(accountName);
this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration);
+ this.renameResilience = abfsConfiguration.getRenameResilience();
String encryptionKey = this.abfsConfiguration
.getClientProvidedEncryptionKey();
@@ -504,27 +510,55 @@ public class AbfsClient implements Closeable {
* took place.
* As rename recovery is only attempted if the source etag is non-empty,
* in normal rename operations rename recovery will never happen.
- * @param source path to source file
- * @param destination destination of rename.
- * @param continuation continuation.
- * @param tracingContext trace context
- * @param sourceEtag etag of source file. may be null or empty
+ *
+ * @param source path to source file
+ * @param destination destination of rename.
+ * @param continuation continuation.
+ * @param tracingContext trace context
+ * @param sourceEtag etag of source file. may be null or empty
* @param isMetadataIncompleteState was there a rename failure due to
* incomplete metadata state?
+ * @param isNamespaceEnabled whether namespace enabled account or not
* @return AbfsClientRenameResult result of rename operation indicating the
* AbfsRest operation, rename recovery and incomplete metadata state failure.
* @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures.
*/
public AbfsClientRenameResult renamePath(
- final String source,
- final String destination,
- final String continuation,
- final TracingContext tracingContext,
- final String sourceEtag,
- boolean isMetadataIncompleteState)
+ final String source,
+ final String destination,
+ final String continuation,
+ final TracingContext tracingContext,
+ String sourceEtag,
+ boolean isMetadataIncompleteState,
+ boolean isNamespaceEnabled)
throws AzureBlobFileSystemException {
final List requestHeaders = createDefaultHeaders();
+ final boolean hasEtag = !isEmpty(sourceEtag);
+
+ boolean shouldAttemptRecovery = renameResilience && isNamespaceEnabled;
+ if (!hasEtag && shouldAttemptRecovery) {
+ // in case eTag is already not supplied to the API
+ // and rename resilience is expected and it is an HNS enabled account
+ // fetch the source etag to be used later in recovery
+ try {
+ final AbfsRestOperation srcStatusOp = getPathStatus(source,
+ false, tracingContext);
+ if (srcStatusOp.hasResult()) {
+ final AbfsHttpOperation result = srcStatusOp.getResult();
+ sourceEtag = extractEtagHeader(result);
+ // and update the directory status.
+ boolean isDir = checkIsDir(result);
+ shouldAttemptRecovery = !isDir;
+ LOG.debug("Retrieved etag of source for rename recovery: {}; isDir={}", sourceEtag, isDir);
+ }
+ } catch (AbfsRestOperationException e) {
+ throw new AbfsRestOperationException(e.getStatusCode(), SOURCE_PATH_NOT_FOUND.getErrorCode(),
+ e.getMessage(), e);
+ }
+
+ }
+
String encodedRenameSource = urlEncode(FORWARD_SLASH + this.getFileSystem() + source);
if (authType == AuthType.SAS) {
final AbfsUriQueryBuilder srcQueryBuilder = new AbfsUriQueryBuilder();
@@ -541,12 +575,7 @@ public class AbfsClient implements Closeable {
appendSASTokenToQuery(destination, SASTokenProvider.RENAME_DESTINATION_OPERATION, abfsUriQueryBuilder);
final URL url = createRequestUrl(destination, abfsUriQueryBuilder.toString());
- final AbfsRestOperation op = new AbfsRestOperation(
- AbfsRestOperationType.RenamePath,
- this,
- HTTP_METHOD_PUT,
- url,
- requestHeaders);
+ final AbfsRestOperation op = createRenameRestOperation(url, requestHeaders);
try {
incrementAbfsRenamePath();
op.execute(tracingContext);
@@ -557,48 +586,74 @@ public class AbfsClient implements Closeable {
// isMetadataIncompleteState is used for renameRecovery(as the 2nd param).
return new AbfsClientRenameResult(op, isMetadataIncompleteState, isMetadataIncompleteState);
} catch (AzureBlobFileSystemException e) {
- // If we have no HTTP response, throw the original exception.
- if (!op.hasResult()) {
- throw e;
- }
-
- // ref: HADOOP-18242. Rename failure occurring due to a rare case of
- // tracking metadata being in incomplete state.
- if (op.getResult().getStorageErrorCode()
- .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
- && !isMetadataIncompleteState) {
- //Logging
- ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
- .info("Rename Failure attempting to resolve tracking metadata state and retrying.");
+ // If we have no HTTP response, throw the original exception.
+ if (!op.hasResult()) {
+ throw e;
+ }
+ // ref: HADOOP-18242. Rename failure occurring due to a rare case of
+ // tracking metadata being in incomplete state.
+ if (op.getResult().getStorageErrorCode()
+ .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
+ && !isMetadataIncompleteState) {
+ //Logging
+ ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
+ .info("Rename Failure attempting to resolve tracking metadata state and retrying.");
+ // rename recovery should be attempted in this case also
+ shouldAttemptRecovery = true;
+ isMetadataIncompleteState = true;
+ String sourceEtagAfterFailure = sourceEtag;
+ if (isEmpty(sourceEtagAfterFailure)) {
// Doing a HEAD call resolves the incomplete metadata state and
// then we can retry the rename operation.
AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
tracingContext);
- isMetadataIncompleteState = true;
// Extract the sourceEtag, using the status Op, and set it
// for future rename recovery.
AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
- String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
- renamePath(source, destination, continuation, tracingContext,
- sourceEtagAfterFailure, isMetadataIncompleteState);
+ sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
}
- // if we get out of the condition without a successful rename, then
- // it isn't metadata incomplete state issue.
- isMetadataIncompleteState = false;
+ renamePath(source, destination, continuation, tracingContext,
+ sourceEtagAfterFailure, isMetadataIncompleteState, isNamespaceEnabled);
+ }
+ // if we get out of the condition without a successful rename, then
+ // it isn't metadata incomplete state issue.
+ isMetadataIncompleteState = false;
- boolean etagCheckSucceeded = renameIdempotencyCheckOp(
- source,
- sourceEtag, op, destination, tracingContext);
- if (!etagCheckSucceeded) {
- // idempotency did not return different result
- // throw back the exception
- throw e;
- }
+ // setting default rename recovery success to false
+ boolean etagCheckSucceeded = false;
+ if (shouldAttemptRecovery) {
+ etagCheckSucceeded = renameIdempotencyCheckOp(
+ source,
+ sourceEtag, op, destination, tracingContext);
+ }
+ if (!etagCheckSucceeded) {
+ // idempotency did not return different result
+ // throw back the exception
+ throw e;
+ }
return new AbfsClientRenameResult(op, true, isMetadataIncompleteState);
}
}
+ private boolean checkIsDir(AbfsHttpOperation result) {
+ String resourceType = result.getResponseHeader(
+ HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
+ return resourceType != null
+ && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY);
+ }
+
+ @VisibleForTesting
+ AbfsRestOperation createRenameRestOperation(URL url, List requestHeaders) {
+ AbfsRestOperation op = new AbfsRestOperation(
+ AbfsRestOperationType.RenamePath,
+ this,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders);
+ return op;
+ }
+
private void incrementAbfsRenamePath() {
abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1);
}
@@ -628,28 +683,44 @@ public class AbfsClient implements Closeable {
TracingContext tracingContext) {
Preconditions.checkArgument(op.hasResult(), "Operations has null HTTP response");
- if ((op.isARetriedRequest())
- && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND)
- && isNotEmpty(sourceEtag)) {
-
- // Server has returned HTTP 404, which means rename source no longer
- // exists. Check on destination status and if its etag matches
- // that of the source, consider it to be a success.
- LOG.debug("rename {} to {} failed, checking etag of destination",
- source, destination);
+ // removing isDir from debug logs as it can be misleading
+ LOG.debug("rename({}, {}) failure {}; retry={} etag {}",
+ source, destination, op.getResult().getStatusCode(), op.isARetriedRequest(), sourceEtag);
+ if (!(op.isARetriedRequest()
+ && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND))) {
+ // only attempt recovery if the failure was a 404 on a retried rename request.
+ return false;
+ }
+ if (isNotEmpty(sourceEtag)) {
+ // Server has returned HTTP 404, we have an etag, so see
+ // if the rename has actually taken place,
+ LOG.info("rename {} to {} failed, checking etag of destination",
+ source, destination);
try {
- final AbfsRestOperation destStatusOp = getPathStatus(destination,
- false, tracingContext);
+ final AbfsRestOperation destStatusOp = getPathStatus(destination, false, tracingContext);
final AbfsHttpOperation result = destStatusOp.getResult();
- return result.getStatusCode() == HttpURLConnection.HTTP_OK
- && sourceEtag.equals(extractEtagHeader(result));
- } catch (AzureBlobFileSystemException ignored) {
+ final boolean recovered = result.getStatusCode() == HttpURLConnection.HTTP_OK
+ && sourceEtag.equals(extractEtagHeader(result));
+ LOG.info("File rename has taken place: recovery {}",
+ recovered ? "succeeded" : "failed");
+ return recovered;
+
+ } catch (AzureBlobFileSystemException ex) {
// GetFileStatus on the destination failed, the rename did not take place
+ // or some other failure. log and swallow.
+ LOG.debug("Failed to get status of path {}", destination, ex);
}
+ } else {
+ LOG.debug("No source etag; unable to probe for the operation's success");
}
- return false;
+ return false;
+ }
+
+ @VisibleForTesting
+ boolean isSourceDestEtagEqual(String sourceEtag, AbfsHttpOperation result) {
+ return sourceEtag.equals(extractEtagHeader(result));
}
public AbfsRestOperation append(final String path, final byte[] buffer,
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java
index 86e3473a9fe..76648cfc44b 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java
@@ -58,4 +58,16 @@ public class AbfsClientRenameResult {
public boolean isIncompleteMetadataState() {
return isIncompleteMetadataState;
}
+
+ @Override
+ public String toString() {
+ return "AbfsClientRenameResult{"
+ + "op="
+ + op
+ + ", renameRecovered="
+ + renameRecovered
+ + ", isIncompleteMetadataState="
+ + isIncompleteMetadataState
+ + '}';
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
index a9a72635422..6402be72ddc 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
@@ -276,26 +276,8 @@ public class AbfsRestOperation {
incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1);
tracingContext.constructHeader(httpOperation, failureReason);
- switch(client.getAuthType()) {
- case Custom:
- case OAuth:
- LOG.debug("Authenticating request with OAuth2 access token");
- httpOperation.setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION,
- client.getAccessToken());
- break;
- case SAS:
- // do nothing; the SAS token should already be appended to the query string
- httpOperation.setMaskForSAS(); //mask sig/oid from url for logs
- break;
- case SharedKey:
- // sign the HTTP request
- LOG.debug("Signing request with shared key");
- // sign the HTTP request
- client.getSharedKeyCredentials().signRequest(
- httpOperation.getConnection(),
- hasRequestBody ? bufferLength : 0);
- break;
- }
+ signRequest(httpOperation, hasRequestBody ? bufferLength : 0);
+
} catch (IOException e) {
LOG.debug("Auth failure: {}, {}", method, url);
throw new AbfsRestOperationException(-1, null,
@@ -376,6 +358,37 @@ public class AbfsRestOperation {
return true;
}
+ /**
+ * Sign an operation.
+ * @param httpOperation operation to sign
+ * @param bytesToSign how many bytes to sign for shared key auth.
+ * @throws IOException failure
+ */
+ @VisibleForTesting
+ public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign) throws IOException {
+ switch(client.getAuthType()) {
+ case Custom:
+ case OAuth:
+ LOG.debug("Authenticating request with OAuth2 access token");
+ httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION,
+ client.getAccessToken());
+ break;
+ case SAS:
+ // do nothing; the SAS token should already be appended to the query string
+ httpOperation.setMaskForSAS(); //mask sig/oid from url for logs
+ break;
+ case SharedKey:
+ default:
+ // sign the HTTP request
+ LOG.debug("Signing request with shared key");
+ // sign the HTTP request
+ client.getSharedKeyCredentials().signRequest(
+ httpOperation.getConnection(),
+ bytesToSign);
+ break;
+ }
+ }
+
/**
* Creates new object of {@link AbfsHttpOperation} with the url, method, and
* requestHeaders fields of the AbfsRestOperation object.
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java
index b164689ef80..5735423aaf9 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java
@@ -70,6 +70,8 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati
private static final Logger LOG =
LoggerFactory.getLogger(ITestAzureBlobFileSystemDelegationSAS.class);
+ private boolean isHNSEnabled;
+
public ITestAzureBlobFileSystemDelegationSAS() throws Exception {
// These tests rely on specific settings in azure-auth-keys.xml:
String sasProvider = getRawConfiguration().get(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE);
@@ -85,7 +87,7 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati
@Override
public void setup() throws Exception {
- boolean isHNSEnabled = this.getConfiguration().getBoolean(
+ isHNSEnabled = this.getConfiguration().getBoolean(
TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false);
Assume.assumeTrue(isHNSEnabled);
createFilesystemForSASTests();
@@ -401,7 +403,7 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati
fs.create(new Path(src)).close();
AbfsRestOperation abfsHttpRestOperation = fs.getAbfsClient()
.renamePath(src, "/testABC" + "/abc.txt", null,
- getTestTracingContext(fs, false), null, false)
+ getTestTracingContext(fs, false), null, false, isHNSEnabled)
.getOp();
AbfsHttpOperation result = abfsHttpRestOperation.getResult();
String url = result.getMaskedUrl();
@@ -419,7 +421,7 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati
intercept(IOException.class, "sig=XXXX",
() -> getFileSystem().getAbfsClient()
.renamePath("testABC/test.xt", "testABC/abc.txt", null,
- getTestTracingContext(getFileSystem(), false), null, false));
+ getTestTracingContext(getFileSystem(), false), null, false, isHNSEnabled));
}
@Test
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java
index bd8dbdf871b..76b8a77fffc 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java
@@ -99,10 +99,14 @@ public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest {
private static final int FILE_SIZE = 10 * ONE_MB;
private static final int FILE_SIZE_FOR_COPY_BETWEEN_ACCOUNTS = 24 * ONE_MB;
+ private boolean isNamespaceEnabled;
+
public ITestCustomerProvidedKey() throws Exception {
boolean isCPKTestsEnabled = getConfiguration()
.getBoolean(FS_AZURE_TEST_CPK_ENABLED, false);
Assume.assumeTrue(isCPKTestsEnabled);
+ isNamespaceEnabled = getConfiguration()
+ .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false);
}
@Test
@@ -526,7 +530,7 @@ public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest {
AbfsClient abfsClient = fs.getAbfsClient();
AbfsRestOperation abfsRestOperation = abfsClient
.renamePath(testFileName, newName, null,
- getTestTracingContext(fs, false), null, false)
+ getTestTracingContext(fs, false), null, false, isNamespaceEnabled)
.getOp();
assertCPKHeaders(abfsRestOperation, false);
assertNoCPKResponseHeadersPresent(abfsRestOperation);
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java
index f5cbceaddd8..cef1c9ae5a1 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java
@@ -18,19 +18,44 @@
package org.apache.hadoop.fs.azurebfs.services;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.SocketException;
+import java.net.URL;
+import java.time.Duration;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys;
+import org.apache.hadoop.fs.statistics.IOStatistics;
import org.assertj.core.api.Assertions;
+import org.junit.Assume;
import org.junit.Test;
+import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.EtagSource;
import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
+import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;
+import org.apache.hadoop.fs.azurebfs.commit.ResilientCommitByRename;
+import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
+import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
+import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND;
+import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.PATH_ALREADY_EXISTS;
import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND;
+import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE;
+import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS;
+import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter;
+import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -45,7 +70,11 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest {
private static final Logger LOG =
LoggerFactory.getLogger(TestAbfsRenameRetryRecovery.class);
+ private boolean isNamespaceEnabled;
+
public TestAbfsRenameRetryRecovery() throws Exception {
+ isNamespaceEnabled = getConfiguration()
+ .getBoolean(TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false);
}
/**
@@ -90,7 +119,7 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest {
// We need to throw an exception once a rename is triggered with
// destination having no parent, but after a retry it needs to succeed.
when(mockClient.renamePath(sourcePath, destNoParentPath, null, null,
- null, false))
+ null, false, isNamespaceEnabled))
.thenThrow(destParentNotFound)
.thenReturn(recoveredMetaDataIncompleteResult);
@@ -98,12 +127,12 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest {
intercept(AzureBlobFileSystemException.class,
() -> mockClient.renamePath(sourcePath,
destNoParentPath, null, null,
- null, false));
+ null, false, isNamespaceEnabled));
AbfsClientRenameResult resultOfSecondRenameCall =
mockClient.renamePath(sourcePath,
destNoParentPath, null, null,
- null, false);
+ null, false, isNamespaceEnabled);
// the second rename call should be the recoveredResult due to
// metaDataIncomplete
@@ -119,10 +148,387 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest {
// Verify renamePath occurred two times implying a retry was attempted.
verify(mockClient, times(2))
- .renamePath(sourcePath, destNoParentPath, null, null, null, false);
+ .renamePath(sourcePath, destNoParentPath, null, null, null, false,
+ isNamespaceEnabled);
}
+ AbfsClient getMockAbfsClient() throws IOException {
+ AzureBlobFileSystem fs = getFileSystem();
+
+ // adding mock objects to current AbfsClient
+ AbfsClient spyClient = Mockito.spy(fs.getAbfsStore().getClient());
+
+ Mockito.doAnswer(answer -> {
+ AbfsRestOperation op = new AbfsRestOperation(AbfsRestOperationType.RenamePath,
+ spyClient, HTTP_METHOD_PUT, answer.getArgument(0), answer.getArgument(1));
+ AbfsRestOperation spiedOp = Mockito.spy(op);
+ addSpyBehavior(spiedOp, op, spyClient);
+ return spiedOp;
+ }).when(spyClient).createRenameRestOperation(Mockito.any(URL.class), anyList());
+
+ return spyClient;
+
+ }
+
+ /**
+ * Spies on a rest operation to inject transient failure.
+ * the first createHttpOperation() invocation will return an abfs rest operation
+ * which will fail.
+ * @param spiedRestOp spied operation whose createHttpOperation() will fail first time
+ * @param normalRestOp normal operation the good operation
+ * @param client client.
+ * @throws IOException failure
+ */
+ private void addSpyBehavior(final AbfsRestOperation spiedRestOp,
+ final AbfsRestOperation normalRestOp,
+ final AbfsClient client)
+ throws IOException {
+ AbfsHttpOperation failingOperation = Mockito.spy(normalRestOp.createHttpOperation());
+ AbfsHttpOperation normalOp1 = normalRestOp.createHttpOperation();
+ executeThenFail(client, normalRestOp, failingOperation, normalOp1);
+ AbfsHttpOperation normalOp2 = normalRestOp.createHttpOperation();
+ normalOp2.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION,
+ client.getAccessToken());
+
+ when(spiedRestOp.createHttpOperation())
+ .thenReturn(failingOperation)
+ .thenReturn(normalOp2);
+ }
+
+ /**
+ * Mock an idempotency failure by executing the normal operation, then
+ * raising an IOE.
+ * @param normalRestOp the rest operation used to sign the requests.
+ * @param failingOperation failing operation
+ * @param normalOp good operation
+ * @throws IOException failure
+ */
+ private void executeThenFail(final AbfsClient client,
+ final AbfsRestOperation normalRestOp,
+ final AbfsHttpOperation failingOperation,
+ final AbfsHttpOperation normalOp)
+ throws IOException {
+
+ Mockito.doAnswer(answer -> {
+ LOG.info("Executing first attempt with post-operation fault injection");
+ final byte[] buffer = answer.getArgument(0);
+ final int offset = answer.getArgument(1);
+ final int length = answer.getArgument(2);
+ normalRestOp.signRequest(normalOp, length);
+ normalOp.sendRequest(buffer, offset, length);
+ normalOp.processResponse(buffer, offset, length);
+ LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure",
+ normalOp.getStatusCode(),
+ normalOp.getStorageErrorCode(),
+ normalOp.getStorageErrorMessage());
+ throw new SocketException("connection-reset");
+ }).when(failingOperation).sendRequest(Mockito.nullable(byte[].class),
+ Mockito.nullable(int.class), Mockito.nullable(int.class));
+
+ }
+
+ /**
+ * This is the good outcome: resilient rename.
+ */
+ @Test
+ public void testRenameRecoveryEtagMatchFsLevel() throws IOException {
+ AzureBlobFileSystem fs = getFileSystem();
+ AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient mockClient = getMockAbfsClient();
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyFile1";
+ String path2 = base + "/dummyFile2";
+
+ touch(new Path(path1));
+
+ setAbfsClient(abfsStore, mockClient);
+
+ // checking correct count in AbfsCounters
+ AbfsCounters counter = mockClient.getAbfsCounters();
+ IOStatistics ioStats = counter.getIOStatistics();
+
+ Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName());
+ Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName());
+
+ // 404 and retry, send sourceEtag as null
+ // source eTag matches -> rename should pass even when execute throws exception
+ fs.rename(new Path(path1), new Path(path2));
+
+ // validating stat counters after rename
+ // 4 calls should have happened in total for rename
+ // 1 -> original rename rest call, 2 -> first retry,
+ // +2 for getPathStatus calls
+ assertThatStatisticCounter(ioStats,
+ CONNECTIONS_MADE.getStatName())
+ .isEqualTo(4 + connMadeBeforeRename);
+ // the RENAME_PATH_ATTEMPTS stat should be incremented by 1
+ // retries happen internally within AbfsRestOperation execute()
+ // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called
+ assertThatStatisticCounter(ioStats,
+ RENAME_PATH_ATTEMPTS.getStatName())
+ .isEqualTo(1 + renamePathAttemptsBeforeRename);
+
+ }
+
+ /**
+ * execute a failing rename but have the file at the far end not match.
+ * This is done by explicitly passing in a made up etag for the source
+ * etag and creating a file at the far end.
+ * The first rename will actually fail with a path exists exception,
+ * but as that is swallowed, it's not a problem.
+ */
+ @Test
+ public void testRenameRecoveryEtagMismatchFsLevel() throws Exception {
+ AzureBlobFileSystem fs = getFileSystem();
+ AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient mockClient = getMockAbfsClient();
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyFile1";
+ String path2 = base + "/dummyFile2";
+
+ fs.create(new Path(path2));
+
+ setAbfsClient(abfsStore, mockClient);
+
+ // source eTag does not match -> rename should be a failure
+ assertEquals(false, fs.rename(new Path(path1), new Path(path2)));
+
+ }
+
+ @Test
+ public void testRenameRecoveryFailsForDirFsLevel() throws Exception {
+ AzureBlobFileSystem fs = getFileSystem();
+ AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient mockClient = getMockAbfsClient();
+
+ String dir1 = "/dummyDir1";
+ String dir2 = "/dummyDir2";
+
+ Path path1 = new Path(dir1);
+ Path path2 = new Path(dir2);
+
+ fs.mkdirs(path1);
+
+ setAbfsClient(abfsStore, mockClient);
+
+ // checking correct count in AbfsCounters
+ AbfsCounters counter = mockClient.getAbfsCounters();
+ IOStatistics ioStats = counter.getIOStatistics();
+
+ Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName());
+ Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName());
+
+ // source eTag does not match -> rename should be a failure
+ boolean renameResult = fs.rename(path1, path2);
+ assertEquals(false, renameResult);
+
+ // validating stat counters after rename
+ // 3 calls should have happened in total for rename
+ // 1 -> original rename rest call, 2 -> first retry,
+ // +1 for getPathStatus calls
+ // last getPathStatus call should be skipped
+ assertThatStatisticCounter(ioStats,
+ CONNECTIONS_MADE.getStatName())
+ .isEqualTo(3 + connMadeBeforeRename);
+
+ // the RENAME_PATH_ATTEMPTS stat should be incremented by 1
+ // retries happen internally within AbfsRestOperation execute()
+ // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called
+ assertThatStatisticCounter(ioStats,
+ RENAME_PATH_ATTEMPTS.getStatName())
+ .isEqualTo(1 + renamePathAttemptsBeforeRename);
+ }
+
+ /**
+ * Assert that an exception failed with a specific error code.
+ * @param code code
+ * @param e exception
+ * @throws AbfsRestOperationException if there is a mismatch
+ */
+ private static void expectErrorCode(final AzureServiceErrorCode code,
+ final AbfsRestOperationException e) throws AbfsRestOperationException {
+ if (e.getErrorCode() != code) {
+ throw e;
+ }
+ }
+
+ /**
+ * Directory rename failure is unrecoverable.
+ */
+ @Test
+ public void testDirRenameRecoveryUnsupported() throws Exception {
+ AzureBlobFileSystem fs = getFileSystem();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient spyClient = getMockAbfsClient();
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyDir1";
+ String path2 = base + "/dummyDir2";
+
+ fs.mkdirs(new Path(path1));
+
+ // source eTag does not match -> throw exception
+ expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () ->
+ spyClient.renamePath(path1, path2, null, testTracingContext, null, false,
+ isNamespaceEnabled)));
+ }
+
+ /**
+ * Even with failures, having
+ */
+ @Test
+ public void testExistingPathCorrectlyRejected() throws Exception {
+ AzureBlobFileSystem fs = getFileSystem();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient spyClient = getMockAbfsClient();
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyDir1";
+ String path2 = base + "/dummyDir2";
+
+
+ touch(new Path(path1));
+ touch(new Path(path2));
+
+ // source eTag does not match -> throw exception
+ expectErrorCode(PATH_ALREADY_EXISTS, intercept(AbfsRestOperationException.class, () ->
+ spyClient.renamePath(path1, path2, null, testTracingContext, null, false,
+ isNamespaceEnabled)));
+ }
+
+ /**
+ * Test that rename recovery remains unsupported for
+ * FNS configurations.
+ */
+ @Test
+ public void testRenameRecoveryUnsupportedForFlatNamespace() throws Exception {
+ Assume.assumeTrue(!isNamespaceEnabled);
+ AzureBlobFileSystem fs = getFileSystem();
+ AzureBlobFileSystemStore abfsStore = fs.getAbfsStore();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ AbfsClient mockClient = getMockAbfsClient();
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyFile1";
+ String path2 = base + "/dummyFile2";
+
+ touch(new Path(path1));
+
+ setAbfsClient(abfsStore, mockClient);
+
+ // checking correct count in AbfsCounters
+ AbfsCounters counter = mockClient.getAbfsCounters();
+ IOStatistics ioStats = counter.getIOStatistics();
+
+ Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName());
+ Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName());
+
+ expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () ->
+ mockClient.renamePath(path1, path2, null, testTracingContext, null, false,
+ isNamespaceEnabled)));
+
+ // validating stat counters after rename
+
+ // only 2 calls should have happened in total for rename
+ // 1 -> original rename rest call, 2 -> first retry,
+ // no getPathStatus calls
+ // last getPathStatus call should be skipped
+ assertThatStatisticCounter(ioStats,
+ CONNECTIONS_MADE.getStatName())
+ .isEqualTo(2 + connMadeBeforeRename);
+
+ // the RENAME_PATH_ATTEMPTS stat should be incremented by 1
+ // retries happen internally within AbfsRestOperation execute()
+ // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called
+ assertThatStatisticCounter(ioStats,
+ RENAME_PATH_ATTEMPTS.getStatName())
+ .isEqualTo(1 + renamePathAttemptsBeforeRename);
+ }
+
+ /**
+ * Test the resilient commit code works through fault injection, including
+ * reporting recovery.
+ */
+ @Test
+ public void testResilientCommitOperation() throws Throwable {
+ AzureBlobFileSystem fs = getFileSystem();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ final AzureBlobFileSystemStore store = fs.getAbfsStore();
+ Assume.assumeTrue(store.getIsNamespaceEnabled(testTracingContext));
+
+ // patch in the mock abfs client to the filesystem, for the resilient
+ // commit API to pick up.
+ setAbfsClient(store, getMockAbfsClient());
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyDir1";
+ String path2 = base + "/dummyDir2";
+
+
+ final Path source = new Path(path1);
+ touch(source);
+ final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag();
+
+ final ResilientCommitByRename commit = fs.createResilientCommitSupport(source);
+ final Pair outcome =
+ commit.commitSingleFileByRename(source, new Path(path2), sourceTag);
+ Assertions.assertThat(outcome.getKey())
+ .describedAs("recovery flag")
+ .isTrue();
+ }
+ /**
+ * Test the resilient commit code works through fault injection, including
+ * reporting recovery.
+ */
+ @Test
+ public void testResilientCommitOperationTagMismatch() throws Throwable {
+ AzureBlobFileSystem fs = getFileSystem();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+ final AzureBlobFileSystemStore store = fs.getAbfsStore();
+ Assume.assumeTrue(store.getIsNamespaceEnabled(testTracingContext));
+
+ // patch in the mock abfs client to the filesystem, for the resilient
+ // commit API to pick up.
+ setAbfsClient(store, getMockAbfsClient());
+
+ String base = "/" + getMethodName();
+ String path1 = base + "/dummyDir1";
+ String path2 = base + "/dummyDir2";
+
+
+ final Path source = new Path(path1);
+ touch(source);
+ final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag();
+
+ final ResilientCommitByRename commit = fs.createResilientCommitSupport(source);
+ intercept(FileNotFoundException.class, () ->
+ commit.commitSingleFileByRename(source, new Path(path2), "not the right tag"));
+ }
+
/**
* Method to create an AbfsRestOperationException.
* @param statusCode status code to be used.
From eb1d3ebe2fb12eb36f507b2fceaea724c0f863d9 Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Sat, 1 Apr 2023 06:29:18 +0800
Subject: [PATCH 37/78] YARN-11442. Refactor FederationInterceptorREST Code.
(#5420)
---
.../utils/FederationStateStoreFacade.java | 21 +
.../webapp/FederationInterceptorREST.java | 679 ++++++------------
.../router/webapp/RouterWebServiceUtil.java | 7 +-
.../webapp/TestFederationInterceptorREST.java | 99 +--
4 files changed, 301 insertions(+), 505 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java
index ebad527b6d4..e7cfb2e3112 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java
@@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.Random;
+import java.util.Collection;
import javax.cache.Cache;
import javax.cache.CacheManager;
@@ -93,6 +94,7 @@ import org.apache.hadoop.yarn.server.federation.store.records.SubClusterState;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterDeregisterRequest;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterDeregisterResponse;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
+import org.apache.hadoop.yarn.webapp.NotFoundException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1211,4 +1213,23 @@ public final class FederationStateStoreFacade {
}
return false;
}
+
+ /**
+ * Get active subclusters.
+ *
+ * @return We will return a list of active subclusters as a Collection.
+ */
+ public Collection getActiveSubClusters()
+ throws NotFoundException {
+ try {
+ Map subClusterMap = getSubClusters(true);
+ if (MapUtils.isEmpty(subClusterMap)) {
+ throw new NotFoundException("Not Found SubClusters.");
+ }
+ return subClusterMap.values();
+ } catch (Exception e) {
+ LOG.error("getActiveSubClusters failed.", e);
+ return null;
+ }
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
index 9975823ec2b..5d73ef20e59 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
@@ -29,12 +29,13 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
@@ -45,6 +46,7 @@ import javax.ws.rs.core.Response.Status;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.impl.prefetch.Validate;
import org.apache.hadoop.io.Text;
@@ -148,6 +150,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
+import static org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade.getRandomActiveSubCluster;
import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.extractToken;
import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.getKerberosUserGroupInformation;
@@ -159,8 +162,7 @@ import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.g
*/
public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
- private static final Logger LOG =
- LoggerFactory.getLogger(FederationInterceptorREST.class);
+ private static final Logger LOG = LoggerFactory.getLogger(FederationInterceptorREST.class);
private int numSubmitRetries;
private FederationStateStoreFacade federationFacade;
@@ -205,10 +207,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
interceptors = new HashMap<>();
routerMetrics = RouterMetrics.getMetrics();
- threadpool = HadoopExecutors.newCachedThreadPool(
- new ThreadFactoryBuilder()
- .setNameFormat("FederationInterceptorREST #%d")
- .build());
+ threadpool = HadoopExecutors.newCachedThreadPool(new ThreadFactoryBuilder()
+ .setNameFormat("FederationInterceptorREST #%d")
+ .build());
returnPartialReport = conf.getBoolean(
YarnConfiguration.ROUTER_WEBAPP_PARTIAL_RESULTS_ENABLED,
@@ -235,13 +236,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
}
@VisibleForTesting
- protected DefaultRequestInterceptorREST getInterceptorForSubCluster(
- SubClusterId subClusterId) {
+ protected DefaultRequestInterceptorREST getInterceptorForSubCluster(SubClusterId subClusterId) {
if (interceptors.containsKey(subClusterId)) {
return interceptors.get(subClusterId);
} else {
- LOG.error(
- "The interceptor for SubCluster {} does not exist in the cache.",
+ LOG.error("The interceptor for SubCluster {} does not exist in the cache.",
subClusterId);
return null;
}
@@ -255,44 +254,63 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
String interceptorClassName = conf.get(
YarnConfiguration.ROUTER_WEBAPP_DEFAULT_INTERCEPTOR_CLASS,
YarnConfiguration.DEFAULT_ROUTER_WEBAPP_DEFAULT_INTERCEPTOR_CLASS);
- DefaultRequestInterceptorREST interceptorInstance = null;
+
+ DefaultRequestInterceptorREST interceptorInstance;
try {
Class> interceptorClass = conf.getClassByName(interceptorClassName);
- if (DefaultRequestInterceptorREST.class
- .isAssignableFrom(interceptorClass)) {
- interceptorInstance = (DefaultRequestInterceptorREST) ReflectionUtils
- .newInstance(interceptorClass, conf);
+ if (DefaultRequestInterceptorREST.class.isAssignableFrom(interceptorClass)) {
+ interceptorInstance =
+ (DefaultRequestInterceptorREST) ReflectionUtils.newInstance(interceptorClass, conf);
String userName = getUser().getUserName();
interceptorInstance.init(userName);
} else {
- throw new YarnRuntimeException(
- "Class: " + interceptorClassName + " not instance of "
- + DefaultRequestInterceptorREST.class.getCanonicalName());
+ throw new YarnRuntimeException("Class: " + interceptorClassName + " not instance of "
+ + DefaultRequestInterceptorREST.class.getCanonicalName());
}
} catch (ClassNotFoundException e) {
- throw new YarnRuntimeException(
- "Could not instantiate ApplicationMasterRequestInterceptor: "
- + interceptorClassName,
- e);
+ throw new YarnRuntimeException("Could not instantiate ApplicationMasterRequestInterceptor: " +
+ interceptorClassName, e);
}
- String webAppAddresswithScheme =
- WebAppUtils.getHttpSchemePrefix(this.getConf()) + webAppAddress;
- interceptorInstance.setWebAppAddress(webAppAddresswithScheme);
+ String webAppAddressWithScheme = WebAppUtils.getHttpSchemePrefix(conf) + webAppAddress;
+ interceptorInstance.setWebAppAddress(webAppAddressWithScheme);
interceptorInstance.setSubClusterId(subClusterId);
interceptors.put(subClusterId, interceptorInstance);
return interceptorInstance;
}
+ protected DefaultRequestInterceptorREST getOrCreateInterceptorForSubCluster(
+ SubClusterInfo subClusterInfo) {
+ if (subClusterInfo != null) {
+ final SubClusterId subClusterId = subClusterInfo.getSubClusterId();
+ final String webServiceAddress = subClusterInfo.getRMWebServiceAddress();
+ return getOrCreateInterceptorForSubCluster(subClusterId, webServiceAddress);
+ }
+ return null;
+ }
+
+ protected DefaultRequestInterceptorREST getOrCreateInterceptorByAppId(String appId)
+ throws YarnException {
+ // We first check the applicationId
+ RouterServerUtil.validateApplicationId(appId);
+
+ // Get homeSubCluster By appId
+ SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
+ return getOrCreateInterceptorForSubCluster(subClusterInfo);
+ }
+
+ protected DefaultRequestInterceptorREST getOrCreateInterceptorByNodeId(String nodeId) {
+ SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId);
+ return getOrCreateInterceptorForSubCluster(subClusterInfo);
+ }
+
@VisibleForTesting
protected DefaultRequestInterceptorREST getOrCreateInterceptorForSubCluster(
SubClusterId subClusterId, String webAppAddress) {
- DefaultRequestInterceptorREST interceptor =
- getInterceptorForSubCluster(subClusterId);
- String webAppAddresswithScheme = WebAppUtils.getHttpSchemePrefix(
- this.getConf()) + webAppAddress;
- if (interceptor == null || !webAppAddresswithScheme.equals(interceptor.
- getWebAppAddress())){
+ DefaultRequestInterceptorREST interceptor = getInterceptorForSubCluster(subClusterId);
+ String webAppAddressWithScheme =
+ WebAppUtils.getHttpSchemePrefix(this.getConf()) + webAppAddress;
+ if (interceptor == null || !webAppAddressWithScheme.equals(interceptor.getWebAppAddress())) {
interceptor = createInterceptorForSubCluster(subClusterId, webAppAddress);
}
return interceptor;
@@ -372,8 +390,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
List blackList, HttpServletRequest hsr, int retryCount)
throws YarnException, IOException, InterruptedException {
- SubClusterId subClusterId =
- federationFacade.getRandomActiveSubCluster(subClustersActive, blackList);
+ SubClusterId subClusterId = getRandomActiveSubCluster(subClustersActive, blackList);
LOG.info("getNewApplication try #{} on SubCluster {}.", retryCount, subClusterId);
@@ -462,8 +479,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
* Router submits the request to the selected SubCluster (e.g. SC2).
*/
@Override
- public Response submitApplication(ApplicationSubmissionContextInfo newApp,
- HttpServletRequest hsr)
+ public Response submitApplication(ApplicationSubmissionContextInfo newApp, HttpServletRequest hsr)
throws AuthorizationException, IOException, InterruptedException {
long startTime = clock.getTime();
@@ -548,6 +564,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Step3. We get subClusterInfo based on subClusterId.
SubClusterInfo subClusterInfo = federationFacade.getSubCluster(subClusterId);
+ if (subClusterInfo == null) {
+ throw new YarnException("Can't Find SubClusterId = " + subClusterId);
+ }
// Step4. Submit the request, if the response is HttpServletResponse.SC_ACCEPTED,
// We return the response, otherwise we throw an exception.
@@ -587,43 +606,29 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
* operation.
*/
@Override
- public AppInfo getApp(HttpServletRequest hsr, String appId,
- Set unselectedFields) {
+ public AppInfo getApp(HttpServletRequest hsr, String appId, Set unselectedFields) {
- long startTime = clock.getTime();
-
- ApplicationId applicationId = null;
try {
- applicationId = ApplicationId.fromString(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrAppsFailedRetrieved();
- return null;
- }
+ long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = null;
- SubClusterId subClusterId = null;
- try {
- subClusterId =
- federationFacade.getApplicationHomeSubCluster(applicationId);
- if (subClusterId == null) {
+ // Get SubClusterInfo according to applicationId
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
+ if (interceptor == null) {
routerMetrics.incrAppsFailedRetrieved();
return null;
}
- subClusterInfo = federationFacade.getSubCluster(subClusterId);
+ AppInfo response = interceptor.getApp(hsr, appId, unselectedFields);
+ long stopTime = clock.getTime();
+ routerMetrics.succeededAppsRetrieved(stopTime - startTime);
+ return response;
} catch (YarnException e) {
routerMetrics.incrAppsFailedRetrieved();
+ LOG.error("getApp Error, applicationId = {}.", appId, e);
return null;
+ } catch (IllegalArgumentException e) {
+ routerMetrics.incrAppsFailedRetrieved();
+ throw e;
}
-
- DefaultRequestInterceptorREST interceptor =
- getOrCreateInterceptorForSubCluster(
- subClusterId, subClusterInfo.getRMWebServiceAddress());
- AppInfo response = interceptor.getApp(hsr, appId, unselectedFields);
-
- long stopTime = clock.getTime();
- routerMetrics.succeededAppsRetrieved(stopTime - startTime);
-
- return response;
}
/**
@@ -643,13 +648,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
* operation.
*/
@Override
- public Response updateAppState(AppState targetState, HttpServletRequest hsr,
- String appId) throws AuthorizationException, YarnException,
- InterruptedException, IOException {
+ public Response updateAppState(AppState targetState, HttpServletRequest hsr, String appId)
+ throws AuthorizationException, YarnException, InterruptedException, IOException {
long startTime = clock.getTime();
- ApplicationId applicationId = null;
+ ApplicationId applicationId;
try {
applicationId = ApplicationId.fromString(appId);
} catch (IllegalArgumentException e) {
@@ -660,8 +664,8 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
.build();
}
- SubClusterInfo subClusterInfo = null;
- SubClusterId subClusterId = null;
+ SubClusterInfo subClusterInfo;
+ SubClusterId subClusterId;
try {
subClusterId =
federationFacade.getApplicationHomeSubCluster(applicationId);
@@ -724,60 +728,35 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
AppsInfo apps = new AppsInfo();
long startTime = clock.getTime();
- Map subClustersActive = null;
- try {
- subClustersActive = federationFacade.getSubClusters(true);
- } catch (YarnException e) {
- routerMetrics.incrMultipleAppsFailedRetrieved();
- return null;
- }
-
- // Send the requests in parallel
- CompletionService compSvc =
- new ExecutorCompletionService<>(this.threadpool);
-
// HttpServletRequest does not work with ExecutorCompletionService.
// Create a duplicate hsr.
final HttpServletRequest hsrCopy = clone(hsr);
- for (final SubClusterInfo info : subClustersActive.values()) {
- compSvc.submit(new Callable() {
- @Override
- public AppsInfo call() {
- DefaultRequestInterceptorREST interceptor =
- getOrCreateInterceptorForSubCluster(
- info.getSubClusterId(), info.getRMWebServiceAddress());
- AppsInfo rmApps = interceptor.getApps(hsrCopy, stateQuery,
- statesQuery, finalStatusQuery, userQuery, queueQuery, count,
- startedBegin, startedEnd, finishBegin, finishEnd,
- applicationTypes, applicationTags, name, unselectedFields);
+ Collection subClusterInfos = federationFacade.getActiveSubClusters();
- if (rmApps == null) {
- routerMetrics.incrMultipleAppsFailedRetrieved();
- LOG.error("Subcluster {} failed to return appReport.", info.getSubClusterId());
- return null;
- }
+ List appsInfos = subClusterInfos.parallelStream().map(subCluster -> {
+ try {
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(subCluster);
+ AppsInfo rmApps = interceptor.getApps(hsrCopy, stateQuery, statesQuery, finalStatusQuery,
+ userQuery, queueQuery, count, startedBegin, startedEnd, finishBegin, finishEnd,
+ applicationTypes, applicationTags, name, unselectedFields);
+ if (rmApps != null) {
return rmApps;
}
- });
- }
-
- // Collect all the responses in parallel
- for (int i = 0; i < subClustersActive.size(); i++) {
- try {
- Future future = compSvc.take();
- AppsInfo appsResponse = future.get();
+ } catch (Exception e) {
+ LOG.warn("Failed to get application report.", e);
+ }
+ routerMetrics.incrMultipleAppsFailedRetrieved();
+ LOG.error("Subcluster {} failed to return appReport.", subCluster.getSubClusterId());
+ return null;
+ }).collect(Collectors.toList());
+ appsInfos.forEach(appsInfo -> {
+ if (appsInfo != null) {
+ apps.addAll(appsInfo.getApps());
long stopTime = clock.getTime();
routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime);
-
- if (appsResponse != null) {
- apps.addAll(appsResponse.getApps());
- }
- } catch (Throwable e) {
- routerMetrics.incrMultipleAppsFailedRetrieved();
- LOG.warn("Failed to get application report", e);
}
- }
+ });
if (apps.getApps().isEmpty()) {
return null;
@@ -803,15 +782,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
if (hsr == null) {
return null;
}
- @SuppressWarnings("unchecked")
- final Map parameterMap =
- (Map) hsr.getParameterMap();
+
+ final Map parameterMap = hsr.getParameterMap();
final String pathInfo = hsr.getPathInfo();
final String user = hsr.getRemoteUser();
final Principal principal = hsr.getUserPrincipal();
- final String mediaType =
- RouterWebServiceUtil.getMediaTypeFromHttpServletRequest(
- hsr, AppsInfo.class);
+ final String mediaType = RouterWebServiceUtil.getMediaTypeFromHttpServletRequest(
+ hsr, AppsInfo.class);
return new HttpServletRequestWrapper(hsr) {
public Map getParameterMap() {
return parameterMap;
@@ -835,20 +812,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
};
}
- /**
- * Get the active subclusters in the federation.
- * @return Map from subcluster id to its info.
- * @throws NotFoundException If the subclusters cannot be found.
- */
- private Map getActiveSubclusters()
- throws NotFoundException {
- try {
- return federationFacade.getSubClusters(true);
- } catch (YarnException e) {
- throw new NotFoundException(e.getMessage());
- }
- }
-
/**
* Get the active subcluster in the federation.
*
@@ -860,13 +823,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throws NotFoundException {
try {
SubClusterId pSubClusterId = SubClusterId.newInstance(subClusterId);
- Map subClusterInfoMap =
- federationFacade.getSubClusters(true);
- SubClusterInfo subClusterInfo = subClusterInfoMap.get(pSubClusterId);
- if (subClusterInfo == null) {
- throw new NotFoundException(subClusterId + " not found.");
- }
- return subClusterInfo;
+ return federationFacade.getSubCluster(pSubClusterId);
} catch (YarnException e) {
throw new NotFoundException(e.getMessage());
}
@@ -890,14 +847,14 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
*/
@Override
public NodeInfo getNode(String nodeId) {
- final Map subClustersActive =
- getActiveSubclusters();
+
+ final Collection subClustersActive = federationFacade.getActiveSubClusters();
+
if (subClustersActive.isEmpty()) {
- throw new NotFoundException(
- FederationPolicyUtils.NO_ACTIVE_SUBCLUSTER_AVAILABLE);
+ throw new NotFoundException(FederationPolicyUtils.NO_ACTIVE_SUBCLUSTER_AVAILABLE);
}
- final Map results =
- getNode(subClustersActive.values(), nodeId);
+
+ final Map results = getNode(subClustersActive, nodeId);
// Collect the responses
NodeInfo nodeInfo = null;
@@ -922,65 +879,53 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
/**
* Get a node and the subcluster where it is.
+ *
* @param subClusters Subclusters where to search.
- * @param nodeId Identifier of the node we are looking for.
+ * @param nodeId Identifier of the node we are looking for.
* @return Map between subcluster and node.
*/
- private Map getNode(
- Collection subClusters, String nodeId) {
+ private Map getNode(Collection subClusters,
+ String nodeId) {
- // Send the requests in parallel
- CompletionService compSvc =
- new ExecutorCompletionService(this.threadpool);
- final Map> futures = new HashMap<>();
- for (final SubClusterInfo subcluster : subClusters) {
- final SubClusterId subclusterId = subcluster.getSubClusterId();
- Future result = compSvc.submit(() -> {
- try {
- DefaultRequestInterceptorREST interceptor =
- getOrCreateInterceptorForSubCluster(
- subclusterId, subcluster.getRMWebServiceAddress());
- return interceptor.getNode(nodeId);
- } catch (Exception e) {
- LOG.error("Subcluster {} failed to return nodeInfo.", subclusterId, e);
- return null;
- }
- });
- futures.put(subcluster, result);
- }
+ // Parallel traversal of subClusters
+ Stream> pairStream = subClusters.parallelStream().map(
+ subClusterInfo -> {
+ final SubClusterId subClusterId = subClusterInfo.getSubClusterId();
+ try {
+ DefaultRequestInterceptorREST interceptor =
+ getOrCreateInterceptorForSubCluster(subClusterInfo);
+ return Pair.of(subClusterInfo, interceptor.getNode(nodeId));
+ } catch (Exception e) {
+ LOG.error("Subcluster {} failed to return nodeInfo.", subClusterId, e);
+ return null;
+ }
+ });
// Collect the results
final Map results = new HashMap<>();
- for (Entry> entry : futures.entrySet()) {
- try {
- final Future future = entry.getValue();
- final NodeInfo nodeInfo = future.get();
- // Check if the node was found in this SubCluster
- if (nodeInfo != null) {
- SubClusterInfo subcluster = entry.getKey();
- results.put(subcluster, nodeInfo);
- }
- } catch (Throwable e) {
- LOG.warn("Failed to get node report ", e);
+ pairStream.forEach(pair -> {
+ if (pair != null) {
+ SubClusterInfo subCluster = pair.getKey();
+ NodeInfo nodeInfo = pair.getValue();
+ results.put(subCluster, nodeInfo);
}
- }
+ });
return results;
}
/**
* Get the subcluster a node belongs to.
+ *
* @param nodeId Identifier of the node we are looking for.
* @return The subcluster containing the node.
* @throws NotFoundException If the node cannot be found.
*/
- private SubClusterInfo getNodeSubcluster(String nodeId)
- throws NotFoundException {
+ private SubClusterInfo getNodeSubcluster(String nodeId) throws NotFoundException {
+
+ final Collection subClusters = federationFacade.getActiveSubClusters();
+ final Map results = getNode(subClusters, nodeId);
- final Collection subClusters =
- getActiveSubclusters().values();
- final Map results =
- getNode(subClusters, nodeId);
SubClusterInfo subcluster = null;
NodeInfo nodeInfo = null;
for (Entry entry : results.entrySet()) {
@@ -992,8 +937,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
}
}
if (subcluster == null) {
- throw new NotFoundException(
- "Cannot find " + nodeId + " in any subcluster");
+ throw new NotFoundException("Cannot find " + nodeId + " in any subcluster");
}
return subcluster;
}
@@ -1022,15 +966,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
NodesInfo nodes = new NodesInfo();
try {
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
Class[] argsClasses = new Class[]{String.class};
Object[] args = new Object[]{states};
ClientMethod remoteMethod = new ClientMethod("getNodes", argsClasses, args);
Map nodesMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodesInfo.class);
- nodesMap.values().stream().forEach(nodesInfo -> {
- nodes.addAll(nodesInfo.getNodes());
- });
+ invokeConcurrent(subClustersActive, remoteMethod, NodesInfo.class);
+ nodesMap.values().forEach(nodesInfo -> nodes.addAll(nodesInfo.getNodes()));
} catch (NotFoundException e) {
LOG.error("get all active sub cluster(s) error.", e);
throw e;
@@ -1049,14 +991,20 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
return RouterWebServiceUtil.deleteDuplicateNodesInfo(nodes.getNodes());
}
+ /**
+ * This method changes the resources of a specific node, and it is reachable
+ * by using {@link RMWSConsts#NODE_RESOURCE}.
+ *
+ * @param hsr The servlet request.
+ * @param nodeId The node we want to retrieve the information for.
+ * It is a PathParam.
+ * @param resourceOption The resource change.
+ * @return the resources of a specific node.
+ */
@Override
public ResourceInfo updateNodeResource(HttpServletRequest hsr,
String nodeId, ResourceOptionInfo resourceOption) {
- SubClusterInfo subcluster = getNodeSubcluster(nodeId);
- DefaultRequestInterceptorREST interceptor =
- getOrCreateInterceptorForSubCluster(
- subcluster.getSubClusterId(),
- subcluster.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByNodeId(nodeId);
return interceptor.updateNodeResource(hsr, nodeId, resourceOption);
}
@@ -1064,50 +1012,30 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public ClusterMetricsInfo getClusterMetricsInfo() {
ClusterMetricsInfo metrics = new ClusterMetricsInfo();
- final Map subClustersActive;
- try {
- subClustersActive = getActiveSubclusters();
- } catch (Exception e) {
- LOG.error(e.getLocalizedMessage());
- return metrics;
- }
+ Collection subClusterInfos = federationFacade.getActiveSubClusters();
- // Send the requests in parallel
- CompletionService compSvc =
- new ExecutorCompletionService(this.threadpool);
-
- for (final SubClusterInfo info : subClustersActive.values()) {
- compSvc.submit(new Callable() {
- @Override
- public ClusterMetricsInfo call() {
+ Stream clusterMetricsInfoStream = subClusterInfos.parallelStream()
+ .map(subClusterInfo -> {
DefaultRequestInterceptorREST interceptor =
- getOrCreateInterceptorForSubCluster(
- info.getSubClusterId(), info.getRMWebServiceAddress());
+ getOrCreateInterceptorForSubCluster(subClusterInfo);
try {
- ClusterMetricsInfo metrics = interceptor.getClusterMetricsInfo();
- return metrics;
+ return interceptor.getClusterMetricsInfo();
} catch (Exception e) {
LOG.error("Subcluster {} failed to return Cluster Metrics.",
- info.getSubClusterId());
+ subClusterInfo.getSubClusterId());
return null;
}
- }
- });
- }
+ });
- // Collect all the responses in parallel
- for (int i = 0; i < subClustersActive.size(); i++) {
+ clusterMetricsInfoStream.forEach(clusterMetricsInfo -> {
try {
- Future future = compSvc.take();
- ClusterMetricsInfo metricsResponse = future.get();
-
- if (metricsResponse != null) {
- RouterWebServiceUtil.mergeMetrics(metrics, metricsResponse);
+ if (clusterMetricsInfo != null) {
+ RouterWebServiceUtil.mergeMetrics(metrics, clusterMetricsInfo);
}
} catch (Throwable e) {
- LOG.warn("Failed to get nodes report ", e);
+ LOG.warn("Failed to get nodes report.", e);
}
- }
+ });
return metrics;
}
@@ -1131,31 +1059,15 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
@Override
public AppState getAppState(HttpServletRequest hsr, String appId)
throws AuthorizationException {
-
- ApplicationId applicationId = null;
try {
- applicationId = ApplicationId.fromString(appId);
- } catch (IllegalArgumentException e) {
- return null;
- }
-
- SubClusterInfo subClusterInfo = null;
- SubClusterId subClusterId = null;
- try {
- subClusterId =
- federationFacade.getApplicationHomeSubCluster(applicationId);
- if (subClusterId == null) {
- return null;
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
+ if (interceptor != null) {
+ return interceptor.getAppState(hsr, appId);
}
- subClusterInfo = federationFacade.getSubCluster(subClusterId);
- } catch (YarnException e) {
- return null;
+ } catch (YarnException | IllegalArgumentException e) {
+ LOG.error("getHomeSubClusterInfoByAppId error, applicationId = {}.", appId, e);
}
-
- DefaultRequestInterceptorREST interceptor =
- getOrCreateInterceptorForSubCluster(subClusterId,
- subClusterInfo.getRMWebServiceAddress());
- return interceptor.getAppState(hsr, appId);
+ return null;
}
@Override
@@ -1176,12 +1088,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public ClusterInfo getClusterInfo() {
try {
long startTime = Time.now();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
Class[] argsClasses = new Class[]{};
Object[] args = new Object[]{};
ClientMethod remoteMethod = new ClientMethod("getClusterInfo", argsClasses, args);
Map subClusterInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, ClusterInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, ClusterInfo.class);
FederationClusterInfo federationClusterInfo = new FederationClusterInfo();
subClusterInfoMap.forEach((subClusterInfo, clusterInfo) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
@@ -1216,13 +1128,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public ClusterUserInfo getClusterUserInfo(HttpServletRequest hsr) {
try {
long startTime = Time.now();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getClusterUserInfo", argsClasses, args);
Map subClusterInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, ClusterUserInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, ClusterUserInfo.class);
FederationClusterUserInfo federationClusterUserInfo = new FederationClusterUserInfo();
subClusterInfoMap.forEach((subClusterInfo, clusterUserInfo) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
@@ -1246,7 +1158,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
/**
* This method retrieves the current scheduler status, and it is reachable by
* using {@link RMWSConsts#SCHEDULER}.
- *
* For the federation mode, the SchedulerType information of the cluster
* cannot be integrated and displayed, and the specific cluster information needs to be marked.
*
@@ -1256,12 +1167,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public SchedulerTypeInfo getSchedulerInfo() {
try {
long startTime = Time.now();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
Class[] argsClasses = new Class[]{};
Object[] args = new Object[]{};
ClientMethod remoteMethod = new ClientMethod("getSchedulerInfo", argsClasses, args);
Map subClusterInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, SchedulerTypeInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, SchedulerTypeInfo.class);
FederationSchedulerTypeInfo federationSchedulerTypeInfo = new FederationSchedulerTypeInfo();
subClusterInfoMap.forEach((subClusterInfo, schedulerTypeInfo) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
@@ -1319,17 +1230,18 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Step2. Call dumpSchedulerLogs of each subcluster.
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{String.class, HttpServletRequest.class};
Object[] args = new Object[]{time, hsrCopy};
ClientMethod remoteMethod = new ClientMethod("dumpSchedulerLogs", argsClasses, args);
Map dumpSchedulerLogsMap = invokeConcurrent(
- subClustersActive.values(), remoteMethod, String.class);
+ subClustersActive, remoteMethod, String.class);
StringBuilder stringBuilder = new StringBuilder();
dumpSchedulerLogsMap.forEach((subClusterInfo, msg) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
- stringBuilder.append("subClusterId" + subClusterId + " : " + msg + "; ");
+ stringBuilder.append("subClusterId")
+ .append(subClusterId).append(" : ").append(msg).append("; ");
});
long stopTime = clock.getTime();
routerMetrics.succeededDumpSchedulerLogsRetrieved(stopTime - startTime);
@@ -1369,12 +1281,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Query SubClusterInfo according to id,
// if the nodeId cannot get SubClusterInfo, an exception will be thrown directly.
- SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId);
-
// Call the corresponding subCluster to get ActivitiesInfo.
long startTime = clock.getTime();
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByNodeId(nodeId);
final HttpServletRequest hsrCopy = clone(hsr);
ActivitiesInfo activitiesInfo = interceptor.getActivities(hsrCopy, nodeId, groupBy);
if (activitiesInfo != null) {
@@ -1382,10 +1291,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
routerMetrics.succeededGetActivitiesLatencyRetrieved(stopTime - startTime);
return activitiesInfo;
}
- } catch (IllegalArgumentException e) {
- routerMetrics.incrGetActivitiesFailedRetrieved();
- throw e;
- } catch (NotFoundException e) {
+ } catch (IllegalArgumentException | NotFoundException e) {
routerMetrics.incrGetActivitiesFailedRetrieved();
throw e;
}
@@ -1413,13 +1319,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
Validate.checkNotNegative(activitiesCount, "activitiesCount");
// Step2. Call the interface of subCluster concurrently and get the returned result.
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class, int.class};
Object[] args = new Object[]{hsrCopy, groupBy, activitiesCount};
ClientMethod remoteMethod = new ClientMethod("getBulkActivities", argsClasses, args);
Map appStatisticsMap = invokeConcurrent(
- subClustersActive.values(), remoteMethod, BulkActivitiesInfo.class);
+ subClustersActive, remoteMethod, BulkActivitiesInfo.class);
// Step3. Generate Federation objects and set subCluster information.
long startTime = clock.getTime();
@@ -1460,22 +1366,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
Set allocationRequestIds, String groupBy, String limit,
Set actions, boolean summarize) {
- // Only verify the app_id,
- // because the specific subCluster needs to be found according to the app_id,
- // and other verifications are directly handed over to the corresponding subCluster RM
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrGetAppActivitiesFailedRetrieved();
- throw e;
- }
-
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
final HttpServletRequest hsrCopy = clone(hsr);
AppActivitiesInfo appActivitiesInfo = interceptor.getAppActivities(hsrCopy, appId, time,
requestPriorities, allocationRequestIds, groupBy, limit, actions, summarize);
@@ -1502,13 +1395,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
Set stateQueries, Set typeQueries) {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class, Set.class, Set.class};
Object[] args = new Object[]{hsrCopy, stateQueries, typeQueries};
ClientMethod remoteMethod = new ClientMethod("getAppStatistics", argsClasses, args);
Map appStatisticsMap = invokeConcurrent(
- subClustersActive.values(), remoteMethod, ApplicationStatisticsInfo.class);
+ subClustersActive, remoteMethod, ApplicationStatisticsInfo.class);
ApplicationStatisticsInfo applicationStatisticsInfo =
RouterWebServiceUtil.mergeApplicationStatisticsInfo(appStatisticsMap.values());
if (applicationStatisticsInfo != null) {
@@ -1541,13 +1434,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throws IOException {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", argsClasses, args);
Map nodeToLabelsInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodeToLabelsInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, NodeToLabelsInfo.class);
NodeToLabelsInfo nodeToLabelsInfo =
RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap);
if (nodeToLabelsInfo != null) {
@@ -1570,13 +1463,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public NodeLabelsInfo getRMNodeLabels(HttpServletRequest hsr) throws IOException {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getRMNodeLabels", argsClasses, args);
Map nodeToLabelsInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, NodeLabelsInfo.class);
NodeLabelsInfo nodeToLabelsInfo =
RouterWebServiceUtil.mergeNodeLabelsInfo(nodeToLabelsInfoMap);
if (nodeToLabelsInfo != null) {
@@ -1600,12 +1493,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throws IOException {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
Class[] argsClasses = new Class[]{Set.class};
Object[] args = new Object[]{labels};
ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", argsClasses, args);
Map labelsToNodesInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, LabelsToNodesInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, LabelsToNodesInfo.class);
Map labelToNodesMap = new HashMap<>();
labelsToNodesInfoMap.values().forEach(labelsToNode -> {
Map values = labelsToNode.getLabelsToNodes();
@@ -1666,7 +1559,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Step2. We map the NodeId and NodeToLabelsEntry in the request.
Map nodeIdToLabels = new HashMap<>();
- newNodeToLabels.getNodeToLabels().stream().forEach(nodeIdToLabel -> {
+ newNodeToLabels.getNodeToLabels().forEach(nodeIdToLabel -> {
String nodeId = nodeIdToLabel.getNodeId();
nodeIdToLabels.put(nodeId, nodeIdToLabel);
});
@@ -1686,11 +1579,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
long startTime = clock.getTime();
final HttpServletRequest hsrCopy = clone(hsr);
StringBuilder builder = new StringBuilder();
- subClusterToNodeToLabelsEntryList.forEach((subCluster, nodeToLabelsEntryList) -> {
- SubClusterId subClusterId = subCluster.getSubClusterId();
+ subClusterToNodeToLabelsEntryList.forEach((subClusterInfo, nodeToLabelsEntryList) -> {
+ SubClusterId subClusterId = subClusterInfo.getSubClusterId();
try {
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subCluster.getSubClusterId(), subCluster.getRMWebServiceAddress());
+ subClusterInfo);
interceptor.replaceLabelsOnNodes(nodeToLabelsEntryList, hsrCopy);
builder.append("subCluster-").append(subClusterId.getId()).append(":Success,");
} catch (Exception e) {
@@ -1703,9 +1596,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Step5. return call result.
return Response.status(Status.OK).entity(builder.toString()).build();
- } catch (NotFoundException e) {
- routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved();
- throw e;
} catch (Exception e) {
routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved();
throw e;
@@ -1743,8 +1633,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// and then call the replaceLabelsOnNode of the subCluster.
long startTime = clock.getTime();
SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByNodeId(nodeId);
final HttpServletRequest hsrCopy = clone(hsr);
interceptor.replaceLabelsOnNode(newNodeLabelsName, hsrCopy, nodeId);
@@ -1753,10 +1642,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
routerMetrics.succeededReplaceLabelsOnNodeRetrieved(stopTime - startTime);
String msg = "subCluster#" + subClusterInfo.getSubClusterId().getId() + ":Success;";
return Response.status(Status.OK).entity(msg).build();
- } catch (NotFoundException e) {
- routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved();
- throw e;
- } catch (Exception e){
+ } catch (Exception e) {
routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved();
throw e;
}
@@ -1767,13 +1653,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throws IOException {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getClusterNodeLabels", argsClasses, args);
Map nodeToLabelsInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, NodeLabelsInfo.class);
Set hashSets = Sets.newHashSet();
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets);
@@ -1820,18 +1706,17 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActives = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{NodeLabelsInfo.class, HttpServletRequest.class};
Object[] args = new Object[]{newNodeLabels, hsrCopy};
ClientMethod remoteMethod = new ClientMethod("addToClusterNodeLabels", argsClasses, args);
Map responseInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class);
+ invokeConcurrent(subClustersActives, remoteMethod, Response.class);
StringBuffer buffer = new StringBuffer();
// SubCluster-0:SUCCESS,SubCluster-1:SUCCESS
- responseInfoMap.forEach((subClusterInfo, response) -> {
- buildAppendMsg(subClusterInfo, buffer, response);
- });
+ responseInfoMap.forEach((subClusterInfo, response) ->
+ buildAppendMsg(subClusterInfo, buffer, response));
long stopTime = clock.getTime();
routerMetrics.succeededAddToClusterNodeLabelsRetrieved((stopTime - startTime));
return Response.status(Status.OK).entity(buffer.toString()).build();
@@ -1868,19 +1753,18 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActives = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{Set.class, HttpServletRequest.class};
Object[] args = new Object[]{oldNodeLabels, hsrCopy};
ClientMethod remoteMethod =
new ClientMethod("removeFromClusterNodeLabels", argsClasses, args);
Map responseInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class);
+ invokeConcurrent(subClustersActives, remoteMethod, Response.class);
StringBuffer buffer = new StringBuffer();
// SubCluster-0:SUCCESS,SubCluster-1:SUCCESS
- responseInfoMap.forEach((subClusterInfo, response) -> {
- buildAppendMsg(subClusterInfo, buffer, response);
- });
+ responseInfoMap.forEach((subClusterInfo, response) ->
+ buildAppendMsg(subClusterInfo, buffer, response));
long stopTime = clock.getTime();
routerMetrics.succeededRemoveFromClusterNodeLabelsRetrieved(stopTime - startTime);
return Response.status(Status.OK).entity(buffer.toString()).build();
@@ -1897,7 +1781,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
}
/**
- * Bbulid Append information.
+ * Build Append information.
*
* @param subClusterInfo subCluster information.
* @param buffer StringBuffer.
@@ -1920,13 +1804,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throws IOException {
try {
long startTime = clock.getTime();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class};
Object[] args = new Object[]{hsrCopy, nodeId};
ClientMethod remoteMethod = new ClientMethod("getLabelsOnNode", argsClasses, args);
Map nodeToLabelsInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, NodeLabelsInfo.class);
Set hashSets = Sets.newHashSet();
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets);
@@ -1952,19 +1836,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public AppPriority getAppPriority(HttpServletRequest hsr, String appId)
throws AuthorizationException {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrGetAppPriorityFailedRetrieved();
- throw e;
- }
-
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
AppPriority appPriority = interceptor.getAppPriority(hsr, appId);
if (appPriority != null) {
long stopTime = clock.getTime();
@@ -1988,14 +1862,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
HttpServletRequest hsr, String appId) throws AuthorizationException,
YarnException, InterruptedException, IOException {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrUpdateAppPriorityFailedRetrieved();
- throw e;
- }
-
if (targetPriority == null) {
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
throw new IllegalArgumentException("Parameter error, the targetPriority is empty or null.");
@@ -2003,9 +1869,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
Response response = interceptor.updateApplicationPriority(targetPriority, hsr, appId);
if (response != null) {
long stopTime = clock.getTime();
@@ -2028,19 +1892,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public AppQueue getAppQueue(HttpServletRequest hsr, String appId)
throws AuthorizationException {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrGetAppQueueFailedRetrieved();
- throw e;
- }
-
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
AppQueue queue = interceptor.getAppQueue(hsr, appId);
if (queue != null) {
long stopTime = clock.getTime();
@@ -2063,14 +1917,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
String appId) throws AuthorizationException, YarnException,
InterruptedException, IOException {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrUpdateAppQueueFailedRetrieved();
- throw e;
- }
-
if (targetQueue == null) {
routerMetrics.incrUpdateAppQueueFailedRetrieved();
throw new IllegalArgumentException("Parameter error, the targetQueue is null.");
@@ -2078,9 +1924,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
Response response = interceptor.updateAppQueue(targetQueue, hsr, appId);
if (response != null) {
long stopTime = clock.getTime();
@@ -2197,8 +2041,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
byte[] password = token.getPassword().array();
Text kind = new Text(token.getKind());
Text service = new Text(token.getService());
- Token tk = new Token<>(identifier, password, kind, service);
- return tk;
+ return new Token<>(identifier, password, kind, service);
}
/**
@@ -2342,9 +2185,8 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
private Response invokeCreateNewReservation(Map subClustersActive,
List blackList, HttpServletRequest hsr, int retryCount)
- throws YarnException, IOException, InterruptedException {
- SubClusterId subClusterId =
- federationFacade.getRandomActiveSubCluster(subClustersActive, blackList);
+ throws YarnException {
+ SubClusterId subClusterId = getRandomActiveSubCluster(subClustersActive, blackList);
LOG.info("createNewReservation try #{} on SubCluster {}.", retryCount, subClusterId);
SubClusterInfo subClusterInfo = subClustersActive.get(subClusterId);
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
@@ -2591,19 +2433,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public AppTimeoutInfo getAppTimeout(HttpServletRequest hsr, String appId,
String type) throws AuthorizationException {
- if (appId == null || appId.isEmpty()) {
- routerMetrics.incrGetAppTimeoutFailedRetrieved();
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
- }
-
- // Check that the appId format is accurate
- try {
- ApplicationId.fromString(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrGetAppTimeoutFailedRetrieved();
- throw e;
- }
-
if (type == null || type.isEmpty()) {
routerMetrics.incrGetAppTimeoutFailedRetrieved();
throw new IllegalArgumentException("Parameter error, the type is empty or null.");
@@ -2611,9 +2440,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
AppTimeoutInfo appTimeoutInfo = interceptor.getAppTimeout(hsr, appId, type);
if (appTimeoutInfo != null) {
long stopTime = clock.getTime();
@@ -2636,19 +2463,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
public AppTimeoutsInfo getAppTimeouts(HttpServletRequest hsr, String appId)
throws AuthorizationException {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrGetAppTimeoutsFailedRetrieved();
- throw e;
- }
-
try {
long startTime = clock.getTime();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
AppTimeoutsInfo appTimeoutsInfo = interceptor.getAppTimeouts(hsr, appId);
if (appTimeoutsInfo != null) {
long stopTime = clock.getTime();
@@ -2673,14 +2490,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
HttpServletRequest hsr, String appId) throws AuthorizationException,
YarnException, InterruptedException, IOException {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
- throw e;
- }
-
if (appTimeout == null) {
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
throw new IllegalArgumentException("Parameter error, the appTimeout is null.");
@@ -2688,9 +2497,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = Time.now();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
Response response = interceptor.updateApplicationTimeout(appTimeout, hsr, appId);
if (response != null) {
long stopTime = clock.getTime();
@@ -2713,19 +2520,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
@Override
public AppAttemptsInfo getAppAttempts(HttpServletRequest hsr, String appId) {
- // Check that the appId format is accurate
- try {
- RouterServerUtil.validateApplicationId(appId);
- } catch (IllegalArgumentException e) {
- routerMetrics.incrAppAttemptsFailedRetrieved();
- throw e;
- }
-
try {
long startTime = Time.now();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
AppAttemptsInfo appAttemptsInfo = interceptor.getAppAttempts(hsr, appId);
if (appAttemptsInfo != null) {
long stopTime = Time.now();
@@ -2768,14 +2565,14 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Traverse SubCluster and call checkUserAccessToQueue Api
try {
long startTime = Time.now();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{String.class, String.class, String.class,
HttpServletRequest.class};
Object[] args = new Object[]{queue, username, queueAclType, hsrCopy};
ClientMethod remoteMethod = new ClientMethod("checkUserAccessToQueue", argsClasses, args);
Map rmQueueAclInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, RMQueueAclInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, RMQueueAclInfo.class);
FederationRMQueueAclInfo aclInfo = new FederationRMQueueAclInfo();
rmQueueAclInfoMap.forEach((subClusterInfo, rMQueueAclInfo) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
@@ -2803,7 +2600,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Check that the appId/appAttemptId format is accurate
try {
- RouterServerUtil.validateApplicationId(appId);
RouterServerUtil.validateApplicationAttemptId(appAttemptId);
} catch (IllegalArgumentException e) {
routerMetrics.incrAppAttemptReportFailedRetrieved();
@@ -2813,9 +2609,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Call the getAppAttempt method
try {
long startTime = Time.now();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
AppAttemptInfo appAttemptInfo = interceptor.getAppAttempt(req, res, appId, appAttemptId);
if (appAttemptInfo != null) {
long stopTime = Time.now();
@@ -2853,13 +2647,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
ContainersInfo containersInfo = new ContainersInfo();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
Class[] argsClasses = new Class[]{
HttpServletRequest.class, HttpServletResponse.class, String.class, String.class};
Object[] args = new Object[]{req, res, appId, appAttemptId};
ClientMethod remoteMethod = new ClientMethod("getContainers", argsClasses, args);
Map containersInfoMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, ContainersInfo.class);
+ invokeConcurrent(subClustersActive, remoteMethod, ContainersInfo.class);
if (containersInfoMap != null && !containersInfoMap.isEmpty()) {
containersInfoMap.values().forEach(containers ->
containersInfo.addAll(containers.getContainers()));
@@ -2895,7 +2689,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
// Check that the appId/appAttemptId/containerId format is accurate
try {
- RouterServerUtil.validateApplicationId(appId);
RouterServerUtil.validateApplicationAttemptId(appAttemptId);
RouterServerUtil.validateContainerId(containerId);
} catch (IllegalArgumentException e) {
@@ -2905,9 +2698,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = Time.now();
- SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
- DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
- subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
+ DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId);
ContainerInfo containerInfo =
interceptor.getContainer(req, res, appId, appAttemptId, containerId);
if (containerInfo != null) {
@@ -3006,13 +2797,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
try {
long startTime = clock.getTime();
FederationConfInfo federationConfInfo = new FederationConfInfo();
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getSchedulerConfiguration", argsClasses, args);
Map responseMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class);
+ invokeConcurrent(subClustersActive, remoteMethod, Response.class);
responseMap.forEach((subClusterInfo, response) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
if (response == null) {
@@ -3022,7 +2813,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
String errorMsg = String.valueOf(response.getEntity());
federationConfInfo.getErrorMsgs().add(errorMsg);
} else if (response.getStatus() == Status.OK.getStatusCode()) {
- ConfInfo fedConfInfo = ConfInfo.class.cast(response.getEntity());
+ ConfInfo fedConfInfo = (ConfInfo) response.getEntity();
fedConfInfo.setSubClusterId(subClusterId.getId());
federationConfInfo.getList().add(fedConfInfo);
}
@@ -3175,7 +2966,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
*/
private SubClusterInfo getHomeSubClusterInfoByAppId(String appId)
throws YarnException {
- SubClusterInfo subClusterInfo = null;
+
+ if (StringUtils.isBlank(appId)) {
+ throw new IllegalArgumentException("applicationId can't null or empty.");
+ }
+
try {
ApplicationId applicationId = ApplicationId.fromString(appId);
SubClusterId subClusterId = federationFacade.getApplicationHomeSubCluster(applicationId);
@@ -3183,8 +2978,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
RouterServerUtil.logAndThrowException(null,
"Can't get HomeSubCluster by applicationId %s", applicationId);
}
- subClusterInfo = federationFacade.getSubCluster(subClusterId);
- return subClusterInfo;
+ return federationFacade.getSubCluster(subClusterId);
} catch (IllegalArgumentException e){
throw new IllegalArgumentException(e);
} catch (YarnException e) {
@@ -3210,8 +3004,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
RouterServerUtil.logAndThrowException(null,
"Can't get HomeSubCluster by reservationId %s", resId);
}
- SubClusterInfo subClusterInfo = federationFacade.getSubCluster(subClusterId);
- return subClusterInfo;
+ return federationFacade.getSubCluster(subClusterId);
} catch (YarnException | IOException e) {
RouterServerUtil.logAndThrowException(e,
"Get HomeSubClusterInfo by reservationId %s failed.", resId);
@@ -3236,12 +3029,10 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
@VisibleForTesting
public Map invokeConcurrentGetNodeLabel()
throws IOException, YarnException {
- Map subClustersActive = getActiveSubclusters();
+ Collection subClustersActive = federationFacade.getActiveSubClusters();
Class[] argsClasses = new Class[]{String.class};
Object[] args = new Object[]{null};
ClientMethod remoteMethod = new ClientMethod("getNodes", argsClasses, args);
- Map nodesMap =
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodesInfo.class);
- return nodesMap;
+ return invokeConcurrent(subClustersActive, remoteMethod, NodesInfo.class);
}
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java
index 7af470dc583..07afc9180ac 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java
@@ -111,8 +111,8 @@ public final class RouterWebServiceUtil {
* @param formParam the form parameters as input for a specific REST call
* @param additionalParam the query parameters as input for a specific REST
* call in case the call has no servlet request
+ * @param conf configuration.
* @param client same client used to reduce number of clients created
- * @param conf configuration
* @return the retrieved entity from the REST call
*/
protected static T genericForward(final String webApp,
@@ -510,6 +510,11 @@ public final class RouterWebServiceUtil {
/**
* Extract from HttpServletRequest the MediaType in output.
+ *
+ * @param request the servlet request.
+ * @param returnType the return type of the REST call.
+ * @param Generic Type T.
+ * @return MediaType.
*/
protected static String getMediaTypeFromHttpServletRequest(
HttpServletRequest request, final Class returnType) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
index 19bba51e270..5279902b58a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
@@ -145,8 +145,6 @@ import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.junit.Assert;
import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
@@ -170,11 +168,11 @@ import static org.mockito.Mockito.when;
* reused to validate different request interceptor chains.
*/
public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
- private static final Logger LOG =
- LoggerFactory.getLogger(TestFederationInterceptorREST.class);
+
private final static int NUM_SUBCLUSTER = 4;
private static final int BAD_REQUEST = 400;
private static final int ACCEPTED = 202;
+ private static final String TEST_USER = "test-user";
private static final int OK = 200;
private static String user = "test-user";
private TestableFederationInterceptorREST interceptor;
@@ -195,7 +193,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
stateStoreUtil = new FederationStateStoreTestUtil(stateStore);
interceptor.setConf(this.getConf());
- interceptor.init(user);
+ interceptor.init(TEST_USER);
subClusters = new ArrayList<>();
@@ -282,8 +280,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* ApplicationId has to belong to one of the SubCluster in the cluster.
*/
@Test
- public void testGetNewApplication()
- throws YarnException, IOException, InterruptedException {
+ public void testGetNewApplication() throws IOException, InterruptedException {
Response response = interceptor.createNewApplication(null);
@@ -359,8 +356,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* request.
*/
@Test
- public void testSubmitApplicationEmptyRequest()
- throws YarnException, IOException, InterruptedException {
+ public void testSubmitApplicationEmptyRequest() throws IOException, InterruptedException {
// ApplicationSubmissionContextInfo null
Response response = interceptor.submitApplication(null, null);
@@ -384,8 +380,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* application in wrong format.
*/
@Test
- public void testSubmitApplicationWrongFormat()
- throws YarnException, IOException, InterruptedException {
+ public void testSubmitApplicationWrongFormat() throws IOException, InterruptedException {
ApplicationSubmissionContextInfo context =
new ApplicationSubmissionContextInfo();
@@ -506,8 +501,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* application does not exist in StateStore.
*/
@Test
- public void testGetApplicationNotExists()
- throws YarnException, IOException, InterruptedException {
+ public void testGetApplicationNotExists() {
ApplicationId appId =
ApplicationId.newInstance(System.currentTimeMillis(), 1);
@@ -522,8 +516,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* application in wrong format.
*/
@Test
- public void testGetApplicationWrongFormat()
- throws YarnException, IOException, InterruptedException {
+ public void testGetApplicationWrongFormat() {
AppInfo response = interceptor.getApp(null, "Application_wrong_id", null);
@@ -535,8 +528,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* subcluster provided one application.
*/
@Test
- public void testGetApplicationsReport()
- throws YarnException, IOException, InterruptedException {
+ public void testGetApplicationsReport() {
AppsInfo responseGet = interceptor.getApps(null, null, null, null, null,
null, null, null, null, null, null, null, null, null, null);
@@ -645,8 +637,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
* application does not exist in StateStore.
*/
@Test
- public void testGetApplicationStateNotExists()
- throws YarnException, IOException, InterruptedException {
+ public void testGetApplicationStateNotExists() throws IOException {
ApplicationId appId =
ApplicationId.newInstance(Time.now(), 1);
@@ -662,7 +653,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
*/
@Test
public void testGetApplicationStateWrongFormat()
- throws YarnException, IOException, InterruptedException {
+ throws IOException {
AppState response = interceptor.getAppState(null, "Application_wrong_id");
@@ -865,8 +856,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppAttempts()
- throws IOException, InterruptedException, YarnException {
+ public void testGetAppAttempts() throws IOException, InterruptedException {
// Submit application to multiSubCluster
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
ApplicationSubmissionContextInfo context = new ApplicationSubmissionContextInfo();
@@ -897,8 +887,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppAttempt()
- throws IOException, InterruptedException, YarnException {
+ public void testGetAppAttempt() throws IOException, InterruptedException {
// Generate ApplicationId information
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
@@ -922,7 +911,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppTimeout() throws IOException, InterruptedException, YarnException {
+ public void testGetAppTimeout() throws IOException, InterruptedException {
// Generate ApplicationId information
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
@@ -942,7 +931,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppTimeouts() throws IOException, InterruptedException, YarnException {
+ public void testGetAppTimeouts() throws IOException, InterruptedException {
// Generate ApplicationId information
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
@@ -1022,8 +1011,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppPriority() throws IOException, InterruptedException,
- YarnException {
+ public void testGetAppPriority() throws IOException, InterruptedException {
// Submit application to multiSubCluster
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
@@ -1072,7 +1060,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppQueue() throws IOException, InterruptedException, YarnException {
+ public void testGetAppQueue() throws IOException, InterruptedException {
String queueName = "queueName";
// Submit application to multiSubCluster
@@ -1090,7 +1078,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
}
@Test
- public void testGetAppsInfoCache() throws IOException, InterruptedException, YarnException {
+ public void testGetAppsInfoCache() {
AppsInfo responseGet = interceptor.getApps(
null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
@@ -1102,7 +1090,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
LRUCacheHashMap appsInfoCache =
interceptor.getAppInfosCaches();
Assert.assertNotNull(appsInfoCache);
- Assert.assertTrue(!appsInfoCache.isEmpty());
+ Assert.assertFalse(appsInfoCache.isEmpty());
Assert.assertEquals(1, appsInfoCache.size());
Assert.assertTrue(appsInfoCache.containsKey(cacheKey));
@@ -1113,7 +1101,6 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
@Test
public void testGetAppStatistics() throws IOException, InterruptedException, YarnException {
- AppState appStateRUNNING = new AppState(YarnApplicationState.RUNNING.name());
// Submit application to multiSubCluster
ApplicationId appId = ApplicationId.newInstance(Time.now(), 1);
@@ -1200,6 +1187,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertNotNull(entity);
Assert.assertNotNull(entity instanceof ReservationListInfo);
+ Assert.assertTrue(entity instanceof ReservationListInfo);
ReservationListInfo listInfo = (ReservationListInfo) entity;
Assert.assertNotNull(listInfo);
@@ -1267,6 +1255,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertNotNull(entity);
Assert.assertNotNull(entity instanceof ReservationListInfo);
+ Assert.assertTrue(entity instanceof ReservationListInfo);
ReservationListInfo listInfo = (ReservationListInfo) entity;
Assert.assertNotNull(listInfo);
@@ -1310,6 +1299,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertNotNull(entity);
Assert.assertNotNull(entity instanceof ReservationListInfo);
+ Assert.assertTrue(entity instanceof ReservationListInfo);
ReservationListInfo listInfo = (ReservationListInfo) entity;
Assert.assertNotNull(listInfo);
@@ -1373,8 +1363,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
throws IOException, InterruptedException {
ReservationSubmissionRequestInfo resSubmissionRequestInfo =
getReservationSubmissionRequestInfo(reservationId);
- Response response = interceptor.submitReservation(resSubmissionRequestInfo, null);
- return response;
+ return interceptor.submitReservation(resSubmissionRequestInfo, null);
}
public static ReservationSubmissionRequestInfo getReservationSubmissionRequestInfo(
@@ -1402,15 +1391,13 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
long arrival = Time.now();
// deadline by when the resource(s) must be allocated.
- // The reason for choosing 1.05 is because this gives an integer
+ // The reason for choosing 1.05 is that this gives an integer
// DURATION * 0.05 = 3000(ms)
// deadline = arrival + 3000ms
long deadline = (long) (arrival + 1.05 * DURATION);
- ReservationSubmissionRequest submissionRequest = createSimpleReservationRequest(
+ return createSimpleReservationRequest(
reservationId, numContainers, arrival, deadline, DURATION, memory, vcore);
-
- return submissionRequest;
}
public static ReservationSubmissionRequest createSimpleReservationRequest(
@@ -1423,9 +1410,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Collections.singletonList(r), ReservationRequestInterpreter.R_ALL);
ReservationDefinition rDef = ReservationDefinition.newInstance(
arrival, deadline, reqs, "testClientRMService#reservation", "0", Priority.UNDEFINED);
- ReservationSubmissionRequest request = ReservationSubmissionRequest.newInstance(
- rDef, QUEUE_DEDICATED_FULL, reservationId);
- return request;
+ return ReservationSubmissionRequest.newInstance(rDef, QUEUE_DEDICATED_FULL, reservationId);
}
@Test
@@ -1497,7 +1482,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
interceptor.checkUserAccessToQueue(queue, userName, queueACL.name(), mockHsr);
Assert.assertNotNull(aclInfo);
Assert.assertTrue(aclInfo instanceof FederationRMQueueAclInfo);
- FederationRMQueueAclInfo fedAclInfo = FederationRMQueueAclInfo.class.cast(aclInfo);
+ FederationRMQueueAclInfo fedAclInfo = (FederationRMQueueAclInfo) aclInfo;
List aclInfos = fedAclInfo.getList();
Assert.assertNotNull(aclInfos);
Assert.assertEquals(4, aclInfos.size());
@@ -1513,7 +1498,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
interceptor.checkUserAccessToQueue(queue, userName, queueACL.name(), mockHsr);
Assert.assertNotNull(aclInfo);
Assert.assertTrue(aclInfo instanceof FederationRMQueueAclInfo);
- FederationRMQueueAclInfo fedAclInfo = FederationRMQueueAclInfo.class.cast(aclInfo);
+ FederationRMQueueAclInfo fedAclInfo = (FederationRMQueueAclInfo) aclInfo;
List aclInfos = fedAclInfo.getList();
Assert.assertNotNull(aclInfos);
Assert.assertEquals(4, aclInfos.size());
@@ -1589,13 +1574,12 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertTrue(typeInfo instanceof FederationSchedulerTypeInfo);
FederationSchedulerTypeInfo federationSchedulerTypeInfo =
- FederationSchedulerTypeInfo.class.cast(typeInfo);
+ (FederationSchedulerTypeInfo) typeInfo;
Assert.assertNotNull(federationSchedulerTypeInfo);
List schedulerTypeInfos = federationSchedulerTypeInfo.getList();
Assert.assertNotNull(schedulerTypeInfos);
Assert.assertEquals(4, schedulerTypeInfos.size());
- List subClusterIds =
- subClusters.stream().map(subClusterId -> subClusterId.getId()).
+ List subClusterIds = subClusters.stream().map(SubClusterId::getId).
collect(Collectors.toList());
for (SchedulerTypeInfo schedulerTypeInfo : schedulerTypeInfos) {
@@ -1609,8 +1593,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
SchedulerInfo schedulerInfo = schedulerTypeInfo.getSchedulerInfo();
Assert.assertNotNull(schedulerInfo);
Assert.assertTrue(schedulerInfo instanceof CapacitySchedulerInfo);
- CapacitySchedulerInfo capacitySchedulerInfo =
- CapacitySchedulerInfo.class.cast(schedulerInfo);
+ CapacitySchedulerInfo capacitySchedulerInfo = (CapacitySchedulerInfo) schedulerInfo;
Assert.assertNotNull(capacitySchedulerInfo);
// 3. The parent queue name should be root
@@ -1702,7 +1685,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertNotNull(entity);
Assert.assertTrue(entity instanceof DelegationToken);
- DelegationToken dtoken = DelegationToken.class.cast(entity);
+ DelegationToken dtoken = (DelegationToken) entity;
Assert.assertEquals(TEST_RENEWER, dtoken.getRenewer());
Assert.assertEquals(TEST_RENEWER, dtoken.getOwner());
Assert.assertEquals("RM_DELEGATION_TOKEN", dtoken.getKind());
@@ -1751,7 +1734,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Object entity = response.getEntity();
Assert.assertNotNull(entity);
Assert.assertTrue(entity instanceof DelegationToken);
- DelegationToken dtoken = DelegationToken.class.cast(entity);
+ DelegationToken dtoken = (DelegationToken) entity;
final String yarnTokenHeader = "Hadoop-YARN-RM-Delegation-Token";
when(request.getHeader(yarnTokenHeader)).thenReturn(dtoken.getToken());
@@ -1764,7 +1747,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertTrue(renewEntity instanceof DelegationToken);
// renewDelegation, we only return renewDate, other values are NULL.
- DelegationToken renewDToken = DelegationToken.class.cast(renewEntity);
+ DelegationToken renewDToken = (DelegationToken) renewEntity;
Assert.assertNull(renewDToken.getRenewer());
Assert.assertNull(renewDToken.getOwner());
Assert.assertNull(renewDToken.getKind());
@@ -1789,7 +1772,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Object entity = response.getEntity();
Assert.assertNotNull(entity);
Assert.assertTrue(entity instanceof DelegationToken);
- DelegationToken dtoken = DelegationToken.class.cast(entity);
+ DelegationToken dtoken = (DelegationToken) entity;
final String yarnTokenHeader = "Hadoop-YARN-RM-Delegation-Token";
when(request.getHeader(yarnTokenHeader)).thenReturn(dtoken.getToken());
@@ -1903,7 +1886,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
// We cannot guarantee the calling order of the sub-clusters,
// We guarantee that the returned result contains the information of each subCluster.
Assert.assertNotNull(dumpSchedulerLogsMsg);
- subClusters.stream().forEach(subClusterId -> {
+ subClusters.forEach(subClusterId -> {
String subClusterMsg =
"subClusterId" + subClusterId + " : Capacity scheduler logs are being created.; ";
Assert.assertTrue(dumpSchedulerLogsMsg.contains(subClusterMsg));
@@ -1978,7 +1961,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertTrue(bulkActivitiesInfo instanceof FederationBulkActivitiesInfo);
FederationBulkActivitiesInfo federationBulkActivitiesInfo =
- FederationBulkActivitiesInfo.class.cast(bulkActivitiesInfo);
+ (FederationBulkActivitiesInfo) bulkActivitiesInfo;
Assert.assertNotNull(federationBulkActivitiesInfo);
List activitiesInfos = federationBulkActivitiesInfo.getList();
@@ -2033,9 +2016,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
// we confirm the result by contains
String expectedMsg =
"SubCluster-0:SUCCESS,SubCluster-1:SUCCESS,SubCluster-2:SUCCESS,SubCluster-3:SUCCESS";
- Arrays.stream(entities).forEach(item -> {
- Assert.assertTrue(expectedMsg.contains(item));
- });
+ Arrays.stream(entities).forEach(item -> Assert.assertTrue(expectedMsg.contains(item)));
}
@Test
@@ -2098,9 +2079,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
// we confirm the result by contains
String expectedMsg =
"SubCluster-0:SUCCESS,SubCluster-1:SUCCESS,SubCluster-2:SUCCESS,SubCluster-3:SUCCESS";
- Arrays.stream(entities).forEach(item -> {
- Assert.assertTrue(expectedMsg.contains(item));
- });
+ Arrays.stream(entities).forEach(item -> Assert.assertTrue(expectedMsg.contains(item)));
}
@Test
From 811441d5bc4fc6f788a277558274160bf4c242a9 Mon Sep 17 00:00:00 2001
From: zhtttylz
Date: Sat, 1 Apr 2023 18:18:20 +0800
Subject: [PATCH 38/78] HDFS-16951. Add description of GETSERVERDEFAULTS to
WebHDFS doc (#5491)
Co-authored-by: Zhtttylz
Reviewed-by: Shilun Fan
Signed-off-by: Shilun Fan
---
.../hadoop-hdfs/src/site/markdown/WebHDFS.md | 48 +++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md
index 46b5613fe72..5e5924ad36e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md
@@ -57,6 +57,7 @@ The HTTP REST API supports the complete [FileSystem](../../api/org/apache/hadoop
* [`GETSNAPSHOTLIST`](#Get_Snapshot_List)
* [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations)
* [`GETECPOLICY`](#Get_EC_Policy) (see [HDFSErasureCoding](./HDFSErasureCoding.html#Administrative_commands).getErasureCodingPolicy)
+ * [`GETSERVERDEFAULTS`](#Get_Server_Defaults) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults)
* HTTP PUT
* [`CREATE`](#Create_and_Write_to_a_File) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).create)
* [`MKDIRS`](#Make_a_Directory) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).mkdirs)
@@ -1109,6 +1110,35 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getAclSta
See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).access
+### Get Server Defaults
+
+* Submit a HTTP GET request.
+
+ curl -i "http://:/webhdfs/v1/?op=GETSERVERDEFAULTS"
+
+ The client receives a response with a [`ServerDefaults` JSON object](Server_Defaults_JSON_Schema):
+
+ HTTP/1.1 200 OK
+ Content-Type: application/json
+ Transfer-Encoding: chunked
+
+ {
+ "FsServerDefaults": {
+ "replication": 3,
+ "encryptDataTransfer": "false",
+ "defaultStoragePolicyId":7,
+ "writePacketSize": 65536,
+ "fileBufferSize": 4096,
+ "checksumType": 2,
+ "trashInterval": 10080,
+ "keyProviderUri": "",
+ "blockSize": 134217728,
+ "bytesPerChecksum": 512
+ }
+ }
+
+See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults
+
Storage Policy Operations
-------------------------
@@ -3042,6 +3072,24 @@ var blockLocationProperties =
}
};
```
+### Server Defaults JSON Schema
+
+```json
+{
+ "FsServerDefaults": {
+ "replication": 3,
+ "encryptDataTransfer": false,
+ "defaultStoragePolicyId": 7,
+ "writePacketSize": 65536,
+ "fileBufferSize": 4096,
+ "checksumType": 2,
+ "trashInterval": 10080,
+ "keyProviderUri": "",
+ "blockSize": 134217728,
+ "bytesPerChecksum": 512
+ }
+}
+```
HTTP Query Parameter Dictionary
-------------------------------
From 14c5810d5ef284216a88ee1d0c158fc451cf7fda Mon Sep 17 00:00:00 2001
From: Chris Nauroth
Date: Mon, 3 Apr 2023 22:53:29 +0000
Subject: [PATCH 39/78] HADOOP-18680: Insufficient heap during full test runs
in Docker container.
Closes #5522
Signed-off-by: Ayush Saxena
---
dev-support/docker/Dockerfile_aarch64 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64
index dd0348961f4..14a53780127 100644
--- a/dev-support/docker/Dockerfile_aarch64
+++ b/dev-support/docker/Dockerfile_aarch64
@@ -74,7 +74,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin"
###
# Avoid out of memory errors in builds
###
-ENV MAVEN_OPTS -Xms256m -Xmx1536m
+ENV MAVEN_OPTS -Xms256m -Xmx3072m
# Skip gpg verification when downloading Yetus via yetus-wrapper
ENV HADOOP_SKIP_YETUS_VERIFICATION true
From 937caf7de9e50268ff49af86825eac698fb98d2d Mon Sep 17 00:00:00 2001
From: Viraj Jasani
Date: Tue, 4 Apr 2023 06:39:53 -0700
Subject: [PATCH 40/78] HDFS-16967. RBF: File based state stores should allow
concurrent access to the records (#5523)
Reviewed-by: Inigo Goiri
Reviewed-by: Simbarashe Dzinamarira
Signed-off-by: Takanobu Asanuma
---
.../federation/router/RBFConfigKeys.java | 9 +
.../driver/impl/StateStoreFileBaseImpl.java | 197 ++++++++++++++----
.../store/driver/impl/StateStoreFileImpl.java | 7 +
.../driver/impl/StateStoreFileSystemImpl.java | 9 +-
.../src/main/resources/hdfs-rbf-default.xml | 28 +++
.../driver/TestStateStoreDriverBase.java | 1 +
.../store/driver/TestStateStoreFile.java | 32 ++-
.../driver/TestStateStoreFileSystem.java | 47 +++--
8 files changed, 268 insertions(+), 62 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java
index c0ee9504597..f47d6ceb269 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java
@@ -255,6 +255,15 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
public static final int FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT =
-1;
+ // HDFS Router-based federation File based store implementation specific configs
+ public static final String FEDERATION_STORE_FILE_ASYNC_THREADS =
+ FEDERATION_STORE_PREFIX + "driver.file.async.threads";
+ public static final int FEDERATION_STORE_FILE_ASYNC_THREADS_DEFAULT = 0;
+
+ public static final String FEDERATION_STORE_FS_ASYNC_THREADS =
+ FEDERATION_STORE_PREFIX + "driver.fs.async.threads";
+ public static final int FEDERATION_STORE_FS_ASYNC_THREADS_DEFAULT = 0;
+
// HDFS Router safe mode
public static final String DFS_ROUTER_SAFEMODE_ENABLE =
FEDERATION_ROUTER_PREFIX + "safemode.enable";
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java
index c93d919aea0..ec3c89b65bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java
@@ -25,14 +25,24 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
+
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUnavailableException;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils;
@@ -69,6 +79,8 @@ public abstract class StateStoreFileBaseImpl
/** If it is initialized. */
private boolean initialized = false;
+ private ExecutorService concurrentStoreAccessPool;
+
/**
* Get the reader of a record for the file system.
@@ -137,6 +149,8 @@ public abstract class StateStoreFileBaseImpl
*/
protected abstract String getRootDir();
+ protected abstract int getConcurrentFilesAccessNumThreads();
+
/**
* Set the driver as initialized.
*
@@ -168,9 +182,31 @@ public abstract class StateStoreFileBaseImpl
return false;
}
setInitialized(true);
+ int threads = getConcurrentFilesAccessNumThreads();
+ if (threads > 1) {
+ this.concurrentStoreAccessPool =
+ new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue<>(),
+ new ThreadFactoryBuilder()
+ .setNameFormat("state-store-file-based-concurrent-%d")
+ .setDaemon(true).build());
+ LOG.info("File based state store will be accessed concurrently with {} max threads", threads);
+ } else {
+ LOG.info("File based state store will be accessed serially");
+ }
return true;
}
+ @Override
+ public void close() throws Exception {
+ if (this.concurrentStoreAccessPool != null) {
+ this.concurrentStoreAccessPool.shutdown();
+ boolean isTerminated = this.concurrentStoreAccessPool.awaitTermination(5, TimeUnit.SECONDS);
+ LOG.info("Concurrent store access pool is terminated: {}", isTerminated);
+ this.concurrentStoreAccessPool = null;
+ }
+ }
+
@Override
public boolean initRecordStorage(
String className, Class recordClass) {
@@ -198,22 +234,29 @@ public abstract class StateStoreFileBaseImpl
verifyDriverReady();
long start = monotonicNow();
StateStoreMetrics metrics = getMetrics();
- List ret = new ArrayList<>();
+ List result = Collections.synchronizedList(new ArrayList<>());
try {
String path = getPathForClass(clazz);
List children = getChildren(path);
- for (String child : children) {
- String pathRecord = path + "/" + child;
- if (child.endsWith(TMP_MARK)) {
- LOG.debug("There is a temporary file {} in {}", child, path);
- if (isOldTempRecord(child)) {
- LOG.warn("Removing {} as it's an old temporary record", child);
- remove(pathRecord);
- }
- } else {
- T record = getRecord(pathRecord, clazz);
- ret.add(record);
+ List> callables = new ArrayList<>();
+ children.forEach(child -> callables.add(
+ () -> getRecordsFromFileAndRemoveOldTmpRecords(clazz, result, path, child)));
+ if (this.concurrentStoreAccessPool != null) {
+ // Read records concurrently
+ List> futures = this.concurrentStoreAccessPool.invokeAll(callables);
+ for (Future future : futures) {
+ future.get();
}
+ } else {
+ // Read records serially
+ callables.forEach(e -> {
+ try {
+ e.call();
+ } catch (Exception ex) {
+ LOG.error("Failed to retrieve record using file operations.", ex);
+ throw new RuntimeException(ex);
+ }
+ });
}
} catch (Exception e) {
if (metrics != null) {
@@ -227,7 +270,37 @@ public abstract class StateStoreFileBaseImpl
if (metrics != null) {
metrics.addRead(monotonicNow() - start);
}
- return new QueryResult(ret, getTime());
+ return new QueryResult<>(result, getTime());
+ }
+
+ /**
+ * Get the state store record from the given path (path/child) and add the record to the
+ * result list.
+ *
+ * @param clazz Class of the record.
+ * @param result The list of results record. The records would be added to it unless the given
+ * path represents old temp file.
+ * @param path The parent path.
+ * @param child The child path under the parent path. Both path and child completes the file
+ * location for the given record.
+ * @param Record class of the records.
+ * @return Void.
+ * @throws IOException If the file read operation fails.
+ */
+ private Void getRecordsFromFileAndRemoveOldTmpRecords(Class clazz,
+ List result, String path, String child) throws IOException {
+ String pathRecord = path + "/" + child;
+ if (child.endsWith(TMP_MARK)) {
+ LOG.debug("There is a temporary file {} in {}", child, path);
+ if (isOldTempRecord(child)) {
+ LOG.warn("Removing {} as it's an old temporary record", child);
+ remove(pathRecord);
+ }
+ } else {
+ T record = getRecord(pathRecord, clazz);
+ result.add(record);
+ }
+ return null;
}
/**
@@ -260,23 +333,17 @@ public abstract class StateStoreFileBaseImpl
*/
private T getRecord(
final String path, final Class clazz) throws IOException {
- BufferedReader reader = getReader(path);
- try {
+ try (BufferedReader reader = getReader(path)) {
String line;
while ((line = reader.readLine()) != null) {
if (!line.startsWith("#") && line.length() > 0) {
try {
- T record = newRecord(line, clazz, false);
- return record;
+ return newRecord(line, clazz, false);
} catch (Exception ex) {
LOG.error("Cannot parse line {} in file {}", line, path, ex);
}
}
}
- } finally {
- if (reader != null) {
- reader.close();
- }
}
throw new IOException("Cannot read " + path + " for record " +
clazz.getSimpleName());
@@ -330,13 +397,12 @@ public abstract class StateStoreFileBaseImpl
record.setDateModified(this.getTime());
toWrite.put(recordPath, record);
} else if (errorIfExists) {
- LOG.error("Attempt to insert record {} that already exists",
- recordPath);
+ LOG.error("Attempt to insert record {} that already exists", recordPath);
if (metrics != null) {
metrics.addFailure(monotonicNow() - start);
}
return false;
- } else {
+ } else {
LOG.debug("Not updating {}", record);
}
} else {
@@ -345,36 +411,81 @@ public abstract class StateStoreFileBaseImpl
}
// Write the records
- boolean success = true;
- for (Entry entry : toWrite.entrySet()) {
- String recordPath = entry.getKey();
- String recordPathTemp = recordPath + "." + now() + TMP_MARK;
- boolean recordWrittenSuccessfully = true;
- try (BufferedWriter writer = getWriter(recordPathTemp)) {
- T record = entry.getValue();
- String line = serializeString(record);
- writer.write(line);
- } catch (IOException e) {
- LOG.error("Cannot write {}", recordPathTemp, e);
- recordWrittenSuccessfully = false;
- success = false;
+ final AtomicBoolean success = new AtomicBoolean(true);
+ final List> callables = new ArrayList<>();
+ toWrite.entrySet().forEach(entry -> callables.add(() -> writeRecordToFile(success, entry)));
+ if (this.concurrentStoreAccessPool != null) {
+ // Write records concurrently
+ List> futures = null;
+ try {
+ futures = this.concurrentStoreAccessPool.invokeAll(callables);
+ } catch (InterruptedException e) {
+ success.set(false);
+ LOG.error("Failed to put record concurrently.", e);
}
- // Commit
- if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) {
- LOG.error("Failed committing record into {}", recordPath);
- success = false;
+ if (futures != null) {
+ for (Future future : futures) {
+ try {
+ future.get();
+ } catch (InterruptedException | ExecutionException e) {
+ success.set(false);
+ LOG.error("Failed to retrieve results from concurrent record put runs.", e);
+ }
+ }
}
+ } else {
+ // Write records serially
+ callables.forEach(callable -> {
+ try {
+ callable.call();
+ } catch (Exception e) {
+ success.set(false);
+ LOG.error("Failed to put record.", e);
+ }
+ });
}
long end = monotonicNow();
if (metrics != null) {
- if (success) {
+ if (success.get()) {
metrics.addWrite(end - start);
} else {
metrics.addFailure(end - start);
}
}
- return success;
+ return success.get();
+ }
+
+ /**
+ * Writes the state store record to the file. At first, the record is written to a temp location
+ * and then later renamed to the final location that is passed with the entry key.
+ *
+ * @param success The atomic boolean that gets updated to false if the file write operation fails.
+ * @param entry The entry of the record path and the state store record to be written to the file
+ * by first writing to a temp location and then renaming it to the record path.
+ * @param Record class of the records.
+ * @return Void.
+ */
+ private Void writeRecordToFile(AtomicBoolean success,
+ Entry entry) {
+ String recordPath = entry.getKey();
+ String recordPathTemp = recordPath + "." + now() + TMP_MARK;
+ boolean recordWrittenSuccessfully = true;
+ try (BufferedWriter writer = getWriter(recordPathTemp)) {
+ T record = entry.getValue();
+ String line = serializeString(record);
+ writer.write(line);
+ } catch (IOException e) {
+ LOG.error("Cannot write {}", recordPathTemp, e);
+ recordWrittenSuccessfully = false;
+ success.set(false);
+ }
+ // Commit
+ if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) {
+ LOG.error("Failed committing record into {}", recordPath);
+ success.set(false);
+ }
+ return null;
}
@Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java
index 6ca26637161..1df26e07843 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java
@@ -109,6 +109,12 @@ public class StateStoreFileImpl extends StateStoreFileBaseImpl {
return this.rootDirectory;
}
+ @Override
+ protected int getConcurrentFilesAccessNumThreads() {
+ return getConf().getInt(RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS,
+ RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS_DEFAULT);
+ }
+
@Override
protected BufferedReader getReader(String filename) {
BufferedReader reader = null;
@@ -144,6 +150,7 @@ public class StateStoreFileImpl extends StateStoreFileBaseImpl {
@Override
public void close() throws Exception {
+ super.close();
setInitialized(false);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java
index ee34d8a4cab..d05682398ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java
@@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory;
/**
* {@link StateStoreDriver} implementation based on a filesystem. The common
* implementation uses HDFS as a backend. The path can be specified setting
- * dfs.federation.router.driver.fs.path=hdfs://host:port/path/to/store.
+ * dfs.federation.router.store.driver.fs.path=hdfs://host:port/path/to/store.
*/
public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl {
@@ -117,8 +117,15 @@ public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl {
return this.workPath;
}
+ @Override
+ protected int getConcurrentFilesAccessNumThreads() {
+ return getConf().getInt(RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS,
+ RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS_DEFAULT);
+ }
+
@Override
public void close() throws Exception {
+ super.close();
if (fs != null) {
fs.close();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
index 79a16cc2022..780fb76a2da 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
@@ -894,4 +894,32 @@
If this is below 0, the auto-refresh is disabled.
+
+
+ dfs.federation.router.store.driver.file.async.threads
+ 0
+
+ Max threads used by StateStoreFileImpl to access state store files concurrently.
+ The only class currently being supported:
+ org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl.
+ Default value is 0, which means StateStoreFileImpl would work in sync mode, meaning it
+ would access one file at a time.
+ Use positive integer value to enable concurrent files access.
+
+
+
+
+ dfs.federation.router.store.driver.fs.async.threads
+ 0
+
+ Max threads used by StateStoreFileSystemImpl to access state store files from the given
+ filesystem concurrently.
+ The only class currently being supported:
+ org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl.
+ Default value is 0, which means StateStoreFileSystemImpl would work in sync mode, meaning it
+ would access one file from the filesystem at a time.
+ Use positive integer value to enable concurrent files access from the given filesystem.
+
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java
index 48d84f9326b..73d0774ace3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java
@@ -94,6 +94,7 @@ public class TestStateStoreDriverBase {
public static void tearDownCluster() {
if (stateStore != null) {
stateStore.stop();
+ stateStore = null;
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java
index b01500b2ea1..5b5b3fc1f81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java
@@ -18,31 +18,55 @@
package org.apache.hadoop.hdfs.server.federation.store.driver;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration;
+import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl;
+
+import org.junit.After;
import org.junit.Before;
-import org.junit.BeforeClass;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
/**
* Test the FileSystem (e.g., HDFS) implementation of the State Store driver.
*/
+@RunWith(Parameterized.class)
public class TestStateStoreFile extends TestStateStoreDriverBase {
- @BeforeClass
- public static void setupCluster() throws Exception {
+ private final String numFileAsyncThreads;
+
+ public TestStateStoreFile(String numFileAsyncThreads) {
+ this.numFileAsyncThreads = numFileAsyncThreads;
+ }
+
+ @Parameterized.Parameters(name = "numFileAsyncThreads-{0}")
+ public static List data() {
+ return Arrays.asList(new String[][] {{"20"}, {"0"}});
+ }
+
+ private static void setupCluster(String numFsAsyncThreads) throws Exception {
Configuration conf = getStateStoreConfiguration(StateStoreFileImpl.class);
+ conf.setInt(FEDERATION_STORE_FILE_ASYNC_THREADS, Integer.parseInt(numFsAsyncThreads));
getStateStore(conf);
}
@Before
- public void startup() throws IOException {
+ public void startup() throws Exception {
+ setupCluster(numFileAsyncThreads);
removeAll(getStateStoreDriver());
}
+ @After
+ public void tearDown() throws Exception {
+ tearDownCluster();
+ }
+
@Test
public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java
index 8c06e6b8ed1..4d383ae63fc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver;
import java.io.BufferedWriter;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -26,12 +28,15 @@ import org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUt
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileBaseImpl;
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl;
import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
-import org.junit.AfterClass;
+
+import org.junit.After;
import org.junit.Before;
-import org.junit.BeforeClass;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
import org.mockito.stubbing.Answer;
+import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doThrow;
@@ -41,16 +46,22 @@ import static org.mockito.Mockito.spy;
/**
* Test the FileSystem (e.g., HDFS) implementation of the State Store driver.
*/
+@RunWith(Parameterized.class)
public class TestStateStoreFileSystem extends TestStateStoreDriverBase {
private static MiniDFSCluster dfsCluster;
- @BeforeClass
- public static void setupCluster() throws Exception {
- Configuration conf = FederationStateStoreTestUtils
- .getStateStoreConfiguration(StateStoreFileSystemImpl.class);
- conf.set(StateStoreFileSystemImpl.FEDERATION_STORE_FS_PATH,
- "/hdfs-federation/");
+ private final String numFsAsyncThreads;
+
+ public TestStateStoreFileSystem(String numFsAsyncThreads) {
+ this.numFsAsyncThreads = numFsAsyncThreads;
+ }
+
+ private static void setupCluster(String numFsAsyncThreads) throws Exception {
+ Configuration conf =
+ FederationStateStoreTestUtils.getStateStoreConfiguration(StateStoreFileSystemImpl.class);
+ conf.set(StateStoreFileSystemImpl.FEDERATION_STORE_FS_PATH, "/hdfs-federation/");
+ conf.setInt(FEDERATION_STORE_FS_ASYNC_THREADS, Integer.parseInt(numFsAsyncThreads));
// Create HDFS cluster to back the state tore
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
@@ -60,18 +71,26 @@ public class TestStateStoreFileSystem extends TestStateStoreDriverBase {
getStateStore(conf);
}
- @AfterClass
- public static void tearDownCluster() {
- if (dfsCluster != null) {
- dfsCluster.shutdown();
- }
+ @Parameterized.Parameters(name = "numFsAsyncThreads-{0}")
+ public static List data() {
+ return Arrays.asList(new String[][] {{"20"}, {"0"}});
}
@Before
- public void startup() throws IOException {
+ public void startup() throws Exception {
+ setupCluster(numFsAsyncThreads);
removeAll(getStateStoreDriver());
}
+ @After
+ public void tearDown() throws Exception {
+ tearDownCluster();
+ if (dfsCluster != null) {
+ dfsCluster.shutdown();
+ dfsCluster = null;
+ }
+ }
+
@Test
public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException {
From dfb2ca0a64a6ff377a8d6796b635298f46dc67ec Mon Sep 17 00:00:00 2001
From: HarshitGupta11 <50410275+HarshitGupta11@users.noreply.github.com>
Date: Wed, 5 Apr 2023 17:12:11 +0530
Subject: [PATCH 41/78] HADOOP-18684. S3A filesystem to support binding to to
other URI schemes (#5521)
Contributed by Harshit Gupta
---
.../java/org/apache/hadoop/fs/s3a/S3A.java | 11 ++--
.../apache/hadoop/fs/s3a/S3AFileSystem.java | 8 ++-
.../hadoop/fs/s3a/ITestS3AUrlScheme.java | 51 +++++++++++++++++++
.../s3a/fileContext/ITestS3AFileContext.java | 25 ++++++++-
4 files changed, 89 insertions(+), 6 deletions(-)
create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java
index ec433fa95c2..34779996963 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java
@@ -18,14 +18,16 @@
package org.apache.hadoop.fs.s3a;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
/**
* S3A implementation of AbstractFileSystem.
@@ -37,7 +39,8 @@ public class S3A extends DelegateToFileSystem {
public S3A(URI theUri, Configuration conf)
throws IOException, URISyntaxException {
- super(theUri, new S3AFileSystem(), conf, "s3a", false);
+ super(theUri, new S3AFileSystem(), conf,
+ theUri.getScheme().isEmpty() ? FS_S3A : theUri.getScheme(), false);
}
@Override
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index cb17b80fb6a..e96feb0243a 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -419,6 +419,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
*/
private final Set deleteOnExit = new TreeSet<>();
+ /**
+ * Scheme for the current filesystem.
+ */
+ private String scheme = FS_S3A;
+
/** Add any deprecated keys. */
@SuppressWarnings("deprecation")
private static void addDeprecatedKeys() {
@@ -642,6 +647,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
vectoredActiveRangeReads = intOption(conf,
AWS_S3_VECTOR_ACTIVE_RANGE_READS, DEFAULT_AWS_S3_VECTOR_ACTIVE_RANGE_READS, 1);
vectoredIOContext = populateVectoredIOContext(conf);
+ scheme = (this.uri != null && this.uri.getScheme() != null) ? this.uri.getScheme() : FS_S3A;
} catch (AmazonClientException e) {
// amazon client exception: stop all services then throw the translation
cleanupWithLogger(LOG, span);
@@ -1201,7 +1207,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
*/
@Override
public String getScheme() {
- return "s3a";
+ return this.scheme;
}
/**
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java
new file mode 100644
index 00000000000..cfe46440c75
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class ITestS3AUrlScheme extends AbstractS3ATestBase{
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ conf.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem");
+ return conf;
+ }
+
+ @Test
+ public void testFSScheme() throws IOException, URISyntaxException {
+ FileSystem fs = FileSystem.get(new URI("s3://mybucket/path"),
+ getConfiguration());
+ try {
+ assertEquals("s3", fs.getScheme());
+ Path path = fs.makeQualified(new Path("tmp/path"));
+ assertEquals("s3", path.toUri().getScheme());
+ } finally {
+ fs.close();
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java
index 7e4273a4c70..d29a017a643 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java
@@ -13,11 +13,34 @@
*/
package org.apache.hadoop.fs.s3a.fileContext;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.TestFileContext;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+
+import static org.junit.Assert.assertEquals;
/**
* Implementation of TestFileContext for S3a.
*/
-public class ITestS3AFileContext extends TestFileContext{
+public class ITestS3AFileContext extends TestFileContext {
+ @Test
+ public void testScheme()
+ throws URISyntaxException, UnsupportedFileSystemException {
+ Configuration conf = new Configuration();
+ URI uri = new URI("s3://mybucket/path");
+ conf.set("fs.AbstractFileSystem.s3.impl",
+ "org.apache.hadoop.fs.s3a.S3A");
+ FileContext fc = FileContext.getFileContext(uri, conf);
+ assertEquals("s3", fc.getDefaultFileSystem().getUri().getScheme());
+ Path path = fc.makeQualified(new Path("tmp/path"));
+ assertEquals("s3", path.toUri().getScheme());
+ }
}
From 69b90b5698df0e36a507c5288ec0fb93a48a5a55 Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:35:24 +0800
Subject: [PATCH 42/78] YARN-11436. [Federation] MemoryFederationStateStore
Support Version. (#5518)
---
...tionStateVersionIncompatibleException.java | 37 +++++++++++++++++++
.../impl/MemoryFederationStateStore.java | 37 ++++++++++++++++---
.../impl/TestMemoryFederationStateStore.java | 37 +++++++++++++++++++
3 files changed, 106 insertions(+), 5 deletions(-)
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java
new file mode 100644
index 00000000000..090c2807739
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.federation.store.exception;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+public class FederationStateVersionIncompatibleException extends YarnException {
+
+ private static final long serialVersionUID = 1L;
+
+ public FederationStateVersionIncompatibleException(Throwable cause) {
+ super(cause);
+ }
+
+ public FederationStateVersionIncompatibleException(String message) {
+ super(message);
+ }
+
+ public FederationStateVersionIncompatibleException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java
index 273e736e887..4aad86fbb16 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java
@@ -31,17 +31,18 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.Comparator;
-import org.apache.commons.lang3.NotImplementedException;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
+import org.apache.hadoop.yarn.server.federation.store.exception.FederationStateVersionIncompatibleException;
import org.apache.hadoop.yarn.server.federation.store.metrics.FederationStateStoreClientMetrics;
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest;
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterResponse;
@@ -97,6 +98,7 @@ import org.apache.hadoop.yarn.server.federation.store.utils.FederationMembership
import org.apache.hadoop.yarn.server.federation.store.utils.FederationPolicyStoreInputValidator;
import org.apache.hadoop.yarn.server.federation.store.utils.FederationStateStoreUtils;
import org.apache.hadoop.yarn.server.records.Version;
+import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.util.MonotonicClock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -116,6 +118,9 @@ public class MemoryFederationStateStore implements FederationStateStore {
private int maxAppsInStateStore;
private AtomicInteger sequenceNum;
private AtomicInteger masterKeyId;
+ private static final Version CURRENT_VERSION_INFO = Version
+ .newInstance(1, 1);
+ private byte[] version;
private final MonotonicClock clock = new MonotonicClock();
@@ -134,6 +139,7 @@ public class MemoryFederationStateStore implements FederationStateStore {
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_MAX_APPLICATIONS);
sequenceNum = new AtomicInteger();
masterKeyId = new AtomicInteger();
+ version = ((VersionPBImpl) CURRENT_VERSION_INFO).getProto().toByteArray();
}
@Override
@@ -367,22 +373,43 @@ public class MemoryFederationStateStore implements FederationStateStore {
@Override
public Version getCurrentVersion() {
- throw new NotImplementedException("Code is not implemented");
+ return CURRENT_VERSION_INFO;
}
@Override
public Version loadVersion() throws Exception {
- throw new NotImplementedException("Code is not implemented");
+ if (version != null) {
+ VersionProto versionProto = VersionProto.parseFrom(version);
+ return new VersionPBImpl(versionProto);
+ }
+ return null;
}
@Override
public void storeVersion() throws Exception {
- throw new NotImplementedException("Code is not implemented");
+ version = ((VersionPBImpl) CURRENT_VERSION_INFO).getProto().toByteArray();
}
@Override
public void checkVersion() throws Exception {
- throw new NotImplementedException("Code is not implemented");
+ Version loadedVersion = loadVersion();
+ LOG.info("Loaded Router State Version Info = {}.", loadedVersion);
+ Version currentVersion = getCurrentVersion();
+ if (loadedVersion != null && loadedVersion.equals(currentVersion)) {
+ return;
+ }
+ // if there is no version info, treat it as CURRENT_VERSION_INFO;
+ if (loadedVersion == null) {
+ loadedVersion = currentVersion;
+ }
+ if (loadedVersion.isCompatibleTo(currentVersion)) {
+ LOG.info("Storing Router State Version Info {}.", currentVersion);
+ storeVersion();
+ } else {
+ throw new FederationStateVersionIncompatibleException(
+ "Expecting Router state version " + currentVersion +
+ ", but loading version " + loadedVersion);
+ }
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java
index 5548dab1b8c..bb7e130b5e7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java
@@ -27,6 +27,8 @@ import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKey;
import org.apache.hadoop.yarn.server.federation.store.records.RouterRMDTSecretManagerState;
import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken;
+import org.apache.hadoop.yarn.server.records.Version;
+import org.junit.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -88,4 +90,39 @@ public class TestMemoryFederationStateStore extends FederationStateStoreBaseTest
assertTrue(tokenIdentifier instanceof RMDelegationTokenIdentifier);
assertEquals(identifier, tokenIdentifier);
}
+
+ @Test
+ public void testGetCurrentVersion() {
+ MemoryFederationStateStore memoryStateStore =
+ MemoryFederationStateStore.class.cast(this.getStateStore());
+ Version version = memoryStateStore.getCurrentVersion();
+ assertEquals(version.getMajorVersion(), 1);
+ assertEquals(version.getMinorVersion(), 1);
+ }
+
+ @Test
+ public void testStoreVersion() throws Exception {
+ MemoryFederationStateStore memoryStateStore =
+ MemoryFederationStateStore.class.cast(this.getStateStore());
+ memoryStateStore.storeVersion();
+ Version version = memoryStateStore.getCurrentVersion();
+ assertEquals(version.getMajorVersion(), 1);
+ assertEquals(version.getMinorVersion(), 1);
+ }
+
+ @Test
+ public void testLoadVersion() throws Exception {
+ MemoryFederationStateStore memoryStateStore =
+ MemoryFederationStateStore.class.cast(this.getStateStore());
+ Version version = memoryStateStore.loadVersion();
+ assertEquals(version.getMajorVersion(), 1);
+ assertEquals(version.getMinorVersion(), 1);
+ }
+
+ @Test
+ public void testCheckVersion() throws Exception {
+ MemoryFederationStateStore memoryStateStore =
+ MemoryFederationStateStore.class.cast(this.getStateStore());
+ memoryStateStore.checkVersion();
+ }
}
\ No newline at end of file
From 422bf3b24c82803cb0e8ed25fa0b12b5f5cccc1b Mon Sep 17 00:00:00 2001
From: Viraj Jasani
Date: Wed, 5 Apr 2023 14:06:38 -0700
Subject: [PATCH 43/78] HDFS-16973. RBF: MountTableResolver cache size lookup
should take read lock (#5533)
---
.../federation/resolver/MountTableResolver.java | 13 +++++++++----
.../federation/resolver/TestMountTableResolver.java | 10 ++++++++++
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java
index 4b21ec0aa63..adb0f91d042 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java
@@ -678,11 +678,16 @@ public class MountTableResolver
* @return Size of the cache.
* @throws IOException If the cache is not initialized.
*/
- protected long getCacheSize() throws IOException{
- if (this.locationCache != null) {
- return this.locationCache.size();
+ protected long getCacheSize() throws IOException {
+ this.readLock.lock();
+ try {
+ if (this.locationCache != null) {
+ return this.locationCache.size();
+ }
+ throw new IOException("localCache is null");
+ } finally {
+ this.readLock.unlock();
}
- throw new IOException("localCache is null");
}
@VisibleForTesting
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java
index 15d3caa5e4e..998b79782de 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java
@@ -552,6 +552,16 @@ public class TestMountTableResolver {
assertEquals(100000, mountTable.getMountPoints("/").size());
assertEquals(100000, mountTable.getMounts("/").size());
+ // test concurrency for mount table cache size when it gets updated frequently
+ for (int i = 0; i < 20; i++) {
+ mountTable.getDestinationForPath("/" + i);
+ if (i >= 10) {
+ assertEquals(TEST_MAX_CACHE_SIZE, mountTable.getCacheSize());
+ } else {
+ assertEquals(i + 1, mountTable.getCacheSize());
+ }
+ }
+ assertEquals(TEST_MAX_CACHE_SIZE, mountTable.getCacheSize());
// Add 1000 entries in deep list
mountTable.refreshEntries(emptyList);
From 47c22e388ee5631c99a7f926d11a8747aa51e5e4 Mon Sep 17 00:00:00 2001
From: Simbarashe Dzinamarira
Date: Wed, 5 Apr 2023 16:44:29 -0700
Subject: [PATCH 44/78] HDFS-16943. RBF: Implements MySQL based
StateStoreDriver. (#5469)
---
.../store/driver/StateStoreDriver.java | 4 +
.../store/driver/impl/StateStoreBaseImpl.java | 4 +
.../driver/impl/StateStoreMySQLImpl.java | 425 ++++++++++++++++++
.../impl/StateStoreSerializableImpl.java | 4 +
.../src/main/resources/hdfs-rbf-default.xml | 3 +-
.../store/driver/TestStateStoreMySQL.java | 102 +++++
6 files changed, 541 insertions(+), 1 deletion(-)
create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java
create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java
index a4e9c1ce82b..778ac3ecea5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver;
import java.net.InetAddress;
import java.util.Collection;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
@@ -35,6 +37,8 @@ import org.slf4j.LoggerFactory;
* provider. Driver implementations will extend this class and implement some of
* the default methods.
*/
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
public abstract class StateStoreDriver implements StateStoreRecordOperations {
private static final Logger LOG =
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java
index 30686f104b7..f7a6174226e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java
@@ -23,6 +23,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils;
import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver;
import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
@@ -39,6 +41,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult;
* optimization, such as custom get/put/remove queries, depending on the
* capabilities of the data store.
*/
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
public abstract class StateStoreBaseImpl extends StateStoreDriver {
@Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java
new file mode 100644
index 00000000000..72644bb816e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java
@@ -0,0 +1,425 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.store.driver.impl;
+
+import com.zaxxer.hikari.HikariConfig;
+import com.zaxxer.hikari.HikariDataSource;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
+import org.apache.hadoop.hdfs.server.federation.router.security.token.SQLConnectionFactory;
+import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils;
+import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
+import org.apache.hadoop.hdfs.server.federation.store.records.DisabledNameservice;
+import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
+import org.apache.hadoop.hdfs.server.federation.store.records.MountTable;
+import org.apache.hadoop.hdfs.server.federation.store.records.Query;
+import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult;
+import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
+import org.apache.hadoop.util.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils.*;
+
+/**
+ * StateStoreDriver implementation based on MySQL.
+ * There is a separate table for each record type. Each table just as two
+ * columns, recordKey and recordValue.
+ */
+public class StateStoreMySQLImpl extends StateStoreSerializableImpl {
+ public static final String SQL_STATE_STORE_CONF_PREFIX = "state-store-mysql.";
+ public static final String CONNECTION_URL =
+ SQL_STATE_STORE_CONF_PREFIX + "connection.url";
+ public static final String CONNECTION_USERNAME =
+ SQL_STATE_STORE_CONF_PREFIX + "connection.username";
+ public static final String CONNECTION_PASSWORD =
+ SQL_STATE_STORE_CONF_PREFIX + "connection.password";
+ public static final String CONNECTION_DRIVER =
+ SQL_STATE_STORE_CONF_PREFIX + "connection.driver";
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(StateStoreSerializableImpl.class);
+ private SQLConnectionFactory connectionFactory;
+ /** If the driver has been initialized. */
+ private boolean initialized = false;
+ private final static Set VALID_TABLES = Collections.unmodifiableSet(
+ new HashSet<>(Arrays.asList(
+ MembershipState.class.getSimpleName(),
+ RouterState.class.getSimpleName(),
+ MountTable.class.getSimpleName(),
+ DisabledNameservice.class.getSimpleName()
+ ))
+ );
+
+ @Override
+ public boolean initDriver() {
+ Configuration conf = getConf();
+ connectionFactory = new MySQLStateStoreHikariDataSourceConnectionFactory(conf);
+ initialized = true;
+ LOG.info("MySQL state store connection factory initialized");
+ return true;
+ }
+
+ @Override
+ public boolean initRecordStorage(String className, Class clazz) {
+ String tableName = getAndValidateTableNameForClass(clazz);
+ try (Connection connection = connectionFactory.getConnection();
+ ResultSet resultSet = connection
+ .getMetaData()
+ .getTables(null, null, tableName, null)) {
+ if (resultSet.next()) {
+ return true;
+ }
+ } catch (SQLException e) {
+ LOG.error("Could not check if table {} able exists", tableName);
+ }
+
+ try (Connection connection = connectionFactory.getConnection();
+ Statement statement = connection.createStatement()) {
+ String sql = String.format("CREATE TABLE %s ("
+ + "recordKey VARCHAR (255) NOT NULL,"
+ + "recordValue VARCHAR (2047) NOT NULL, "
+ + "PRIMARY KEY(recordKey))", tableName);
+ statement.execute(sql);
+ return true;
+ } catch (SQLException e) {
+ LOG.error(String.format("Cannot create table %s for record type %s.",
+ tableName, className), e.getMessage());
+ return false;
+ }
+ }
+
+ @Override
+ public boolean isDriverReady() {
+ return this.initialized;
+ }
+
+ @Override
+ public void close() throws Exception {
+ connectionFactory.shutdown();
+ }
+
+ @Override
+ public QueryResult get(Class clazz)
+ throws IOException {
+ String tableName = getAndValidateTableNameForClass(clazz);
+ verifyDriverReady();
+ long start = Time.monotonicNow();
+ StateStoreMetrics metrics = getMetrics();
+ List ret = new ArrayList<>();
+ try (Connection connection = connectionFactory.getConnection();
+ PreparedStatement statement = connection.prepareStatement(
+ String.format("SELECT * FROM %s", tableName))) {
+ try (ResultSet result = statement.executeQuery()) {
+ while(result.next()) {
+ String recordValue = result.getString("recordValue");
+ T record = newRecord(recordValue, clazz, false);
+ ret.add(record);
+ }
+ }
+ } catch (SQLException e) {
+ if (metrics != null) {
+ metrics.addFailure(Time.monotonicNow() - start);
+ }
+ String msg = "Cannot fetch records for " + clazz.getSimpleName();
+ LOG.error(msg, e);
+ throw new IOException(msg, e);
+ }
+
+ if (metrics != null) {
+ metrics.addRead(Time.monotonicNow() - start);
+ }
+ return new QueryResult<>(ret, getTime());
+ }
+
+ @Override
+ public boolean putAll(
+ List records, boolean allowUpdate, boolean errorIfExists) throws IOException {
+ if (records.isEmpty()) {
+ return true;
+ }
+
+ verifyDriverReady();
+ StateStoreMetrics metrics = getMetrics();
+
+ long start = Time.monotonicNow();
+
+ boolean success = true;
+ for (T record : records) {
+ String tableName = getAndValidateTableNameForClass(record.getClass());
+ String primaryKey = getPrimaryKey(record);
+ String data = serializeString(record);
+
+ if (recordExists(tableName, primaryKey)) {
+ if (allowUpdate) {
+ // Update the mod time stamp. Many backends will use their
+ // own timestamp for the mod time.
+ record.setDateModified(this.getTime());
+ if (!updateRecord(tableName, primaryKey, data)) {
+ LOG.error("Cannot write {} into table {}", primaryKey, tableName);
+ success = false;
+ }
+ } else {
+ if (errorIfExists) {
+ LOG.error("Attempted to insert record {} that already exists "
+ + "in table {} and updates are disallowed.", primaryKey, tableName);
+ if (metrics != null) {
+ metrics.addFailure(Time.monotonicNow() - start);
+ }
+ return false;
+ } else {
+ LOG.debug("Not updating {} as updates are not allowed", record);
+ }
+ }
+ } else {
+ if (!insertRecord(tableName, primaryKey, data)) {
+ LOG.error("Cannot write {} in table {}", primaryKey, tableName);
+ success = false;
+ }
+ }
+ }
+
+ long end = Time.monotonicNow();
+ if (metrics != null) {
+ if (success) {
+ metrics.addWrite(end - start);
+ } else {
+ metrics.addFailure(end - start);
+ }
+ }
+ return success;
+ }
+
+ @Override
+ public boolean removeAll(Class clazz) throws IOException {
+ verifyDriverReady();
+ long startTimeMs = Time.monotonicNow();
+ StateStoreMetrics metrics = getMetrics();
+ boolean success = true;
+ String tableName = getAndValidateTableNameForClass(clazz);
+ try (Connection connection = connectionFactory.getConnection(true);
+ PreparedStatement truncateTable = connection.prepareStatement(
+ String.format("TRUNCATE TABLE %s", tableName))){
+ truncateTable.execute();
+ } catch (SQLException e) {
+ LOG.error("Could not remove all records in table {}", tableName, e);
+ success = false;
+ }
+
+ if (metrics != null) {
+ long durationMs = Time.monotonicNow() - startTimeMs;
+ if (success) {
+ metrics.addRemove(durationMs);
+ } else {
+ metrics.addFailure(durationMs);
+ }
+ }
+ return success;
+ }
+
+ @Override
+ public int remove(Class clazz, Query query) throws IOException {
+ verifyDriverReady();
+
+ if (query == null) {
+ return 0;
+ }
+
+ long startTimeMs = Time.monotonicNow();
+ StateStoreMetrics metrics = getMetrics();
+ int removed = 0;
+ // Get the current records
+ try {
+ final QueryResult result = get(clazz);
+ final List existingRecords = result.getRecords();
+ // Write all of the existing records except those to be removed
+ final List recordsToRemove = filterMultiple(query, existingRecords);
+ boolean success = true;
+ for (T recordToRemove : recordsToRemove) {
+ String tableName = getAndValidateTableNameForClass(clazz);
+ String primaryKey = getPrimaryKey(recordToRemove);
+ if (removeRecord(tableName, primaryKey)) {
+ removed++;
+ } else {
+ LOG.error("Cannot remove record {} from table {}", primaryKey, tableName);
+ success = false;
+ }
+ }
+ if (!success) {
+ LOG.error("Cannot remove records {} query {}", clazz, query);
+ if (metrics != null) {
+ metrics.addFailure(Time.monotonicNow() - startTimeMs);
+ }
+ }
+ } catch (IOException e) {
+ LOG.error("Cannot remove records {} query {}", clazz, query, e);
+ if (metrics != null) {
+ metrics.addFailure(Time.monotonicNow() - startTimeMs);
+ }
+ }
+
+ if (removed > 0 && metrics != null) {
+ metrics.addRemove(Time.monotonicNow() - startTimeMs);
+ }
+ return removed;
+ }
+
+ /**
+ * Insert a record with a given key into the specified table.
+ * @param tableName Name of table to modify
+ * @param key Primary key for the record.
+ * @return True is operation is successful, false otherwise.
+ */
+ protected boolean insertRecord(String tableName, String key, String data) {
+ try (Connection connection = connectionFactory.getConnection(true);
+ PreparedStatement statement = connection.prepareStatement(
+ String.format("INSERT INTO %s (recordKey, recordValue) VALUES (?, ?)", tableName))) {
+ statement.setString(1, key);
+ statement.setString(2, data);
+ statement.execute();
+ } catch (SQLException e) {
+ LOG.error("Failed to insert record {} into table {}", key, tableName, e);
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Updates the record with a given key from the specified table.
+ * @param tableName Name of table to modify
+ * @param key Primary key for the record.
+ * @return True is operation is successful, false otherwise.
+ */
+ protected boolean updateRecord(String tableName, String key, String data) {
+ try (Connection connection = connectionFactory.getConnection(true);
+ PreparedStatement statement = connection.prepareStatement(
+ String.format("UPDATE %s SET recordValue = ? WHERE recordKey = ?", tableName))) {
+ statement.setString(1, data);
+ statement.setString(2, key);
+ statement.execute();
+ } catch (SQLException e){
+ LOG.error("Failed to update record {} in table {}", key, tableName, e);
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Checks if a record with a given key existing in the specified table.
+ * @param tableName Name of table to modify
+ * @param key Primary key for the record.
+ * @return True is operation is successful, false otherwise.
+ */
+ protected boolean recordExists(String tableName, String key) {
+ try (Connection connection = connectionFactory.getConnection(true);
+ PreparedStatement statement = connection.prepareStatement(
+ String.format("SELECT * FROM %s WHERE recordKey = ?", tableName))) {
+ statement.setString(1, key);
+ try (ResultSet result = statement.executeQuery()) {
+ return result.next();
+ }
+ } catch (SQLException e) {
+ LOG.error("Failed to check existence of record {} in table {}", key, tableName, e);
+ return false;
+ }
+ }
+
+ /**
+ * Removes the record with a given key from the specified table.
+ * @param tableName Name of table to modify
+ * @param key Primary key for the record.
+ * @return True is operation is successful, false otherwise.
+ */
+ protected boolean removeRecord(String tableName, String key) {
+ try (Connection connection = connectionFactory.getConnection(true);
+ PreparedStatement statement = connection.prepareStatement(
+ String.format("DELETE FROM %s WHERE recordKey = ?", tableName))) {
+ statement.setString(1, key);
+ statement.execute();
+ return true;
+ } catch (SQLException e) {
+ LOG.error("Failed to remove record {} in table {}", key, tableName, e);
+ return false;
+ }
+ }
+
+ /**
+ * Get the table for a record class and validate is this is one of the supported
+ * record types.
+ * @param clazz Class of the record.
+ * @return Table name for this record class.
+ */
+ private String getAndValidateTableNameForClass(final Class clazz) {
+ String tableName = StateStoreUtils.getRecordName(clazz);
+ if (VALID_TABLES.contains(tableName)) {
+ return tableName;
+ } else {
+ throw new IllegalArgumentException(tableName + " is not a valid table name");
+ }
+ }
+
+
+ /**
+ * Class that relies on a HikariDataSource to provide SQL connections.
+ */
+ static class MySQLStateStoreHikariDataSourceConnectionFactory
+ implements SQLConnectionFactory {
+ protected final static String HIKARI_PROPS = SQL_STATE_STORE_CONF_PREFIX
+ + "connection.hikari.";
+ private final HikariDataSource dataSource;
+
+ MySQLStateStoreHikariDataSourceConnectionFactory(Configuration conf) {
+ Properties properties = new Properties();
+ properties.setProperty("jdbcUrl", conf.get(StateStoreMySQLImpl.CONNECTION_URL));
+ properties.setProperty("username", conf.get(StateStoreMySQLImpl.CONNECTION_USERNAME));
+ properties.setProperty("password", conf.get(StateStoreMySQLImpl.CONNECTION_PASSWORD));
+ properties.setProperty("driverClassName", conf.get(StateStoreMySQLImpl.CONNECTION_DRIVER));
+
+ // Include hikari connection properties
+ properties.putAll(conf.getPropsWithPrefix(HIKARI_PROPS));
+
+ HikariConfig hikariConfig = new HikariConfig(properties);
+ this.dataSource = new HikariDataSource(hikariConfig);
+ }
+
+ @Override
+ public Connection getConnection() throws SQLException {
+ return dataSource.getConnection();
+ }
+
+ @Override
+ public void shutdown() {
+ // Close database connections
+ dataSource.close();
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java
index 7bc93de84bc..8f766c65c5b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver.impl;
import java.io.IOException;
import java.util.Collection;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreSerializer;
@@ -29,6 +31,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
* State Store driver that stores a serialization of the records. The serializer
* is pluggable.
*/
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
public abstract class StateStoreSerializableImpl extends StateStoreBaseImpl {
/** Mark for slashes in path names. */
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
index 780fb76a2da..c7b403ce634 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
@@ -362,7 +362,8 @@
Class to implement the State Store. There are three implementation classes currently
being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl,
- org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl and
+ org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl,
+ org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreMySQLImpl and
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.
These implementation classes use the local file, filesystem and ZooKeeper as a backend respectively.
By default it uses the ZooKeeper as the default State Store.
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java
new file mode 100644
index 00000000000..ebac2c0b93b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.store.driver;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreMySQLImpl;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.*;
+
+/**
+ * Test the FileSystem (e.g., HDFS) implementation of the State Store driver.
+ */
+public class TestStateStoreMySQL extends TestStateStoreDriverBase {
+ private static final String CONNECTION_URL = "jdbc:derby:memory:StateStore";
+
+ @BeforeClass
+ public static void initDatabase() throws Exception {
+ Connection connection = DriverManager.getConnection(CONNECTION_URL + ";create=true");
+ Statement s = connection.createStatement();
+ s.execute("CREATE SCHEMA TESTUSER");
+
+ Configuration conf =
+ getStateStoreConfiguration(StateStoreMySQLImpl.class);
+ conf.set(StateStoreMySQLImpl.CONNECTION_URL, CONNECTION_URL);
+ conf.set(StateStoreMySQLImpl.CONNECTION_USERNAME, "testuser");
+ conf.set(StateStoreMySQLImpl.CONNECTION_PASSWORD, "testpassword");
+ conf.set(StateStoreMySQLImpl.CONNECTION_DRIVER, "org.apache.derby.jdbc.EmbeddedDriver");
+ getStateStore(conf);
+ }
+
+ @Before
+ public void startup() throws IOException {
+ removeAll(getStateStoreDriver());
+ }
+
+ @AfterClass
+ public static void cleanupDatabase() {
+ try {
+ DriverManager.getConnection(CONNECTION_URL + ";drop=true");
+ } catch (SQLException e) {
+ // SQLException expected when database is dropped
+ if (!e.getMessage().contains("dropped")) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ @Test
+ public void testInsert()
+ throws IllegalArgumentException, IllegalAccessException, IOException {
+ testInsert(getStateStoreDriver());
+ }
+
+ @Test
+ public void testUpdate()
+ throws IllegalArgumentException, ReflectiveOperationException,
+ IOException, SecurityException {
+ testPut(getStateStoreDriver());
+ }
+
+ @Test
+ public void testDelete()
+ throws IllegalArgumentException, IllegalAccessException, IOException {
+ testRemove(getStateStoreDriver());
+ }
+
+ @Test
+ public void testFetchErrors()
+ throws IllegalArgumentException, IllegalAccessException, IOException {
+ testFetchErrors(getStateStoreDriver());
+ }
+
+ @Test
+ public void testMetrics()
+ throws IllegalArgumentException, IllegalAccessException, IOException {
+ testMetrics(getStateStoreDriver());
+ }
+}
\ No newline at end of file
From 523ff816246322e7376c52b3c8df92c0d6e4f6ef Mon Sep 17 00:00:00 2001
From: zhtttylz
Date: Thu, 6 Apr 2023 19:44:47 +0800
Subject: [PATCH 45/78] HDFS-16952. Support getLinkTarget API in WebHDFS
(#5517)
Co-authored-by: Zhtttylz
Reviewed-by: Shilun Fan
Signed-off-by: Shilun Fan
---
.../hadoop/hdfs/web/WebHdfsFileSystem.java | 13 +++++++++
.../hadoop/hdfs/web/resources/GetOpParam.java | 1 +
.../router/RouterWebHdfsMethods.java | 1 +
.../web/resources/NamenodeWebHdfsMethods.java | 5 ++++
.../hadoop-hdfs/src/site/markdown/WebHDFS.md | 17 +++++++++++
.../apache/hadoop/hdfs/web/TestWebHDFS.java | 28 +++++++++++++++++++
6 files changed, 65 insertions(+)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
index f0774e98d1f..615cf3bd7c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
@@ -2147,6 +2147,19 @@ public class WebHdfsFileSystem extends FileSystem
}.run();
}
+ @Override
+ public Path getLinkTarget(Path f) throws IOException {
+ statistics.incrementReadOps(1);
+ storageStatistics.incrementOpCounter(OpType.GET_LINK_TARGET);
+ final HttpOpParam.Op op = GetOpParam.Op.GETLINKTARGET;
+ return new FsPathResponseRunner(op, f) {
+ @Override
+ Path decodeResponse(Map, ?> json) {
+ return new Path((String) json.get(Path.class.getSimpleName()));
+ }
+ }.run();
+ }
+
@VisibleForTesting
InetSocketAddress[] getResolvedNNAddr() {
return nnAddrs;
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
index 14938c3c45b..89979295c79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
@@ -64,6 +64,7 @@ public class GetOpParam extends HttpOpParam {
GETSNAPSHOTDIFF(false, HttpURLConnection.HTTP_OK),
GETSNAPSHOTDIFFLISTING(false, HttpURLConnection.HTTP_OK),
GETSNAPSHOTTABLEDIRECTORYLIST(false, HttpURLConnection.HTTP_OK),
+ GETLINKTARGET(false, HttpURLConnection.HTTP_OK),
GETSNAPSHOTLIST(false, HttpURLConnection.HTTP_OK);
final boolean redirect;
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java
index a66953b1bd7..477a59941fe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java
@@ -385,6 +385,7 @@ public class RouterWebHdfsMethods extends NamenodeWebHdfsMethods {
case GETXATTRS:
case LISTXATTRS:
case CHECKACCESS:
+ case GETLINKTARGET:
{
return super.get(ugi, delegation, username, doAsUser, fullpath, op,
offset, length, renewer, bufferSize, xattrNames, xattrEncoding,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
index a3250c213ca..4b3b53731ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
@@ -1383,6 +1383,11 @@ public class NamenodeWebHdfsMethods {
final String js = JsonUtil.toJsonString(snapshotList);
return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
}
+ case GETLINKTARGET: {
+ String target = cp.getLinkTarget(fullpath);
+ final String js = JsonUtil.toJsonString("Path", target);
+ return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
+ }
default:
throw new UnsupportedOperationException(op + " is not supported");
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md
index 5e5924ad36e..f84018ae821 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md
@@ -58,6 +58,7 @@ The HTTP REST API supports the complete [FileSystem](../../api/org/apache/hadoop
* [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations)
* [`GETECPOLICY`](#Get_EC_Policy) (see [HDFSErasureCoding](./HDFSErasureCoding.html#Administrative_commands).getErasureCodingPolicy)
* [`GETSERVERDEFAULTS`](#Get_Server_Defaults) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults)
+ * [`GETLINKTARGET`](#Get_Link_Target) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getLinkTarget)
* HTTP PUT
* [`CREATE`](#Create_and_Write_to_a_File) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).create)
* [`MKDIRS`](#Make_a_Directory) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).mkdirs)
@@ -1139,6 +1140,22 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).access
See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults
+### Get Link Target
+
+* Submit a HTTP GET request.
+
+ curl -i "http://:/webhdfs/v1/?op=GETLINKTARGET"
+
+ The client receives a response with a [`Path` JSON object](#Path_JSON_Schema):
+
+ HTTP/1.1 200 OK
+ Content-Type: application/json
+ Transfer-Encoding: chunked
+
+ {"Path": "/user/username/targetFile"}
+
+See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getLinkTarget
+
Storage Policy Operations
-------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java
index c4f53b05615..8f4759d8e30 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java
@@ -2202,6 +2202,34 @@ public class TestWebHDFS {
cluster.shutdown();
}
}
+
+ @Test
+ public void testLinkTarget() throws Exception {
+ final Configuration conf = WebHdfsTestUtil.createConf();
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .numDataNodes(3)
+ .build();
+ cluster.waitActive();
+
+ final WebHdfsFileSystem webHdfs =
+ WebHdfsTestUtil.getWebHdfsFileSystem(conf,
+ WebHdfsConstants.WEBHDFS_SCHEME);
+
+ // Symbolic link
+ Path root = new Path("/webHdfsTest/");
+ Path targetFile = new Path(root, "debug.log");
+ FileSystemTestHelper.createFile(webHdfs, targetFile);
+
+ Path symLink = new Path(root, "debug.link");
+
+ webHdfs.createSymlink(targetFile, symLink, false);
+ assertEquals(webHdfs.getLinkTarget(symLink), targetFile);
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
/**
* Get FileStatus JSONObject from ListStatus response.
*/
From e45451f9c715d067bfd6ea02d266e4aef782fcfe Mon Sep 17 00:00:00 2001
From: mjwiq <80327153+mjwiq@users.noreply.github.com>
Date: Thu, 6 Apr 2023 17:00:33 +0200
Subject: [PATCH 46/78] HADOOP-18687. hadoop-auth: remove unnecessary
dependency on json-smart (#5524)
Contributed by Michiel de Jong
---
hadoop-common-project/hadoop-auth/pom.xml | 12 ------------
hadoop-project/pom.xml | 4 ----
2 files changed, 16 deletions(-)
diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml
index 6eaa4fdfce5..433a615c606 100644
--- a/hadoop-common-project/hadoop-auth/pom.xml
+++ b/hadoop-common-project/hadoop-auth/pom.xml
@@ -110,20 +110,8 @@
org.bouncycastlebcprov-jdk15on
-
-
- net.minidev
- json-smart
-
-
- net.minidev
- json-smart
- org.apache.zookeeperzookeeper
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index d8114afb58f..7a57f05011d 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -1730,10 +1730,6 @@
-
net.minidevjson-smart${json-smart.version}
From 3e2ae1da00e055211914c90cca89d62432096530 Mon Sep 17 00:00:00 2001
From: rdingankar
Date: Mon, 10 Apr 2023 08:56:00 -0700
Subject: [PATCH 47/78] =?UTF-8?q?HDFS-16949=20Introduce=20inverse=20quanti?=
=?UTF-8?q?les=20for=20metrics=20where=20higher=20numer=E2=80=A6=20(#5495)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../hadoop/metrics2/lib/MetricsRegistry.java | 25 ++++-
.../metrics2/lib/MutableInverseQuantiles.java | 89 +++++++++++++++++
.../hadoop/metrics2/lib/MutableQuantiles.java | 99 +++++++++++++++----
.../metrics2/util/TestSampleQuantiles.java | 68 ++++++++++---
.../apache/hadoop/test/MetricsAsserts.java | 25 ++++-
.../datanode/metrics/DataNodeMetrics.java | 2 +-
.../server/datanode/TestDataNodeMetrics.java | 3 +-
7 files changed, 273 insertions(+), 38 deletions(-)
create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java
index b71f7f8cc5e..31031b808ea 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java
@@ -227,6 +227,29 @@ public class MetricsRegistry {
return ret;
}
+ /**
+ * Create a mutable inverse metric that estimates inverse quantiles of a stream of values
+ * @param name of the metric
+ * @param desc metric description
+ * @param sampleName of the metric (e.g., "Ops")
+ * @param valueName of the metric (e.g., "Rate")
+ * @param interval rollover interval of estimator in seconds
+ * @return a new inverse quantile estimator object
+ * @throws MetricsException if interval is not a positive integer
+ */
+ public synchronized MutableQuantiles newInverseQuantiles(String name, String desc,
+ String sampleName, String valueName, int interval) {
+ checkMetricName(name);
+ if (interval <= 0) {
+ throw new MetricsException("Interval should be positive. Value passed" +
+ " is: " + interval);
+ }
+ MutableQuantiles ret =
+ new MutableInverseQuantiles(name, desc, sampleName, valueName, interval);
+ metricsMap.put(name, ret);
+ return ret;
+ }
+
/**
* Create a mutable metric with stats
* @param name of the metric
@@ -278,7 +301,7 @@ public class MetricsRegistry {
}
/**
- * Create a mutable rate metric (for throughput measurement)
+ * Create a mutable rate metric (for throughput measurement).
* @param name of the metric
* @param desc description
* @param extended produce extended stat (stdev/min/max etc.) if true
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java
new file mode 100644
index 00000000000..a3d579cb9e7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics2.lib;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.metrics2.util.Quantile;
+import org.apache.hadoop.metrics2.util.SampleQuantiles;
+import java.text.DecimalFormat;
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+/**
+ * Watches a stream of long values, maintaining online estimates of specific
+ * quantiles with provably low error bounds. Inverse quantiles are meant for
+ * highly accurate low-percentile (e.g. 1st, 5th) metrics.
+ * InverseQuantiles are used for metrics where higher the value better it is.
+ * ( eg: data transfer rate ).
+ * The 1st percentile here corresponds to the 99th inverse percentile metric,
+ * 5th percentile to 95th and so on.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class MutableInverseQuantiles extends MutableQuantiles{
+
+ static class InversePercentile extends Quantile {
+ InversePercentile(double inversePercentile) {
+ super(inversePercentile/100, inversePercentile/1000);
+ }
+ }
+
+ @VisibleForTesting
+ public static final Quantile[] INVERSE_QUANTILES = {new InversePercentile(50),
+ new InversePercentile(25), new InversePercentile(10),
+ new InversePercentile(5), new InversePercentile(1)};
+
+ /**
+ * Instantiates a new {@link MutableInverseQuantiles} for a metric that rolls itself
+ * over on the specified time interval.
+ *
+ * @param name of the metric
+ * @param description long-form textual description of the metric
+ * @param sampleName type of items in the stream (e.g., "Ops")
+ * @param valueName type of the values
+ * @param intervalSecs rollover interval (in seconds) of the estimator
+ */
+ public MutableInverseQuantiles(String name, String description, String sampleName,
+ String valueName, int intervalSecs) {
+ super(name, description, sampleName, valueName, intervalSecs);
+ }
+
+ /**
+ * Sets quantileInfo and estimator.
+ *
+ * @param ucName capitalized name of the metric
+ * @param uvName capitalized type of the values
+ * @param desc uncapitalized long-form textual description of the metric
+ * @param lvName uncapitalized type of the values
+ * @param df Number formatter for inverse percentile value
+ */
+ void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat df) {
+ // Construct the MetricsInfos for inverse quantiles, converting to inverse percentiles
+ setQuantileInfos(INVERSE_QUANTILES.length);
+ for (int i = 0; i < INVERSE_QUANTILES.length; i++) {
+ double inversePercentile = 100 * (1 - INVERSE_QUANTILES[i].quantile);
+ String nameTemplate = ucName + df.format(inversePercentile) + "thInversePercentile" + uvName;
+ String descTemplate = df.format(inversePercentile) + " inverse percentile " + lvName
+ + " with " + getInterval() + " second interval for " + desc;
+ addQuantileInfo(i, info(nameTemplate, descTemplate));
+ }
+
+ setEstimator(new SampleQuantiles(INVERSE_QUANTILES));
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java
index f7dfaffb3f9..edb2159f17b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.metrics2.lib;
import static org.apache.hadoop.metrics2.lib.Interns.info;
+import java.text.DecimalFormat;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@@ -52,9 +53,10 @@ public class MutableQuantiles extends MutableMetric {
new Quantile(0.75, 0.025), new Quantile(0.90, 0.010),
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) };
- private final MetricsInfo numInfo;
- private final MetricsInfo[] quantileInfos;
- private final int interval;
+ private MetricsInfo numInfo;
+ private MetricsInfo[] quantileInfos;
+ private int intervalSecs;
+ private static DecimalFormat decimalFormat = new DecimalFormat("###.####");
private QuantileEstimator estimator;
private long previousCount = 0;
@@ -91,26 +93,39 @@ public class MutableQuantiles extends MutableMetric {
String lsName = StringUtils.uncapitalize(sampleName);
String lvName = StringUtils.uncapitalize(valueName);
- numInfo = info(ucName + "Num" + usName, String.format(
- "Number of %s for %s with %ds interval", lsName, desc, interval));
- // Construct the MetricsInfos for the quantiles, converting to percentiles
- quantileInfos = new MetricsInfo[quantiles.length];
- String nameTemplate = ucName + "%dthPercentile" + uvName;
- String descTemplate = "%d percentile " + lvName + " with " + interval
- + " second interval for " + desc;
- for (int i = 0; i < quantiles.length; i++) {
- int percentile = (int) (100 * quantiles[i].quantile);
- quantileInfos[i] = info(String.format(nameTemplate, percentile),
- String.format(descTemplate, percentile));
- }
-
- estimator = new SampleQuantiles(quantiles);
-
- this.interval = interval;
+ setInterval(interval);
+ setNumInfo(info(ucName + "Num" + usName, String.format(
+ "Number of %s for %s with %ds interval", lsName, desc, interval)));
scheduledTask = scheduler.scheduleWithFixedDelay(new RolloverSample(this),
interval, interval, TimeUnit.SECONDS);
+ setQuantiles(ucName, uvName, desc, lvName, decimalFormat);
}
+ /**
+ * Sets quantileInfo and estimator.
+ *
+ * @param ucName capitalized name of the metric
+ * @param uvName capitalized type of the values
+ * @param desc uncapitalized long-form textual description of the metric
+ * @param lvName uncapitalized type of the values
+ * @param pDecimalFormat Number formatter for percentile value
+ */
+ void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat pDecimalFormat) {
+ // Construct the MetricsInfos for the quantiles, converting to percentiles
+ setQuantileInfos(quantiles.length);
+ for (int i = 0; i < quantiles.length; i++) {
+ double percentile = 100 * quantiles[i].quantile;
+ String nameTemplate = ucName + pDecimalFormat.format(percentile) + "thPercentile" + uvName;
+ String descTemplate = pDecimalFormat.format(percentile) + " percentile " + lvName
+ + " with " + getInterval() + " second interval for " + desc;
+ addQuantileInfo(i, info(nameTemplate, descTemplate));
+ }
+
+ setEstimator(new SampleQuantiles(quantiles));
+ }
+
+ public MutableQuantiles() {}
+
@Override
public synchronized void snapshot(MetricsRecordBuilder builder, boolean all) {
if (all || changed()) {
@@ -133,8 +148,50 @@ public class MutableQuantiles extends MutableMetric {
estimator.insert(value);
}
- public int getInterval() {
- return interval;
+ /**
+ * Set info about the metrics.
+ *
+ * @param pNumInfo info about the metrics.
+ */
+ public synchronized void setNumInfo(MetricsInfo pNumInfo) {
+ this.numInfo = pNumInfo;
+ }
+
+ /**
+ * Initialize quantileInfos array.
+ *
+ * @param length of the quantileInfos array.
+ */
+ public synchronized void setQuantileInfos(int length) {
+ this.quantileInfos = new MetricsInfo[length];
+ }
+
+ /**
+ * Add entry to quantileInfos array.
+ *
+ * @param i array index.
+ * @param info info to be added to quantileInfos array.
+ */
+ public synchronized void addQuantileInfo(int i, MetricsInfo info) {
+ this.quantileInfos[i] = info;
+ }
+
+ /**
+ * Set the rollover interval (in seconds) of the estimator.
+ *
+ * @param pIntervalSecs of the estimator.
+ */
+ public synchronized void setInterval(int pIntervalSecs) {
+ this.intervalSecs = pIntervalSecs;
+ }
+
+ /**
+ * Get the rollover interval (in seconds) of the estimator.
+ *
+ * @return intervalSecs of the estimator.
+ */
+ public synchronized int getInterval() {
+ return intervalSecs;
}
public void stop() {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java
index c7d8f60b181..aefd7a264b0 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java
@@ -24,6 +24,7 @@ import java.util.Collections;
import java.util.Map;
import java.util.Random;
+import org.apache.hadoop.metrics2.lib.MutableInverseQuantiles;
import org.junit.Before;
import org.junit.Test;
@@ -36,6 +37,7 @@ public class TestSampleQuantiles {
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) };
SampleQuantiles estimator;
+ final static int NUM_REPEATS = 10;
@Before
public void init() {
@@ -91,28 +93,70 @@ public class TestSampleQuantiles {
@Test
public void testQuantileError() throws IOException {
final int count = 100000;
- Random r = new Random(0xDEADDEAD);
- Long[] values = new Long[count];
+ Random rnd = new Random(0xDEADDEAD);
+ int[] values = new int[count];
for (int i = 0; i < count; i++) {
- values[i] = (long) (i + 1);
+ values[i] = i + 1;
}
- // Do 10 shuffle/insert/check cycles
- for (int i = 0; i < 10; i++) {
- System.out.println("Starting run " + i);
- Collections.shuffle(Arrays.asList(values), r);
+
+ // Repeat shuffle/insert/check cycles 10 times
+ for (int i = 0; i < NUM_REPEATS; i++) {
+
+ // Shuffle
+ Collections.shuffle(Arrays.asList(values), rnd);
estimator.clear();
- for (int j = 0; j < count; j++) {
- estimator.insert(values[j]);
+
+ // Insert
+ for (int value : values) {
+ estimator.insert(value);
}
Map snapshot;
snapshot = estimator.snapshot();
+
+ // Check
for (Quantile q : quantiles) {
long actual = (long) (q.quantile * count);
long error = (long) (q.error * count);
long estimate = snapshot.get(q);
- System.out
- .println(String.format("Expected %d with error %d, estimated %d",
- actual, error, estimate));
+ assertThat(estimate <= actual + error).isTrue();
+ assertThat(estimate >= actual - error).isTrue();
+ }
+ }
+ }
+
+ /**
+ * Correctness test that checks that absolute error of the estimate for inverse quantiles
+ * is within specified error bounds for some randomly permuted streams of items.
+ */
+ @Test
+ public void testInverseQuantiles() throws IOException {
+ SampleQuantiles inverseQuantilesEstimator =
+ new SampleQuantiles(MutableInverseQuantiles.INVERSE_QUANTILES);
+ final int count = 100000;
+ Random rnd = new Random(0xDEADDEAD);
+ int[] values = new int[count];
+ for (int i = 0; i < count; i++) {
+ values[i] = i + 1;
+ }
+
+ // Repeat shuffle/insert/check cycles 10 times
+ for (int i = 0; i < NUM_REPEATS; i++) {
+ // Shuffle
+ Collections.shuffle(Arrays.asList(values), rnd);
+ inverseQuantilesEstimator.clear();
+
+ // Insert
+ for (int value : values) {
+ inverseQuantilesEstimator.insert(value);
+ }
+ Map snapshot;
+ snapshot = inverseQuantilesEstimator.snapshot();
+
+ // Check
+ for (Quantile q : MutableInverseQuantiles.INVERSE_QUANTILES) {
+ long actual = (long) (q.quantile * count);
+ long error = (long) (q.error * count);
+ long estimate = snapshot.get(q);
assertThat(estimate <= actual + error).isTrue();
assertThat(estimate >= actual - error).isTrue();
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
index 9132e20210a..8210322f8f4 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
@@ -392,13 +392,34 @@ public class MetricsAsserts {
*/
public static void assertQuantileGauges(String prefix,
MetricsRecordBuilder rb, String valueName) {
- verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0l));
+ verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L));
for (Quantile q : MutableQuantiles.quantiles) {
String nameTemplate = prefix + "%dthPercentile" + valueName;
int percentile = (int) (100 * q.quantile);
verify(rb).addGauge(
eqName(info(String.format(nameTemplate, percentile), "")),
- geq(0l));
+ geq(0L));
+ }
+ }
+
+ /**
+ * Asserts that the NumOps and inverse quantiles for a metric have been changed at
+ * some point to a non-zero value, for the specified value name of the
+ * metrics (e.g., "Rate").
+ *
+ * @param prefix of the metric
+ * @param rb MetricsRecordBuilder with the metric
+ * @param valueName the value name for the metric
+ */
+ public static void assertInverseQuantileGauges(String prefix,
+ MetricsRecordBuilder rb, String valueName) {
+ verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L));
+ for (Quantile q : MutableQuantiles.quantiles) {
+ String nameTemplate = prefix + "%dthInversePercentile" + valueName;
+ int percentile = (int) (100 * q.quantile);
+ verify(rb).addGauge(
+ eqName(info(String.format(nameTemplate, percentile), "")),
+ geq(0L));
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
index 675dbbff4c3..c3aa3c3a454 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
@@ -258,7 +258,7 @@ public class DataNodeMetrics {
"ramDiskBlocksLazyPersistWindows" + interval + "s",
"Time between the RamDisk block write and disk persist in ms",
"ops", "latency", interval);
- readTransferRateQuantiles[i] = registry.newQuantiles(
+ readTransferRateQuantiles[i] = registry.newInverseQuantiles(
"readTransferRate" + interval + "s",
"Rate at which bytes are read from datanode calculated in bytes per second",
"ops", "rate", interval);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
index de5c985a4f0..35f7924be11 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
+import static org.apache.hadoop.test.MetricsAsserts.assertInverseQuantileGauges;
import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
@@ -413,7 +414,7 @@ public class TestDataNodeMetrics {
final long endWriteValue = getLongCounter("TotalWriteTime", rbNew);
final long endReadValue = getLongCounter("TotalReadTime", rbNew);
assertCounter("ReadTransferRateNumOps", 1L, rbNew);
- assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate");
+ assertInverseQuantileGauges("ReadTransferRate60s", rbNew, "Rate");
return endWriteValue > startWriteValue
&& endReadValue > startReadValue;
}
From 74ddf69f808b9fbf94bdad802b4745613152dbe5 Mon Sep 17 00:00:00 2001
From: Sadanand Shenoy
Date: Tue, 11 Apr 2023 02:33:16 +0530
Subject: [PATCH 48/78] HDFS-16911. Distcp with snapshot diff to support Ozone
filesystem. (#5364)
---
.../org/apache/hadoop/tools/DistCpSync.java | 110 ++++++++++++------
.../apache/hadoop/tools/TestDistCpSync.java | 67 +++++++++++
2 files changed, 140 insertions(+), 37 deletions(-)
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
index 1cf2d97ec1f..dbc86fd0b47 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
@@ -20,18 +20,19 @@ package org.apache.hadoop.tools;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonPathCapabilities;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSUtilClient;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
-import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.tools.CopyListing.InvalidInputException;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
@@ -106,20 +107,7 @@ class DistCpSync {
final FileSystem snapshotDiffFs = isRdiff() ? tgtFs : srcFs;
final Path snapshotDiffDir = isRdiff() ? targetDir : sourceDir;
- // currently we require both the source and the target file system are
- // DistributedFileSystem or (S)WebHdfsFileSystem.
- if (!(srcFs instanceof DistributedFileSystem
- || srcFs instanceof WebHdfsFileSystem)) {
- throw new IllegalArgumentException("Unsupported source file system: "
- + srcFs.getScheme() + "://. " +
- "Supported file systems: hdfs://, webhdfs:// and swebhdfs://.");
- }
- if (!(tgtFs instanceof DistributedFileSystem
- || tgtFs instanceof WebHdfsFileSystem)) {
- throw new IllegalArgumentException("Unsupported target file system: "
- + tgtFs.getScheme() + "://. " +
- "Supported file systems: hdfs://, webhdfs:// and swebhdfs://.");
- }
+ checkFilesystemSupport(sourceDir,targetDir,srcFs, tgtFs);
// make sure targetFS has no change between from and the current states
if (!checkNoChange(tgtFs, targetDir)) {
@@ -165,6 +153,42 @@ class DistCpSync {
return true;
}
+ /**
+ * Check if the source and target filesystems support snapshots.
+ */
+ private void checkFilesystemSupport(Path sourceDir, Path targetDir,
+ FileSystem srcFs, FileSystem tgtFs) throws IOException {
+ if (!srcFs.hasPathCapability(sourceDir,
+ CommonPathCapabilities.FS_SNAPSHOTS)) {
+ throw new UnsupportedOperationException(
+ "The source file system " + srcFs.getScheme()
+ + " does not support snapshot.");
+ }
+ if (!tgtFs.hasPathCapability(targetDir,
+ CommonPathCapabilities.FS_SNAPSHOTS)) {
+ throw new UnsupportedOperationException(
+ "The target file system " + tgtFs.getScheme()
+ + " does not support snapshot.");
+ }
+ try {
+ getSnapshotDiffReportMethod(srcFs);
+ } catch (NoSuchMethodException e) {
+ throw new UnsupportedOperationException(
+ "The source file system " + srcFs.getScheme()
+ + " does not support getSnapshotDiffReport",
+ e);
+ }
+ try {
+ getSnapshotDiffReportMethod(tgtFs);
+ } catch (NoSuchMethodException e) {
+ throw new UnsupportedOperationException(
+ "The target file system " + tgtFs.getScheme()
+ + " does not support getSnapshotDiffReport",
+ e);
+ }
+
+ }
+
public boolean sync() throws IOException {
if (!preSyncCheck()) {
return false;
@@ -211,21 +235,10 @@ class DistCpSync {
context.getTargetPath() : context.getSourcePaths().get(0);
try {
- SnapshotDiffReport report = null;
- FileSystem fs = ssDir.getFileSystem(conf);
final String from = getSnapshotName(context.getFromSnapshot());
final String to = getSnapshotName(context.getToSnapshot());
- if (fs instanceof DistributedFileSystem) {
- DistributedFileSystem dfs = (DistributedFileSystem)fs;
- report = dfs.getSnapshotDiffReport(ssDir, from, to);
- } else if (fs instanceof WebHdfsFileSystem) {
- WebHdfsFileSystem webHdfs = (WebHdfsFileSystem)fs;
- report = webHdfs.getSnapshotDiffReport(ssDir, from, to);
- } else {
- throw new IllegalArgumentException("Unsupported file system: " +
- fs.getScheme() + "://. " +
- "Supported file systems: hdfs://, webhdfs:// and swebhdfs://.");
- }
+ SnapshotDiffReport report =
+ getSnapshotDiffReport(ssDir.getFileSystem(conf), ssDir, from, to);
this.diffMap = new EnumMap<>(SnapshotDiffReport.DiffType.class);
for (SnapshotDiffReport.DiffType type :
@@ -286,6 +299,36 @@ class DistCpSync {
return false;
}
+ /**
+ * Check if the filesystem implementation has a method named
+ * getSnapshotDiffReport.
+ */
+ private static Method getSnapshotDiffReportMethod(FileSystem fs)
+ throws NoSuchMethodException {
+ return fs.getClass().getMethod(
+ "getSnapshotDiffReport", Path.class, String.class, String.class);
+ }
+
+ /**
+ * Get the snapshotDiff b/w the fromSnapshot & toSnapshot for the given
+ * filesystem.
+ */
+ private static SnapshotDiffReport getSnapshotDiffReport(
+ final FileSystem fs,
+ final Path snapshotDir,
+ final String fromSnapshot,
+ final String toSnapshot) throws IOException {
+ try {
+ return (SnapshotDiffReport) getSnapshotDiffReportMethod(fs).invoke(
+ fs, snapshotDir, fromSnapshot, toSnapshot);
+ } catch (InvocationTargetException e) {
+ throw new IOException(e.getCause());
+ } catch (NoSuchMethodException|IllegalAccessException e) {
+ throw new IllegalArgumentException(
+ "Failed to invoke getSnapshotDiffReport.", e);
+ }
+ }
+
private String getSnapshotName(String name) {
return Path.CUR_DIR.equals(name) ? "" : name;
}
@@ -327,14 +370,7 @@ class DistCpSync {
private boolean checkNoChange(FileSystem fs, Path path) {
try {
final String from = getSnapshotName(context.getFromSnapshot());
- SnapshotDiffReport targetDiff = null;
- if (fs instanceof DistributedFileSystem) {
- DistributedFileSystem dfs = (DistributedFileSystem)fs;
- targetDiff = dfs.getSnapshotDiffReport(path, from, "");
- } else {
- WebHdfsFileSystem webHdfs = (WebHdfsFileSystem)fs;
- targetDiff = webHdfs.getSnapshotDiffReport(path, from, "");
- }
+ SnapshotDiffReport targetDiff = getSnapshotDiffReport(fs, path, from, "");
if (!targetDiff.getDiffList().isEmpty()) {
DistCp.LOG.warn("The target has been modified since snapshot "
+ context.getFromSnapshot());
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
index 93796e752eb..0fbcd6571c6 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.fs.CommonPathCapabilities;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -38,6 +40,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.tools.mapred.CopyMapper;
import org.junit.After;
import org.junit.Assert;
@@ -47,6 +50,7 @@ import org.junit.Test;
import java.io.IOException;
import java.io.FileWriter;
import java.io.BufferedWriter;
+import java.net.URI;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.ArrayList;
@@ -56,6 +60,9 @@ import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
+import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
+import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
+
public class TestDistCpSync {
private MiniDFSCluster cluster;
private final Configuration conf = new HdfsConfiguration();
@@ -89,6 +96,7 @@ public class TestDistCpSync {
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, target.toString());
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, target.toString());
+ conf.setClass("fs.dummy.impl", DummyFs.class, FileSystem.class);
}
@After
@@ -1276,4 +1284,63 @@ public class TestDistCpSync {
verifyCopyByFs(sourceFS, targetFS, sourceFS.getFileStatus(sourceFSPath),
targetFS.getFileStatus(targetFSPath), false);
}
+
+ @Test
+ public void testSyncSnapshotDiffWithLocalFileSystem() throws Exception {
+ String[] args = new String[]{"-update", "-diff", "s1", "s2",
+ "file:///source", "file:///target"};
+ LambdaTestUtils.intercept(
+ UnsupportedOperationException.class,
+ "The source file system file does not support snapshot",
+ () -> new DistCp(conf, OptionsParser.parse(args)).execute());
+ }
+
+ @Test
+ public void testSyncSnapshotDiffWithDummyFileSystem() {
+ String[] args =
+ new String[] { "-update", "-diff", "s1", "s2", "dummy:///source",
+ "dummy:///target" };
+ try {
+ FileSystem dummyFs = FileSystem.get(URI.create("dummy:///"), conf);
+ assertThat(dummyFs).isInstanceOf(DummyFs.class);
+ new DistCp(conf, OptionsParser.parse(args)).execute();
+ } catch (UnsupportedOperationException e) {
+ throw e;
+ } catch (Exception e) {
+ // can expect other exceptions as source and target paths
+ // are not created.
+ }
+ }
+
+ public static class DummyFs extends RawLocalFileSystem {
+ public DummyFs() {
+ super();
+ }
+
+ public URI getUri() {
+ return URI.create("dummy:///");
+ }
+
+ @Override
+ public boolean hasPathCapability(Path path, String capability)
+ throws IOException {
+ switch (validatePathCapabilityArgs(makeQualified(path), capability)) {
+ case CommonPathCapabilities.FS_SNAPSHOTS:
+ return true;
+ default:
+ return super.hasPathCapability(path, capability);
+ }
+ }
+
+ @Override
+ public FileStatus getFileStatus(Path f) throws IOException {
+ return new FileStatus();
+ }
+
+ public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir,
+ final String fromSnapshot, final String toSnapshot) {
+ return new SnapshotDiffReport(snapshotDir.getName(), fromSnapshot,
+ toSnapshot, new ArrayList());
+ }
+ }
}
From bffa49a64f93c3860b984d137df68d383b3a79f1 Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Wed, 12 Apr 2023 00:47:58 +0800
Subject: [PATCH 49/78] =?UTF-8?q?YARN-11377.=20[Federation]=20Support=20ad?=
=?UTF-8?q?dToClusterNodeLabels=E3=80=81removeFromClusterNodeLabels?=
=?UTF-8?q?=E3=80=81replaceLabelsOnNode=20API's=20for=20Federation.=20(#55?=
=?UTF-8?q?25)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../AddToClusterNodeLabelsRequest.java | 30 ++++
.../RemoveFromClusterNodeLabelsRequest.java | 28 ++++
.../ReplaceLabelsOnNodeRequest.java | 28 ++++
...erver_resourcemanager_service_protos.proto | 3 +
.../AddToClusterNodeLabelsRequestPBImpl.java | 16 +++
...oveFromClusterNodeLabelsRequestPBImpl.java | 19 ++-
.../pb/ReplaceLabelsOnNodeRequestPBImpl.java | 19 ++-
.../rmadmin/FederationRMAdminInterceptor.java | 101 ++++++++++++-
.../TestFederationRMAdminInterceptor.java | 133 ++++++++++++++++++
9 files changed, 368 insertions(+), 9 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java
index f2ac395dc3d..1f61268515a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.api.protocolrecords;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.util.Records;
@@ -37,6 +38,17 @@ public abstract class AddToClusterNodeLabelsRequest {
return request;
}
+ @Public
+ @Unstable
+ public static AddToClusterNodeLabelsRequest newInstance(String subClusterId,
+ List nodeLabels) {
+ AddToClusterNodeLabelsRequest request = Records
+ .newRecord(AddToClusterNodeLabelsRequest.class);
+ request.setNodeLabels(nodeLabels);
+ request.setSubClusterId(subClusterId);
+ return request;
+ }
+
@Public
@Unstable
public abstract void setNodeLabels(List nodeLabels);
@@ -44,4 +56,22 @@ public abstract class AddToClusterNodeLabelsRequest {
@Public
@Unstable
public abstract List getNodeLabels();
+
+ /**
+ * Get the subClusterId.
+ *
+ * @return subClusterId.
+ */
+ @Public
+ @InterfaceStability.Evolving
+ public abstract String getSubClusterId();
+
+ /**
+ * Set the subClusterId.
+ *
+ * @param subClusterId subCluster Id.
+ */
+ @Public
+ @InterfaceStability.Evolving
+ public abstract void setSubClusterId(String subClusterId);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java
index fd45f91e457..11baea04475 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.api.protocolrecords;
import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.yarn.util.Records;
@@ -35,6 +36,15 @@ public abstract class RemoveFromClusterNodeLabelsRequest {
return request;
}
+ public static RemoveFromClusterNodeLabelsRequest newInstance(String subClusterId,
+ Set labels) {
+ RemoveFromClusterNodeLabelsRequest request =
+ Records.newRecord(RemoveFromClusterNodeLabelsRequest.class);
+ request.setNodeLabels(labels);
+ request.setSubClusterId(subClusterId);
+ return request;
+ }
+
@Public
@Evolving
public abstract void setNodeLabels(Set labels);
@@ -42,4 +52,22 @@ public abstract class RemoveFromClusterNodeLabelsRequest {
@Public
@Evolving
public abstract Set getNodeLabels();
+
+ /**
+ * Get the subClusterId.
+ *
+ * @return subClusterId.
+ */
+ @Public
+ @InterfaceStability.Evolving
+ public abstract String getSubClusterId();
+
+ /**
+ * Set the subClusterId.
+ *
+ * @param subClusterId subCluster Id.
+ */
+ @Public
+ @InterfaceStability.Evolving
+ public abstract void setSubClusterId(String subClusterId);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java
index 1b8e687b3dc..ab67e95f7d4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java
@@ -22,6 +22,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.util.Records;
@@ -37,6 +38,15 @@ public abstract class ReplaceLabelsOnNodeRequest {
return request;
}
+ public static ReplaceLabelsOnNodeRequest newInstance(Map> map,
+ String subClusterId) {
+ ReplaceLabelsOnNodeRequest request =
+ Records.newRecord(ReplaceLabelsOnNodeRequest.class);
+ request.setNodeToLabels(map);
+ request.setSubClusterId(subClusterId);
+ return request;
+ }
+
@Public
@Evolving
public abstract void setNodeToLabels(Map> map);
@@ -52,4 +62,22 @@ public abstract class ReplaceLabelsOnNodeRequest {
@Public
@Evolving
public abstract boolean getFailOnUnknownNodes();
+
+ /**
+ * Get the subClusterId.
+ *
+ * @return subClusterId.
+ */
+ @Public
+ @InterfaceStability.Evolving
+ public abstract String getSubClusterId();
+
+ /**
+ * Set the subClusterId.
+ *
+ * @param subClusterId subCluster Id.
+ */
+ @Public
+ @InterfaceStability.Evolving
+ public abstract void setSubClusterId(String subClusterId);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto
index 132f937e150..f2145ca73d0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto
@@ -95,6 +95,7 @@ message RefreshNodesResourcesResponseProto {
message AddToClusterNodeLabelsRequestProto {
repeated string deprecatedNodeLabels = 1;
repeated NodeLabelProto nodeLabels = 2;
+ optional string sub_cluster_id = 3;
}
message AddToClusterNodeLabelsResponseProto {
@@ -102,6 +103,7 @@ message AddToClusterNodeLabelsResponseProto {
message RemoveFromClusterNodeLabelsRequestProto {
repeated string nodeLabels = 1;
+ optional string sub_cluster_id = 2;
}
message RemoveFromClusterNodeLabelsResponseProto {
@@ -110,6 +112,7 @@ message RemoveFromClusterNodeLabelsResponseProto {
message ReplaceLabelsOnNodeRequestProto {
repeated NodeIdToLabelsProto nodeToLabels = 1;
optional bool failOnUnknownNodes = 2;
+ optional string sub_cluster_id = 3;
}
message ReplaceLabelsOnNodeResponseProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java
index 3bf22fb1b51..2012b9f3030 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java
@@ -152,4 +152,20 @@ public class AddToClusterNodeLabelsRequestPBImpl extends
initLocalNodeLabels();
return this.updatedNodeLabels;
}
+
+ @Override
+ public String getSubClusterId() {
+ AddToClusterNodeLabelsRequestProtoOrBuilder p = viaProto ? proto : builder;
+ return (p.hasSubClusterId()) ? p.getSubClusterId() : null;
+ }
+
+ @Override
+ public void setSubClusterId(String subClusterId) {
+ maybeInitBuilder();
+ if (subClusterId == null) {
+ builder.clearSubClusterId();
+ return;
+ }
+ builder.setSubClusterId(subClusterId);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java
index afabcd919fe..d420bda5d7b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java
@@ -103,10 +103,25 @@ public class RemoveFromClusterNodeLabelsRequestPBImpl
return this.labels;
}
+ @Override
+ public String getSubClusterId() {
+ RemoveFromClusterNodeLabelsRequestProtoOrBuilder p = viaProto ? proto : builder;
+ return (p.hasSubClusterId()) ? p.getSubClusterId() : null;
+ }
+
+ @Override
+ public void setSubClusterId(String subClusterId) {
+ maybeInitBuilder();
+ if (subClusterId == null) {
+ builder.clearSubClusterId();
+ return;
+ }
+ builder.setSubClusterId(subClusterId);
+ }
+
@Override
public int hashCode() {
- assert false : "hashCode not designed";
- return 0;
+ return getProto().hashCode();
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java
index b2f491950d0..e7f2fa658e1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java
@@ -151,6 +151,22 @@ public class ReplaceLabelsOnNodeRequestPBImpl extends
return p.getFailOnUnknownNodes();
}
+ @Override
+ public String getSubClusterId() {
+ ReplaceLabelsOnNodeRequestProtoOrBuilder p = viaProto ? proto : builder;
+ return (p.hasSubClusterId()) ? p.getSubClusterId() : null;
+ }
+
+ @Override
+ public void setSubClusterId(String subClusterId) {
+ maybeInitBuilder();
+ if (subClusterId == null) {
+ builder.clearSubClusterId();
+ return;
+ }
+ builder.setSubClusterId(subClusterId);
+ }
+
@Override
public void setFailOnUnknownNodes(boolean failOnUnknownNodes) {
maybeInitBuilder();
@@ -163,8 +179,7 @@ public class ReplaceLabelsOnNodeRequestPBImpl extends
@Override
public int hashCode() {
- assert false : "hashCode not designed";
- return 0;
+ return getProto().hashCode();
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java
index c930459559f..c3cac82e38c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java
@@ -512,22 +512,113 @@ public class FederationRMAdminInterceptor extends AbstractRMAdminRequestIntercep
@Override
public AddToClusterNodeLabelsResponse addToClusterNodeLabels(
- AddToClusterNodeLabelsRequest request)
- throws YarnException, IOException {
- throw new NotImplementedException();
+ AddToClusterNodeLabelsRequest request) throws YarnException, IOException {
+ // parameter verification.
+ if (request == null) {
+ routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved();
+ RouterServerUtil.logAndThrowException("Missing AddToClusterNodeLabels request.", null);
+ }
+
+ String subClusterId = request.getSubClusterId();
+ if (StringUtils.isBlank(subClusterId)) {
+ routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved();
+ RouterServerUtil.logAndThrowException("Missing AddToClusterNodeLabels SubClusterId.", null);
+ }
+
+ try {
+ long startTime = clock.getTime();
+ RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod(
+ new Class[]{AddToClusterNodeLabelsRequest.class}, new Object[]{request});
+ Collection addToClusterNodeLabelsResps =
+ remoteMethod.invokeConcurrent(this, AddToClusterNodeLabelsResponse.class, subClusterId);
+ if (CollectionUtils.isNotEmpty(addToClusterNodeLabelsResps)) {
+ long stopTime = clock.getTime();
+ routerMetrics.succeededAddToClusterNodeLabelsRetrieved(stopTime - startTime);
+ return AddToClusterNodeLabelsResponse.newInstance();
+ }
+ } catch (YarnException e) {
+ routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved();
+ RouterServerUtil.logAndThrowException(e,
+ "Unable to addToClusterNodeLabels due to exception. " + e.getMessage());
+ }
+
+ routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved();
+ throw new YarnException("Unable to addToClusterNodeLabels.");
}
@Override
public RemoveFromClusterNodeLabelsResponse removeFromClusterNodeLabels(
RemoveFromClusterNodeLabelsRequest request)
throws YarnException, IOException {
- throw new NotImplementedException();
+ // parameter verification.
+ if (request == null) {
+ routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved();
+ RouterServerUtil.logAndThrowException("Missing RemoveFromClusterNodeLabels request.", null);
+ }
+
+ String subClusterId = request.getSubClusterId();
+ if (StringUtils.isBlank(subClusterId)) {
+ routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved();
+ RouterServerUtil.logAndThrowException("Missing RemoveFromClusterNodeLabels SubClusterId.",
+ null);
+ }
+
+ try {
+ long startTime = clock.getTime();
+ RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod(
+ new Class[]{RemoveFromClusterNodeLabelsRequest.class}, new Object[]{request});
+ Collection refreshNodesResourcesResps =
+ remoteMethod.invokeConcurrent(this, RemoveFromClusterNodeLabelsResponse.class,
+ subClusterId);
+ if (CollectionUtils.isNotEmpty(refreshNodesResourcesResps)) {
+ long stopTime = clock.getTime();
+ routerMetrics.succeededRemoveFromClusterNodeLabelsRetrieved(stopTime - startTime);
+ return RemoveFromClusterNodeLabelsResponse.newInstance();
+ }
+ } catch (YarnException e) {
+ routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved();
+ RouterServerUtil.logAndThrowException(e,
+ "Unable to removeFromClusterNodeLabels due to exception. " + e.getMessage());
+ }
+
+ routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved();
+ throw new YarnException("Unable to removeFromClusterNodeLabels.");
}
@Override
public ReplaceLabelsOnNodeResponse replaceLabelsOnNode(ReplaceLabelsOnNodeRequest request)
throws YarnException, IOException {
- throw new NotImplementedException();
+ // parameter verification.
+ if (request == null) {
+ routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved();
+ RouterServerUtil.logAndThrowException("Missing ReplaceLabelsOnNode request.", null);
+ }
+
+ String subClusterId = request.getSubClusterId();
+ if (StringUtils.isBlank(subClusterId)) {
+ routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved();
+ RouterServerUtil.logAndThrowException("Missing ReplaceLabelsOnNode SubClusterId.", null);
+ }
+
+ try {
+ long startTime = clock.getTime();
+ RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod(
+ new Class[]{ReplaceLabelsOnNodeRequest.class}, new Object[]{request});
+ Collection replaceLabelsOnNodeResps =
+ remoteMethod.invokeConcurrent(this, ReplaceLabelsOnNodeResponse.class, subClusterId);
+ if (CollectionUtils.isNotEmpty(replaceLabelsOnNodeResps)) {
+ long stopTime = clock.getTime();
+ routerMetrics.succeededRemoveFromClusterNodeLabelsRetrieved(stopTime - startTime);
+ return ReplaceLabelsOnNodeResponse.newInstance();
+ }
+ } catch (YarnException e) {
+ routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved();
+ RouterServerUtil.logAndThrowException(e,
+ "Unable to replaceLabelsOnNode due to exception. " + e.getMessage());
+ }
+
+ routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved();
+ throw new YarnException("Unable to replaceLabelsOnNode.");
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java
index 7449c8474d5..fa38bd6f4ce 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.api.records.DecommissionType;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceOption;
+import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest;
@@ -42,6 +43,12 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceReque
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RemoveFromClusterNodeLabelsRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RemoveFromClusterNodeLabelsResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeResponse;
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
@@ -55,6 +62,8 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
import static org.junit.Assert.assertNotNull;
@@ -388,4 +397,128 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest {
LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.",
() -> interceptor.refreshNodesResources(request1));
}
+
+ @Test
+ public void testAddToClusterNodeLabelsEmptyRequest() throws Exception {
+ // null request1.
+ LambdaTestUtils.intercept(YarnException.class, "Missing AddToClusterNodeLabels request.",
+ () -> interceptor.addToClusterNodeLabels(null));
+
+ // null request2.
+ AddToClusterNodeLabelsRequest request = AddToClusterNodeLabelsRequest.newInstance(null, null);
+ LambdaTestUtils.intercept(YarnException.class, "Missing AddToClusterNodeLabels SubClusterId.",
+ () -> interceptor.addToClusterNodeLabels(request));
+ }
+
+ @Test
+ public void testAddToClusterNodeLabelsNormalRequest() throws Exception {
+ // case1, We add NodeLabel to subCluster SC-1
+ NodeLabel nodeLabelA = NodeLabel.newInstance("a");
+ NodeLabel nodeLabelB = NodeLabel.newInstance("b");
+ List labels = new ArrayList<>();
+ labels.add(nodeLabelA);
+ labels.add(nodeLabelB);
+
+ AddToClusterNodeLabelsRequest request =
+ AddToClusterNodeLabelsRequest.newInstance("SC-1", labels);
+ AddToClusterNodeLabelsResponse response = interceptor.addToClusterNodeLabels(request);
+ assertNotNull(response);
+
+ // case2, test the non-exist subCluster.
+ AddToClusterNodeLabelsRequest request1 =
+ AddToClusterNodeLabelsRequest.newInstance("SC-NON", labels);
+ LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.",
+ () -> interceptor.addToClusterNodeLabels(request1));
+ }
+
+ @Test
+ public void testRemoveFromClusterNodeLabelsEmptyRequest() throws Exception {
+ // null request1.
+ LambdaTestUtils.intercept(YarnException.class, "Missing RemoveFromClusterNodeLabels request.",
+ () -> interceptor.removeFromClusterNodeLabels(null));
+
+ // null request2.
+ RemoveFromClusterNodeLabelsRequest request =
+ RemoveFromClusterNodeLabelsRequest.newInstance(null, null);
+ LambdaTestUtils.intercept(YarnException.class,
+ "Missing RemoveFromClusterNodeLabels SubClusterId.",
+ () -> interceptor.removeFromClusterNodeLabels(request));
+ }
+
+ @Test
+ public void testRemoveFromClusterNodeLabelsNormalRequest() throws Exception {
+ // case1, We add nodelabel a for SC-1, and then remove nodelabel a
+
+ // Step1. Add NodeLabel for subCluster SC-1
+ NodeLabel nodeLabelA = NodeLabel.newInstance("a");
+ NodeLabel nodeLabelB = NodeLabel.newInstance("b");
+ List nodeLabels = new ArrayList<>();
+ nodeLabels.add(nodeLabelA);
+ nodeLabels.add(nodeLabelB);
+
+ AddToClusterNodeLabelsRequest request =
+ AddToClusterNodeLabelsRequest.newInstance("SC-1", nodeLabels);
+ interceptor.addToClusterNodeLabels(request);
+
+ // Step2. We delete the label a of subCluster SC-1
+ Set labels = new HashSet<>();
+ labels.add("a");
+
+ RemoveFromClusterNodeLabelsRequest request1 =
+ RemoveFromClusterNodeLabelsRequest.newInstance("SC-1", labels);
+ RemoveFromClusterNodeLabelsResponse response =
+ interceptor.removeFromClusterNodeLabels(request1);
+ assertNotNull(response);
+
+ // case2, test the non-exist subCluster.
+ RemoveFromClusterNodeLabelsRequest request2 =
+ RemoveFromClusterNodeLabelsRequest.newInstance("SC-NON", labels);
+ LambdaTestUtils.intercept(YarnException.class,
+ "subClusterId = SC-NON is not an active subCluster.",
+ () -> interceptor.removeFromClusterNodeLabels(request2));
+ }
+
+ @Test
+ public void testReplaceLabelsOnNodeEmptyRequest() throws Exception {
+ // null request1.
+ LambdaTestUtils.intercept(YarnException.class, "Missing ReplaceLabelsOnNode request.",
+ () -> interceptor.replaceLabelsOnNode(null));
+
+ // null request2.
+ Map> labelMap = new HashMap<>();
+ ReplaceLabelsOnNodeRequest request = ReplaceLabelsOnNodeRequest.newInstance(labelMap, null);
+ LambdaTestUtils.intercept(YarnException.class, "Missing ReplaceLabelsOnNode SubClusterId.",
+ () -> interceptor.replaceLabelsOnNode(request));
+ }
+
+ @Test
+ public void tesReplaceLabelsOnNodeEmptyNormalRequest() throws Exception {
+ // case1, We add nodelabel for SC-1, and then replace the label for the specific node.
+ NodeLabel nodeLabelA = NodeLabel.newInstance("a");
+ NodeLabel nodeLabelB = NodeLabel.newInstance("b");
+ List nodeLabels = new ArrayList<>();
+ nodeLabels.add(nodeLabelA);
+ nodeLabels.add(nodeLabelB);
+
+ AddToClusterNodeLabelsRequest request =
+ AddToClusterNodeLabelsRequest.newInstance("SC-1", nodeLabels);
+ interceptor.addToClusterNodeLabels(request);
+
+ Map> pMap = new HashMap<>();
+ NodeId nodeId = NodeId.newInstance("127.0.0.1", 0);
+ Set labels = new HashSet<>();
+ labels.add("a");
+ pMap.put(nodeId, labels);
+
+ ReplaceLabelsOnNodeRequest request1 = ReplaceLabelsOnNodeRequest.newInstance(pMap, "SC-1");
+ ReplaceLabelsOnNodeResponse response = interceptor.replaceLabelsOnNode(request1);
+ assertNotNull(response);
+
+ // case2, test the non-exist subCluster.
+ ReplaceLabelsOnNodeRequest request2 =
+ ReplaceLabelsOnNodeRequest.newInstance(pMap, "SC-NON");
+ LambdaTestUtils.intercept(YarnException.class,
+ "subClusterId = SC-NON is not an active subCluster.",
+ () -> interceptor.replaceLabelsOnNode(request2));
+ }
}
From 7c3d94a032ba0bfafb2d1ff35d4675cb6b5618d9 Mon Sep 17 00:00:00 2001
From: Steve Loughran
Date: Wed, 12 Apr 2023 00:47:45 +0100
Subject: [PATCH 50/78] HADOOP-18637. S3A to support upload of files greater
than 2 GB using DiskBlocks (#5543)
Contributed By: HarshitGupta and Steve Loughran
---
hadoop-tools/hadoop-aws/pom.xml | 2 +
.../org/apache/hadoop/fs/s3a/Constants.java | 21 +++++
.../hadoop/fs/s3a/S3ABlockOutputStream.java | 72 +++++++++++----
.../apache/hadoop/fs/s3a/S3ADataBlocks.java | 76 ++++++++++------
.../apache/hadoop/fs/s3a/S3AFileSystem.java | 26 ++++--
.../hadoop/fs/s3a/S3AInstrumentation.java | 8 +-
.../org/apache/hadoop/fs/s3a/S3AUtils.java | 33 +++++++
.../hadoop/fs/s3a/WriteOperationHelper.java | 4 +-
.../apache/hadoop/fs/s3a/WriteOperations.java | 2 +-
.../hadoop/fs/s3a/api/RequestFactory.java | 5 +-
.../fs/s3a/commit/AbstractS3ACommitter.java | 4 +
.../fs/s3a/impl/RequestFactoryImpl.java | 30 ++++++-
.../BlockOutputStreamStatistics.java | 8 +-
.../impl/EmptyS3AStatisticsContext.java | 8 +-
.../site/markdown/tools/hadoop-aws/index.md | 4 +-
.../hadoop/fs/s3a/MockS3AFileSystem.java | 5 ++
.../ITestMagicCommitProtocolFailure.java | 69 ++++++++++++++
.../ITestStagingCommitProtocolFailure.java | 69 ++++++++++++++
.../fs/s3a/impl/TestRequestFactory.java | 3 +-
.../ITestS3AHugeFileUploadSinglePut.java | 89 +++++++++++++++++++
20 files changed, 465 insertions(+), 73 deletions(-)
create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java
create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java
create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java
diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml
index 6ebf1c71f0d..ae8db93329d 100644
--- a/hadoop-tools/hadoop-aws/pom.xml
+++ b/hadoop-tools/hadoop-aws/pom.xml
@@ -108,6 +108,7 @@
${testsThreadCount}false
+ false${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true${testsThreadCount}
@@ -272,6 +273,7 @@
verify
+ false${fs.s3a.scale.test.enabled}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 16472a75fd2..a59a07c8437 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -1255,4 +1255,25 @@ public final class Constants {
*/
public static final String PREFETCH_BLOCK_COUNT_KEY = "fs.s3a.prefetch.block.count";
public static final int PREFETCH_BLOCK_DEFAULT_COUNT = 8;
+
+ /**
+ * Option to enable or disable the multipart uploads.
+ * Value: {@value}.
+ *
+ * Default is {@link #DEFAULT_MULTIPART_UPLOAD_ENABLED}.
+ */
+ public static final String MULTIPART_UPLOADS_ENABLED = "fs.s3a.multipart.uploads.enabled";
+
+ /**
+ * Default value for multipart uploads.
+ * {@value}
+ */
+ public static final boolean DEFAULT_MULTIPART_UPLOAD_ENABLED = true;
+
+ /**
+ * Stream supports multipart uploads to the given path.
+ */
+ public static final String STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED =
+ "fs.s3a.capability.multipart.uploads.enabled";
+
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java
index 19943ff2f70..df3c9315ba8 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java
@@ -101,7 +101,7 @@ class S3ABlockOutputStream extends OutputStream implements
private final String key;
/** Size of all blocks. */
- private final int blockSize;
+ private final long blockSize;
/** IO Statistics. */
private final IOStatistics iostatistics;
@@ -169,6 +169,9 @@ class S3ABlockOutputStream extends OutputStream implements
/** Thread level IOStatistics Aggregator. */
private final IOStatisticsAggregator threadIOStatisticsAggregator;
+ /** Is multipart upload enabled? */
+ private final boolean isMultipartUploadEnabled;
+
/**
* An S3A output stream which uploads partitions in a separate pool of
* threads; different {@link S3ADataBlocks.BlockFactory}
@@ -181,7 +184,6 @@ class S3ABlockOutputStream extends OutputStream implements
this.builder = builder;
this.key = builder.key;
this.blockFactory = builder.blockFactory;
- this.blockSize = (int) builder.blockSize;
this.statistics = builder.statistics;
// test instantiations may not provide statistics;
this.iostatistics = statistics.getIOStatistics();
@@ -195,17 +197,26 @@ class S3ABlockOutputStream extends OutputStream implements
(ProgressListener) progress
: new ProgressableListener(progress);
downgradeSyncableExceptions = builder.downgradeSyncableExceptions;
- // create that first block. This guarantees that an open + close sequence
- // writes a 0-byte entry.
- createBlockIfNeeded();
- LOG.debug("Initialized S3ABlockOutputStream for {}" +
- " output to {}", key, activeBlock);
+
+ // look for multipart support.
+ this.isMultipartUploadEnabled = builder.isMultipartUploadEnabled;
+ // block size is infinite if multipart is disabled, so ignore
+ // what was passed in from the builder.
+ this.blockSize = isMultipartUploadEnabled
+ ? builder.blockSize
+ : -1;
+
if (putTracker.initialize()) {
LOG.debug("Put tracker requests multipart upload");
initMultipartUpload();
}
this.isCSEEnabled = builder.isCSEEnabled;
this.threadIOStatisticsAggregator = builder.ioStatisticsAggregator;
+ // create that first block. This guarantees that an open + close sequence
+ // writes a 0-byte entry.
+ createBlockIfNeeded();
+ LOG.debug("Initialized S3ABlockOutputStream for {}" +
+ " output to {}", key, activeBlock);
}
/**
@@ -318,7 +329,15 @@ class S3ABlockOutputStream extends OutputStream implements
statistics.writeBytes(len);
S3ADataBlocks.DataBlock block = createBlockIfNeeded();
int written = block.write(source, offset, len);
- int remainingCapacity = block.remainingCapacity();
+ if (!isMultipartUploadEnabled) {
+ // no need to check for space as multipart uploads
+ // are not available...everything is saved to a single
+ // (disk) block.
+ return;
+ }
+ // look to see if another block is needed to complete
+ // the upload or exactly a block was written.
+ int remainingCapacity = (int) block.remainingCapacity();
if (written < len) {
// not everything was written —the block has run out
// of capacity
@@ -369,6 +388,8 @@ class S3ABlockOutputStream extends OutputStream implements
*/
@Retries.RetryTranslated
private void initMultipartUpload() throws IOException {
+ Preconditions.checkState(isMultipartUploadEnabled,
+ "multipart upload is disabled");
if (multiPartUpload == null) {
LOG.debug("Initiating Multipart upload");
multiPartUpload = new MultiPartUpload(key);
@@ -558,19 +579,20 @@ class S3ABlockOutputStream extends OutputStream implements
}
/**
- * Upload the current block as a single PUT request; if the buffer
- * is empty a 0-byte PUT will be invoked, as it is needed to create an
- * entry at the far end.
- * @throws IOException any problem.
- * @return number of bytes uploaded. If thread was interrupted while
- * waiting for upload to complete, returns zero with interrupted flag set
- * on this thread.
+ * Upload the current block as a single PUT request; if the buffer is empty a
+ * 0-byte PUT will be invoked, as it is needed to create an entry at the far
+ * end.
+ * @return number of bytes uploaded. If thread was interrupted while waiting
+ * for upload to complete, returns zero with interrupted flag set on this
+ * thread.
+ * @throws IOException
+ * any problem.
*/
- private int putObject() throws IOException {
+ private long putObject() throws IOException {
LOG.debug("Executing regular upload for {}", writeOperationHelper);
final S3ADataBlocks.DataBlock block = getActiveBlock();
- int size = block.dataSize();
+ long size = block.dataSize();
final S3ADataBlocks.BlockUploadData uploadData = block.startUpload();
final PutObjectRequest putObjectRequest = uploadData.hasFile() ?
writeOperationHelper.createPutObjectRequest(
@@ -617,6 +639,7 @@ class S3ABlockOutputStream extends OutputStream implements
"S3ABlockOutputStream{");
sb.append(writeOperationHelper.toString());
sb.append(", blockSize=").append(blockSize);
+ sb.append(", isMultipartUploadEnabled=").append(isMultipartUploadEnabled);
// unsynced access; risks consistency in exchange for no risk of deadlock.
S3ADataBlocks.DataBlock block = activeBlock;
if (block != null) {
@@ -835,7 +858,7 @@ class S3ABlockOutputStream extends OutputStream implements
Preconditions.checkNotNull(uploadId, "Null uploadId");
maybeRethrowUploadFailure();
partsSubmitted++;
- final int size = block.dataSize();
+ final long size = block.dataSize();
bytesSubmitted += size;
final int currentPartNumber = partETagsFutures.size() + 1;
final UploadPartRequest request;
@@ -1011,7 +1034,7 @@ class S3ABlockOutputStream extends OutputStream implements
ProgressEventType eventType = progressEvent.getEventType();
long bytesTransferred = progressEvent.getBytesTransferred();
- int size = block.dataSize();
+ long size = block.dataSize();
switch (eventType) {
case REQUEST_BYTE_TRANSFER_EVENT:
@@ -1126,6 +1149,11 @@ class S3ABlockOutputStream extends OutputStream implements
*/
private IOStatisticsAggregator ioStatisticsAggregator;
+ /**
+ * Is Multipart Uploads enabled for the given upload.
+ */
+ private boolean isMultipartUploadEnabled;
+
private BlockOutputStreamBuilder() {
}
@@ -1276,5 +1304,11 @@ class S3ABlockOutputStream extends OutputStream implements
ioStatisticsAggregator = value;
return this;
}
+
+ public BlockOutputStreamBuilder withMultipartEnabled(
+ final boolean value) {
+ isMultipartUploadEnabled = value;
+ return this;
+ }
}
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java
index 03b5bd96162..b20d8e859aa 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java
@@ -180,7 +180,7 @@ final class S3ADataBlocks {
* @param statistics stats to work with
* @return a new block.
*/
- abstract DataBlock create(long index, int limit,
+ abstract DataBlock create(long index, long limit,
BlockOutputStreamStatistics statistics)
throws IOException;
@@ -258,7 +258,7 @@ final class S3ADataBlocks {
* Return the current data size.
* @return the size of the data
*/
- abstract int dataSize();
+ abstract long dataSize();
/**
* Predicate to verify that the block has the capacity to write
@@ -280,7 +280,7 @@ final class S3ADataBlocks {
* The remaining capacity in the block before it is full.
* @return the number of bytes remaining.
*/
- abstract int remainingCapacity();
+ abstract long remainingCapacity();
/**
* Write a series of bytes from the buffer, from the offset.
@@ -391,9 +391,11 @@ final class S3ADataBlocks {
}
@Override
- DataBlock create(long index, int limit,
+ DataBlock create(long index, long limit,
BlockOutputStreamStatistics statistics)
throws IOException {
+ Preconditions.checkArgument(limit > 0,
+ "Invalid block size: %d", limit);
return new ByteArrayBlock(0, limit, statistics);
}
@@ -436,11 +438,11 @@ final class S3ADataBlocks {
private Integer dataSize;
ByteArrayBlock(long index,
- int limit,
+ long limit,
BlockOutputStreamStatistics statistics) {
super(index, statistics);
- this.limit = limit;
- buffer = new S3AByteArrayOutputStream(limit);
+ this.limit = (limit > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int) limit;
+ buffer = new S3AByteArrayOutputStream(this.limit);
blockAllocated();
}
@@ -449,7 +451,7 @@ final class S3ADataBlocks {
* @return the amount of data available to upload.
*/
@Override
- int dataSize() {
+ long dataSize() {
return dataSize != null ? dataSize : buffer.size();
}
@@ -468,14 +470,14 @@ final class S3ADataBlocks {
}
@Override
- int remainingCapacity() {
+ long remainingCapacity() {
return limit - dataSize();
}
@Override
int write(byte[] b, int offset, int len) throws IOException {
super.write(b, offset, len);
- int written = Math.min(remainingCapacity(), len);
+ int written = (int) Math.min(remainingCapacity(), len);
buffer.write(b, offset, written);
return written;
}
@@ -514,9 +516,11 @@ final class S3ADataBlocks {
}
@Override
- ByteBufferBlock create(long index, int limit,
+ ByteBufferBlock create(long index, long limit,
BlockOutputStreamStatistics statistics)
throws IOException {
+ Preconditions.checkArgument(limit > 0,
+ "Invalid block size: %d", limit);
return new ByteBufferBlock(index, limit, statistics);
}
@@ -564,11 +568,12 @@ final class S3ADataBlocks {
* @param statistics statistics to update
*/
ByteBufferBlock(long index,
- int bufferSize,
+ long bufferSize,
BlockOutputStreamStatistics statistics) {
super(index, statistics);
- this.bufferSize = bufferSize;
- blockBuffer = requestBuffer(bufferSize);
+ this.bufferSize = bufferSize > Integer.MAX_VALUE ?
+ Integer.MAX_VALUE : (int) bufferSize;
+ blockBuffer = requestBuffer(this.bufferSize);
blockAllocated();
}
@@ -577,7 +582,7 @@ final class S3ADataBlocks {
* @return the amount of data available to upload.
*/
@Override
- int dataSize() {
+ long dataSize() {
return dataSize != null ? dataSize : bufferCapacityUsed();
}
@@ -598,7 +603,7 @@ final class S3ADataBlocks {
}
@Override
- public int remainingCapacity() {
+ public long remainingCapacity() {
return blockBuffer != null ? blockBuffer.remaining() : 0;
}
@@ -609,7 +614,7 @@ final class S3ADataBlocks {
@Override
int write(byte[] b, int offset, int len) throws IOException {
super.write(b, offset, len);
- int written = Math.min(remainingCapacity(), len);
+ int written = (int) Math.min(remainingCapacity(), len);
blockBuffer.put(b, offset, written);
return written;
}
@@ -802,16 +807,18 @@ final class S3ADataBlocks {
* Create a temp file and a {@link DiskBlock} instance to manage it.
*
* @param index block index
- * @param limit limit of the block.
+ * @param limit limit of the block. -1 means "no limit"
* @param statistics statistics to update
* @return the new block
* @throws IOException IO problems
*/
@Override
DataBlock create(long index,
- int limit,
+ long limit,
BlockOutputStreamStatistics statistics)
throws IOException {
+ Preconditions.checkArgument(limit != 0,
+ "Invalid block size: %d", limit);
File destFile = getOwner()
.createTmpFileForWrite(String.format("s3ablock-%04d-", index),
limit, getOwner().getConf());
@@ -825,14 +832,14 @@ final class S3ADataBlocks {
*/
static class DiskBlock extends DataBlock {
- private int bytesWritten;
+ private long bytesWritten;
private final File bufferFile;
- private final int limit;
+ private final long limit;
private BufferedOutputStream out;
private final AtomicBoolean closed = new AtomicBoolean(false);
DiskBlock(File bufferFile,
- int limit,
+ long limit,
long index,
BlockOutputStreamStatistics statistics)
throws FileNotFoundException {
@@ -844,24 +851,39 @@ final class S3ADataBlocks {
}
@Override
- int dataSize() {
+ long dataSize() {
return bytesWritten;
}
+ /**
+ * Does this block have unlimited space?
+ * @return true if a block with no size limit was created.
+ */
+ private boolean unlimited() {
+ return limit < 0;
+ }
+
@Override
boolean hasCapacity(long bytes) {
- return dataSize() + bytes <= limit;
+ return unlimited() || dataSize() + bytes <= limit;
}
+ /**
+ * {@inheritDoc}.
+ * If there is no limit to capacity, return MAX_VALUE.
+ * @return capacity in the block.
+ */
@Override
- int remainingCapacity() {
- return limit - bytesWritten;
+ long remainingCapacity() {
+ return unlimited()
+ ? Integer.MAX_VALUE
+ : limit - bytesWritten;
}
@Override
int write(byte[] b, int offset, int len) throws IOException {
super.write(b, offset, len);
- int written = Math.min(remainingCapacity(), len);
+ int written = (int) Math.min(remainingCapacity(), len);
out.write(b, offset, written);
bytesWritten += written;
return written;
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index e96feb0243a..a73bd55b55e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -413,6 +413,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
*/
private ArnResource accessPoint;
+ /**
+ * Does this S3A FS instance have multipart upload enabled?
+ */
+ private boolean isMultipartUploadEnabled = DEFAULT_MULTIPART_UPLOAD_ENABLED;
+
/**
* A cache of files that should be deleted when the FileSystem is closed
* or the JVM is exited.
@@ -543,7 +548,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
this.prefetchBlockSize = (int) prefetchBlockSizeLong;
this.prefetchBlockCount =
intOption(conf, PREFETCH_BLOCK_COUNT_KEY, PREFETCH_BLOCK_DEFAULT_COUNT, 1);
-
+ this.isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED,
+ DEFAULT_MULTIPART_UPLOAD_ENABLED);
initThreadPools(conf);
int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION);
@@ -605,7 +611,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
}
blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER,
DEFAULT_FAST_UPLOAD_BUFFER);
- partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer);
blockOutputActiveBlocks = intOption(conf,
FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
@@ -614,8 +619,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
blockOutputActiveBlocks = 1;
}
LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" +
- " queue limit={}",
- blockOutputBuffer, partSize, blockOutputActiveBlocks);
+ " queue limit={}; multipart={}",
+ blockOutputBuffer, partSize, blockOutputActiveBlocks, isMultipartUploadEnabled);
// verify there's no S3Guard in the store config.
checkNoS3Guard(this.getUri(), getConf());
@@ -1092,6 +1097,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
.withRequestPreparer(getAuditManager()::requestCreated)
.withContentEncoding(contentEncoding)
.withStorageClass(storageClass)
+ .withMultipartUploadEnabled(isMultipartUploadEnabled)
.build();
}
@@ -1842,6 +1848,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
final PutObjectOptions putOptions =
new PutObjectOptions(keep, null, options.getHeaders());
+ validateOutputStreamConfiguration(path, getConf());
+
final S3ABlockOutputStream.BlockOutputStreamBuilder builder =
S3ABlockOutputStream.builder()
.withKey(destKey)
@@ -1865,7 +1873,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
.withCSEEnabled(isCSEEnabled)
.withPutOptions(putOptions)
.withIOStatisticsAggregator(
- IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator());
+ IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator())
+ .withMultipartEnabled(isMultipartUploadEnabled);
return new FSDataOutputStream(
new S3ABlockOutputStream(builder),
null);
@@ -5103,6 +5112,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
case STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_DELETE:
return !keepDirectoryMarkers(path);
+ case STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED:
+ return isMultipartUploadEnabled();
+
// create file options
case FS_S3A_CREATE_PERFORMANCE:
case FS_S3A_CREATE_HEADER:
@@ -5419,4 +5431,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
public boolean isCSEEnabled() {
return isCSEEnabled;
}
+
+ public boolean isMultipartUploadEnabled() {
+ return isMultipartUploadEnabled;
+ }
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
index 9d33efa9d01..da12223570e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
@@ -1547,7 +1547,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource,
* of block uploads pending (1) and the bytes pending (blockSize).
*/
@Override
- public void blockUploadQueued(int blockSize) {
+ public void blockUploadQueued(long blockSize) {
incCounter(StreamStatisticNames.STREAM_WRITE_BLOCK_UPLOADS);
incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_PENDING, 1);
incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING, blockSize);
@@ -1560,7 +1560,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource,
* {@code STREAM_WRITE_BLOCK_UPLOADS_ACTIVE}.
*/
@Override
- public void blockUploadStarted(Duration timeInQueue, int blockSize) {
+ public void blockUploadStarted(Duration timeInQueue, long blockSize) {
// the local counter is used in toString reporting.
queueDuration.addAndGet(timeInQueue.toMillis());
// update the duration fields in the IOStatistics.
@@ -1588,7 +1588,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource,
@Override
public void blockUploadCompleted(
Duration timeSinceUploadStarted,
- int blockSize) {
+ long blockSize) {
transferDuration.addAndGet(timeSinceUploadStarted.toMillis());
incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_ACTIVE, -1);
blockUploadsCompleted.incrementAndGet();
@@ -1602,7 +1602,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource,
@Override
public void blockUploadFailed(
Duration timeSinceUploadStarted,
- int blockSize) {
+ long blockSize) {
incCounter(StreamStatisticNames.STREAM_WRITE_EXCEPTIONS);
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index 8a1947f3e42..274bc96fb99 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.util.functional.RemoteIterators;
import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;
@@ -1031,6 +1032,38 @@ public final class S3AUtils {
return partSize;
}
+ /**
+ * Validates the output stream configuration.
+ * @param path path: for error messages
+ * @param conf : configuration object for the given context
+ * @throws PathIOException Unsupported configuration.
+ */
+ public static void validateOutputStreamConfiguration(final Path path,
+ Configuration conf) throws PathIOException {
+ if(!checkDiskBuffer(conf)){
+ throw new PathIOException(path.toString(),
+ "Unable to create OutputStream with the given"
+ + " multipart upload and buffer configuration.");
+ }
+ }
+
+ /**
+ * Check whether the configuration for S3ABlockOutputStream is
+ * consistent or not. Multipart uploads allow all kinds of fast buffers to
+ * be supported. When the option is disabled only disk buffers are allowed to
+ * be used as the file size might be bigger than the buffer size that can be
+ * allocated.
+ * @param conf : configuration object for the given context
+ * @return true if the disk buffer and the multipart settings are supported
+ */
+ public static boolean checkDiskBuffer(Configuration conf) {
+ boolean isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED,
+ DEFAULT_MULTIPART_UPLOAD_ENABLED);
+ return isMultipartUploadEnabled
+ || FAST_UPLOAD_BUFFER_DISK.equals(
+ conf.get(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER));
+ }
+
/**
* Ensure that the long value is in the range of an integer.
* @param name property name for error messages
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java
index 14ffeed4a55..7f9db33157f 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java
@@ -269,8 +269,6 @@ public class WriteOperationHelper implements WriteOperations {
String dest,
File sourceFile,
final PutObjectOptions options) {
- Preconditions.checkState(sourceFile.length() < Integer.MAX_VALUE,
- "File length is too big for a single PUT upload");
activateAuditSpan();
final ObjectMetadata objectMetadata =
newObjectMetadata((int) sourceFile.length());
@@ -532,7 +530,7 @@ public class WriteOperationHelper implements WriteOperations {
String destKey,
String uploadId,
int partNumber,
- int size,
+ long size,
InputStream uploadStream,
File sourceFile,
Long offset) throws IOException {
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java
index 321390446f7..32888314d88 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java
@@ -233,7 +233,7 @@ public interface WriteOperations extends AuditSpanSource, Closeable {
String destKey,
String uploadId,
int partNumber,
- int size,
+ long size,
InputStream uploadStream,
File sourceFile,
Long offset) throws IOException;
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java
index cae4d3ef034..2a4771925f0 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java
@@ -196,10 +196,11 @@ public interface RequestFactory {
* @param destKey destination object key
* @param options options for the request
* @return the request.
+ * @throws PathIOException if multipart uploads are disabled
*/
InitiateMultipartUploadRequest newMultipartUploadRequest(
String destKey,
- @Nullable PutObjectOptions options);
+ @Nullable PutObjectOptions options) throws PathIOException;
/**
* Complete a multipart upload.
@@ -248,7 +249,7 @@ public interface RequestFactory {
String destKey,
String uploadId,
int partNumber,
- int size,
+ long size,
InputStream uploadStream,
File sourceFile,
long offset) throws PathIOException;
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java
index d6044edde29..e53c690431e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java
@@ -217,6 +217,10 @@ public abstract class AbstractS3ACommitter extends PathOutputCommitter
LOG.debug("{} instantiated for job \"{}\" ID {} with destination {}",
role, jobName(context), jobIdString(context), outputPath);
S3AFileSystem fs = getDestS3AFS();
+ if (!fs.isMultipartUploadEnabled()) {
+ throw new PathCommitException(outputPath, "Multipart uploads are disabled for the FileSystem,"
+ + " the committer can't proceed.");
+ }
// set this thread's context with the job ID.
// audit spans created in this thread will pick
// up this value., including the commit operations instance
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java
index ce11df03839..7227941e344 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java
@@ -124,6 +124,11 @@ public class RequestFactoryImpl implements RequestFactory {
*/
private final StorageClass storageClass;
+ /**
+ * Is multipart upload enabled.
+ */
+ private final boolean isMultipartUploadEnabled;
+
/**
* Constructor.
* @param builder builder with all the configuration.
@@ -137,6 +142,7 @@ public class RequestFactoryImpl implements RequestFactory {
this.requestPreparer = builder.requestPreparer;
this.contentEncoding = builder.contentEncoding;
this.storageClass = builder.storageClass;
+ this.isMultipartUploadEnabled = builder.isMultipartUploadEnabled;
}
/**
@@ -460,7 +466,10 @@ public class RequestFactoryImpl implements RequestFactory {
@Override
public InitiateMultipartUploadRequest newMultipartUploadRequest(
final String destKey,
- @Nullable final PutObjectOptions options) {
+ @Nullable final PutObjectOptions options) throws PathIOException {
+ if (!isMultipartUploadEnabled) {
+ throw new PathIOException(destKey, "Multipart uploads are disabled.");
+ }
final ObjectMetadata objectMetadata = newObjectMetadata(-1);
maybeSetMetadata(options, objectMetadata);
final InitiateMultipartUploadRequest initiateMPURequest =
@@ -509,7 +518,7 @@ public class RequestFactoryImpl implements RequestFactory {
String destKey,
String uploadId,
int partNumber,
- int size,
+ long size,
InputStream uploadStream,
File sourceFile,
long offset) throws PathIOException {
@@ -682,6 +691,11 @@ public class RequestFactoryImpl implements RequestFactory {
*/
private PrepareRequest requestPreparer;
+ /**
+ * Is Multipart Enabled on the path.
+ */
+ private boolean isMultipartUploadEnabled = true;
+
private RequestFactoryBuilder() {
}
@@ -767,6 +781,18 @@ public class RequestFactoryImpl implements RequestFactory {
this.requestPreparer = value;
return this;
}
+
+ /**
+ * Multipart upload enabled.
+ *
+ * @param value new value
+ * @return the builder
+ */
+ public RequestFactoryBuilder withMultipartUploadEnabled(
+ final boolean value) {
+ this.isMultipartUploadEnabled = value;
+ return this;
+ }
}
/**
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java
index bd1466b2a43..554b628d003 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java
@@ -32,21 +32,21 @@ public interface BlockOutputStreamStatistics extends Closeable,
* Block is queued for upload.
* @param blockSize block size.
*/
- void blockUploadQueued(int blockSize);
+ void blockUploadQueued(long blockSize);
/**
* Queued block has been scheduled for upload.
* @param timeInQueue time in the queue.
* @param blockSize block size.
*/
- void blockUploadStarted(Duration timeInQueue, int blockSize);
+ void blockUploadStarted(Duration timeInQueue, long blockSize);
/**
* A block upload has completed. Duration excludes time in the queue.
* @param timeSinceUploadStarted time in since the transfer began.
* @param blockSize block size
*/
- void blockUploadCompleted(Duration timeSinceUploadStarted, int blockSize);
+ void blockUploadCompleted(Duration timeSinceUploadStarted, long blockSize);
/**
* A block upload has failed. Duration excludes time in the queue.
@@ -57,7 +57,7 @@ public interface BlockOutputStreamStatistics extends Closeable,
* @param timeSinceUploadStarted time in since the transfer began.
* @param blockSize block size
*/
- void blockUploadFailed(Duration timeSinceUploadStarted, int blockSize);
+ void blockUploadFailed(Duration timeSinceUploadStarted, long blockSize);
/**
* Intermediate report of bytes uploaded.
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java
index d10b6484175..6454065b240 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java
@@ -442,22 +442,22 @@ public final class EmptyS3AStatisticsContext implements S3AStatisticsContext {
implements BlockOutputStreamStatistics {
@Override
- public void blockUploadQueued(final int blockSize) {
+ public void blockUploadQueued(final long blockSize) {
}
@Override
public void blockUploadStarted(final Duration timeInQueue,
- final int blockSize) {
+ final long blockSize) {
}
@Override
public void blockUploadCompleted(final Duration timeSinceUploadStarted,
- final int blockSize) {
+ final long blockSize) {
}
@Override
public void blockUploadFailed(final Duration timeSinceUploadStarted,
- final int blockSize) {
+ final long blockSize) {
}
@Override
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index ae042b16199..7e2a1c2b120 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -1727,7 +1727,9 @@ The "fast" output stream
1. Uploads large files as blocks with the size set by
`fs.s3a.multipart.size`. That is: the threshold at which multipart uploads
- begin and the size of each upload are identical.
+ begin and the size of each upload are identical. This behavior can be enabled
+ or disabled by using the flag `fs.s3a.multipart.uploads.enabled` which by
+ default is set to true.
1. Buffers blocks to disk (default) or in on-heap or off-heap memory.
1. Uploads blocks in parallel in background threads.
1. Begins uploading blocks as soon as the buffered data exceeds this partition
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java
index a859cd534bb..40857373fb8 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java
@@ -200,6 +200,11 @@ public class MockS3AFileSystem extends S3AFileSystem {
return true;
}
+ @Override
+ public boolean isMultipartUploadEnabled() {
+ return true;
+ }
+
/**
* Make operation to set the s3 client public.
* @param client client.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java
new file mode 100644
index 00000000000..41593c2b263
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.commit.magic;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.commit.CommitConstants;
+import org.apache.hadoop.fs.s3a.commit.PathCommitException;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+
+import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_UPLOADS_ENABLED;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides;
+import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_NAME;
+import static org.apache.hadoop.fs.s3a.commit.CommitConstants.MAGIC_COMMITTER_ENABLED;
+import static org.apache.hadoop.fs.s3a.commit.CommitConstants.S3A_COMMITTER_FACTORY_KEY;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Verify that the magic committer cannot be created if the FS doesn't support multipart
+ * uploads.
+ */
+public class ITestMagicCommitProtocolFailure extends AbstractS3ATestBase {
+
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ removeBucketOverrides(getTestBucketName(conf), conf,
+ MAGIC_COMMITTER_ENABLED,
+ S3A_COMMITTER_FACTORY_KEY,
+ FS_S3A_COMMITTER_NAME,
+ MULTIPART_UPLOADS_ENABLED);
+ conf.setBoolean(MULTIPART_UPLOADS_ENABLED, false);
+ conf.set(S3A_COMMITTER_FACTORY_KEY, CommitConstants.S3A_COMMITTER_FACTORY);
+ conf.set(FS_S3A_COMMITTER_NAME, CommitConstants.COMMITTER_NAME_MAGIC);
+ return conf;
+ }
+
+ @Test
+ public void testCreateCommitter() throws Exception {
+ TaskAttemptContext tContext = new TaskAttemptContextImpl(getConfiguration(),
+ new TaskAttemptID());
+ Path commitPath = methodPath();
+ LOG.debug("Trying to create a committer on the path: {}", commitPath);
+ intercept(PathCommitException.class,
+ () -> new MagicS3GuardCommitter(commitPath, tContext));
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java
new file mode 100644
index 00000000000..a6d2c57d1d2
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.commit.staging.integration;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.commit.CommitConstants;
+import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants;
+import org.apache.hadoop.fs.s3a.commit.PathCommitException;
+import org.apache.hadoop.fs.s3a.commit.staging.StagingCommitter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+
+import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_UPLOADS_ENABLED;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides;
+import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_NAME;
+import static org.apache.hadoop.fs.s3a.commit.CommitConstants.S3A_COMMITTER_FACTORY_KEY;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Verify that a staging committer cannot be created if the FS doesn't support multipart
+ * uploads.
+ */
+public class ITestStagingCommitProtocolFailure extends AbstractS3ATestBase {
+
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ removeBucketOverrides(getTestBucketName(conf), conf,
+ S3A_COMMITTER_FACTORY_KEY,
+ FS_S3A_COMMITTER_NAME,
+ MULTIPART_UPLOADS_ENABLED);
+ conf.setBoolean(MULTIPART_UPLOADS_ENABLED, false);
+ conf.set(S3A_COMMITTER_FACTORY_KEY, CommitConstants.S3A_COMMITTER_FACTORY);
+ conf.set(FS_S3A_COMMITTER_NAME, InternalCommitterConstants.COMMITTER_NAME_STAGING);
+ return conf;
+ }
+
+ @Test
+ public void testCreateCommitter() throws Exception {
+ TaskAttemptContext tContext = new TaskAttemptContextImpl(getConfiguration(),
+ new TaskAttemptID());
+ Path commitPath = methodPath();
+ LOG.debug("Trying to create a committer on the path: {}", commitPath);
+ intercept(PathCommitException.class,
+ () -> new StagingCommitter(commitPath, tContext));
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java
index 5c243bb820f..7c85142d437 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.fs.s3a.impl;
import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.atomic.AtomicLong;
@@ -155,7 +156,7 @@ public class TestRequestFactory extends AbstractHadoopTestBase {
* Create objects through the factory.
* @param factory factory
*/
- private void createFactoryObjects(RequestFactory factory) {
+ private void createFactoryObjects(RequestFactory factory) throws IOException {
String path = "path";
String path2 = "path2";
String id = "1";
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java
new file mode 100644
index 00000000000..08192969e2d
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import java.io.IOException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.junit.Test;
+
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.Constants;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE;
+import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER;
+import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_DISK;
+import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE;
+import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_UPLOADS_ENABLED;
+import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestPropertyBytes;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides;
+import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_PUT_REQUESTS;
+import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter;
+
+/**
+ * Test a file upload using a single PUT operation. Multipart uploads will
+ * be disabled in the test.
+ */
+public class ITestS3AHugeFileUploadSinglePut extends S3AScaleTestBase {
+
+ public static final Logger LOG = LoggerFactory.getLogger(
+ ITestS3AHugeFileUploadSinglePut.class);
+
+ private long fileSize;
+
+ @Override
+ protected Configuration createScaleConfiguration() {
+ Configuration conf = super.createScaleConfiguration();
+ removeBucketOverrides(getTestBucketName(conf), conf,
+ FAST_UPLOAD_BUFFER,
+ IO_CHUNK_BUFFER_SIZE,
+ KEY_HUGE_FILESIZE,
+ MULTIPART_UPLOADS_ENABLED,
+ MULTIPART_SIZE,
+ REQUEST_TIMEOUT);
+ conf.setBoolean(Constants.MULTIPART_UPLOADS_ENABLED, false);
+ fileSize = getTestPropertyBytes(conf, KEY_HUGE_FILESIZE,
+ DEFAULT_HUGE_FILESIZE);
+ // set a small part size to verify it does not impact block allocation size
+ conf.setLong(MULTIPART_SIZE, 10_000);
+ conf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK);
+ conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360);
+ conf.set(REQUEST_TIMEOUT, "1h");
+ return conf;
+ }
+
+ @Test
+ public void uploadFileSinglePut() throws IOException {
+ LOG.info("Creating file with size : {}", fileSize);
+ S3AFileSystem fs = getFileSystem();
+ ContractTestUtils.createAndVerifyFile(fs,
+ methodPath(), fileSize);
+ // Exactly three put requests should be made during the upload of the file
+ // First one being the creation of the directory marker
+ // Second being the creation of the test file
+ // Third being the creation of directory marker on the file delete
+ assertThatStatisticCounter(fs.getIOStatistics(), OBJECT_PUT_REQUESTS.getSymbol())
+ .isEqualTo(3);
+ }
+}
From dd6d0ac5108ffa616241886d9e8d8f07dbc034cf Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Wed, 12 Apr 2023 11:08:23 +0800
Subject: [PATCH 51/78] YARN-11462. Fix Typo of hadoop-yarn-common. (#5539)
Co-authored-by: Shilun Fan
Reviewed-by: He Xiaoqiao
Signed-off-by: Shilun Fan
---
.../pb/client/ContainerManagementProtocolPBClientImpl.java | 4 ++--
.../hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java | 2 +-
.../apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java | 2 +-
.../yarn/security/client/ClientToAMTokenIdentifier.java | 2 +-
.../impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java | 2 +-
.../main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java | 2 +-
.../apache/hadoop/yarn/webapp/GenericExceptionHandler.java | 2 +-
.../java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java | 2 +-
.../org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java | 2 +-
.../api/records/impl/pb/TestSerializedExceptionPBImpl.java | 2 +-
.../hadoop/yarn/client/api/impl/TestTimelineClient.java | 2 +-
.../yarn/client/api/impl/TestTimelineClientForATS1_5.java | 2 +-
.../hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java | 4 ++--
.../org/apache/hadoop/yarn/conf/TestYarnConfiguration.java | 4 ++--
.../ifile/TestLogAggregationIndexedFileController.java | 2 +-
.../hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java | 2 +-
16 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java
index 86fc398f252..cdd2661f05a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java
@@ -109,11 +109,11 @@ public class ContainerManagementProtocolPBClientImpl implements ContainerManagem
ProtobufRpcEngine2.class);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
- int expireIntvl = conf.getInt(NM_COMMAND_TIMEOUT, DEFAULT_COMMAND_TIMEOUT);
+ int expireInterval = conf.getInt(NM_COMMAND_TIMEOUT, DEFAULT_COMMAND_TIMEOUT);
proxy =
(ContainerManagementProtocolPB) RPC.getProxy(ContainerManagementProtocolPB.class,
clientVersion, addr, ugi, conf,
- NetUtils.getDefaultSocketFactory(conf), expireIntvl);
+ NetUtils.getDefaultSocketFactory(conf), expireInterval);
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java
index 83a4df451bb..ed74addd162 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java
@@ -531,7 +531,7 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
count++;
if (count == numberOfAsyncsToMerge) {
// Flush the entities if the number of the async
- // putEntites merged reaches the desired limit. To avoid
+ // putEntities merged reaches the desired limit. To avoid
// collecting multiple entities and delaying for a long
// time.
entitiesHolder.run();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java
index 15d4efc03e6..fed6e46e497 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java
@@ -62,7 +62,7 @@ public class FSStoreOpHandler {
registerLog(NODE_LABEL_STORE, RemoveClusterLabelOp.OPCODE,
RemoveClusterLabelOp.class);
- //NodeAttibute operation
+ //NodeAttribute operation
registerLog(NODE_ATTRIBUTE, AddNodeToAttributeLogOp.OPCODE,
AddNodeToAttributeLogOp.class);
registerLog(NODE_ATTRIBUTE, RemoveNodeToAttributeLogOp.OPCODE,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java
index 2085982e2fe..e18a1a2d472 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java
@@ -44,7 +44,7 @@ public class ClientToAMTokenIdentifier extends TokenIdentifier {
private ClientToAMTokenIdentifierProto proto;
// TODO: Add more information in the tokenID such that it is not
- // transferrable, more secure etc.
+ // transferable, more secure etc.
public ClientToAMTokenIdentifier() {
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java
index d420bda5d7b..0ab81661c67 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java
@@ -29,7 +29,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords
.RemoveFromClusterNodeLabelsRequest;
/**
- * Proto class to handlde RemoveFromClusterNodeLabels request.
+ * Proto class to handle RemoveFromClusterNodeLabels request.
*/
public class RemoveFromClusterNodeLabelsRequestPBImpl
extends RemoveFromClusterNodeLabelsRequest {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
index a859ffbc1f2..8a1686987b6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
@@ -252,7 +252,7 @@ public class Dispatcher extends HttpServlet {
checkState(devMode, "only in dev mode");
new Timer("webapp exit", true).schedule(new TimerTask() {
@Override public void run() {
- LOG.info("WebAppp /{} exiting...", webApp.name());
+ LOG.info("WebApp /{} exiting...", webApp.name());
webApp.stop();
System.exit(0); // FINDBUG: this is intended in dev mode
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java
index b8fc9e00541..7cb6018e92a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java
@@ -53,7 +53,7 @@ public class GenericExceptionHandler implements ExceptionMapper {
@Override
public Response toResponse(Exception e) {
if (LOG.isTraceEnabled()) {
- LOG.trace("GOT EXCEPITION", e);
+ LOG.trace("GOT EXCEPTION", e);
}
// Don't catch this as filter forward on 404
// (ServletContainer.FEATURE_FILTER_FORWARD_ON_404)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java
index 210cf0482a0..9c04e00e384 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java
@@ -88,7 +88,7 @@ public abstract class HtmlPage extends TextView {
}
/**
- * Render the the HTML page.
+ * Render the HTML page.
* @param html the page to render data to.
*/
protected abstract void render(Page.HTML<__> html);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java
index e9ac044affc..2fd6760cd97 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java
@@ -150,7 +150,7 @@ public class BasePBImplRecordsTest {
}
/**
- * this method generate record instance by calling newIntance
+ * this method generate record instance by calling newInstance
* using reflection, add register the generated value to typeValueCache
*/
@SuppressWarnings("rawtypes")
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java
index d4bfb318fed..73a8b85922f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java
@@ -49,7 +49,7 @@ public class TestSerializedExceptionPBImpl {
try {
pb.deSerialize();
- fail("deSerialze should throw YarnRuntimeException");
+ fail("deSerialize should throw YarnRuntimeException");
} catch (YarnRuntimeException e) {
assertEquals(ClassNotFoundException.class,
e.getCause().getClass());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
index 507cac61332..4b9b7c5f503 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
@@ -439,7 +439,7 @@ public class TestTimelineClient {
public static TimelineDomain generateDomain() {
TimelineDomain domain = new TimelineDomain();
- domain.setId("namesapce id");
+ domain.setId("namespace id");
domain.setDescription("domain description");
domain.setOwner("domain owner");
domain.setReaders("domain_reader");
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java
index 4d4e412e732..2fdff72a4f4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java
@@ -238,7 +238,7 @@ public class TestTimelineClientForATS1_5 {
private static TimelineDomain generateDomain() {
TimelineDomain domain = new TimelineDomain();
- domain.setId("namesapce id");
+ domain.setId("namespace id");
domain.setDescription("domain description");
domain.setOwner("domain owner");
domain.setReaders("domain_reader");
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java
index a26b4bf0a67..659d6cd7517 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java
@@ -241,7 +241,7 @@ public class TestTimelineClientV2Impl {
@Test
void testSyncCall() throws Exception {
try {
- // sync entity should not be be merged with Async
+ // sync entity should not be merged with Async
client.putEntities(generateEntity("1"));
client.putEntitiesAsync(generateEntity("2"));
client.putEntitiesAsync(generateEntity("3"));
@@ -360,7 +360,7 @@ public class TestTimelineClientV2Impl {
new byte[0], "kind", new byte[0], "service");
client.setTimelineCollectorInfo(CollectorInfo.newInstance(null, token));
assertNull(client.currentTimelineToken,
- "Timeline token in v2 client should not be set as token kind " + "is unexepcted.");
+ "Timeline token in v2 client should not be set as token kind " + "is unexpected.");
assertEquals(0, ugi.getTokens().size());
token = Token.newInstance(new byte[0], TimelineDelegationTokenIdentifier.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
index b17c1806de3..e4547a9163d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
@@ -36,7 +36,7 @@ public class TestYarnConfiguration {
void testDefaultRMWebUrl() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
String rmWebUrl = WebAppUtils.getRMWebAppURLWithScheme(conf);
- // shouldn't have a "/" on the end of the url as all the other uri routinnes
+ // shouldn't have a "/" on the end of the url as all the other uri routines
// specifically add slashes and Jetty doesn't handle double slashes.
assertNotSame("http://0.0.0.0:8088",
rmWebUrl,
@@ -77,7 +77,7 @@ public class TestYarnConfiguration {
String[] parts = rmWebUrl.split(":");
assertEquals(24543,
Integer.parseInt(parts[parts.length - 1]),
- "RM Web URL Port is incrrect");
+ "RM Web URL Port is incorrect");
assertNotSame("http://rmtesting:24543", rmWebUrl,
"RM Web Url not resolved correctly. Should not be rmtesting");
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java
index cd178382b52..b7fcb18ff60 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java
@@ -385,7 +385,7 @@ public class TestLogAggregationIndexedFileController
@Test
@Timeout(15000)
- void testFetchApplictionLogsHar() throws Exception {
+ void testFetchApplicationLogsHar() throws Exception {
List newLogTypes = new ArrayList<>();
newLogTypes.add("syslog");
newLogTypes.add("stdout");
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
index e769a21a750..099684318f4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
@@ -143,7 +143,7 @@ public class TestFileSystemNodeLabelsStore extends NodeLabelTestBase {
"p4", toSet(toNodeId("n4")),
"p2", toSet(toNodeId("n2"))));
- // stutdown mgr and start a new mgr
+ // shutdown mgr and start a new mgr
mgr.stop();
mgr = new MockNodeLabelManager();
mgr.init(conf);
From 06f9bdffa6a717600e46f5d6b6efd7783121b546 Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Thu, 13 Apr 2023 00:53:20 +0800
Subject: [PATCH 52/78] YARN-10846. Add dispatcher metrics to NM. (#4687)
---
.../hadoop/yarn/conf/YarnConfiguration.java | 4 +
.../yarn/metrics/GenericEventTypeMetrics.java | 6 +-
.../src/main/resources/yarn-default.xml | 10 ++
.../GenericEventTypeMetricsManager.java | 43 +++++++++
.../yarn/server/nodemanager/NodeManager.java | 24 ++++-
.../ContainerManagerImpl.java | 70 +++++++++++++-
.../nodemanager/DummyContainerManager.java | 10 +-
.../BaseContainerSchedulerTest.java | 2 +-
.../TestContainerManagerRecovery.java | 4 +-
.../metrics/TestNodeManagerMetrics.java | 94 +++++++++++++++++++
.../GenericEventTypeMetricsManager.java | 6 +-
.../hadoop/yarn/server/MiniYARNCluster.java | 10 +-
12 files changed, 261 insertions(+), 22 deletions(-)
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index eb7d3143ca7..6d77eb492dc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -3070,6 +3070,10 @@ public class YarnConfiguration extends Configuration {
+ "amrmproxy.ha.enable";
public static final boolean DEFAULT_AMRM_PROXY_HA_ENABLED = false;
+ // Enable NM Dispatcher Metric default False.
+ public static final String NM_DISPATCHER_METRIC_ENABLED = NM_PREFIX + "dispatcher.metric.enable";
+ public static final boolean DEFAULT_NM_DISPATCHER_METRIC_ENABLED = false;
+
/**
* Default platform-agnostic CLASSPATH for YARN applications. A
* comma-separated list of CLASSPATH entries. The parameter expansion marker
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java
index 464edb27782..1809ad159d9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java
@@ -57,10 +57,8 @@ public class GenericEventTypeMetrics>
//Initialize enum
for (final T type : enums) {
- String eventCountMetricsName =
- type.toString() + "_" + "event_count";
- String processingTimeMetricsName =
- type.toString() + "_" + "processing_time";
+ String eventCountMetricsName = type + "_" + "event_count";
+ String processingTimeMetricsName = type + "_" + "processing_time";
eventCountMetrics.put(type, this.registry.
newGauge(eventCountMetricsName, eventCountMetricsName, 0L));
processingTimeMetrics.put(type, this.registry.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index ab422330788..b9385d1c276 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -5065,6 +5065,16 @@
+
+ yarn.nodemanager.dispatcher.metric.enable
+ false
+
+ Yarn NodeManager enables Dispatcher Metric.
+ if true, will enable dispatcher metric; if false, will not enable dispatcher metric;
+ Default is false.
+
+
+
yarn.router.interceptor.user-thread-pool.minimum-pool-size5
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java
new file mode 100644
index 00000000000..88adf8a0d51
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+public final class GenericEventTypeMetricsManager {
+
+ private GenericEventTypeMetricsManager() {
+ // nothing to do
+ }
+
+ // Construct a GenericEventTypeMetrics for dispatcher
+ @SuppressWarnings("unchecked")
+ public static > GenericEventTypeMetrics
+ create(String dispatcherName, Class eventTypeClass) {
+ return new GenericEventTypeMetrics.EventTypeMetricsBuilder()
+ .setMs(DefaultMetricsSystem.instance())
+ .setInfo(info("GenericEventTypeMetrics for " + eventTypeClass.getName(),
+ "Metrics for " + dispatcherName))
+ .setEnumClass(eventTypeClass)
+ .setEnums(eventTypeClass.getEnumConstants())
+ .build().registerMetrics();
+ }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 81e60361dff..438a39b0973 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.AppCollectorData;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
@@ -144,8 +145,10 @@ public class NodeManager extends CompositeService
private AtomicBoolean isStopping = new AtomicBoolean(false);
private boolean rmWorkPreservingRestartEnabled;
private boolean shouldExitOnShutdownEvent = false;
+ private boolean nmDispatherMetricEnabled;
private NMLogAggregationStatusTracker nmLogAggregationStatusTracker;
+
/**
* Default Container State transition listener.
*/
@@ -366,6 +369,10 @@ public class NodeManager extends CompositeService
.RM_WORK_PRESERVING_RECOVERY_ENABLED,
YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_ENABLED);
+ nmDispatherMetricEnabled = conf.getBoolean(
+ YarnConfiguration.NM_DISPATCHER_METRIC_ENABLED,
+ YarnConfiguration.DEFAULT_NM_DISPATCHER_METRIC_ENABLED);
+
try {
initAndStartRecoveryStore(conf);
} catch (IOException e) {
@@ -1006,8 +1013,17 @@ public class NodeManager extends CompositeService
/**
* Unit test friendly.
*/
+ @SuppressWarnings("unchecked")
protected AsyncDispatcher createNMDispatcher() {
- return new AsyncDispatcher("NM Event dispatcher");
+ dispatcher = new AsyncDispatcher("NM Event dispatcher");
+ if (nmDispatherMetricEnabled) {
+ GenericEventTypeMetrics eventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(),
+ ContainerManagerEventType.class);
+ dispatcher.addMetrics(eventTypeMetrics, eventTypeMetrics.getEnumClass());
+ LOG.info("NM Event dispatcher Metric Initialization Completed.");
+ }
+ return dispatcher;
}
//For testing
@@ -1052,4 +1068,10 @@ public class NodeManager extends CompositeService
Context ctxt) {
return new NMLogAggregationStatusTracker(ctxt);
}
+
+ @VisibleForTesting
+ @Private
+ public AsyncDispatcher getDispatcher() {
+ return dispatcher;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index cc5f0d914b8..e07a0e1cc18 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.yarn.api.protocolrecords.GetLocalizationStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetLocalizationStatusesResponse;
import org.apache.hadoop.yarn.api.records.LocalizationStatus;
+import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.UpdateContainerTokenEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerTokenUpdatedEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent;
import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator;
@@ -105,6 +107,7 @@ import org.apache.hadoop.yarn.server.api.ContainerType;
import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit;
import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus;
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
+import org.apache.hadoop.yarn.server.nodemanager.GenericEventTypeMetricsManager;
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent;
import org.apache.hadoop.yarn.server.nodemanager.CMgrUpdateContainersEvent;
import org.apache.hadoop.yarn.server.nodemanager.CMgrSignalContainersEvent;
@@ -120,6 +123,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerInitEvent;
+
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationFinishEvent;
@@ -217,7 +221,7 @@ public class ContainerManagerImpl extends CompositeService implements
protected final NodeStatusUpdater nodeStatusUpdater;
protected LocalDirsHandlerService dirsHandler;
- protected final AsyncDispatcher dispatcher;
+ private AsyncDispatcher dispatcher;
private final DeletionService deletionService;
private LogHandler logHandler;
@@ -233,6 +237,7 @@ public class ContainerManagerImpl extends CompositeService implements
// NM metrics publisher is set only if the timeline service v.2 is enabled
private NMTimelinePublisher nmMetricsPublisher;
private boolean timelineServiceV2Enabled;
+ private boolean nmDispatherMetricEnabled;
public ContainerManagerImpl(Context context, ContainerExecutor exec,
DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
@@ -242,7 +247,7 @@ public class ContainerManagerImpl extends CompositeService implements
this.dirsHandler = dirsHandler;
// ContainerManager level dispatcher.
- dispatcher = new AsyncDispatcher("NM ContainerManager dispatcher");
+ dispatcher = createContainerManagerDispatcher();
this.deletionService = deletionContext;
this.metrics = metrics;
@@ -324,10 +329,67 @@ public class ContainerManagerImpl extends CompositeService implements
YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) +
SHUTDOWN_CLEANUP_SLOP_MS;
+ nmDispatherMetricEnabled = conf.getBoolean(
+ YarnConfiguration.NM_DISPATCHER_METRIC_ENABLED,
+ YarnConfiguration.DEFAULT_NM_DISPATCHER_METRIC_ENABLED);
+
super.serviceInit(conf);
recover();
}
+ @SuppressWarnings("unchecked")
+ protected AsyncDispatcher createContainerManagerDispatcher() {
+ dispatcher = new AsyncDispatcher("NM ContainerManager dispatcher");
+
+ if (!nmDispatherMetricEnabled) {
+ return dispatcher;
+ }
+
+ GenericEventTypeMetrics containerEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(), ContainerEventType.class);
+ dispatcher.addMetrics(containerEventTypeMetrics, containerEventTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics localizationEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(), LocalizationEventType.class);
+ dispatcher.addMetrics(localizationEventTypeMetrics,
+ localizationEventTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics applicationEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(), ApplicationEventType.class);
+ dispatcher.addMetrics(applicationEventTypeMetrics,
+ applicationEventTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics containersLauncherEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(),
+ ContainersLauncherEventType.class);
+ dispatcher.addMetrics(containersLauncherEventTypeMetrics,
+ containersLauncherEventTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics containerSchedulerEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(),
+ ContainerSchedulerEventType.class);
+ dispatcher.addMetrics(containerSchedulerEventTypeMetrics,
+ containerSchedulerEventTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics containersMonitorEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(),
+ ContainersMonitorEventType.class);
+ dispatcher.addMetrics(containersMonitorEventTypeMetrics,
+ containersMonitorEventTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics auxServicesEventTypeTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(), AuxServicesEventType.class);
+ dispatcher.addMetrics(auxServicesEventTypeTypeMetrics,
+ auxServicesEventTypeTypeMetrics.getEnumClass());
+
+ GenericEventTypeMetrics localizerEventTypeMetrics =
+ GenericEventTypeMetricsManager.create(dispatcher.getName(), LocalizerEventType.class);
+ dispatcher.addMetrics(localizerEventTypeMetrics, localizerEventTypeMetrics.getEnumClass());
+ LOG.info("NM ContainerManager dispatcher Metric Initialization Completed.");
+
+ return dispatcher;
+ }
+
protected void createAMRMProxyService(Configuration conf) {
this.amrmProxyEnabled =
conf.getBoolean(YarnConfiguration.AMRM_PROXY_ENABLED,
@@ -2034,4 +2096,8 @@ public class ContainerManagerImpl extends CompositeService implements
public ResourceLocalizationService getResourceLocalizationService() {
return rsrcLocalizationSrvc;
}
+
+ public AsyncDispatcher getDispatcher() {
+ return dispatcher;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
index 1acf3e9a378..fa6d04c044f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
@@ -70,7 +70,7 @@ public class DummyContainerManager extends ContainerManagerImpl {
NodeManagerMetrics metrics, LocalDirsHandlerService dirsHandler) {
super(context, exec, deletionContext, nodeStatusUpdater, metrics,
dirsHandler);
- dispatcher.disableExitOnDispatchException();
+ getDispatcher().disableExitOnDispatchException();
}
@Override
@@ -78,7 +78,7 @@ public class DummyContainerManager extends ContainerManagerImpl {
protected ResourceLocalizationService createResourceLocalizationService(
ContainerExecutor exec, DeletionService deletionContext, Context context,
NodeManagerMetrics metrics) {
- return new ResourceLocalizationService(super.dispatcher, exec,
+ return new ResourceLocalizationService(getDispatcher(), exec,
deletionContext, super.dirsHandler, context, metrics) {
@Override
public void handle(LocalizationEvent event) {
@@ -148,7 +148,7 @@ public class DummyContainerManager extends ContainerManagerImpl {
@SuppressWarnings("unchecked")
protected ContainersLauncher createContainersLauncher(Context context,
ContainerExecutor exec) {
- return new ContainersLauncher(context, super.dispatcher, exec,
+ return new ContainersLauncher(context, getDispatcher(), exec,
super.dirsHandler, this) {
@Override
public void handle(ContainersLauncherEvent event) {
@@ -156,12 +156,12 @@ public class DummyContainerManager extends ContainerManagerImpl {
ContainerId containerId = container.getContainerId();
switch (event.getType()) {
case LAUNCH_CONTAINER:
- dispatcher.getEventHandler().handle(
+ getDispatcher().getEventHandler().handle(
new ContainerEvent(containerId,
ContainerEventType.CONTAINER_LAUNCHED));
break;
case CLEANUP_CONTAINER:
- dispatcher.getEventHandler().handle(
+ getDispatcher().getEventHandler().handle(
new ContainerExitEvent(containerId,
ContainerEventType.CONTAINER_KILLED_ON_REQUEST, 0,
"Container exited with exit code 0."));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java
index 5a495d74137..f3661a68e6c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java
@@ -135,7 +135,7 @@ public class BaseContainerSchedulerTest extends BaseContainerManagerTest {
@Override
protected ContainersMonitor createContainersMonitor(
ContainerExecutor exec) {
- return new ContainersMonitorImpl(exec, dispatcher, this.context) {
+ return new ContainersMonitorImpl(exec, getDispatcher(), this.context) {
// Define resources available for containers to be executed.
@Override
public long getPmemAllocatedForContainers() {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
index b960f5dea1f..868fb39ed15 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
@@ -797,7 +797,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
}
@Override
protected ContainerScheduler createContainerScheduler(Context context) {
- return new ContainerScheduler(context, dispatcher, metrics){
+ return new ContainerScheduler(context, getDispatcher(), metrics){
@Override
public ContainersMonitor getContainersMonitor() {
return new ContainersMonitorImpl(null, null, null) {
@@ -1001,7 +1001,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
return null;
}
};
- containerManager.dispatcher.disableExitOnDispatchException();
+ containerManager.getDispatcher().disableExitOnDispatchException();
return containerManager;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java
index 33a3ae12f10..84216665156 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java
@@ -17,11 +17,24 @@
*/
package org.apache.hadoop.yarn.server.nodemanager.metrics;
+import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.source.JvmMetrics;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
import static org.apache.hadoop.test.MetricsAsserts.*;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.Event;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
import org.apache.hadoop.yarn.util.Records;
import org.junit.After;
@@ -37,6 +50,7 @@ public class TestNodeManagerMetrics {
@Before
public void setup() {
DefaultMetricsSystem.initialize("NodeManager");
+ DefaultMetricsSystem.setMiniClusterMode(true);
metrics = NodeManagerMetrics.create();
}
@@ -140,4 +154,84 @@ public class TestNodeManagerMetrics {
assertGauge("NodeGpuUtilization", nodeGpuUtilization, rb);
assertGauge("ApplicationsRunning", applicationsRunning, rb);
}
+
+ private enum TestEnum {
+ TestEventType
+ }
+
+ private static class TestHandler implements EventHandler {
+
+ private long sleepTime = 1500;
+
+ TestHandler() {
+ }
+
+ TestHandler(long sleepTime) {
+ this.sleepTime = sleepTime;
+ }
+
+ @Override
+ public void handle(Event event) {
+ try {
+ // As long as 10000 events queued
+ Thread.sleep(this.sleepTime);
+ } catch (InterruptedException e) {
+ }
+ }
+ }
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testNMDispatcherMetricsHistogram() throws Exception {
+ YarnConfiguration conf = new YarnConfiguration();
+
+ NodeManager nm = new NodeManager();
+ nm.init(conf);
+ AsyncDispatcher dispatcher = nm.getDispatcher();
+
+ MetricsInfo metricsInfo = info(
+ "GenericEventTypeMetrics for " + TestEnum.class.getName(),
+ "Metrics for " + dispatcher.getName());
+
+ GenericEventTypeMetrics genericEventTypeMetrics =
+ new GenericEventTypeMetrics.EventTypeMetricsBuilder()
+ .setMs(DefaultMetricsSystem.instance())
+ .setInfo(metricsInfo)
+ .setEnumClass(TestEnum.class)
+ .setEnums(TestEnum.class.getEnumConstants())
+ .build().registerMetrics();
+
+ dispatcher.addMetrics(genericEventTypeMetrics, genericEventTypeMetrics.getEnumClass());
+ dispatcher.init(conf);
+
+ // Register handler
+ dispatcher.register(TestEnum.class, new TestHandler());
+ dispatcher.start();
+
+ for (int i = 0; i < 3; ++i) {
+ Event event = mock(Event.class);
+ when(event.getType()).thenReturn(TestEnum.TestEventType);
+ dispatcher.getEventHandler().handle(event);
+ }
+
+ // Check event type count.
+ GenericTestUtils.waitFor(() -> genericEventTypeMetrics.
+ get(TestEnum.TestEventType) == 3, 1000, 10000);
+
+ String testEventTypeCountExpect =
+ Long.toString(genericEventTypeMetrics.get(TestEnum.TestEventType));
+ Assert.assertNotNull(testEventTypeCountExpect);
+ String testEventTypeCountMetric =
+ genericEventTypeMetrics.getRegistry().get("TestEventType_event_count").toString();
+ Assert.assertNotNull(testEventTypeCountMetric);
+ Assert.assertEquals(testEventTypeCountExpect, testEventTypeCountMetric);
+
+ String testEventTypeProcessingTimeExpect =
+ Long.toString(genericEventTypeMetrics.getTotalProcessingTime(TestEnum.TestEventType));
+ Assert.assertNotNull(testEventTypeProcessingTimeExpect);
+ String testEventTypeProcessingTimeMetric =
+ genericEventTypeMetrics.getRegistry().get("TestEventType_processing_time").toString();
+ Assert.assertNotNull(testEventTypeProcessingTimeMetric);
+ Assert.assertEquals(testEventTypeProcessingTimeExpect, testEventTypeProcessingTimeMetric);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java
index 8fda9b7f38a..8da793972e2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager;
+import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics;
@@ -31,10 +32,11 @@ public final class GenericEventTypeMetricsManager {
// Construct a GenericEventTypeMetrics for dispatcher
public static > GenericEventTypeMetrics
create(String dispatcherName, Class eventTypeClass) {
+ MetricsInfo metricsInfo = info("GenericEventTypeMetrics for " + eventTypeClass.getName(),
+ "Metrics for " + dispatcherName);
return new GenericEventTypeMetrics.EventTypeMetricsBuilder()
.setMs(DefaultMetricsSystem.instance())
- .setInfo(info("GenericEventTypeMetrics for " + eventTypeClass.getName(),
- "Metrics for " + dispatcherName))
+ .setInfo(metricsInfo)
.setEnumClass(eventTypeClass)
.setEnums(eventTypeClass.getEnumConstants())
.build().registerMetrics();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
index ceea3c28213..026495fa202 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
@@ -902,8 +902,8 @@ public class MiniYARNCluster extends CompositeService {
LOG.info("CustomAMRMProxyService is enabled. "
+ "All the AM->RM requests will be intercepted by the proxy");
AMRMProxyService amrmProxyService =
- useRpc ? new AMRMProxyService(getContext(), dispatcher)
- : new ShortCircuitedAMRMProxy(getContext(), dispatcher);
+ useRpc ? new AMRMProxyService(getContext(), getDispatcher())
+ : new ShortCircuitedAMRMProxy(getContext(), getDispatcher());
this.setAMRMProxyService(amrmProxyService);
addService(this.getAMRMProxyService());
} else {
@@ -934,8 +934,8 @@ public class MiniYARNCluster extends CompositeService {
LOG.info("CustomAMRMProxyService is enabled. "
+ "All the AM->RM requests will be intercepted by the proxy");
AMRMProxyService amrmProxyService =
- useRpc ? new AMRMProxyService(getContext(), dispatcher)
- : new ShortCircuitedAMRMProxy(getContext(), dispatcher);
+ useRpc ? new AMRMProxyService(getContext(), getDispatcher())
+ : new ShortCircuitedAMRMProxy(getContext(), getDispatcher());
this.setAMRMProxyService(amrmProxyService);
addService(this.getAMRMProxyService());
} else {
@@ -946,7 +946,7 @@ public class MiniYARNCluster extends CompositeService {
@Override
protected ContainersMonitor createContainersMonitor(ContainerExecutor
exec) {
- return new ContainersMonitorImpl(exec, dispatcher, this.context) {
+ return new ContainersMonitorImpl(exec, getDispatcher(), this.context) {
@Override
public float getVmemRatio() {
return 2.0f;
From 2b60d0c1f440e61b57085abd2d72a30db7c013cf Mon Sep 17 00:00:00 2001
From: Melissa You <31492618+melissayou@users.noreply.github.com>
Date: Thu, 13 Apr 2023 09:07:42 -0700
Subject: [PATCH 53/78] [HDFS-16971] Add read metrics for remote reads in
FileSystem Statistics #5534 (#5536)
---
.../java/org/apache/hadoop/fs/FileSystem.java | 34 +++++++++++++++++++
.../fs/FileSystemStorageStatistics.java | 5 ++-
.../fs/TestFileSystemStorageStatistics.java | 6 +++-
.../org/apache/hadoop/hdfs/DFSClient.java | 10 ++++--
.../apache/hadoop/hdfs/DFSInputStream.java | 9 +++--
.../hadoop/hdfs/DFSStripedInputStream.java | 6 ++--
.../org/apache/hadoop/hdfs/StripeReader.java | 5 ++-
7 files changed, 64 insertions(+), 11 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
index 763af197a1f..5d8f0e575f2 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
@@ -3942,6 +3942,7 @@ public abstract class FileSystem extends Configured
private volatile long bytesReadDistanceOfThreeOrFour;
private volatile long bytesReadDistanceOfFiveOrLarger;
private volatile long bytesReadErasureCoded;
+ private volatile long remoteReadTimeMS;
/**
* Add another StatisticsData object to this one.
@@ -3959,6 +3960,7 @@ public abstract class FileSystem extends Configured
this.bytesReadDistanceOfFiveOrLarger +=
other.bytesReadDistanceOfFiveOrLarger;
this.bytesReadErasureCoded += other.bytesReadErasureCoded;
+ this.remoteReadTimeMS += other.remoteReadTimeMS;
}
/**
@@ -3977,6 +3979,7 @@ public abstract class FileSystem extends Configured
this.bytesReadDistanceOfFiveOrLarger =
-this.bytesReadDistanceOfFiveOrLarger;
this.bytesReadErasureCoded = -this.bytesReadErasureCoded;
+ this.remoteReadTimeMS = -this.remoteReadTimeMS;
}
@Override
@@ -4025,6 +4028,10 @@ public abstract class FileSystem extends Configured
public long getBytesReadErasureCoded() {
return bytesReadErasureCoded;
}
+
+ public long getRemoteReadTimeMS() {
+ return remoteReadTimeMS;
+ }
}
private interface StatisticsAggregator {
@@ -4252,6 +4259,14 @@ public abstract class FileSystem extends Configured
}
}
+ /**
+ * Increment the time taken to read bytes from remote in the statistics.
+ * @param durationMS time taken in ms to read bytes from remote
+ */
+ public void increaseRemoteReadTime(final long durationMS) {
+ getThreadStatistics().remoteReadTimeMS += durationMS;
+ }
+
/**
* Apply the given aggregator to all StatisticsData objects associated with
* this Statistics object.
@@ -4399,6 +4414,25 @@ public abstract class FileSystem extends Configured
return bytesRead;
}
+ /**
+ * Get total time taken in ms for bytes read from remote.
+ * @return time taken in ms for remote bytes read.
+ */
+ public long getRemoteReadTime() {
+ return visitAll(new StatisticsAggregator() {
+ private long remoteReadTimeMS = 0;
+
+ @Override
+ public void accept(StatisticsData data) {
+ remoteReadTimeMS += data.remoteReadTimeMS;
+ }
+
+ public Long aggregate() {
+ return remoteReadTimeMS;
+ }
+ });
+ }
+
/**
* Get all statistics data.
* MR or other frameworks can use the method to get all statistics at once.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java
index 62806d61b54..9e62e63775a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java
@@ -47,7 +47,8 @@ public class FileSystemStorageStatistics extends StorageStatistics {
"bytesReadDistanceOfOneOrTwo",
"bytesReadDistanceOfThreeOrFour",
"bytesReadDistanceOfFiveOrLarger",
- "bytesReadErasureCoded"
+ "bytesReadErasureCoded",
+ "remoteReadTimeMS"
};
private static class LongStatisticIterator
@@ -107,6 +108,8 @@ public class FileSystemStorageStatistics extends StorageStatistics {
return data.getBytesReadDistanceOfFiveOrLarger();
case "bytesReadErasureCoded":
return data.getBytesReadErasureCoded();
+ case "remoteReadTimeMS":
+ return data.getRemoteReadTimeMS();
default:
return null;
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java
index 2b4e686e592..e99f0f2348b 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java
@@ -52,7 +52,8 @@ public class TestFileSystemStorageStatistics {
"bytesReadDistanceOfOneOrTwo",
"bytesReadDistanceOfThreeOrFour",
"bytesReadDistanceOfFiveOrLarger",
- "bytesReadErasureCoded"
+ "bytesReadErasureCoded",
+ "remoteReadTimeMS"
};
private FileSystem.Statistics statistics =
@@ -74,6 +75,7 @@ public class TestFileSystemStorageStatistics {
statistics.incrementBytesReadByDistance(1, RandomUtils.nextInt(0, 100));
statistics.incrementBytesReadByDistance(3, RandomUtils.nextInt(0, 100));
statistics.incrementBytesReadErasureCoded(RandomUtils.nextInt(0, 100));
+ statistics.increaseRemoteReadTime(RandomUtils.nextInt(0, 100));
}
@Test
@@ -128,6 +130,8 @@ public class TestFileSystemStorageStatistics {
return statistics.getBytesReadByDistance(5);
case "bytesReadErasureCoded":
return statistics.getBytesReadErasureCoded();
+ case "remoteReadTimeMS":
+ return statistics.getRemoteReadTime();
default:
return 0;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index acfca6799f4..8faeebe8e85 100755
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -3090,10 +3090,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
}
}
- void updateFileSystemReadStats(int distance, int nRead) {
+ void updateFileSystemReadStats(int distance, int readBytes, long readTimeMS) {
if (stats != null) {
- stats.incrementBytesRead(nRead);
- stats.incrementBytesReadByDistance(distance, nRead);
+ stats.incrementBytesRead(readBytes);
+ stats.incrementBytesReadByDistance(distance, readBytes);
+ if (distance > 0) {
+ //remote read
+ stats.increaseRemoteReadTime(readTimeMS);
+ }
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index a8d80016072..b5be33206e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -851,8 +851,9 @@ public class DFSInputStream extends FSInputStream
locatedBlocks.getFileLength() - pos);
}
}
+ long beginReadMS = Time.monotonicNow();
int result = readBuffer(strategy, realLen, corruptedBlocks);
-
+ long readTimeMS = Time.monotonicNow() - beginReadMS;
if (result >= 0) {
pos += result;
} else {
@@ -861,7 +862,7 @@ public class DFSInputStream extends FSInputStream
}
updateReadStatistics(readStatistics, result, blockReader);
dfsClient.updateFileSystemReadStats(blockReader.getNetworkDistance(),
- result);
+ result, readTimeMS);
if (readStatistics.getBlockType() == BlockType.STRIPED) {
dfsClient.updateFileSystemECReadStats(result);
}
@@ -1184,6 +1185,7 @@ public class DFSInputStream extends FSInputStream
ByteBuffer tmp = buf.duplicate();
tmp.limit(tmp.position() + len);
tmp = tmp.slice();
+ long beginReadMS = Time.monotonicNow();
int nread = 0;
int ret;
while (true) {
@@ -1193,11 +1195,12 @@ public class DFSInputStream extends FSInputStream
}
nread += ret;
}
+ long readTimeMS = Time.monotonicNow() - beginReadMS;
buf.position(buf.position() + nread);
IOUtilsClient.updateReadStatistics(readStatistics, nread, reader);
dfsClient.updateFileSystemReadStats(
- reader.getNetworkDistance(), nread);
+ reader.getNetworkDistance(), nread, readTimeMS);
if (readStatistics.getBlockType() == BlockType.STRIPED) {
dfsClient.updateFileSystemECReadStats(nread);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java
index 5ae51709593..6c1bafbef9d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java
@@ -331,15 +331,17 @@ public class DFSStripedInputStream extends DFSInputStream {
* its ThreadLocal.
*
* @param stats striped read stats
+ * @param readTimeMS read time metrics in ms
+ *
*/
- void updateReadStats(final StripedBlockUtil.BlockReadStats stats) {
+ void updateReadStats(final StripedBlockUtil.BlockReadStats stats, long readTimeMS) {
if (stats == null) {
return;
}
updateReadStatistics(readStatistics, stats.getBytesRead(),
stats.isShortCircuit(), stats.getNetworkDistance());
dfsClient.updateFileSystemReadStats(stats.getNetworkDistance(),
- stats.getBytesRead());
+ stats.getBytesRead(), readTimeMS);
assert readStatistics.getBlockType() == BlockType.STRIPED;
dfsClient.updateFileSystemECReadStats(stats.getBytesRead());
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java
index 3fc87c7952a..f2d6732a459 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java
@@ -351,9 +351,12 @@ abstract class StripeReader {
// first read failure
while (!futures.isEmpty()) {
try {
+ long beginReadMS = Time.monotonicNow();
StripingChunkReadResult r = StripedBlockUtil
.getNextCompletedStripedRead(service, futures, 0);
- dfsStripedInputStream.updateReadStats(r.getReadStats());
+ long readTimeMS = Time.monotonicNow() - beginReadMS;
+
+ dfsStripedInputStream.updateReadStats(r.getReadStats(), readTimeMS);
DFSClient.LOG.debug("Read task returned: {}, for stripe {}",
r, alignedStripe);
StripingChunk returnedChunk = alignedStripe.chunks[r.index];
From f1936d29f1f28bc0ce833147ff016a07aeb163b6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 13 Apr 2023 10:25:17 -0700
Subject: [PATCH 54/78] HADOOP-18693. Bump derby from 10.10.2.0 to 10.14.2.0 in
/hadoop-project (#5427)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
hadoop-project/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index 7a57f05011d..4b80849af0a 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -129,7 +129,7 @@
1.0-alpha-13.3.14.0.3
- 10.10.2.0
+ 10.14.2.08.0.296.2.1.jre74.10.0
From 0185afafeac26a447b6138b2d74a6f5ed0051d0b Mon Sep 17 00:00:00 2001
From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com>
Date: Fri, 14 Apr 2023 10:33:30 +0800
Subject: [PATCH 55/78] HDFS-16974. Consider volumes average load of each
DataNode when choosing target. (#5541). Contributed by Shuyan Zhang.
Signed-off-by: He Xiaoqiao
---
.../org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 +
.../BlockPlacementPolicyDefault.java | 16 ++
.../blockmanagement/DatanodeDescriptor.java | 16 ++
.../blockmanagement/DatanodeManager.java | 11 ++
.../blockmanagement/DatanodeStatistics.java | 4 +-
.../server/blockmanagement/DatanodeStats.java | 7 +
.../blockmanagement/FSClusterStats.java | 14 +-
.../blockmanagement/HeartbeatManager.java | 5 +
.../src/main/resources/hdfs-default.xml | 8 +
.../BaseReplicationPolicyTest.java | 4 +-
...ionPolicyRatioConsiderLoadWithStorage.java | 169 ++++++++++++++++++
11 files changed, 254 insertions(+), 5 deletions(-)
create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 3286ffb4f09..1729106ad14 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -271,6 +271,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.namenode.redundancy.considerLoad.factor";
public static final double
DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT = 2.0;
+ public static final String DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY =
+ "dfs.namenode.redundancy.considerLoadByVolume";
+ public static final boolean
+ DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT
+ = false;
public static final String DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY =
HdfsClientConfigKeys.DeprecatedKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY;
public static final int DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_DEFAULT = 3;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
index 1fef3db69d0..3d5ecf9b575 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -82,6 +82,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
NOT_IN_SERVICE("the node is not in service"),
NODE_STALE("the node is stale"),
NODE_TOO_BUSY("the node is too busy"),
+ NODE_TOO_BUSY_BY_VOLUME("the node is too busy based on volume load"),
TOO_MANY_NODES_ON_RACK("the rack has too many chosen nodes"),
NOT_ENOUGH_STORAGE_SPACE("not enough storage space to place the block"),
NO_REQUIRED_STORAGE_TYPE("required storage types are unavailable"),
@@ -101,6 +102,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
protected boolean considerLoad;
private boolean considerLoadByStorageType;
protected double considerLoadFactor;
+ private boolean considerLoadByVolume = false;
private boolean preferLocalNode;
private boolean dataNodePeerStatsEnabled;
private volatile boolean excludeSlowNodesEnabled;
@@ -131,6 +133,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
this.considerLoadFactor = conf.getDouble(
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR,
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT);
+ this.considerLoadByVolume = conf.getBoolean(
+ DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY,
+ DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT
+ );
this.stats = stats;
this.clusterMap = clusterMap;
this.host2datanodeMap = host2datanodeMap;
@@ -1007,6 +1013,16 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
"(load: " + nodeLoad + " > " + maxLoad + ")");
return true;
}
+ if (considerLoadByVolume) {
+ final int numVolumesAvailable = node.getNumVolumesAvailable();
+ final double maxLoadForVolumes = considerLoadFactor * numVolumesAvailable *
+ stats.getInServiceXceiverAverageForVolume();
+ if (maxLoadForVolumes > 0.0 && nodeLoad > maxLoadForVolumes) {
+ logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY_BY_VOLUME,
+ "(load: " + nodeLoad + " > " + maxLoadForVolumes + ") ");
+ return true;
+ }
+ }
return false;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index c77d54591a9..352238b7f70 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -233,6 +233,9 @@ public class DatanodeDescriptor extends DatanodeInfo {
// HB processing can use it to tell if it is the first HB since DN restarted
private boolean heartbeatedSinceRegistration = false;
+ /** The number of volumes that can be written.*/
+ private int numVolumesAvailable = 0;
+
/**
* DatanodeDescriptor constructor
* @param nodeID id of the data node
@@ -411,6 +414,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
long totalNonDfsUsed = 0;
Set visitedMount = new HashSet<>();
Set failedStorageInfos = null;
+ int volumesAvailable = 0;
// Decide if we should check for any missing StorageReport and mark it as
// failed. There are different scenarios.
@@ -489,7 +493,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
visitedMount.add(mount);
}
}
+ if (report.getRemaining() > 0 && storage.getState() != State.FAILED) {
+ volumesAvailable += 1;
+ }
}
+ this.numVolumesAvailable = volumesAvailable;
// Update total metrics for the node.
setCapacity(totalCapacity);
@@ -981,6 +989,14 @@ public class DatanodeDescriptor extends DatanodeInfo {
return volumeFailureSummary;
}
+ /**
+ * Return the number of volumes that can be written.
+ * @return the number of volumes that can be written.
+ */
+ public int getNumVolumesAvailable() {
+ return numVolumesAvailable;
+ }
+
/**
* @param nodeReg DatanodeID to update registration for.
*/
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 88f3ac4e7c4..ed60f388d3f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -2101,6 +2101,17 @@ public class DatanodeManager {
return avgLoad;
}
+ @Override
+ public double getInServiceXceiverAverageForVolume() {
+ double avgLoad = 0;
+ final int volumes = heartbeatManager.getInServiceAvailableVolumeCount();
+ if (volumes > 0) {
+ final long xceivers = heartbeatManager.getInServiceXceiverCount();
+ avgLoad = (double)xceivers/volumes;
+ }
+ return avgLoad;
+ }
+
@Override
public Map getStorageTypeStats() {
return heartbeatManager.getStorageTypeStats();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java
index 36a9c2bc095..fcf86195bdf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java
@@ -60,7 +60,9 @@ public interface DatanodeStatistics {
/** @return number of non-decommission(ing|ed) nodes */
public int getNumDatanodesInService();
-
+
+ /** @return average xceiver count for writable volumes. */
+ int getInServiceAvailableVolumeCount();
/**
* @return the total used space by data nodes for non-DFS purposes
* such as storing temporary files on the local file system
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java
index 912d4d236a6..5bd88b561ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java
@@ -44,6 +44,7 @@ class DatanodeStats {
private int nodesInService = 0;
private int nodesInServiceXceiverCount = 0;
+ private int nodesInServiceAvailableVolumeCount = 0;
private int expiredHeartbeats = 0;
synchronized void add(final DatanodeDescriptor node) {
@@ -58,6 +59,7 @@ class DatanodeStats {
capacityRemaining += node.getRemaining();
cacheCapacity += node.getCacheCapacity();
cacheUsed += node.getCacheUsed();
+ nodesInServiceAvailableVolumeCount += node.getNumVolumesAvailable();
} else if (node.isDecommissionInProgress() ||
node.isEnteringMaintenance()) {
cacheCapacity += node.getCacheCapacity();
@@ -87,6 +89,7 @@ class DatanodeStats {
capacityRemaining -= node.getRemaining();
cacheCapacity -= node.getCacheCapacity();
cacheUsed -= node.getCacheUsed();
+ nodesInServiceAvailableVolumeCount -= node.getNumVolumesAvailable();
} else if (node.isDecommissionInProgress() ||
node.isEnteringMaintenance()) {
cacheCapacity -= node.getCacheCapacity();
@@ -149,6 +152,10 @@ class DatanodeStats {
return nodesInServiceXceiverCount;
}
+ synchronized int getNodesInServiceAvailableVolumeCount() {
+ return nodesInServiceAvailableVolumeCount;
+ }
+
synchronized int getExpiredHeartbeats() {
return expiredHeartbeats;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java
index 14122952bb1..217dd36e3ef 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java
@@ -53,14 +53,24 @@ public interface FSClusterStats {
public int getNumDatanodesInService();
/**
- * an indication of the average load of non-decommission(ing|ed) nodes
- * eligible for block placement
+ * An indication of the average load of non-decommission(ing|ed) nodes
+ * eligible for block placement.
*
* @return average of the in service number of block transfers and block
* writes that are currently occurring on the cluster.
*/
public double getInServiceXceiverAverage();
+ /**
+ * An indication of the average load of volumes at non-decommission(ing|ed)
+ * nodes eligible for block placement.
+ *
+ * @return average of in service number of block transfers and block
+ * writes that are currently occurring on the volumes of the
+ * cluster.
+ */
+ double getInServiceXceiverAverageForVolume();
+
/**
* Indicates the storage statistics per storage type.
* @return storage statistics per storage type.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
index 01e1b6392a0..429d40d9fbd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
@@ -183,6 +183,11 @@ class HeartbeatManager implements DatanodeStatistics {
public int getNumDatanodesInService() {
return stats.getNodesInService();
}
+
+ @Override
+ public int getInServiceAvailableVolumeCount() {
+ return stats.getNodesInServiceAvailableVolumeCount();
+ }
@Override
public long getCacheCapacity() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index bdd048004d3..8e6ef99040a 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -334,6 +334,14 @@
+
+ dfs.namenode.redundancy.considerLoadByVolume
+ false
+ Decide if chooseTarget considers the target's volume load or
+ not.
+
+
+
dfs.namenode.read.considerLoadfalse
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java
index 1e75452d3d8..c9eb624e5c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java
@@ -56,13 +56,13 @@ abstract public class BaseReplicationPolicyTest {
protected String blockPlacementPolicy;
protected NamenodeProtocols nameNodeRpc = null;
- static void updateHeartbeatWithUsage(DatanodeDescriptor dn,
+ void updateHeartbeatWithUsage(DatanodeDescriptor dn,
long capacity, long dfsUsed, long remaining, long blockPoolUsed,
long dnCacheCapacity, long dnCacheUsed, int xceiverCount,
int volFailures) {
dn.getStorageInfos()[0].setUtilizationForTesting(
capacity, dfsUsed, remaining, blockPoolUsed);
- dn.updateHeartbeat(
+ dnManager.getHeartbeatManager().updateHeartbeat(dn,
BlockManagerTestUtil.getStorageReportsForDatanode(dn),
dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures, null);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java
new file mode 100644
index 00000000000..d06af054699
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java
@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.TestBlockStoragePolicy;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Verify that chooseTarget can exclude nodes with high volume average load.
+ */
+public class TestReplicationPolicyRatioConsiderLoadWithStorage
+ extends BaseReplicationPolicyTest {
+
+ public TestReplicationPolicyRatioConsiderLoadWithStorage() {
+ this.blockPlacementPolicy = BlockPlacementPolicyDefault.class.getName();
+ }
+
+ @Override
+ DatanodeDescriptor[] getDatanodeDescriptors(Configuration conf) {
+ conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY,
+ true);
+ conf.setDouble(DFSConfigKeys
+ .DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR, 2);
+ conf.setBoolean(
+ DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY, true);
+
+ final String[] racks = {
+ "/rack1",
+ "/rack2",
+ "/rack3",
+ "/rack4",
+ "/rack5"};
+ storages = DFSTestUtil.createDatanodeStorageInfos(racks);
+ DatanodeDescriptor[] descriptors =
+ DFSTestUtil.toDatanodeDescriptor(storages);
+ long storageCapacity =
+ 2 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE;
+ // Each datanode has 6 storages, but the number of available storages
+ // varies.
+ for (int i = 0; i < descriptors.length; i++) {
+ for (int j = 0; j < 5; j++) {
+ DatanodeStorage s =
+ new DatanodeStorage("s" + i + j);
+ descriptors[i].updateStorage(s);
+
+ }
+ for (int j = 0; j < descriptors[i].getStorageInfos().length; j++) {
+ DatanodeStorageInfo dsInfo = descriptors[i].getStorageInfos()[j];
+ if (j > i + 1) {
+ dsInfo.setUtilizationForTesting(storageCapacity, storageCapacity, 0,
+ storageCapacity);
+ } else {
+ dsInfo.setUtilizationForTesting(storageCapacity, 0, storageCapacity,
+ 0);
+ }
+ }
+ }
+ return descriptors;
+ }
+
+ /**
+ * Tests that chooseTarget with considerLoad and consider volume load set to
+ * true and correctly calculates load.
+ */
+ @Test
+ public void testChooseTargetWithRatioConsiderLoad() {
+ namenode.getNamesystem().writeLock();
+ try {
+ // After heartbeat has been processed, the total load should be 200.
+ // And average load per node should be 40. The max load should be 2 * 40;
+ // And average load per storage should be 10. Considering available
+ // storages, the max load should be:
+ // 2*10*2, 3*10*2, 4*10*2, 5*10*2, 6*10*2.
+ // Considering the load of every node and number of storages:
+ // Index: 0, 1, 2, 3, 4
+ // Available Storage: 2, 3, 4, 5, 6
+ // Load: 50, 110, 28, 2, 10
+ // So, dataNodes[1] should be never chosen because over-load of node.
+ // And dataNodes[0] should be never chosen because over-load of per
+ // storage.
+ dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[0],
+ BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[0]),
+ dataNodes[0].getCacheCapacity(),
+ dataNodes[0].getCacheUsed(),
+ 50, 0, null);
+ dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[1],
+ BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[1]),
+ dataNodes[0].getCacheCapacity(),
+ dataNodes[0].getCacheUsed(),
+ 110, 0, null);
+ dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[2],
+ BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[2]),
+ dataNodes[0].getCacheCapacity(),
+ dataNodes[0].getCacheUsed(),
+ 28, 0, null);
+ dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[3],
+ BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[3]),
+ dataNodes[0].getCacheCapacity(),
+ dataNodes[0].getCacheUsed(),
+ 2, 0, null);
+ dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[4],
+ BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[4]),
+ dataNodes[0].getCacheCapacity(),
+ dataNodes[0].getCacheUsed(),
+ 10, 0, null);
+
+ Set targetSet = new HashSet<>();
+
+ // Try to choose 3 datanode targets.
+ DatanodeDescriptor writerDn = dataNodes[2];
+ DatanodeStorageInfo[] targets = namenode.getNamesystem().getBlockManager()
+ .getBlockPlacementPolicy()
+ .chooseTarget("testFile.txt", 3, writerDn, new ArrayList<>(), false,
+ null, 1024, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY, null);
+ // The result contains 3 nodes(dataNodes[2],dataNodes[3],dataNodes[4]).
+ assertEquals(3, targets.length);
+ for (DatanodeStorageInfo dsi : targets) {
+ targetSet.add(dsi.getDatanodeDescriptor());
+ }
+ assertTrue(targetSet.contains(dataNodes[2]));
+ assertTrue(targetSet.contains(dataNodes[3]));
+ assertTrue(targetSet.contains(dataNodes[4]));
+
+ // Try to choose 4 datanode targets.
+ targets = namenode.getNamesystem().getBlockManager()
+ .getBlockPlacementPolicy()
+ .chooseTarget("testFile.txt", 4, writerDn, new ArrayList<>(), false,
+ null, 1024, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY, null);
+ // The result contains 3 nodes(dataNodes[2],dataNodes[3],dataNodes[4]).
+ assertEquals(3, targets.length);
+ targetSet.clear();
+ for (DatanodeStorageInfo dsi : targets) {
+ targetSet.add(dsi.getDatanodeDescriptor());
+ }
+ assertTrue(targetSet.contains(dataNodes[2]));
+ assertTrue(targetSet.contains(dataNodes[3]));
+ assertTrue(targetSet.contains(dataNodes[4]));
+ } finally {
+ namenode.getNamesystem().writeUnlock();
+ }
+ }
+}
\ No newline at end of file
From 0bcdea7912b18c1c9244a37b9ed3d66d6e748c95 Mon Sep 17 00:00:00 2001
From: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Date: Sat, 15 Apr 2023 04:09:18 +0800
Subject: [PATCH 56/78] YARN-11239. Optimize FederationClientInterceptor audit
log. (#5127)
---
.../yarn/server/router/RouterAuditLogger.java | 61 +-
.../yarn/server/router/RouterServerUtil.java | 13 +
.../clientrm/FederationClientInterceptor.java | 526 +++++++++++++-----
.../server/router/TestRouterAuditLogger.java | 17 +-
4 files changed, 484 insertions(+), 133 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java
index f3b428dab4a..bb814b65283 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java
@@ -30,7 +30,7 @@ import java.net.InetAddress;
* Manages Router audit logs.
* Audit log format is written as key=value pairs. Tab separated.
*/
-public class RouterAuditLogger {
+public final class RouterAuditLogger {
private static final Logger LOG =
LoggerFactory.getLogger(RouterAuditLogger.class);
@@ -51,6 +51,43 @@ public class RouterAuditLogger {
public static final String GET_APP_REPORT = "Get Application Report";
public static final String TARGET_CLIENT_RM_SERVICE = "RouterClientRMService";
public static final String UNKNOWN = "UNKNOWN";
+ public static final String GET_APPLICATIONS = "Get Applications";
+ public static final String GET_CLUSTERMETRICS = "Get ClusterMetrics";
+ public static final String GET_CLUSTERNODES = "Get ClusterNodes";
+ public static final String GET_QUEUEINFO = "Get QueueInfo";
+ public static final String GET_QUEUE_USER_ACLS = "Get QueueUserAcls";
+ public static final String MOVE_APPLICATION_ACROSS_QUEUES = "Move ApplicationAcrossQueues";
+ public static final String GET_NEW_RESERVATION = "Get NewReservation";
+ public static final String SUBMIT_RESERVATION = "Submit Reservation";
+ public static final String LIST_RESERVATIONS = "List Reservations";
+ public static final String UPDATE_RESERVATION = "Update Reservation";
+ public static final String DELETE_RESERVATION = "Delete Reservation";
+ public static final String GET_NODETOLABELS = "Get NodeToLabels";
+ public static final String GET_LABELSTONODES = "Get LabelsToNodes";
+ public static final String GET_CLUSTERNODELABELS = "Get ClusterNodeLabels";
+ public static final String GET_APPLICATION_ATTEMPT_REPORT = "Get ApplicationAttemptReport";
+ public static final String GET_APPLICATION_ATTEMPTS = "Get ApplicationAttempts";
+ public static final String GET_CONTAINERREPORT = "Get ContainerReport";
+ public static final String GET_CONTAINERS = "Get Containers";
+ public static final String GET_DELEGATIONTOKEN = "Get DelegationToken";
+ public static final String RENEW_DELEGATIONTOKEN = "Renew DelegationToken";
+ public static final String CANCEL_DELEGATIONTOKEN = "Cancel DelegationToken";
+ public static final String FAIL_APPLICATIONATTEMPT = "Fail ApplicationAttempt";
+ public static final String UPDATE_APPLICATIONPRIORITY = "Update ApplicationPriority";
+ public static final String SIGNAL_TOCONTAINER = "Signal ToContainer";
+ public static final String UPDATE_APPLICATIONTIMEOUTS = "Update ApplicationTimeouts";
+ public static final String GET_RESOURCEPROFILES = "Get ResourceProfiles";
+ public static final String GET_RESOURCEPROFILE = "Get ResourceProfile";
+ public static final String GET_RESOURCETYPEINFO = "Get ResourceTypeInfo";
+ public static final String GET_ATTRIBUTESTONODES = "Get AttributesToNodes";
+ public static final String GET_CLUSTERNODEATTRIBUTES = "Get ClusterNodeAttributes";
+ public static final String GET_NODESTOATTRIBUTES = "Get NodesToAttributes";
+ }
+
+ public static void logSuccess(String user, String operation, String target) {
+ if (LOG.isInfoEnabled()) {
+ LOG.info(createSuccessLog(user, operation, target, null, null));
+ }
}
/**
@@ -146,6 +183,28 @@ public class RouterAuditLogger {
}
}
+ /**
+ * Create a readable and parseable audit log string for a failed event.
+ *
+ * @param user User who made the service request.
+ * @param operation Operation requested by the user.
+ * @param perm Target permissions.
+ * @param target The target on which the operation is being performed.
+ * @param descriptionFormat the description message format string.
+ * @param args format parameter.
+ *
+ *
+ * Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val
+ * delimiter and hence the value fields should not contains tabs ('\t').
+ */
+ public static void logFailure(String user, String operation, String perm,
+ String target, String descriptionFormat, Object... args) {
+ if (LOG.isInfoEnabled()) {
+ String description = String.format(descriptionFormat, args);
+ LOG.info(createFailureLog(user, operation, perm, target, description, null, null));
+ }
+ }
+
/**
* Create a readable and parseable audit log string for a failed event.
*
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java
index 0dbead33f02..dcd7777779b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java
@@ -131,6 +131,19 @@ public final class RouterServerUtil {
}
}
+ /**
+ * Throws an exception due to an error.
+ *
+ * @param errMsg the error message
+ * @throws YarnException on failure
+ */
+ @Public
+ @Unstable
+ public static void logAndThrowException(String errMsg) throws YarnException {
+ LOG.error(errMsg);
+ throw new YarnException(errMsg);
+ }
+
public static R createRequestInterceptorChain(Configuration conf, String pipeLineClassName,
String interceptorClassName, Class clazz) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
index a50ea5bc423..345c3b4ba2a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
@@ -115,6 +115,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityReque
import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityResponse;
import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest;
import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ReservationId;
@@ -152,6 +153,37 @@ import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConsta
import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.FORCE_KILL_APP;
import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.TARGET_CLIENT_RM_SERVICE;
import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UNKNOWN;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERNODES;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_QUEUE_USER_ACLS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_APPLICATIONS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERMETRICS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_QUEUEINFO;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.MOVE_APPLICATION_ACROSS_QUEUES;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_NEW_RESERVATION;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.SUBMIT_RESERVATION;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.LIST_RESERVATIONS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UPDATE_RESERVATION;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.DELETE_RESERVATION;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_NODETOLABELS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_LABELSTONODES;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERNODELABELS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_APPLICATION_ATTEMPT_REPORT;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_APPLICATION_ATTEMPTS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CONTAINERREPORT;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CONTAINERS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_DELEGATIONTOKEN;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.RENEW_DELEGATIONTOKEN;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.CANCEL_DELEGATIONTOKEN;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.FAIL_APPLICATIONATTEMPT;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UPDATE_APPLICATIONPRIORITY;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.SIGNAL_TOCONTAINER;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UPDATE_APPLICATIONTIMEOUTS;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_RESOURCEPROFILES;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_RESOURCEPROFILE;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_RESOURCETYPEINFO;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_ATTRIBUTESTONODES;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERNODEATTRIBUTES;
+import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_NODESTOATTRIBUTES;
/**
* Extends the {@code AbstractRequestInterceptorClient} class and provides an
@@ -328,6 +360,8 @@ public class FederationClientInterceptor
}
} catch (Exception e) {
routerMetrics.incrAppsFailedCreated();
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_APP, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, e.getMessage());
RouterServerUtil.logAndThrowException(e.getMessage(), e);
}
@@ -485,6 +519,8 @@ public class FederationClientInterceptor
} catch (Exception e) {
routerMetrics.incrAppsFailedSubmitted();
+ RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_NEW_APP, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, e.getMessage(), applicationId);
RouterServerUtil.logAndThrowException(e.getMessage(), e);
}
@@ -734,7 +770,10 @@ public class FederationClientInterceptor
throws YarnException, IOException {
if (request == null) {
routerMetrics.incrMultipleAppsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getApplications request.", null);
+ String msg = "Missing getApplications request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATIONS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getApplications",
@@ -744,10 +783,15 @@ public class FederationClientInterceptor
applications = invokeConcurrent(remoteMethod, GetApplicationsResponse.class);
} catch (Exception ex) {
routerMetrics.incrMultipleAppsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get applications due to exception.", ex);
+ String msg = "Unable to get applications due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATIONS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_APPLICATIONS,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the Application Reports
return RouterYarnClientUtils.mergeApplications(applications, returnPartialReport);
}
@@ -757,7 +801,10 @@ public class FederationClientInterceptor
GetClusterMetricsRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrGetClusterMetricsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getClusterMetrics request.", null);
+ String msg = "Missing getApplications request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERMETRICS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getClusterMetrics",
@@ -767,10 +814,15 @@ public class FederationClientInterceptor
clusterMetrics = invokeConcurrent(remoteMethod, GetClusterMetricsResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetClusterMetricsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get cluster metrics due to exception.", ex);
+ String msg = "Unable to get cluster metrics due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERMETRICS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetClusterMetricsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERMETRICS,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.merge(clusterMetrics);
}
@@ -811,7 +863,7 @@ public class FederationClientInterceptor
results.put(subClusterId, clazz.cast(result));
} catch (InterruptedException | ExecutionException e) {
Throwable cause = e.getCause();
- LOG.error("Cannot execute {} on {}: {}", request.getMethodName(),
+ LOG.error("Cannot execute {} on {} : {}", request.getMethodName(),
subClusterId.getId(), cause.getMessage());
exceptions.put(subClusterId, e);
}
@@ -837,7 +889,10 @@ public class FederationClientInterceptor
throws YarnException, IOException {
if (request == null) {
routerMetrics.incrClusterNodesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getClusterNodes request.", null);
+ String msg = "Missing getClusterNodes request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getClusterNodes",
@@ -847,10 +902,15 @@ public class FederationClientInterceptor
invokeConcurrent(remoteMethod, GetClusterNodesResponse.class);
long stopTime = clock.getTime();
routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERNODES,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes);
} catch (Exception ex) {
routerMetrics.incrClusterNodesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get cluster nodes due to exception.", ex);
+ String msg = "Unable to get cluster nodes due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
throw new YarnException("Unable to get cluster nodes.");
}
@@ -860,7 +920,10 @@ public class FederationClientInterceptor
throws YarnException, IOException {
if (request == null || request.getQueueName() == null) {
routerMetrics.incrGetQueueInfoFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getQueueInfo request or queueName.", null);
+ String msg = "Missing getQueueInfo request or queueName.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUEINFO, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -871,11 +934,14 @@ public class FederationClientInterceptor
queues = invokeConcurrent(remoteMethod, GetQueueInfoResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetQueueInfoFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get queue [" +
- request.getQueueName() + "] to exception.", ex);
+ String msg = "Unable to get queue [" + request.getQueueName() + "] to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUEINFO, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetQueueInfoRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_QUEUEINFO, TARGET_CLIENT_RM_SERVICE);
// Merge the GetQueueInfoResponse
return RouterYarnClientUtils.mergeQueues(queues);
}
@@ -885,7 +951,10 @@ public class FederationClientInterceptor
GetQueueUserAclsInfoRequest request) throws YarnException, IOException {
if(request == null){
routerMetrics.incrQueueUserAclsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getQueueUserAcls request.", null);
+ String msg = "Missing getQueueUserAcls request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUE_USER_ACLS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getQueueUserAcls",
@@ -895,10 +964,15 @@ public class FederationClientInterceptor
queueUserAcls = invokeConcurrent(remoteMethod, GetQueueUserAclsInfoResponse.class);
} catch (Exception ex) {
routerMetrics.incrQueueUserAclsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get queue user Acls due to exception.", ex);
+ String msg = "Unable to get queue user Acls due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUE_USER_ACLS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetQueueUserAclsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_QUEUE_USER_ACLS,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the QueueUserAclsInfoResponse
return RouterYarnClientUtils.mergeQueueUserAcls(queueUserAcls);
}
@@ -909,8 +983,11 @@ public class FederationClientInterceptor
throws YarnException, IOException {
if (request == null || request.getApplicationId() == null || request.getTargetQueue() == null) {
routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing moveApplicationAcrossQueues request or " +
- "applicationId or target queue.", null);
+ String msg = "Missing moveApplicationAcrossQueues request or " +
+ "applicationId or target queue.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), MOVE_APPLICATION_ACROSS_QUEUES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg);
}
long startTime = clock.getTime();
@@ -922,8 +999,10 @@ public class FederationClientInterceptor
.getApplicationHomeSubCluster(applicationId);
} catch (YarnException e) {
routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " +
- applicationId + " does not exist in FederationStateStore.", e);
+ String errMsgFormat = "Application %s does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), MOVE_APPLICATION_ACROSS_QUEUES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, String.format(errMsgFormat, applicationId));
+ RouterServerUtil.logAndThrowException(e, errMsgFormat, applicationId);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -932,8 +1011,9 @@ public class FederationClientInterceptor
response = clientRMProxy.moveApplicationAcrossQueues(request);
} catch (Exception e) {
routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to moveApplicationAcrossQueues for " +
- applicationId + " to SubCluster " + subClusterId.getId(), e);
+ RouterServerUtil.logAndThrowException(e,
+ "Unable to moveApplicationAcrossQueues for %s to SubCluster %s.", applicationId,
+ subClusterId.getId());
}
if (response == null) {
@@ -943,6 +1023,8 @@ public class FederationClientInterceptor
}
long stopTime = clock.getTime();
+ RouterAuditLogger.logSuccess(user.getShortUserName(), MOVE_APPLICATION_ACROSS_QUEUES,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
routerMetrics.succeededMoveApplicationAcrossQueuesRetrieved(stopTime - startTime);
return response;
}
@@ -954,6 +1036,8 @@ public class FederationClientInterceptor
if (request == null) {
routerMetrics.incrGetNewReservationFailedRetrieved();
String errMsg = "Missing getNewReservation request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, errMsg);
RouterServerUtil.logAndThrowException(errMsg, null);
}
@@ -969,16 +1053,23 @@ public class FederationClientInterceptor
if (response != null) {
long stopTime = clock.getTime();
routerMetrics.succeededGetNewReservationRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_NEW_RESERVATION,
+ TARGET_CLIENT_RM_SERVICE);
return response;
}
} catch (Exception e) {
- LOG.warn("Unable to create a new Reservation in SubCluster {}.", subClusterId.getId(), e);
+ String logFormatted = "Unable to create a new Reservation in SubCluster {}.";
+ LOG.warn(logFormatted, subClusterId.getId(), e);
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, logFormatted, subClusterId.getId());
subClustersActive.remove(subClusterId);
}
}
routerMetrics.incrGetNewReservationFailedRetrieved();
String errMsg = "Failed to create a new reservation.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, errMsg);
throw new YarnException(errMsg);
}
@@ -989,9 +1080,11 @@ public class FederationClientInterceptor
if (request == null || request.getReservationId() == null
|| request.getReservationDefinition() == null || request.getQueue() == null) {
routerMetrics.incrSubmitReservationFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing submitReservation request or reservationId " +
- "or reservation definition or queue.", null);
+ String msg = "Missing submitReservation request or reservationId " +
+ "or reservation definition or queue.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1028,6 +1121,8 @@ public class FederationClientInterceptor
LOG.info("Reservation {} submitted on subCluster {}.", reservationId, subClusterId);
long stopTime = clock.getTime();
routerMetrics.succeededSubmitReservationRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), SUBMIT_RESERVATION,
+ TARGET_CLIENT_RM_SERVICE);
return response;
}
} catch (Exception e) {
@@ -1037,6 +1132,8 @@ public class FederationClientInterceptor
routerMetrics.incrSubmitReservationFailedRetrieved();
String msg = String.format("Reservation %s failed to be submitted.", reservationId);
+ RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
throw new YarnException(msg);
}
@@ -1045,7 +1142,10 @@ public class FederationClientInterceptor
ReservationListRequest request) throws YarnException, IOException {
if (request == null || request.getReservationId() == null) {
routerMetrics.incrListReservationsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing listReservations request.", null);
+ String msg = "Missing listReservations request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), LIST_RESERVATIONS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("listReservations",
@@ -1054,12 +1154,16 @@ public class FederationClientInterceptor
try {
listResponses = invokeConcurrent(remoteMethod, ReservationListResponse.class);
} catch (Exception ex) {
+ String msg = "Unable to list reservations node due to exception.";
routerMetrics.incrListReservationsFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Unable to list reservations node due to exception.", ex);
+ RouterAuditLogger.logFailure(user.getShortUserName(), LIST_RESERVATIONS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededListReservationsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), LIST_RESERVATIONS,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the ReservationListResponse
return RouterYarnClientUtils.mergeReservationsList(listResponses);
}
@@ -1071,8 +1175,10 @@ public class FederationClientInterceptor
if (request == null || request.getReservationId() == null
|| request.getReservationDefinition() == null) {
routerMetrics.incrUpdateReservationFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing updateReservation request or reservationId or reservation definition.", null);
+ String msg = "Missing updateReservation request or reservationId or reservation definition.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1085,16 +1191,22 @@ public class FederationClientInterceptor
if (response != null) {
long stopTime = clock.getTime();
routerMetrics.succeededUpdateReservationRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), UPDATE_RESERVATION,
+ TARGET_CLIENT_RM_SERVICE);
return response;
}
} catch (Exception ex) {
routerMetrics.incrUpdateReservationFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Unable to reservation update due to exception.", ex);
+ String msg = "Unable to reservation update due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
routerMetrics.incrUpdateReservationFailedRetrieved();
String msg = String.format("Reservation %s failed to be update.", reservationId);
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
throw new YarnException(msg);
}
@@ -1103,8 +1215,10 @@ public class FederationClientInterceptor
ReservationDeleteRequest request) throws YarnException, IOException {
if (request == null || request.getReservationId() == null) {
routerMetrics.incrDeleteReservationFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing deleteReservation request or reservationId.", null);
+ String msg = "Missing deleteReservation request or reservationId.";
+ RouterServerUtil.logAndThrowException(msg, null);
+ RouterAuditLogger.logFailure(user.getShortUserName(), DELETE_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
}
long startTime = clock.getTime();
@@ -1118,16 +1232,22 @@ public class FederationClientInterceptor
federationFacade.deleteReservationHomeSubCluster(reservationId);
long stopTime = clock.getTime();
routerMetrics.succeededDeleteReservationRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), DELETE_RESERVATION,
+ TARGET_CLIENT_RM_SERVICE);
return response;
}
} catch (Exception ex) {
routerMetrics.incrUpdateReservationFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Unable to reservation delete due to exception.", ex);
+ String msg = "Unable to reservation delete due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), DELETE_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
routerMetrics.incrDeleteReservationFailedRetrieved();
String msg = String.format("Reservation %s failed to be delete.", reservationId);
+ RouterAuditLogger.logFailure(user.getShortUserName(), DELETE_RESERVATION, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
throw new YarnException(msg);
}
@@ -1136,20 +1256,28 @@ public class FederationClientInterceptor
GetNodesToLabelsRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrNodeToLabelsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getNodesToLabels request.", null);
+ String msg = "Missing getNodesToLabels request.";
+ RouterServerUtil.logAndThrowException(msg, null);
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODETOLABELS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getNodeToLabels",
- new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request});
+ new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request});
Collection clusterNodes = null;
try {
clusterNodes = invokeConcurrent(remoteMethod, GetNodesToLabelsResponse.class);
} catch (Exception ex) {
routerMetrics.incrNodeToLabelsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get node label due to exception.", ex);
+ String msg = "Unable to get node label due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODETOLABELS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetNodeToLabelsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_NODETOLABELS,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the NodesToLabelsResponse
return RouterYarnClientUtils.mergeNodesToLabelsResponse(clusterNodes);
}
@@ -1159,7 +1287,10 @@ public class FederationClientInterceptor
GetLabelsToNodesRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrLabelsToNodesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getLabelsToNodes request.", null);
+ String msg = "Missing getNodesToLabels request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_LABELSTONODES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes",
@@ -1169,10 +1300,15 @@ public class FederationClientInterceptor
labelNodes = invokeConcurrent(remoteMethod, GetLabelsToNodesResponse.class);
} catch (Exception ex) {
routerMetrics.incrLabelsToNodesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get label node due to exception.", ex);
+ String msg = "Unable to get label node due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_LABELSTONODES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetLabelsToNodesRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_LABELSTONODES,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the LabelsToNodesResponse
return RouterYarnClientUtils.mergeLabelsToNodes(labelNodes);
}
@@ -1182,7 +1318,10 @@ public class FederationClientInterceptor
GetClusterNodeLabelsRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getClusterNodeLabels request.", null);
+ String msg = "Missing getClusterNodeLabels request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODELABELS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getClusterNodeLabels",
@@ -1192,11 +1331,15 @@ public class FederationClientInterceptor
nodeLabels = invokeConcurrent(remoteMethod, GetClusterNodeLabelsResponse.class);
} catch (Exception ex) {
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get cluster nodeLabels due to exception.",
- ex);
+ String msg = "Unable to get cluster nodeLabels due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODELABELS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetClusterNodeLabelsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERNODELABELS,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the ClusterNodeLabelsResponse
return RouterYarnClientUtils.mergeClusterNodeLabelsResponse(nodeLabels);
}
@@ -1225,9 +1368,11 @@ public class FederationClientInterceptor
if (request == null || request.getApplicationAttemptId() == null
|| request.getApplicationAttemptId().getApplicationId() == null) {
routerMetrics.incrAppAttemptReportFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing getApplicationAttemptReport request or applicationId " +
- "or applicationAttemptId information.", null);
+ String msg = "Missing getApplicationAttemptReport request or applicationId " +
+ "or applicationAttemptId information.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1237,10 +1382,12 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException e) {
routerMetrics.incrAppAttemptReportFailedRetrieved();
- RouterServerUtil.logAndThrowException("ApplicationAttempt " +
- request.getApplicationAttemptId() + " belongs to Application " +
- request.getApplicationAttemptId().getApplicationId() +
- " does not exist in FederationStateStore.", e);
+ String msgFormat = "ApplicationAttempt %s belongs to " +
+ "Application %s does not exist in FederationStateStore.";
+ ApplicationAttemptId applicationAttemptId = request.getApplicationAttemptId();
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msgFormat, applicationAttemptId, applicationId);
+ RouterServerUtil.logAndThrowException(e, msgFormat, applicationAttemptId, applicationId);
}
ApplicationClientProtocol clientRMProxy =
@@ -1254,6 +1401,8 @@ public class FederationClientInterceptor
String msg = String.format(
"Unable to get the applicationAttempt report for %s to SubCluster %s.",
request.getApplicationAttemptId(), subClusterId.getId());
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
RouterServerUtil.logAndThrowException(msg, e);
}
@@ -1265,6 +1414,8 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededAppAttemptReportRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT,
+ TARGET_CLIENT_RM_SERVICE);
return response;
}
@@ -1273,8 +1424,10 @@ public class FederationClientInterceptor
GetApplicationAttemptsRequest request) throws YarnException, IOException {
if (request == null || request.getApplicationId() == null) {
routerMetrics.incrAppAttemptsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getApplicationAttempts " +
- "request or application id.", null);
+ String msg = "Missing getApplicationAttempts request or application id.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg);
}
long startTime = clock.getTime();
@@ -1284,8 +1437,10 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException ex) {
routerMetrics.incrAppAttemptsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " + applicationId +
- " does not exist in FederationStateStore.", ex);
+ String msg = "Application " + applicationId + " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1294,8 +1449,11 @@ public class FederationClientInterceptor
response = clientRMProxy.getApplicationAttempts(request);
} catch (Exception ex) {
routerMetrics.incrAppAttemptsFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get the application attempts for " +
- applicationId + " from SubCluster " + subClusterId.getId(), ex);
+ String msg = "Unable to get the application attempts for " +
+ applicationId + " from SubCluster " + subClusterId.getId();
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
if (response == null) {
@@ -1305,6 +1463,8 @@ public class FederationClientInterceptor
}
long stopTime = clock.getTime();
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_APPLICATION_ATTEMPTS,
+ TARGET_CLIENT_RM_SERVICE, applicationId);
routerMetrics.succeededAppAttemptsRetrieved(stopTime - startTime);
return response;
}
@@ -1314,8 +1474,10 @@ public class FederationClientInterceptor
GetContainerReportRequest request) throws YarnException, IOException {
if(request == null || request.getContainerId() == null){
routerMetrics.incrGetContainerReportFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getContainerReport request " +
- "or containerId", null);
+ String msg = "Missing getContainerReport request or containerId";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERREPORT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1326,8 +1488,10 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException ex) {
routerMetrics.incrGetContainerReportFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " + applicationId +
- " does not exist in FederationStateStore.", ex);
+ String msg = "Application " + applicationId + " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERREPORT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1348,6 +1512,8 @@ public class FederationClientInterceptor
}
long stopTime = clock.getTime();
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CONTAINERREPORT,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
routerMetrics.succeededGetContainerReportRetrieved(stopTime - startTime);
return response;
}
@@ -1357,8 +1523,10 @@ public class FederationClientInterceptor
throws YarnException, IOException {
if (request == null || request.getApplicationAttemptId() == null) {
routerMetrics.incrGetContainersFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing getContainers request or ApplicationAttemptId.", null);
+ String msg = "Missing getContainers request or ApplicationAttemptId.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1368,8 +1536,10 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException ex) {
routerMetrics.incrGetContainersFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " + applicationId +
- " does not exist in FederationStateStore.", ex);
+ String msg = "Application " + applicationId + " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1379,8 +1549,11 @@ public class FederationClientInterceptor
response = clientRMProxy.getContainers(request);
} catch (Exception ex) {
routerMetrics.incrGetContainersFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get the containers for " +
- applicationId + " from SubCluster " + subClusterId.getId(), ex);
+ String msg = "Unable to get the containers for " +
+ applicationId + " from SubCluster " + subClusterId.getId();
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
if (response == null) {
@@ -1390,6 +1563,8 @@ public class FederationClientInterceptor
}
long stopTime = clock.getTime();
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CONTAINERS,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
routerMetrics.succeededGetContainersRetrieved(stopTime - startTime);
return response;
}
@@ -1400,16 +1575,20 @@ public class FederationClientInterceptor
if (request == null || request.getRenewer() == null) {
routerMetrics.incrGetDelegationTokenFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing getDelegationToken request or Renewer.", null);
+ String msg = "Missing getDelegationToken request or Renewer.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
try {
// Verify that the connection is kerberos authenticated
if (!RouterServerUtil.isAllowedDelegationTokenOp()) {
routerMetrics.incrGetDelegationTokenFailedRetrieved();
- throw new IOException(
- "Delegation Token can be issued only with kerberos authentication.");
+ String msg = "Delegation Token can be issued only with kerberos authentication.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ throw new IOException(msg);
}
long startTime = clock.getTime();
@@ -1432,9 +1611,13 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededGetDelegationTokenRetrieved((stopTime - startTime));
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_DELEGATIONTOKEN,
+ TARGET_CLIENT_RM_SERVICE);
return GetDelegationTokenResponse.newInstance(routerRMDTToken);
} catch(IOException e) {
routerMetrics.incrGetDelegationTokenFailedRetrieved();
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, "getDelegationToken error, errMsg = " + e.getMessage());
throw new YarnException(e);
}
}
@@ -1446,8 +1629,10 @@ public class FederationClientInterceptor
if (!RouterServerUtil.isAllowedDelegationTokenOp()) {
routerMetrics.incrRenewDelegationTokenFailedRetrieved();
- throw new IOException(
- "Delegation Token can be renewed only with kerberos authentication");
+ String msg = "Delegation Token can be renewed only with kerberos authentication";
+ RouterAuditLogger.logFailure(user.getShortUserName(), RENEW_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ throw new IOException(msg);
}
long startTime = clock.getTime();
@@ -1455,17 +1640,21 @@ public class FederationClientInterceptor
Token token = new Token<>(
protoToken.getIdentifier().array(), protoToken.getPassword().array(),
new Text(protoToken.getKind()), new Text(protoToken.getService()));
- String user = RouterServerUtil.getRenewerForToken(token);
- long nextExpTime = this.getTokenSecretManager().renewToken(token, user);
+ String renewer = RouterServerUtil.getRenewerForToken(token);
+ long nextExpTime = this.getTokenSecretManager().renewToken(token, renewer);
RenewDelegationTokenResponse renewResponse =
Records.newRecord(RenewDelegationTokenResponse.class);
renewResponse.setNextExpirationTime(nextExpTime);
long stopTime = clock.getTime();
routerMetrics.succeededRenewDelegationTokenRetrieved((stopTime - startTime));
+ RouterAuditLogger.logSuccess(user.getShortUserName(), RENEW_DELEGATIONTOKEN,
+ TARGET_CLIENT_RM_SERVICE);
return renewResponse;
} catch (IOException e) {
routerMetrics.incrRenewDelegationTokenFailedRetrieved();
+ RouterAuditLogger.logFailure(user.getShortUserName(), RENEW_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, "renewDelegationToken error, errMsg = " + e.getMessage());
throw new YarnException(e);
}
}
@@ -1476,8 +1665,10 @@ public class FederationClientInterceptor
try {
if (!RouterServerUtil.isAllowedDelegationTokenOp()) {
routerMetrics.incrCancelDelegationTokenFailedRetrieved();
- throw new IOException(
- "Delegation Token can be cancelled only with kerberos authentication");
+ String msg = "Delegation Token can be cancelled only with kerberos authentication";
+ RouterAuditLogger.logFailure(user.getShortUserName(), CANCEL_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ throw new IOException(msg);
}
long startTime = clock.getTime();
@@ -1485,13 +1676,17 @@ public class FederationClientInterceptor
Token token = new Token<>(
protoToken.getIdentifier().array(), protoToken.getPassword().array(),
new Text(protoToken.getKind()), new Text(protoToken.getService()));
- String user = UserGroupInformation.getCurrentUser().getUserName();
- this.getTokenSecretManager().cancelToken(token, user);
+ String currentUser = UserGroupInformation.getCurrentUser().getUserName();
+ this.getTokenSecretManager().cancelToken(token, currentUser);
long stopTime = clock.getTime();
routerMetrics.succeededCancelDelegationTokenRetrieved((stopTime - startTime));
+ RouterAuditLogger.logSuccess(user.getShortUserName(), CANCEL_DELEGATIONTOKEN,
+ TARGET_CLIENT_RM_SERVICE);
return Records.newRecord(CancelDelegationTokenResponse.class);
} catch (IOException e) {
routerMetrics.incrCancelDelegationTokenFailedRetrieved();
+ RouterAuditLogger.logFailure(user.getShortUserName(), CANCEL_DELEGATIONTOKEN, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, "cancelDelegationToken error, errMsg = " + e.getMessage());
throw new YarnException(e);
}
}
@@ -1502,22 +1697,27 @@ public class FederationClientInterceptor
if (request == null || request.getApplicationAttemptId() == null
|| request.getApplicationAttemptId().getApplicationId() == null) {
routerMetrics.incrFailAppAttemptFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing failApplicationAttempt request or applicationId " +
- "or applicationAttemptId information.", null);
+ String msg = "Missing failApplicationAttempt request or applicationId " +
+ "or applicationAttemptId information.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
SubClusterId subClusterId = null;
- ApplicationId applicationId = request.getApplicationAttemptId().getApplicationId();
+ ApplicationAttemptId applicationAttemptId = request.getApplicationAttemptId();
+ ApplicationId applicationId = applicationAttemptId.getApplicationId();
try {
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException e) {
routerMetrics.incrFailAppAttemptFailedRetrieved();
- RouterServerUtil.logAndThrowException("ApplicationAttempt " +
- request.getApplicationAttemptId() + " belongs to Application " +
- request.getApplicationAttemptId().getApplicationId() +
- " does not exist in FederationStateStore.", e);
+ String msg = "ApplicationAttempt " +
+ applicationAttemptId + " belongs to Application " + applicationId +
+ " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, e);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1526,8 +1726,11 @@ public class FederationClientInterceptor
response = clientRMProxy.failApplicationAttempt(request);
} catch (Exception e) {
routerMetrics.incrFailAppAttemptFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get the applicationAttempt report for " +
- request.getApplicationAttemptId() + " to SubCluster " + subClusterId.getId(), e);
+ String msg = "Unable to get the applicationAttempt report for " +
+ applicationAttemptId + " to SubCluster " + subClusterId;
+ RouterAuditLogger.logFailure(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, e);
}
if (response == null) {
@@ -1538,6 +1741,8 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededFailAppAttemptRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), FAIL_APPLICATIONATTEMPT,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
return response;
}
@@ -1548,9 +1753,11 @@ public class FederationClientInterceptor
if (request == null || request.getApplicationId() == null
|| request.getApplicationPriority() == null) {
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing updateApplicationPriority request or applicationId " +
- "or applicationPriority information.", null);
+ String msg = "Missing updateApplicationPriority request or applicationId " +
+ "or applicationPriority information.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1561,8 +1768,11 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException e) {
routerMetrics.incrUpdateAppPriorityFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " +
- request.getApplicationId() + " does not exist in FederationStateStore.", e);
+ String msg = "Application " +
+ applicationId + " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, e);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1571,8 +1781,11 @@ public class FederationClientInterceptor
response = clientRMProxy.updateApplicationPriority(request);
} catch (Exception e) {
routerMetrics.incrFailAppAttemptFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to update application priority for " +
- request.getApplicationId() + " to SubCluster " + subClusterId.getId(), e);
+ String msg = "Unable to update application priority for " +
+ applicationId + " to SubCluster " + subClusterId;
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, e);
}
if (response == null) {
@@ -1583,6 +1796,8 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededUpdateAppPriorityRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
return response;
}
@@ -1592,9 +1807,10 @@ public class FederationClientInterceptor
if (request == null || request.getContainerId() == null
|| request.getCommand() == null) {
routerMetrics.incrSignalToContainerFailedRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing signalToContainer request or containerId " +
- "or command information.", null);
+ String msg = "Missing signalToContainer request or containerId or command information.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), SIGNAL_TOCONTAINER, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1605,8 +1821,10 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException ex) {
routerMetrics.incrSignalToContainerFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " + applicationId +
- " does not exist in FederationStateStore.", ex);
+ String msg = "Application " + applicationId + " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), SIGNAL_TOCONTAINER, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1614,17 +1832,22 @@ public class FederationClientInterceptor
try {
response = clientRMProxy.signalToContainer(request);
} catch (Exception ex) {
- RouterServerUtil.logAndThrowException("Unable to signal to container for " +
- applicationId + " from SubCluster " + subClusterId.getId(), ex);
+ String msg = "Unable to signal to container for " + applicationId +
+ " from SubCluster " + subClusterId;
+ RouterAuditLogger.logFailure(user.getShortUserName(), SIGNAL_TOCONTAINER, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
if (response == null) {
LOG.error("No response when signal to container of " +
- "the applicationId {} to SubCluster {}.", applicationId, subClusterId.getId());
+ "the applicationId {} to SubCluster {}.", applicationId, subClusterId);
}
long stopTime = clock.getTime();
routerMetrics.succeededSignalToContainerRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), SIGNAL_TOCONTAINER,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
return response;
}
@@ -1635,9 +1858,11 @@ public class FederationClientInterceptor
if (request == null || request.getApplicationId() == null
|| request.getApplicationTimeouts() == null) {
routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
- RouterServerUtil.logAndThrowException(
- "Missing updateApplicationTimeouts request or applicationId " +
- "or applicationTimeouts information.", null);
+ String msg = "Missing updateApplicationTimeouts request or applicationId or " +
+ "applicationTimeouts information.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
@@ -1647,9 +1872,10 @@ public class FederationClientInterceptor
subClusterId = getApplicationHomeSubCluster(applicationId);
} catch (YarnException e) {
routerMetrics.incrFailAppAttemptFailedRetrieved();
- RouterServerUtil.logAndThrowException("Application " +
- request.getApplicationId() +
- " does not exist in FederationStateStore.", e);
+ String msg = "Application " + applicationId + " does not exist in FederationStateStore.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, e);
}
ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId);
@@ -1658,8 +1884,11 @@ public class FederationClientInterceptor
response = clientRMProxy.updateApplicationTimeouts(request);
} catch (Exception e) {
routerMetrics.incrFailAppAttemptFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to update application timeout for " +
- request.getApplicationId() + " to SubCluster " + subClusterId.getId(), e);
+ String msg = "Unable to update application timeout for " + applicationId +
+ " to SubCluster " + subClusterId;
+ RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, e);
}
if (response == null) {
@@ -1670,6 +1899,8 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededUpdateAppTimeoutsRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS,
+ TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId);
return response;
}
@@ -1678,7 +1909,10 @@ public class FederationClientInterceptor
GetAllResourceProfilesRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrGetResourceProfilesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getResourceProfiles request.", null);
+ String msg = "Missing getResourceProfiles request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getResourceProfiles",
@@ -1688,11 +1922,16 @@ public class FederationClientInterceptor
resourceProfiles = invokeConcurrent(remoteMethod, GetAllResourceProfilesResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetResourceProfilesFailedRetrieved();
+ String msg = "Unable to get resource profiles due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
RouterServerUtil.logAndThrowException("Unable to get resource profiles due to exception.",
ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetResourceProfilesRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_RESOURCEPROFILES,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.mergeClusterResourceProfilesResponse(resourceProfiles);
}
@@ -1701,8 +1940,10 @@ public class FederationClientInterceptor
GetResourceProfileRequest request) throws YarnException, IOException {
if (request == null || request.getProfileName() == null) {
routerMetrics.incrGetResourceProfileFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getResourceProfile request or profileName.",
- null);
+ String msg = "Missing getResourceProfile request or profileName.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILE, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getResourceProfile",
@@ -1712,11 +1953,15 @@ public class FederationClientInterceptor
resourceProfile = invokeConcurrent(remoteMethod, GetResourceProfileResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetResourceProfileFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get resource profile due to exception.",
- ex);
+ String msg = "Unable to get resource profile due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILE, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetResourceProfileRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_RESOURCEPROFILE,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.mergeClusterResourceProfileResponse(resourceProfile);
}
@@ -1725,7 +1970,10 @@ public class FederationClientInterceptor
GetAllResourceTypeInfoRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrResourceTypeInfoFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getResourceTypeInfo request.", null);
+ String msg = "Missing getResourceTypeInfo request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCETYPEINFO, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getResourceTypeInfo",
@@ -1735,11 +1983,16 @@ public class FederationClientInterceptor
listResourceTypeInfo = invokeConcurrent(remoteMethod, GetAllResourceTypeInfoResponse.class);
} catch (Exception ex) {
routerMetrics.incrResourceTypeInfoFailedRetrieved();
- LOG.error("Unable to get all resource type info node due to exception.", ex);
+ String msg = "Unable to get all resource type info node due to exception.";
+ LOG.error(msg, ex);
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCETYPEINFO, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
throw ex;
}
long stopTime = clock.getTime();
routerMetrics.succeededGetResourceTypeInfoRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_RESOURCETYPEINFO,
+ TARGET_CLIENT_RM_SERVICE);
// Merge the GetAllResourceTypeInfoResponse
return RouterYarnClientUtils.mergeResourceTypes(listResourceTypeInfo);
}
@@ -1755,8 +2008,10 @@ public class FederationClientInterceptor
GetAttributesToNodesRequest request) throws YarnException, IOException {
if (request == null || request.getNodeAttributes() == null) {
routerMetrics.incrGetAttributesToNodesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getAttributesToNodes request " +
- "or nodeAttributes.", null);
+ String msg = "Missing getAttributesToNodes request or nodeAttributes.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_ATTRIBUTESTONODES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getAttributesToNodes",
@@ -1767,11 +2022,15 @@ public class FederationClientInterceptor
invokeConcurrent(remoteMethod, GetAttributesToNodesResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetAttributesToNodesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get attributes to nodes due to exception.",
- ex);
+ String msg = "Unable to get attributes to nodes due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_ATTRIBUTESTONODES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetAttributesToNodesRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_ATTRIBUTESTONODES,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.mergeAttributesToNodesResponse(attributesToNodesResponses);
}
@@ -1780,7 +2039,10 @@ public class FederationClientInterceptor
GetClusterNodeAttributesRequest request) throws YarnException, IOException {
if (request == null) {
routerMetrics.incrGetClusterNodeAttributesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getClusterNodeAttributes request.", null);
+ String msg = "Missing getClusterNodeAttributes request.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODEATTRIBUTES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getClusterNodeAttributes",
@@ -1791,11 +2053,15 @@ public class FederationClientInterceptor
GetClusterNodeAttributesResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetClusterNodeAttributesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get cluster node attributes due " +
- " to exception.", ex);
+ String msg = "Unable to get cluster node attributes due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODEATTRIBUTES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetClusterNodeAttributesRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERNODEATTRIBUTES,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.mergeClusterNodeAttributesResponse(clusterNodeAttributesResponses);
}
@@ -1804,8 +2070,10 @@ public class FederationClientInterceptor
GetNodesToAttributesRequest request) throws YarnException, IOException {
if (request == null || request.getHostNames() == null) {
routerMetrics.incrGetNodesToAttributesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Missing getNodesToAttributes request or " +
- "hostNames.", null);
+ String msg = "Missing getNodesToAttributes request or hostNames.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODESTOATTRIBUTES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, null);
}
long startTime = clock.getTime();
ClientMethod remoteMethod = new ClientMethod("getNodesToAttributes",
@@ -1816,11 +2084,15 @@ public class FederationClientInterceptor
GetNodesToAttributesResponse.class);
} catch (Exception ex) {
routerMetrics.incrGetNodesToAttributesFailedRetrieved();
- RouterServerUtil.logAndThrowException("Unable to get nodes to attributes due " +
- " to exception.", ex);
+ String msg = "Unable to get nodes to attributes due to exception.";
+ RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODESTOATTRIBUTES, UNKNOWN,
+ TARGET_CLIENT_RM_SERVICE, msg);
+ RouterServerUtil.logAndThrowException(msg, ex);
}
long stopTime = clock.getTime();
routerMetrics.succeededGetNodesToAttributesRetrieved(stopTime - startTime);
+ RouterAuditLogger.logSuccess(user.getShortUserName(), GET_NODESTOATTRIBUTES,
+ TARGET_CLIENT_RM_SERVICE);
return RouterYarnClientUtils.mergeNodesToAttributesResponse(nodesToAttributesResponses);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java
index 48d3ef6c0fe..287048237ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java
@@ -65,7 +65,7 @@ public class TestRouterAuditLogger {
* Test the AuditLog format with key-val pair.
*/
@Test
- public void testKeyValLogFormat() throws Exception {
+ public void testKeyValLogFormat() {
StringBuilder actLog = new StringBuilder();
StringBuilder expLog = new StringBuilder();
@@ -80,7 +80,7 @@ public class TestRouterAuditLogger {
assertEquals(expLog.toString(), actLog.toString());
// append another k1=null pair and test
- RouterAuditLogger.add(RouterAuditLogger.Keys.APPID, (String) null, actLog);
+ RouterAuditLogger.add(RouterAuditLogger.Keys.APPID, null, actLog);
expLog.append("\tAPPID=null");
assertEquals(expLog.toString(), actLog.toString());
@@ -102,7 +102,10 @@ public class TestRouterAuditLogger {
expLog.append("USER=test\t");
if (checkIP) {
InetAddress ip = Server.getRemoteIp();
- expLog.append(RouterAuditLogger.Keys.IP.name() + "=" + ip.getHostAddress() + "\t");
+ if (ip != null && ip.getHostAddress() != null) {
+ expLog.append(RouterAuditLogger.Keys.IP.name())
+ .append("=").append(ip.getHostAddress()).append("\t");
+ }
}
expLog.append("OPERATION=oper\tTARGET=tgt\tRESULT=SUCCESS");
if (appId != null) {
@@ -149,7 +152,11 @@ public class TestRouterAuditLogger {
expLog.append("USER=test\t");
if (checkIP) {
InetAddress ip = Server.getRemoteIp();
- expLog.append(RouterAuditLogger.Keys.IP.name() + "=" + ip.getHostAddress() + "\t");
+ if (ip != null && ip.getHostAddress() != null) {
+ expLog.append(RouterAuditLogger.Keys.IP.name())
+ .append("=")
+ .append(ip.getHostAddress()).append("\t");
+ }
}
expLog.append("OPERATION=oper\tTARGET=tgt\tRESULT=FAILURE\t");
expLog.append("DESCRIPTION=description of an audit log");
@@ -179,7 +186,7 @@ public class TestRouterAuditLogger {
* Test {@link RouterAuditLogger}.
*/
@Test
- public void testRouterAuditLoggerWithOutIP() throws Exception {
+ public void testRouterAuditLoggerWithOutIP() {
testSuccessLogFormat(false);
testFailureLogFormat(false);
}
From 0d1b4a3556d24641c14bbfc7ae1b985d4a998649 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun
Date: Sat, 15 Apr 2023 09:05:43 -0700
Subject: [PATCH 57/78] HADOOP-18590. Publish SBOM artifacts (#5555).
Contributed by Dongjoon Hyun.
---
pom.xml | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/pom.xml b/pom.xml
index fa768296e37..42a11795274 100644
--- a/pom.xml
+++ b/pom.xml
@@ -118,6 +118,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
4.2.01.1.13.10.1
+ 2.7.6bash
@@ -607,6 +608,10 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
com.github.spotbugsspotbugs-maven-plugin
+
+ org.cyclonedx
+ cyclonedx-maven-plugin
+
@@ -748,6 +753,26 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
+
+ dist
+
+
+
+ org.cyclonedx
+ cyclonedx-maven-plugin
+ ${cyclonedx.version}
+
+
+ package
+
+ makeBom
+
+
+
+
+
+
+ sign
From 2c4d6bf33da56a0538ca95047daeefc918e26c41 Mon Sep 17 00:00:00 2001
From: yl09099 <33595968+yl09099@users.noreply.github.com>
Date: Mon, 17 Apr 2023 09:27:52 +0800
Subject: [PATCH 58/78] YARN-11465. Improved YarnClient Log Format (#5550)
Co-authored-by: yl09099
Reviewed-by: Shilun Fan
Signed-off-by: Shilun Fan
---
.../client/api/ContainerShellWebSocket.java | 9 ++-
.../api/async/impl/NMClientAsyncImpl.java | 50 +++++++--------
.../yarn/client/api/impl/AMRMClientImpl.java | 62 +++++++++----------
.../yarn/client/api/impl/NMClientImpl.java | 10 ++-
.../yarn/client/api/impl/YarnClientImpl.java | 31 +++++-----
.../hadoop/yarn/client/TestGetGroups.java | 2 +-
.../client/TestRMFailoverProxyProvider.java | 1 +
...gerAdministrationProtocolPBClientImpl.java | 4 +-
.../api/impl/TestSharedCacheClientImpl.java | 2 +-
9 files changed, 82 insertions(+), 89 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java
index 66a901fc36a..5656484fca1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java
@@ -69,17 +69,16 @@ public class ContainerShellWebSocket {
@OnWebSocketConnect
public void onConnect(Session s) {
initTerminal(s);
- LOG.info(s.getRemoteAddress().getHostString() + " connected!");
+ LOG.info("{} connected!", s.getRemoteAddress().getHostString());
}
@OnWebSocketClose
public void onClose(Session session, int status, String reason) {
if (status==1000) {
- LOG.info(session.getRemoteAddress().getHostString() +
- " closed, status: " + status);
+ LOG.info("{} closed, status: {}", session.getRemoteAddress().getHostString(), status);
} else {
- LOG.warn(session.getRemoteAddress().getHostString() +
- " closed, status: " + status + " Reason: " + reason);
+ LOG.warn("{} closed, status:" +
+ " {} Reason: {}.", session.getRemoteAddress().getHostString(), status, reason);
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java
index eb5b9b227fb..4a4c50607da 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java
@@ -133,7 +133,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
this.maxThreadPoolSize = conf.getInt(
YarnConfiguration.NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE,
YarnConfiguration.DEFAULT_NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE);
- LOG.info("Upper bound of the thread pool size is " + maxThreadPoolSize);
+ LOG.info("Upper bound of the thread pool size is {}.", maxThreadPoolSize);
client.init(conf);
super.serviceInit(conf);
@@ -186,9 +186,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
// always increasing the pool-size
int newThreadPoolSize = Math.min(maxThreadPoolSize,
idealThreadPoolSize + INITIAL_THREAD_POOL_SIZE);
- LOG.info("Set NMClientAsync thread pool size to " +
- newThreadPoolSize + " as the number of nodes to talk to is "
- + nodeNum);
+ LOG.info("Set NMClientAsync thread pool size to {} " +
+ "as the number of nodes to talk to is {}.", newThreadPoolSize, nodeNum);
threadPool.setCorePoolSize(newThreadPoolSize);
}
}
@@ -252,8 +251,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
try {
events.put(new StartContainerEvent(container, containerLaunchContext));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of starting Container " +
- container.getId());
+ LOG.warn("Exception when scheduling the event of starting Container {}", container.getId());
callbackHandler.onStartContainerError(container.getId(), e);
}
}
@@ -276,8 +274,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
try {
events.put(new UpdateContainerResourceEvent(container, true));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of increasing resource of "
- + "Container " + container.getId());
+ LOG.warn("Exception when scheduling the event of increasing " +
+ "resource of Container {}", container.getId());
handler.onIncreaseContainerResourceError(container.getId(), e);
}
}
@@ -300,8 +298,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
try {
events.put(new UpdateContainerResourceEvent(container, false));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of increasing resource of "
- + "Container " + container.getId());
+ LOG.warn("Exception when scheduling the event of " +
+ "increasing resource of Container {}.", container.getId());
handler.onUpdateContainerResourceError(container.getId(), e);
}
}
@@ -325,8 +323,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
client.getNodeIdOfStartedContainer(containerId),
containerLaunchContex, autoCommit));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of re-initializing of "
- + "Container " + containerId);
+ LOG.warn("Exception when scheduling the event of " +
+ "re-initializing of Container {}", containerId);
handler.onContainerReInitializeError(containerId, e);
}
}
@@ -349,8 +347,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
client.getNodeIdOfStartedContainer(containerId),
null, ContainerEventType.RESTART_CONTAINER));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of restart of "
- + "Container " + containerId);
+ LOG.warn("Exception when scheduling the event of restart of Container {}", containerId);
handler.onContainerRestartError(containerId, e);
}
}
@@ -373,8 +370,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
client.getNodeIdOfStartedContainer(containerId),
null, ContainerEventType.ROLLBACK_LAST_REINIT));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event Rollback re-initialization"
- + " of Container " + containerId);
+ LOG.warn("Exception when scheduling the event Rollback " +
+ "re-initialization of Container {}", containerId);
handler.onRollbackLastReInitializationError(containerId, e);
}
}
@@ -397,8 +394,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
client.getNodeIdOfStartedContainer(containerId),
null, ContainerEventType.COMMIT_LAST_REINT));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event Commit re-initialization"
- + " of Container " + containerId);
+ LOG.warn("Exception when scheduling the event " +
+ "Commit re-initialization of Container {}", containerId);
handler.onCommitLastReInitializationError(containerId, e);
}
}
@@ -413,8 +410,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
events.put(new ContainerEvent(containerId, nodeId, null,
ContainerEventType.STOP_CONTAINER));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of stopping Container " +
- containerId);
+ LOG.warn("Exception when scheduling the event of stopping Container {}", containerId);
callbackHandler.onStopContainerError(containerId, e);
}
}
@@ -424,8 +420,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
events.put(new ContainerEvent(containerId, nodeId, null,
ContainerEventType.QUERY_CONTAINER));
} catch (InterruptedException e) {
- LOG.warn("Exception when scheduling the event of querying the status" +
- " of Container " + containerId);
+ LOG.warn("Exception when scheduling the event of querying " +
+ "the status of Container {}", containerId);
callbackHandler.onGetContainerStatusError(containerId, e);
}
}
@@ -730,7 +726,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
switch(containerEvent.getType()) {
case REINITIALIZE_CONTAINER:
if (!(containerEvent instanceof ReInitializeContainerEvevnt)) {
- LOG.error("Unexpected Event.. [" +containerEvent.getType() + "]");
+ LOG.error("Unexpected Event.. [{}]", containerEvent.getType());
return ContainerState.FAILED;
}
ReInitializeContainerEvevnt rEvent =
@@ -771,8 +767,8 @@ public class NMClientAsyncImpl extends NMClientAsync {
}
break;
default:
- LOG.warn("Event of type [" + containerEvent.getType() + "] not" +
- " expected here..");
+ LOG.warn("Event of type [{}] not" +
+ " expected here..", containerEvent.getType());
break;
}
if (handlerError != null) {
@@ -942,7 +938,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
@Override
public void run() {
ContainerId containerId = event.getContainerId();
- LOG.info("Processing Event " + event + " for Container " + containerId);
+ LOG.info("Processing Event {} for Container {}", event, containerId);
if (event.getType() == ContainerEventType.QUERY_CONTAINER) {
try {
ContainerStatus containerStatus = client.getContainerStatus(
@@ -962,7 +958,7 @@ public class NMClientAsyncImpl extends NMClientAsync {
} else {
StatefulContainer container = containers.get(containerId);
if (container == null) {
- LOG.info("Container " + containerId + " is already stopped or failed");
+ LOG.info("Container {} is already stopped or failed", containerId);
} else {
container.handle(event);
if (isCompletelyDone(container)) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
index 274920f7e1b..0a450b532af 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
@@ -478,11 +478,12 @@ public class AMRMClientImpl extends AMRMClient {
continue;
}
if (LOG.isDebugEnabled()) {
- LOG.debug("RM has confirmed changed resource allocation for "
- + "container " + containerId + ". Current resource allocation:"
- + changedContainer.getContainer().getResource()
- + ". Remove pending change request:"
- + pendingChange.get(containerId).getValue());
+ LOG.debug("RM has confirmed changed resource allocation for container {}. " +
+ "Current resource allocation:{}. " +
+ "Remove pending change request:{}",
+ containerId,
+ changedContainer.getContainer().getResource(),
+ pendingChange.get(containerId).getValue());
}
pendingChange.remove(containerId);
}
@@ -495,9 +496,9 @@ public class AMRMClientImpl extends AMRMClient {
String nodeId = token.getNodeId().toString();
if (LOG.isDebugEnabled()) {
if (getNMTokenCache().containsToken(nodeId)) {
- LOG.debug("Replacing token for : " + nodeId);
+ LOG.debug("Replacing token for : {}", nodeId);
} else {
- LOG.debug("Received new token for : " + nodeId);
+ LOG.debug("Received new token for : {}", nodeId);
}
}
getNMTokenCache().setToken(nodeId, token.getToken());
@@ -544,8 +545,7 @@ public class AMRMClientImpl extends AMRMClient {
dedupedRacks.addAll(req.getRacks());
if(req.getRacks().size() != dedupedRacks.size()) {
Joiner joiner = Joiner.on(',');
- LOG.warn("ContainerRequest has duplicate racks: "
- + joiner.join(req.getRacks()));
+ LOG.warn("ContainerRequest has duplicate racks: {}", joiner.join(req.getRacks()));
}
}
Set inferredRacks = resolveRacks(req.getNodes());
@@ -573,8 +573,7 @@ public class AMRMClientImpl extends AMRMClient {
HashSet dedupedNodes = new HashSet(req.getNodes());
if(dedupedNodes.size() != req.getNodes().size()) {
Joiner joiner = Joiner.on(',');
- LOG.warn("ContainerRequest has duplicate nodes: "
- + joiner.join(req.getNodes()));
+ LOG.warn("ContainerRequest has duplicate nodes: {}", joiner.join(req.getNodes()));
}
for (String node : dedupedNodes) {
addResourceRequest(req.getPriority(), node,
@@ -636,11 +635,12 @@ public class AMRMClientImpl extends AMRMClient {
Preconditions.checkNotNull(container, "Container cannot be null!!");
Preconditions.checkNotNull(updateContainerRequest,
"UpdateContainerRequest cannot be null!!");
- LOG.info("Requesting Container update : " +
- "container=" + container + ", " +
- "updateType=" + updateContainerRequest.getContainerUpdateType() + ", " +
- "targetCapability=" + updateContainerRequest.getCapability() + ", " +
- "targetExecType=" + updateContainerRequest.getExecutionType());
+ LOG.info("Requesting Container update : container={}, updateType={}," +
+ " targetCapability={}, targetExecType={}",
+ container,
+ updateContainerRequest.getContainerUpdateType(),
+ updateContainerRequest.getCapability(),
+ updateContainerRequest.getExecutionType());
if (updateContainerRequest.getCapability() != null &&
updateContainerRequest.getExecutionType() == null) {
validateContainerResourceChangeRequest(
@@ -770,7 +770,7 @@ public class AMRMClientImpl extends AMRMClient {
// Ensure node requests are accompanied by requests for
// corresponding rack
if (rack == null) {
- LOG.warn("Failed to resolve rack for node " + node + ".");
+ LOG.warn("Failed to resolve rack for node {}.", node);
} else {
racks.add(rack);
}
@@ -941,12 +941,13 @@ public class AMRMClientImpl extends AMRMClient {
addResourceRequestToAsk(resourceRequestInfo.remoteRequest);
if (LOG.isDebugEnabled()) {
- LOG.debug("Adding request to ask " + resourceRequestInfo.remoteRequest);
- LOG.debug("addResourceRequest:" + " applicationId="
- + " priority=" + priority.getPriority()
- + " resourceName=" + resourceName + " numContainers="
- + resourceRequestInfo.remoteRequest.getNumContainers()
- + " #asks=" + ask.size());
+ LOG.debug("Adding request to ask {}", resourceRequestInfo.remoteRequest);
+ LOG.debug("addResourceRequest: applicationId= priority={}" +
+ " resourceName={} numContainers={} #asks={}",
+ priority.getPriority(),
+ resourceName,
+ resourceRequestInfo.remoteRequest.getNumContainers(),
+ ask.size());
}
}
@@ -972,17 +973,16 @@ public class AMRMClientImpl extends AMRMClient {
}
if (LOG.isDebugEnabled()) {
- LOG.debug("AFTER decResourceRequest:"
- + " allocationRequestId=" + req.getAllocationRequestId()
- + " priority=" + priority.getPriority()
- + " resourceName=" + resourceName + " numContainers="
- + resourceRequestInfo.remoteRequest.getNumContainers()
- + " #asks=" + ask.size());
+ LOG.debug("AFTER decResourceRequest: allocationRequestId={} " +
+ "priority={} resourceName={} numContainers={} #asks={}",
+ req.getAllocationRequestId(), priority.getPriority(),
+ resourceName,
+ resourceRequestInfo.remoteRequest.getNumContainers(), ask.size());
}
}
} else {
- LOG.info("No remoteRequestTable found with allocationRequestId="
- + req.getAllocationRequestId());
+ LOG.info("No remoteRequestTable found with allocationRequestId={}",
+ req.getAllocationRequestId());
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java
index 6b2cf46bfa9..e2d7b9f28ea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java
@@ -128,13 +128,11 @@ public class NMClientImpl extends NMClient {
stopContainer(startedContainer.getContainerId(),
startedContainer.getNodeId());
} catch (YarnException e) {
- LOG.error("Failed to stop Container " +
- startedContainer.getContainerId() +
- " when stopping NMClientImpl");
+ LOG.error("Failed to stop Container {} when stopping NMClientImpl",
+ startedContainer.getContainerId());
} catch (IOException e) {
- LOG.error("Failed to stop Container " +
- startedContainer.getContainerId() +
- " when stopping NMClientImpl");
+ LOG.error("Failed to stop Container {} when stopping NMClientImpl",
+ startedContainer.getContainerId());
}
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java
index 36a5b04ad11..19d03a7da73 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java
@@ -353,7 +353,7 @@ public class YarnClientImpl extends YarnClient {
throw new YarnException("Failed to submit " + applicationId +
" to YARN : " + appReport.getDiagnostics());
}
- LOG.info("Submitted application " + applicationId);
+ LOG.info("Submitted application {}", applicationId);
break;
}
@@ -368,8 +368,9 @@ public class YarnClientImpl extends YarnClient {
// is blocked here too long.
if (++pollCount % 10 == 0) {
LOG.info("Application submission is not finished, " +
- "submitted application " + applicationId +
- " is still in " + state);
+ "submitted application {} is still in {}",
+ applicationId,
+ state);
}
try {
Thread.sleep(submitPollIntervalMillis);
@@ -382,8 +383,8 @@ public class YarnClientImpl extends YarnClient {
} catch (ApplicationNotFoundException ex) {
// FailOver or RM restart happens before RMStateStore saves
// ApplicationState
- LOG.info("Re-submit application " + applicationId + "with the " +
- "same ApplicationSubmissionContext");
+ LOG.info("Re-submit application {} with the" +
+ " same ApplicationSubmissionContext", applicationId);
rmClient.submitApplication(request);
}
}
@@ -408,7 +409,7 @@ public class YarnClientImpl extends YarnClient {
throw new IOException(
"Can't get Master Kerberos principal for use as renewer");
}
- LOG.debug("Delegation Token Renewer: " + masterPrincipal);
+ LOG.debug("Delegation Token Renewer: {}", masterPrincipal);
LogAggregationFileControllerFactory factory =
new LogAggregationFileControllerFactory(conf);
@@ -421,8 +422,7 @@ public class YarnClientImpl extends YarnClient {
fs.addDelegationTokens(masterPrincipal, credentials);
if (finalTokens != null) {
for (org.apache.hadoop.security.token.Token> token : finalTokens) {
- LOG.info("Added delegation token for log aggregation path "
- + remoteRootLogDir + "; "+token);
+ LOG.info("Added delegation token for log aggregation path {}; {}", remoteRootLogDir, token);
}
}
@@ -485,8 +485,7 @@ public class YarnClientImpl extends YarnClient {
return timelineClient.getDelegationToken(timelineDTRenewer);
} catch (Exception e) {
if (timelineServiceBestEffort) {
- LOG.warn("Failed to get delegation token from the timeline server: "
- + e.getMessage());
+ LOG.warn("Failed to get delegation token from the timeline server: {}", e.getMessage());
return null;
}
throw new IOException(e);
@@ -527,7 +526,7 @@ public class YarnClientImpl extends YarnClient {
@Override
public void failApplicationAttempt(ApplicationAttemptId attemptId)
throws YarnException, IOException {
- LOG.info("Failing application attempt " + attemptId);
+ LOG.info("Failing application attempt {}.", attemptId);
FailApplicationAttemptRequest request =
Records.newRecord(FailApplicationAttemptRequest.class);
request.setApplicationAttemptId(attemptId);
@@ -560,7 +559,7 @@ public class YarnClientImpl extends YarnClient {
KillApplicationResponse response =
rmClient.forceKillApplication(request);
if (response.getIsKillCompleted()) {
- LOG.info("Killed application " + applicationId);
+ LOG.info("Killed application {}", applicationId);
break;
}
@@ -573,7 +572,7 @@ public class YarnClientImpl extends YarnClient {
if (++pollCount % 10 == 0) {
LOG.info(
- "Waiting for application " + applicationId + " to be killed.");
+ "Waiting for application {} to be killed.", applicationId);
}
Thread.sleep(asyncApiPollIntervalMillis);
}
@@ -1080,7 +1079,7 @@ public class YarnClientImpl extends YarnClient {
public void signalToContainer(ContainerId containerId,
SignalContainerCommand command)
throws YarnException, IOException {
- LOG.info("Signalling container " + containerId + " with command " + command);
+ LOG.info("Signalling container {} with command {}", containerId, command);
SignalContainerRequest request =
SignalContainerRequest.newInstance(containerId, command);
rmClient.signalToContainer(request);
@@ -1186,9 +1185,9 @@ public class YarnClientImpl extends YarnClient {
client.stop();
}
} catch (WebSocketException e) {
- LOG.debug("Websocket exception: " + e.getMessage());
+ LOG.debug("Websocket exception: {}", e.getMessage());
} catch (Throwable t) {
- LOG.error("Fail to shell to container: " + t.getMessage());
+ LOG.error("Fail to shell to container: {}", t.getMessage());
}
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
index 51b522ae392..d11ea78607c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
@@ -79,7 +79,7 @@ public class TestGetGroups extends GetGroupsTestBase {
boolean rmStarted = rmStartedSignal.await(60000L, TimeUnit.MILLISECONDS);
Assert.assertTrue("ResourceManager failed to start up.", rmStarted);
- LOG.info("ResourceManager RMAdmin address: " +
+ LOG.info("ResourceManager RMAdmin address: {}.",
conf.get(YarnConfiguration.RM_ADMIN_ADDRESS));
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java
index ce9af23744f..b4fd175fae8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java
@@ -189,6 +189,7 @@ public class TestRMFailoverProxyProvider {
* and {@link AutoRefreshRMFailoverProxyProvider#performFailover(Object)}
* gets called.
*/
+ @SuppressWarnings("unchecked")
@Test
public void testAutoRefreshFailoverChange() throws Exception {
conf.setClass(YarnConfiguration.CLIENT_FAILOVER_NO_HA_PROXY_PROVIDER,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java
index dfc2a0fc4b7..08a6e0c78ec 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java
@@ -106,8 +106,8 @@ public class TestResourceManagerAdministrationProtocolPBClientImpl {
boolean rmStarted = rmStartedSignal.await(60000L, TimeUnit.MILLISECONDS);
Assert.assertTrue("ResourceManager failed to start up.", rmStarted);
- LOG.info("ResourceManager RMAdmin address: "
- + configuration.get(YarnConfiguration.RM_ADMIN_ADDRESS));
+ LOG.info("ResourceManager RMAdmin address: {}.",
+ configuration.get(YarnConfiguration.RM_ADMIN_ADDRESS));
client = new ResourceManagerAdministrationProtocolPBClientImpl(1L,
getProtocolAddress(configuration), configuration);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java
index b297d926c05..1b179b138a9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java
@@ -76,7 +76,7 @@ public class TestSharedCacheClientImpl {
localFs.close();
}
} catch (IOException ioe) {
- LOG.info("IO exception in closing file system)");
+ LOG.info("IO exception in closing file system");
ioe.printStackTrace();
}
}
From 6ea10cf41b78bcc54a2b205b2a9f6231f6f574b7 Mon Sep 17 00:00:00 2001
From: Steve Loughran
Date: Mon, 17 Apr 2023 10:18:33 +0100
Subject: [PATCH 59/78] HADOOP-18696. ITestS3ABucketExistence arn test
failures. (#5557)
Explicitly sets the fs.s3a.endpoint.region to eu-west-1 so
the ARN-referenced fs creation fails with unknown store
rather than IllegalArgumentException.
Steve Loughran
---
.../hadoop/fs/s3a/ITestS3ABucketExistence.java | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
index fb295f3f09f..9485202f64c 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.test.LambdaTestUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
@@ -47,7 +48,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase {
private FileSystem fs;
private final String randomBucket =
- "random-bucket-" + UUID.randomUUID().toString();
+ "random-bucket-" + UUID.randomUUID();
private final URI uri = URI.create(FS_S3A + "://" + randomBucket + "/");
@@ -163,7 +164,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase {
@Test
public void testAccessPointProbingV2() throws Exception {
describe("Test V2 bucket probing using an AccessPoint ARN");
- Configuration configuration = createConfigurationWithProbe(2);
+ Configuration configuration = createArnConfiguration();
String accessPointArn = "arn:aws:s3:eu-west-1:123456789012:accesspoint/" + randomBucket;
configuration.set(String.format(InternalConstants.ARN_BUCKET_OPTION, randomBucket),
accessPointArn);
@@ -175,7 +176,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase {
@Test
public void testAccessPointRequired() throws Exception {
describe("Test V2 bucket probing with 'fs.s3a.accesspoint.required' property.");
- Configuration configuration = createConfigurationWithProbe(2);
+ Configuration configuration = createArnConfiguration();
configuration.set(AWS_S3_ACCESSPOINT_REQUIRED, "true");
intercept(PathIOException.class,
InternalConstants.AP_REQUIRED_EXCEPTION,
@@ -189,6 +190,17 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase {
() -> FileSystem.get(uri, configuration));
}
+ /**
+ * Create a configuration which has bucket probe 2 and the endpoint.region
+ * option set to "eu-west-1" to match that of the ARNs generated.
+ * @return a configuration for tests which are expected to fail in specific ways.
+ */
+ private Configuration createArnConfiguration() {
+ Configuration configuration = createConfigurationWithProbe(2);
+ configuration.set(AWS_REGION, "eu-west-1");
+ return configuration;
+ }
+
@Override
protected Configuration getConfiguration() {
Configuration configuration = super.getConfiguration();
From 405ed1dde6bcccca1e07e45a356a89c1b583e236 Mon Sep 17 00:00:00 2001
From: Steve Loughran
Date: Tue, 18 Apr 2023 10:12:07 +0100
Subject: [PATCH 60/78] HADOOP-18470. Hadoop 3.3.5 release wrap-up (#5558)
Post-release updates of the branches
* Add jdiff xml files from 3.3.5 release.
* Declare 3.3.5 as the latest stable release.
* Copy release notes.
---
.../jdiff/Apache_Hadoop_Common_3.3.5.xml | 40640 ++++++++++++++++
.../markdown/release/3.3.5/CHANGELOG.3.3.5.md | 359 +
.../release/3.3.5/RELEASENOTES.3.3.5.md | 89 +
.../jdiff/Apache_Hadoop_HDFS_3.3.5.xml | 835 +
.../Apache_Hadoop_MapReduce_Common_3.3.5.xml | 113 +
.../Apache_Hadoop_MapReduce_Core_3.3.5.xml | 28963 +++++++++++
...pache_Hadoop_MapReduce_JobClient_3.3.5.xml | 16 +
hadoop-project-dist/pom.xml | 2 +-
.../jdiff/Apache_Hadoop_YARN_API_3.3.5.xml | 26420 ++++++++++
.../jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml | 3067 ++
.../jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml | 3982 ++
...Apache_Hadoop_YARN_Server_Common_3.3.5.xml | 1456 +
12 files changed, 105941 insertions(+), 1 deletion(-)
create mode 100644 hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml
create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md
create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md
create mode 100644 hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml
create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml
create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml
create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml
create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml
create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml
create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml
create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml
new file mode 100644
index 00000000000..b788b4497fe
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml
@@ -0,0 +1,40640 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @param customMessage depcrication message
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKey key that take up the values of deprecated key
+ @param customMessage deprecation message]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ As a side effect get loads the properties from the sources if called for
+ the first time as a lazy init.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value
+ @param enumeration type]]>
+
+
+
+
+
+
+ enumeration type
+ @throws IllegalArgumentException If mapping is illegal for the type
+ provided
+ @return enumeration type]]>
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.
+ @param name property name.
+ @param addr inetSocketAddress addr.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @param Interface class type.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @param Interface class type.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param propertyName xml property name.
+ @param out the writer to write to.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException raised on errors performing I/O.
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException raised on errors performing I/O.]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException raised on errors performing I/O.
+ @throws NoSuchAlgorithmException no such algorithm exception.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException raised on errors performing I/O.
+ @throws NoSuchAlgorithmException This exception is thrown when a particular
+ cryptographic algorithm is requested
+ but is not available in the environment.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException raised on errors performing I/O.
+ @throws UnsupportedOperationException Unsupported Operation Exception.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications, must
+ include entries for user, group, and others for compatibility with
+ permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
+
+ Please refer to
+ {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ {@link FileContext#getFileBlockLocations(Path, long, long)}
+ for more examples.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ This does not change the current offset of a file, and is thread-safe.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if reached
+ end-of-stream
+ @throws IOException if there is some error performing the read]]>
+
+
+
+
+
+
+
+
+ This operation provides similar semantics to
+ {@link #read(long, ByteBuffer)}, the difference is that this method is
+ guaranteed to read data until the {@link ByteBuffer} is full, or until
+ the end of the data stream is reached.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @throws IOException if there is some error performing the read
+ @throws EOFException the end of the data was reached before
+ the read operation completed
+ @see #read(long, ByteBuffer)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ @param buf
+ the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if
+ reach end-of-stream
+ @throws IOException
+ if there is some error performing the read]]>
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+ @throws UnresolvedLinkException If unresolved link occurred.
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid
+
+ @return if delete success true, not false.]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+ @return input stream.]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+ @return output stream.]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @param options rename options.
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+ f is
+ not supported.]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ {@literal <---}X{@literal --->}
+ fs://host/A/B/link
+ {@literal <-----}Y{@literal ----->}
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file link already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist.
+ @throws UnresolvedLinkException If unresolved link occurred.
+ @throws AccessControlException If access is denied.
+ @throws IOException If an I/O error occurred.
+ @return resolve path.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries
+ to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications, must include entries for user, group, and others for
+ compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns
+ each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal <}String{@literal >} of the XAttr names of the
+ file or directory
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @param uri uri of the filesystem.
+ @param conf configrution.
+ @return filesystem instance.
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() {@literal <=} start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @param options rename options.
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
?
+
Matches any single character.
+
+
*
+
Matches zero or more characters.
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException If an I/O error occurred.
+ @throws UnsupportedOperationException if the operation is unsupported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal } of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+
+
+ This is a carefully evolving class.
+ New methods may be marked as Unstable or Evolving for their initial release,
+ as a warning that they are new and may change based on the
+ experience of use in applications.
+
+ Important note for developers
+
+ If you are making changes here to the public API or protected methods,
+ you must review the following subclasses and make sure that
+ they are filtering/passing through new methods as appropriate.
+
+ {@link FilterFileSystem}: methods are passed through. If not,
+ then {@code TestFilterFileSystem.MustNotImplement} must be
+ updated with the unsupported interface.
+ Furthermore, if the new API's support is probed for via
+ {@link #hasPathCapability(Path, String)} then
+ {@link FilterFileSystem#hasPathCapability(Path, String)}
+ must return false, always.
+
+ {@link ChecksumFileSystem}: checksums are created and
+ verified.
+
+ {@code TestHarFileSystem} will need its {@code MustNotImplement}
+ interface updated.
+
+
+ There are some external places your changes will break things.
+ Do co-ordinate changes here.
+
+
+ HBase: HBoss
+
+ Hive: HiveShim23
+
+ {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ returns true if the operation succeeded. When deleteSource is true,
+ this means "after the copy, delete(source) returned true"
+ If the destination is a directory, and mkdirs (dest) fails,
+ the operation will return false rather than raise any exception.
+
+ The overwrite flag is about overwriting files; it has no effect about
+ handing an attempt to copy a file atop a directory (expect an IOException),
+ or a directory over a path which contains a file (mkdir will fail, so
+ "false").
+
+ The operation is recursive, and the deleteSource operation takes place
+ as each subdirectory is copied. Therefore, if an operation fails partway
+ through, the source tree may be partially deleted.
+
+ If a file is changed while the readVectored() operation is in progress, the output is
+ undefined. Some ranges may have old data, some may have new and some may have both.
+
+
+ While a readVectored() operation is in progress, normal read api calls may block.
+
+ Consult the filesystem specification document for the requirements
+ of an implementation of this interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Exceptions are caught and downgraded to debug logging.
+ @param source source of statistics.
+ @return a string for logging.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is designed to affordable to use in log statements.
+ @param source source of statistics -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is for use in log statements where for the cost of creation
+ of this entry is low; it is affordable to use in log statements.
+ @param statistics statistics to stringify -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It is serializable so that frameworks which can use java serialization
+ to propagate data (Spark, Flink...) can send the statistics
+ back. For this reason, TreeMaps are explicitly used as field types,
+ even though IDEs can recommend use of Map instead.
+ For security reasons, untrusted java object streams should never be
+ deserialized. If for some reason this is required, use
+ {@link #requiredSerializationClasses()} to get the list of classes
+ used when deserializing instances of this object.
+
+
+ It is annotated for correct serializations with jackson2.
+
]]>
+
+
+
+
+
+
+
+
+
+ This is not an atomic option.
+
+ The instance can be serialized, and its
+ {@code toString()} method lists all the values.
+ @param statistics statistics
+ @return a snapshot of the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It can be used to accrue values so as to dynamically update
+ the mean. If so, know that there is no synchronization
+ on the methods.
+
+
+ If a statistic has 0 samples then it is considered to be empty.
+
+
+ All 'empty' statistics are equivalent, independent of the sum value.
+
+
+ For non-empty statistics, sum and sample values must match
+ for equality.
+
+
+ It is serializable and annotated for correct serializations with jackson2.
+
+
+ Thread safety. The operations to add/copy sample data, are thread safe.
+
+
+
{@link #add(MeanStatistic)}
+
{@link #addSample(long)}
+
{@link #clear()}
+
{@link #setSamplesAndSum(long, long)}
+
{@link #set(MeanStatistic)}
+
{@link #setSamples(long)} and {@link #setSum(long)}
+
+
+ So is the {@link #mean()} method. This ensures that when
+ used to aggregated statistics, the aggregate value and sample
+ count are set and evaluated consistently.
+
+
+ Other methods marked as synchronized because Findbugs overreacts
+ to the idea that some operations to update sum and sample count
+ are synchronized, but that things like equals are not.
+
+ The name of the constants are uppercase, words separated by
+ underscores.
+
+
+ The value of the constants are lowercase of the constant names.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value enumSet value.
+ @param elementType elementType.]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value enumSet value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value enumSet Value.
+ @param elementType elementType.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation.
+
+ @param position input position.
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte.]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+
+ @param what input what.
+ @param start input start.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+
+ @param utf8 input utf8.
+ @param start input start.
+ @param length input length.
+ @param replace input replace.
+ @throws CharacterCodingException a character encoding or
+ decoding error occurs.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+
+ @param string input string.
+ @param replace input replace.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()
+ @throws CharacterCodingException a character encoding or decoding error occurs.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException any other problem for write.]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException any other problem for readFields.]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements
+ WritableComparable{@literal } {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec
+ io.compress.passthrough.extension = .gz
+
+
+ Note: this is not a Splittable codec: it doesn't know the
+ capabilities of the passed in stream. It should be possible to
+ extend this in a subclass: the inner classes are marked as protected
+ to enable this. Do not retrofit splitting to this class..]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException raised on errors performing I/O.
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff;
+ byte[3]=n&0xff.
+ Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff;
+ byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff;
+ byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
+ byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff;
+ byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
+ byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff;
+ byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]=
+ (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff;
+ byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff;
+ byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=
+ (n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException raised on errors performing I/O.]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 +
+ (NB[0]&0xff)<<8 + NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)
+ <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param generic type.
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @param minSamples input minSamples.
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>", where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName serviceName.
+ @param nameName nameName.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName serviceName.
+ @param nameName nameName.
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+
+ @param conf input Configuration.]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return abstract delegation token identifier.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return abstract delegation token identifier.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param dToken abstract delegation token identifier.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @param dToken abstract delegation token identifier.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param dToken abstract delegation token identifier.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param dToken abstract delegation token identifier.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+ @param exitCode exit code
+ @param cause inner cause
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will be 0 until a call
+ to {@link #finished()} has been made.
+
+ @return the currently recorded duration.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Generics Type.
+ @param theClass class of which an object is created
+ @param conf Configuration
+ @return a new object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Generics Type T
+ @param o object whose correctly-typed Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+ Generics Type.
+ @param conf input Configuration.
+ @param src the object to copy from
+ @param dst the object to copy into, which is destroyed
+ @return dst param (the copy)
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.
+
+ @return all shells set.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ RunningJob runningJob = JobClient.runJob(job);
+ if (runningJob.isSuccessful()) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.
+ @throws Exception Exception.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.
+ @throws Exception exception.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr {@literal <=} n
+ (n is the cardinality of the set A to record in
+ the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param timeout timeout to wait
+ @param unit time unit.
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown
+ @throws TimeoutException the future timed out.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+
If it is an IOE: Return.
+
If it is a {@link UncheckedIOException}: return the cause
+
Completion/Execution Exceptions: extract and repeat
+
If it is an RTE or Error: throw.
+
Any other type: wrap in an IOE
+
+
+ Recursively handles wrapped Execution and Completion Exceptions in
+ case something very complicated has happened.
+ @param e exception.
+ @return an IOException extracted or built from the cause.
+ @throws RuntimeException if that is the inner cause.
+ @throws Error if that is the inner cause.]]>
+
+
+
+
+
+
+
+
+ type of result
+ @param type of builder
+ @return the builder passed in.]]>
+
+
+
+
+
+
+
+
+
+ fs.example.s3a.option becomes "s3a.option"
+ fs.example.fs.io.policy becomes "fs.io.policy"
+ fs.example.something becomes "something"
+
+ @param builder builder to modify
+ @param conf configuration to read
+ @param prefix prefix to scan/strip
+ @param mandatory are the options to be mandatory or optional?]]>
+
+
+
+
+
+ Return type.
+ @return the evaluated result.
+ @throws UnsupportedOperationException fail fast if unsupported
+ @throws IllegalArgumentException invalid argument]]>
+
+
+
+
+ Contains methods promoted from
+ {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they
+ are a key part of integrating async IO in application code.
+
+
+ One key feature is that the {@link #awaitFuture(Future)} and
+ {@link #awaitFuture(Future, long, TimeUnit)} calls will
+ extract and rethrow exceptions raised in the future's execution,
+ including extracting the inner IOException of any
+ {@code UncheckedIOException} raised in the future.
+ This makes it somewhat easier to execute IOException-raising
+ code inside futures.
+
]]>
+
+
+
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @param iterator iterator.
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @param iterable iterable.
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @param array array.
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @param mapper transformation
+ @return a remote iterator]]>
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @return a remote iterator]]>
+
+
+
+
+
+
+
+ Elements are filtered in the hasNext() method; if not used
+ the filtering will be done on demand in the {@code next()}
+ call.
+
+ @param type
+ @param iterator source
+ @param filter filter
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type.
+ @return a new iterator]]>
+
+
+
+
+
+
+ type
+ @return a list of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ type
+ @return an array of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ and this classes log is set to DEBUG,
+ then the statistics of the operation are evaluated and logged at
+ debug.
+
+ The number of entries processed is returned, as it is useful to
+ know this, especially during tests or when reporting values
+ to users.
+
+ This does not close the iterator afterwards.
+ @param source iterator source
+ @param consumer consumer of the values.
+ @return the number of elements processed
+ @param type of source
+ @throws IOException if the source RemoteIterator or the consumer raise one.]]>
+
+
+
+
+
+ type of source]]>
+
+
+
+
+ This aims to make it straightforward to use lambda-expressions to
+ transform the results of an iterator, without losing the statistics
+ in the process, and to chain the operations together.
+
+ The closeable operation will be passed through RemoteIterators which
+ wrap other RemoteIterators. This is to support any iterator which
+ can be closed to release held connections, file handles etc.
+ Unless client code is written to assume that RemoteIterator instances
+ may be closed, this is not likely to be broadly used. It is added
+ to make it possible to adopt this feature in a managed way.
+
+ One notable feature is that the
+ {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will
+ LOG at debug any IOStatistics provided by the iterator, if such
+ statistics are provided. There's no attempt at retrieval and logging
+ if the LOG is not set to debug, so it is a zero cost feature unless
+ the logger {@code org.apache.hadoop.fs.functional.RemoteIterators}
+ is at DEBUG.
+
+ Based on the S3A Listing code, and some some work on moving other code
+ to using iterative listings so as to pick up the statistics.]]>
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md
new file mode 100644
index 00000000000..0bdd1844b6e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md
@@ -0,0 +1,359 @@
+
+
+# Apache Hadoop Changelog
+
+## Release 3.3.5 - 2023-03-14
+
+
+
+### IMPORTANT ISSUES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | Replace all default Charset usage with UTF-8 | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | CryptoOutputStream::close leak when encrypted zones + quota exceptions | Critical | fs | Colm Dougan | Colm Dougan |
+
+
+### NEW FEATURES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-18003](https://issues.apache.org/jira/browse/HADOOP-18003) | Add a method appendIfAbsent for CallerContext | Minor | . | Tao Li | Tao Li |
+| [HDFS-16331](https://issues.apache.org/jira/browse/HDFS-16331) | Make dfs.blockreport.intervalMsec reconfigurable | Major | . | Tao Li | Tao Li |
+| [HDFS-16371](https://issues.apache.org/jira/browse/HDFS-16371) | Exclude slow disks when choosing volume | Major | . | Tao Li | Tao Li |
+| [HDFS-16400](https://issues.apache.org/jira/browse/HDFS-16400) | Reconfig DataXceiver parameters for datanode | Major | . | Tao Li | Tao Li |
+| [HDFS-16399](https://issues.apache.org/jira/browse/HDFS-16399) | Reconfig cache report parameters for datanode | Major | . | Tao Li | Tao Li |
+| [HDFS-16398](https://issues.apache.org/jira/browse/HDFS-16398) | Reconfig block report parameters for datanode | Major | . | Tao Li | Tao Li |
+| [HDFS-16396](https://issues.apache.org/jira/browse/HDFS-16396) | Reconfig slow peer parameters for datanode | Major | . | Tao Li | Tao Li |
+| [HDFS-16397](https://issues.apache.org/jira/browse/HDFS-16397) | Reconfig slow disk parameters for datanode | Major | . | Tao Li | Tao Li |
+| [MAPREDUCE-7341](https://issues.apache.org/jira/browse/MAPREDUCE-7341) | Add a task-manifest output committer for Azure and GCS | Major | client | Steve Loughran | Steve Loughran |
+| [HADOOP-18163](https://issues.apache.org/jira/browse/HADOOP-18163) | hadoop-azure support for the Manifest Committer of MAPREDUCE-7341 | Major | fs/azure | Steve Loughran | Steve Loughran |
+| [HDFS-16413](https://issues.apache.org/jira/browse/HDFS-16413) | Reconfig dfs usage parameters for datanode | Major | . | Tao Li | Tao Li |
+| [HDFS-16521](https://issues.apache.org/jira/browse/HDFS-16521) | DFS API to retrieve slow datanodes | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16568](https://issues.apache.org/jira/browse/HDFS-16568) | dfsadmin -reconfig option to start/query reconfig on all live datanodes | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16582](https://issues.apache.org/jira/browse/HDFS-16582) | Expose aggregate latency of slow node as perceived by the reporting node | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | Slow peer metrics - add median, mad and upper latency limits | Major | . | Viraj Jasani | Viraj Jasani |
+| [YARN-11241](https://issues.apache.org/jira/browse/YARN-11241) | Add uncleaning option for local app log file with log-aggregation enabled | Major | log-aggregation | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103) | High performance vectored read API in Hadoop | Major | common, fs, fs/adl, fs/s3 | Mukund Thakur | Mukund Thakur |
+
+
+### IMPROVEMENTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-17276](https://issues.apache.org/jira/browse/HADOOP-17276) | Extend CallerContext to make it include many items | Major | . | Hui Fei | Hui Fei |
+| [HDFS-15745](https://issues.apache.org/jira/browse/HDFS-15745) | Make DataNodePeerMetrics#LOW\_THRESHOLD\_MS and MIN\_OUTLIER\_DETECTION\_NODES configurable | Major | . | Haibin Huang | Haibin Huang |
+| [HDFS-16266](https://issues.apache.org/jira/browse/HDFS-16266) | Add remote port information to HDFS audit log | Major | . | Tao Li | Tao Li |
+| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori |
+| [HDFS-16310](https://issues.apache.org/jira/browse/HDFS-16310) | RBF: Add client port to CallerContext for Router | Major | . | Tao Li | Tao Li |
+| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren |
+| [HDFS-16426](https://issues.apache.org/jira/browse/HDFS-16426) | fix nextBlockReportTime when trigger full block report force | Major | . | qinyuren | qinyuren |
+| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin |
+| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin |
+| [HDFS-16262](https://issues.apache.org/jira/browse/HDFS-16262) | Async refresh of cached locations in DFSInputStream | Major | . | Bryan Beaudreault | Bryan Beaudreault |
+| [HADOOP-18093](https://issues.apache.org/jira/browse/HADOOP-18093) | Better exception handling for testFileStatusOnMountLink() in ViewFsBaseTest.java | Trivial | . | Xing Lin | Xing Lin |
+| [HDFS-16423](https://issues.apache.org/jira/browse/HDFS-16423) | balancer should not get blocks on stale storages | Major | balancer & mover | qinyuren | qinyuren |
+| [HADOOP-18139](https://issues.apache.org/jira/browse/HADOOP-18139) | Allow configuration of zookeeper server principal | Major | auth | Owen O'Malley | Owen O'Malley |
+| [YARN-11076](https://issues.apache.org/jira/browse/YARN-11076) | Upgrade jQuery version in Yarn UI2 | Major | yarn-ui-v2 | Tamas Domok | Tamas Domok |
+| [HDFS-16495](https://issues.apache.org/jira/browse/HDFS-16495) | RBF should prepend the client ip rather than append it. | Major | . | Owen O'Malley | Owen O'Malley |
+| [HADOOP-18144](https://issues.apache.org/jira/browse/HADOOP-18144) | getTrashRoot/s in ViewFileSystem should return viewFS path, not targetFS path | Major | common | Xing Lin | Xing Lin |
+| [HADOOP-18162](https://issues.apache.org/jira/browse/HADOOP-18162) | hadoop-common enhancements for the Manifest Committer of MAPREDUCE-7341 | Major | fs | Steve Loughran | Steve Loughran |
+| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | Zhaohui Wang | Zhaohui Wang |
+| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-16457](https://issues.apache.org/jira/browse/HDFS-16457) | Make fs.getspaceused.classname reconfigurable | Major | namenode | yanbin.zhang | yanbin.zhang |
+| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li |
+| [HDFS-16497](https://issues.apache.org/jira/browse/HDFS-16497) | EC: Add param comment for liveBusyBlockIndices with HDFS-14768 | Minor | erasure-coding, namanode | caozhiqiang | caozhiqiang |
+| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu |
+| [HADOOP-17551](https://issues.apache.org/jira/browse/HADOOP-17551) | Upgrade maven-site-plugin to 3.11.0 | Major | . | Akira Ajisaka | Ashutosh Gupta |
+| [HDFS-16519](https://issues.apache.org/jira/browse/HDFS-16519) | Add throttler to EC reconstruction | Minor | datanode, ec | daimin | daimin |
+| [HDFS-14478](https://issues.apache.org/jira/browse/HDFS-14478) | Add libhdfs APIs for openFile | Major | hdfs-client, libhdfs, native | Sahil Takiar | Sahil Takiar |
+| [HADOOP-16202](https://issues.apache.org/jira/browse/HADOOP-16202) | Enhance openFile() for better read performance against object stores | Major | fs, fs/s3, tools/distcp | Steve Loughran | Steve Loughran |
+| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles |
+| [HDFS-16520](https://issues.apache.org/jira/browse/HDFS-16520) | Improve EC pread: avoid potential reading whole block | Major | dfsclient, ec, erasure-coding | daimin | daimin |
+| [HADOOP-18167](https://issues.apache.org/jira/browse/HADOOP-18167) | Add metrics to track delegation token secret manager operations | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
+| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta |
+| [HADOOP-18172](https://issues.apache.org/jira/browse/HADOOP-18172) | Change scope of getRootFallbackLink for InodeTree to make them accessible from outside package | Minor | . | Xing Lin | Xing Lin |
+| [HDFS-16588](https://issues.apache.org/jira/browse/HDFS-16588) | Backport HDFS-16584 to branch-3.3. | Major | balancer & mover, namenode | JiangHua Zhu | JiangHua Zhu |
+| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta |
+| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta |
+| [HADOOP-18244](https://issues.apache.org/jira/browse/HADOOP-18244) | Fix Hadoop-Common JavaDoc Error on branch-3.3 | Major | common | Shilun Fan | Shilun Fan |
+| [HADOOP-18269](https://issues.apache.org/jira/browse/HADOOP-18269) | Misleading method name in DistCpOptions | Minor | tools/distcp | guophilipse | guophilipse |
+| [HADOOP-18275](https://issues.apache.org/jira/browse/HADOOP-18275) | update os-maven-plugin to 1.7.0 | Minor | build | Steve Loughran | Steve Loughran |
+| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-16576](https://issues.apache.org/jira/browse/HDFS-16576) | Remove unused imports in HDFS project | Minor | . | Ashutosh Gupta | Ashutosh Gupta |
+| [HDFS-16629](https://issues.apache.org/jira/browse/HDFS-16629) | [JDK 11] Fix javadoc warnings in hadoop-hdfs module | Minor | hdfs | Shilun Fan | Shilun Fan |
+| [YARN-11172](https://issues.apache.org/jira/browse/YARN-11172) | Fix testDelegationToken | Major | test | zhengchenyu | zhengchenyu |
+| [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | Improve Magic Committer Performance | Minor | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18288](https://issues.apache.org/jira/browse/HADOOP-18288) | Total requests and total requests per sec served by RPC servers | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18336](https://issues.apache.org/jira/browse/HADOOP-18336) | tag FSDataInputStream.getWrappedStream() @Public/@Stable | Minor | fs | Steve Loughran | Ashutosh Gupta |
+| [HADOOP-13144](https://issues.apache.org/jira/browse/HADOOP-13144) | Enhancing IPC client throughput via multiple connections per user | Minor | ipc | Jason Kace | Íñigo Goiri |
+| [HDFS-16712](https://issues.apache.org/jira/browse/HDFS-16712) | Fix incorrect placeholder in DataNode.java | Major | . | ZanderXu | ZanderXu |
+| [HDFS-16702](https://issues.apache.org/jira/browse/HDFS-16702) | MiniDFSCluster should report cause of exception in assertion error | Minor | hdfs | Steve Vaughan | Steve Vaughan |
+| [HADOOP-18365](https://issues.apache.org/jira/browse/HADOOP-18365) | Updated addresses are still accessed using the old IP address | Major | common | Steve Vaughan | Steve Vaughan |
+| [HDFS-16687](https://issues.apache.org/jira/browse/HDFS-16687) | RouterFsckServlet replicates code from DfsServlet base class | Major | federation | Steve Vaughan | Steve Vaughan |
+| [HADOOP-18333](https://issues.apache.org/jira/browse/HADOOP-18333) | hadoop-client-runtime impact by CVE-2022-2047 CVE-2022-2048 due to shaded jetty | Major | build | phoebe chen | Ashutosh Gupta |
+| [HADOOP-18406](https://issues.apache.org/jira/browse/HADOOP-18406) | Adds alignment context to call path for creating RPC proxy with multiple connections per user. | Major | ipc | Simbarashe Dzinamarira | Simbarashe Dzinamarira |
+| [HDFS-16684](https://issues.apache.org/jira/browse/HDFS-16684) | Exclude self from JournalNodeSyncer when using a bind host | Major | journal-node | Steve Vaughan | Steve Vaughan |
+| [HDFS-16686](https://issues.apache.org/jira/browse/HDFS-16686) | GetJournalEditServlet fails to authorize valid Kerberos request | Major | journal-node | Steve Vaughan | Steve Vaughan |
+| [YARN-11303](https://issues.apache.org/jira/browse/YARN-11303) | Upgrade jquery ui to 1.13.2 | Major | security | D M Murali Krishna Reddy | Ashutosh Gupta |
+| [HADOOP-16769](https://issues.apache.org/jira/browse/HADOOP-16769) | LocalDirAllocator to provide diagnostics when file creation fails | Minor | util | Ramesh Kumar Thangarajan | Ashutosh Gupta |
+| [HADOOP-18341](https://issues.apache.org/jira/browse/HADOOP-18341) | upgrade commons-configuration2 to 2.8.0 and commons-text to 1.9 | Major | . | PJ Fanning | PJ Fanning |
+| [HDFS-16776](https://issues.apache.org/jira/browse/HDFS-16776) | Erasure Coding: The length of targets should be checked when DN gets a reconstruction task | Major | . | Kidd5368 | Kidd5368 |
+| [HADOOP-18469](https://issues.apache.org/jira/browse/HADOOP-18469) | Add XMLUtils methods to centralise code that creates secure XML parsers | Major | . | PJ Fanning | PJ Fanning |
+| [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | Remove the hadoop-openstack module | Major | build, fs, fs/swift | Steve Loughran | Steve Loughran |
+| [HADOOP-18468](https://issues.apache.org/jira/browse/HADOOP-18468) | upgrade jettison json jar due to fix CVE-2022-40149 | Major | build | PJ Fanning | PJ Fanning |
+| [HADOOP-17779](https://issues.apache.org/jira/browse/HADOOP-17779) | Lock File System Creator Semaphore Uninterruptibly | Minor | fs | David Mollitor | David Mollitor |
+| [HADOOP-18360](https://issues.apache.org/jira/browse/HADOOP-18360) | Update commons-csv from 1.0 to 1.9.0. | Minor | common | Shilun Fan | Shilun Fan |
+| [HADOOP-18493](https://issues.apache.org/jira/browse/HADOOP-18493) | update jackson-databind 2.12.7.1 due to CVE fixes | Major | . | PJ Fanning | PJ Fanning |
+| [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | Update Bouncy Castle to 1.68 or later | Major | build | Takanobu Asanuma | PJ Fanning |
+| [HADOOP-18497](https://issues.apache.org/jira/browse/HADOOP-18497) | Upgrade commons-text version to fix CVE-2022-42889 | Major | build | Xiaoqiao He | PJ Fanning |
+| [HDFS-16795](https://issues.apache.org/jira/browse/HDFS-16795) | Use secure XML parser utils in hdfs classes | Major | . | PJ Fanning | PJ Fanning |
+| [YARN-11330](https://issues.apache.org/jira/browse/YARN-11330) | Use secure XML parser utils in YARN | Major | . | PJ Fanning | PJ Fanning |
+| [MAPREDUCE-7411](https://issues.apache.org/jira/browse/MAPREDUCE-7411) | Use secure XML parser utils in MapReduce | Major | . | PJ Fanning | PJ Fanning |
+| [HADOOP-18512](https://issues.apache.org/jira/browse/HADOOP-18512) | upgrade woodstox-core to 5.4.0 for security fix | Major | common | phoebe chen | PJ Fanning |
+| [YARN-11363](https://issues.apache.org/jira/browse/YARN-11363) | Remove unused TimelineVersionWatcher and TimelineVersion from hadoop-yarn-server-tests | Major | test, yarn | Ashutosh Gupta | Ashutosh Gupta |
+| [YARN-11364](https://issues.apache.org/jira/browse/YARN-11364) | Docker Container to accept docker Image name with sha256 digest | Major | yarn | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-18517](https://issues.apache.org/jira/browse/HADOOP-18517) | ABFS: Add fs.azure.enable.readahead option to disable readahead | Major | fs/azure | Steve Loughran | Steve Loughran |
+| [HADOOP-18484](https://issues.apache.org/jira/browse/HADOOP-18484) | upgrade hsqldb to v2.7.1 due to CVE | Major | . | PJ Fanning | Ashutosh Gupta |
+| [HDFS-16844](https://issues.apache.org/jira/browse/HDFS-16844) | [RBF] The routers should be resiliant against exceptions from StateStore | Major | rbf | Owen O'Malley | Owen O'Malley |
+| [HADOOP-18573](https://issues.apache.org/jira/browse/HADOOP-18573) | Improve error reporting on non-standard kerberos names | Blocker | security | Steve Loughran | Steve Loughran |
+| [HADOOP-18561](https://issues.apache.org/jira/browse/HADOOP-18561) | CVE-2021-37533 on commons-net is included in hadoop common and hadoop-client-runtime | Blocker | build | phoebe chen | Steve Loughran |
+| [HADOOP-18067](https://issues.apache.org/jira/browse/HADOOP-18067) | Über-jira: S3A Hadoop 3.3.5 features | Major | fs/s3 | Steve Loughran | Mukund Thakur |
+| [YARN-10444](https://issues.apache.org/jira/browse/YARN-10444) | Node Manager to use openFile() with whole-file read policy for localizing files. | Minor | nodemanager | Steve Loughran | Steve Loughran |
+| [HADOOP-18661](https://issues.apache.org/jira/browse/HADOOP-18661) | Fix bin/hadoop usage script terminology | Blocker | scripts | Steve Loughran | Steve Loughran |
+
+
+### BUG FIXES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru |
+| [YARN-10553](https://issues.apache.org/jira/browse/YARN-10553) | Refactor TestDistributedShell | Major | distributed-shell, test | Ahmed Hussein | Ahmed Hussein |
+| [HDFS-15839](https://issues.apache.org/jira/browse/HDFS-15839) | RBF: Cannot get method setBalancerBandwidth on Router Client | Major | rbf | Yang Yun | Yang Yun |
+| [HADOOP-17588](https://issues.apache.org/jira/browse/HADOOP-17588) | CryptoInputStream#close() should be synchronized | Major | . | Renukaprasad C | Renukaprasad C |
+| [HADOOP-17836](https://issues.apache.org/jira/browse/HADOOP-17836) | Improve logging on ABFS error reporting | Minor | fs/azure | Steve Loughran | Steve Loughran |
+| [HADOOP-17989](https://issues.apache.org/jira/browse/HADOOP-17989) | ITestAzureBlobFileSystemDelete failing "Operations has null HTTP response" | Major | fs/azure, test | Steve Loughran | Steve Loughran |
+| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov |
+| [YARN-11065](https://issues.apache.org/jira/browse/YARN-11065) | Bump follow-redirects from 1.13.3 to 1.14.7 in hadoop-yarn-ui | Major | yarn-ui-v2 | Akira Ajisaka | |
+| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant |
+| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant |
+| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse |
+| [YARN-10788](https://issues.apache.org/jira/browse/YARN-10788) | TestCsiClient fails | Major | test | Akira Ajisaka | Akira Ajisaka |
+| [HADOOP-18126](https://issues.apache.org/jira/browse/HADOOP-18126) | Update junit 5 version due to build issues | Major | bulid | PJ Fanning | PJ Fanning |
+| [YARN-11033](https://issues.apache.org/jira/browse/YARN-11033) | isAbsoluteResource is not correct for dynamically created queues | Minor | yarn | Tamas Domok | Tamas Domok |
+| [YARN-10894](https://issues.apache.org/jira/browse/YARN-10894) | Follow up YARN-10237: fix the new test case in TestRMWebServicesCapacitySched | Major | . | Tamas Domok | Tamas Domok |
+| [YARN-11022](https://issues.apache.org/jira/browse/YARN-11022) | Fix the documentation for max-parallel-apps in CS | Major | capacity scheduler | Tamas Domok | Tamas Domok |
+| [HADOOP-18150](https://issues.apache.org/jira/browse/HADOOP-18150) | Fix ITestAuditManagerDisabled after S3A audit logging was enabled in HADOOP-18091 | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh |
+| [HADOOP-17976](https://issues.apache.org/jira/browse/HADOOP-17976) | abfs etag extraction inconsistent between LIST and HEAD calls | Minor | fs/azure | Steve Loughran | Steve Loughran |
+| [HADOOP-18129](https://issues.apache.org/jira/browse/HADOOP-18129) | Change URI[] in INodeLink to String[] to reduce memory footprint of ViewFileSystem | Major | . | Abhishek Das | Abhishek Das |
+| [HADOOP-18145](https://issues.apache.org/jira/browse/HADOOP-18145) | Fileutil's unzip method causes unzipped files to lose their original permissions | Major | common | jingxiong zhong | jingxiong zhong |
+| [HDFS-16518](https://issues.apache.org/jira/browse/HDFS-16518) | KeyProviderCache close cached KeyProvider with Hadoop ShutdownHookManager | Major | hdfs | Lei Yang | Lei Yang |
+| [HADOOP-18169](https://issues.apache.org/jira/browse/HADOOP-18169) | getDelegationTokens in ViewFs should also fetch the token from the fallback FS | Major | . | Xing Lin | Xing Lin |
+| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma |
+| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin |
+| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang |
+| [HADOOP-18201](https://issues.apache.org/jira/browse/HADOOP-18201) | Remove base and bucket overrides for endpoint in ITestS3ARequesterPays.java | Major | fs/s3 | Mehakmeet Singh | Daniel Carl Jones |
+| [HDFS-16536](https://issues.apache.org/jira/browse/HDFS-16536) | TestOfflineImageViewer fails on branch-3.3 | Major | test | Akira Ajisaka | Ashutosh Gupta |
+| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren |
+| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren |
+| [HADOOP-17564](https://issues.apache.org/jira/browse/HADOOP-17564) | Fix typo in UnixShellGuide.html | Trivial | . | Takanobu Asanuma | Ashutosh Gupta |
+| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li |
+| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta |
+| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta |
+| [HADOOP-16515](https://issues.apache.org/jira/browse/HADOOP-16515) | Update the link to compatibility guide | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
+| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
+| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
+| [HADOOP-18222](https://issues.apache.org/jira/browse/HADOOP-18222) | Prevent DelegationTokenSecretManagerMetrics from registering multiple times | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
+| [HDFS-16540](https://issues.apache.org/jira/browse/HDFS-16540) | Data locality is lost when DataNode pod restarts in kubernetes | Major | namenode | Huaxiang Sun | Huaxiang Sun |
+| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu |
+| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke |
+| [YARN-11141](https://issues.apache.org/jira/browse/YARN-11141) | Capacity Scheduler does not support ambiguous queue names when moving application across queues | Major | capacity scheduler | András Győri | András Győri |
+| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack |
+| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki |
+| [YARN-11128](https://issues.apache.org/jira/browse/YARN-11128) | Fix comments in TestProportionalCapacityPreemptionPolicy\* | Minor | capacityscheduler, documentation | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-18234](https://issues.apache.org/jira/browse/HADOOP-18234) | s3a access point xml examples are wrong | Minor | documentation, fs/s3 | Steve Loughran | Ashutosh Gupta |
+| [HADOOP-18238](https://issues.apache.org/jira/browse/HADOOP-18238) | Fix reentrancy check in SFTPFileSystem.close() | Major | common | yi liu | Ashutosh Gupta |
+| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-16608](https://issues.apache.org/jira/browse/HDFS-16608) | Fix the link in TestClientProtocolForPipelineRecovery | Minor | documentation | Samrat Deb | Samrat Deb |
+| [HDFS-16563](https://issues.apache.org/jira/browse/HDFS-16563) | Namenode WebUI prints sensitive information on Token Expiry | Major | namanode, security, webhdfs | Renukaprasad C | Renukaprasad C |
+| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu |
+| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant |
+| [HADOOP-18255](https://issues.apache.org/jira/browse/HADOOP-18255) | fsdatainputstreambuilder.md refers to hadoop 3.3.3, when it shouldn't | Minor | documentation | Steve Loughran | Ashutosh Gupta |
+| [MAPREDUCE-7387](https://issues.apache.org/jira/browse/MAPREDUCE-7387) | Fix TestJHSSecurity#testDelegationToken AssertionError due to HDFS-16563 | Major | . | Shilun Fan | Shilun Fan |
+| [MAPREDUCE-7369](https://issues.apache.org/jira/browse/MAPREDUCE-7369) | MapReduce tasks timing out when spends more time on MultipleOutputs#close | Major | . | Prabhu Joseph | Ashutosh Gupta |
+| [MAPREDUCE-7391](https://issues.apache.org/jira/browse/MAPREDUCE-7391) | TestLocalDistributedCacheManager failing after HADOOP-16202 | Major | test | Steve Loughran | Steve Loughran |
+| [HDFS-16591](https://issues.apache.org/jira/browse/HDFS-16591) | StateStoreZooKeeper fails to initialize | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
+| [HADOOP-18321](https://issues.apache.org/jira/browse/HADOOP-18321) | Fix when to read an additional record from a BZip2 text file split | Critical | io | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das |
+| [HADOOP-18217](https://issues.apache.org/jira/browse/HADOOP-18217) | shutdownhookmanager should not be multithreaded (deadlock possible) | Minor | util | Catherinot Remi | |
+| [MAPREDUCE-7372](https://issues.apache.org/jira/browse/MAPREDUCE-7372) | MapReduce set permission too late in copyJar method | Major | mrv2 | Zhang Dongsheng | |
+| [HADOOP-18330](https://issues.apache.org/jira/browse/HADOOP-18330) | S3AFileSystem removes Path when calling createS3Client | Minor | fs/s3 | Ashutosh Pant | Ashutosh Pant |
+| [HADOOP-18390](https://issues.apache.org/jira/browse/HADOOP-18390) | Fix out of sync import for HADOOP-18321 | Minor | . | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-18340](https://issues.apache.org/jira/browse/HADOOP-18340) | deleteOnExit does not work with S3AFileSystem | Minor | fs/s3 | Huaxiang Sun | Huaxiang Sun |
+| [HADOOP-18383](https://issues.apache.org/jira/browse/HADOOP-18383) | Codecs with @DoNotPool annotation are not closed causing memory leak | Major | common | Kevin Sewell | Kevin Sewell |
+| [HDFS-16729](https://issues.apache.org/jira/browse/HDFS-16729) | RBF: fix some unreasonably annotated docs | Major | documentation, rbf | JiangHua Zhu | JiangHua Zhu |
+| [HADOOP-18398](https://issues.apache.org/jira/browse/HADOOP-18398) | Prevent AvroRecord\*.class from being included non-test jar | Major | common | YUBI LEE | YUBI LEE |
+| [HDFS-4043](https://issues.apache.org/jira/browse/HDFS-4043) | Namenode Kerberos Login does not use proper hostname for host qualified hdfs principal name. | Major | security | Ahad Rana | Steve Vaughan |
+| [MAPREDUCE-7403](https://issues.apache.org/jira/browse/MAPREDUCE-7403) | Support spark dynamic partitioning in the Manifest Committer | Major | mrv2 | Steve Loughran | Steve Loughran |
+| [HDFS-16732](https://issues.apache.org/jira/browse/HDFS-16732) | [SBN READ] Avoid get location from observer when the block report is delayed. | Critical | hdfs | zhengchenyu | zhengchenyu |
+| [HADOOP-18375](https://issues.apache.org/jira/browse/HADOOP-18375) | Fix failure of shelltest for hadoop\_add\_ldlibpath | Minor | test | Masatake Iwasaki | Masatake Iwasaki |
+| [HDFS-16755](https://issues.apache.org/jira/browse/HDFS-16755) | TestQJMWithFaults.testUnresolvableHostName() can fail due to unexpected host resolution | Minor | test | Steve Vaughan | Steve Vaughan |
+| [HADOOP-18400](https://issues.apache.org/jira/browse/HADOOP-18400) | Fix file split duplicating records from a succeeding split when reading BZip2 text files | Critical | . | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-18242](https://issues.apache.org/jira/browse/HADOOP-18242) | ABFS Rename Failure when tracking metadata is in incomplete state | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh |
+| [HADOOP-18456](https://issues.apache.org/jira/browse/HADOOP-18456) | NullPointerException in ObjectListingIterator's constructor | Blocker | fs/s3 | Quanlong Huang | Steve Loughran |
+| [HADOOP-18444](https://issues.apache.org/jira/browse/HADOOP-18444) | Add Support for localized trash for ViewFileSystem in Trash.moveToAppropriateTrash | Major | . | Xing Lin | Xing Lin |
+| [HADOOP-18443](https://issues.apache.org/jira/browse/HADOOP-18443) | Upgrade snakeyaml to 1.32 | Major | security | Ashutosh Gupta | Ashutosh Gupta |
+| [HDFS-16766](https://issues.apache.org/jira/browse/HDFS-16766) | hdfs ec command loads (administrator provided) erasure code policy files without disabling xml entity expansion | Major | security | Jing | Ashutosh Gupta |
+| [HDFS-13369](https://issues.apache.org/jira/browse/HDFS-13369) | FSCK Report broken with RequestHedgingProxyProvider | Major | hdfs | Harshakiran Reddy | Ranith Sardar |
+| [YARN-11039](https://issues.apache.org/jira/browse/YARN-11039) | LogAggregationFileControllerFactory::getFileControllerForRead can leak threads | Blocker | log-aggregation | Rajesh Balamohan | Steve Loughran |
+| [HADOOP-18499](https://issues.apache.org/jira/browse/HADOOP-18499) | S3A to support HTTPS web proxies | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh |
+| [HADOOP-18233](https://issues.apache.org/jira/browse/HADOOP-18233) | Possible race condition with TemporaryAWSCredentialsProvider | Major | auth, fs/s3 | Jason Sleight | Jimmy Wong |
+| [MAPREDUCE-7425](https://issues.apache.org/jira/browse/MAPREDUCE-7425) | Document Fix for yarn.app.mapreduce.client-am.ipc.max-retries | Major | yarn | teng wang | teng wang |
+| [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | Disable abfs prefetching by default | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh |
+| [HDFS-16836](https://issues.apache.org/jira/browse/HDFS-16836) | StandbyCheckpointer can still trigger rollback fs image after RU is finalized | Major | hdfs | Lei Yang | Lei Yang |
+| [HADOOP-18324](https://issues.apache.org/jira/browse/HADOOP-18324) | Interrupting RPC Client calls can lead to thread exhaustion | Critical | ipc | Owen O'Malley | Owen O'Malley |
+| [HDFS-16832](https://issues.apache.org/jira/browse/HDFS-16832) | [SBN READ] Fix NPE when check the block location of empty directory | Major | . | zhengchenyu | zhengchenyu |
+| [HADOOP-18498](https://issues.apache.org/jira/browse/HADOOP-18498) | [ABFS]: Error introduced when SAS Token containing '?' prefix is passed | Minor | fs/azure | Sree Bhattacharyya | Sree Bhattacharyya |
+| [HDFS-16847](https://issues.apache.org/jira/browse/HDFS-16847) | RBF: StateStore writer should not commit tmp fail if there was an error in writing the file. | Critical | hdfs, rbf | Simbarashe Dzinamarira | Simbarashe Dzinamarira |
+| [HADOOP-18401](https://issues.apache.org/jira/browse/HADOOP-18401) | No ARM binaries in branch-3.3.x releases | Minor | build | Ling Xu | |
+| [HADOOP-18408](https://issues.apache.org/jira/browse/HADOOP-18408) | [ABFS]: ITestAbfsManifestCommitProtocol fails on nonHNS configuration | Minor | fs/azure, test | Pranav Saxena | Sree Bhattacharyya |
+| [HADOOP-18402](https://issues.apache.org/jira/browse/HADOOP-18402) | S3A committer NPE in spark job abort | Blocker | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18569](https://issues.apache.org/jira/browse/HADOOP-18569) | NFS Gateway may release buffer too early | Blocker | nfs | Attila Doroszlai | Attila Doroszlai |
+| [HADOOP-18574](https://issues.apache.org/jira/browse/HADOOP-18574) | Changing log level of IOStatistics increment to make the DEBUG logs less noisy | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh |
+| [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521) | ABFS ReadBufferManager buffer sharing across concurrent HTTP requests | Critical | fs/azure | Steve Loughran | Steve Loughran |
+| [MAPREDUCE-7375](https://issues.apache.org/jira/browse/MAPREDUCE-7375) | JobSubmissionFiles don't set right permission after mkdirs | Major | mrv2 | Zhang Dongsheng | |
+| [HADOOP-17717](https://issues.apache.org/jira/browse/HADOOP-17717) | Update wildfly openssl to 1.1.3.Final | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang |
+| [HADOOP-18598](https://issues.apache.org/jira/browse/HADOOP-18598) | maven site generation doesn't include javadocs | Blocker | site | Steve Loughran | Steve Loughran |
+| [HDFS-16895](https://issues.apache.org/jira/browse/HDFS-16895) | NamenodeHeartbeatService should use credentials of logged in user | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
+| [HDFS-16853](https://issues.apache.org/jira/browse/HDFS-16853) | The UT TestLeaseRecovery2#testHardLeaseRecoveryAfterNameNodeRestart failed because HADOOP-18324 | Blocker | . | ZanderXu | ZanderXu |
+| [HADOOP-18641](https://issues.apache.org/jira/browse/HADOOP-18641) | cyclonedx maven plugin breaks builds on recent maven releases (3.9.0) | Major | build | Steve Loughran | Steve Loughran |
+| [HDFS-16923](https://issues.apache.org/jira/browse/HDFS-16923) | The getListing RPC will throw NPE if the path does not exist | Critical | . | ZanderXu | ZanderXu |
+| [HDFS-16896](https://issues.apache.org/jira/browse/HDFS-16896) | HDFS Client hedged read has increased failure rate than without hedged read | Major | hdfs-client | Tom McCormick | Tom McCormick |
+| [YARN-11383](https://issues.apache.org/jira/browse/YARN-11383) | Workflow priority mappings is case sensitive | Major | yarn | Aparajita Choudhary | Aparajita Choudhary |
+| [HDFS-16939](https://issues.apache.org/jira/browse/HDFS-16939) | Fix the thread safety bug in LowRedundancyBlocks | Major | namanode | Shuyan Zhang | Shuyan Zhang |
+| [HDFS-16934](https://issues.apache.org/jira/browse/HDFS-16934) | org.apache.hadoop.hdfs.tools.TestDFSAdmin#testAllDatanodesReconfig regression | Minor | dfsadmin, test | Steve Loughran | Shilun Fan |
+
+
+### TESTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-16573](https://issues.apache.org/jira/browse/HDFS-16573) | Fix test TestDFSStripedInputStreamWithRandomECPolicy | Minor | test | daimin | daimin |
+| [HDFS-16637](https://issues.apache.org/jira/browse/HDFS-16637) | TestHDFSCLI#testAll consistently failing | Major | . | Viraj Jasani | Viraj Jasani |
+| [YARN-11248](https://issues.apache.org/jira/browse/YARN-11248) | Add unit test for FINISHED\_CONTAINERS\_PULLED\_BY\_AM event on DECOMMISSIONING | Major | test | Ashutosh Gupta | Ashutosh Gupta |
+| [HDFS-16625](https://issues.apache.org/jira/browse/HDFS-16625) | Unit tests aren't checking for PMDK availability | Major | test | Steve Vaughan | Steve Vaughan |
+
+
+### SUB-TASKS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-13293](https://issues.apache.org/jira/browse/HDFS-13293) | RBF: The RouterRPCServer should transfer client IP via CallerContext to NamenodeRpcServer | Major | rbf | Baolong Mao | Hui Fei |
+| [HDFS-15630](https://issues.apache.org/jira/browse/HDFS-15630) | RBF: Fix wrong client IP info in CallerContext when requests mount points with multi-destinations. | Major | rbf | Chengwei Wang | Chengwei Wang |
+| [HADOOP-17152](https://issues.apache.org/jira/browse/HADOOP-17152) | Implement wrapper for guava newArrayList and newLinkedList | Major | common | Ahmed Hussein | Viraj Jasani |
+| [HADOOP-17851](https://issues.apache.org/jira/browse/HADOOP-17851) | S3A to support user-specified content encoding | Minor | fs/s3 | Holden Karau | Holden Karau |
+| [HADOOP-17492](https://issues.apache.org/jira/browse/HADOOP-17492) | abfs listLocatedStatus to support incremental/async page fetching | Major | fs/azure | Steve Loughran | Steve Loughran |
+| [HADOOP-17409](https://issues.apache.org/jira/browse/HADOOP-17409) | Remove S3Guard - no longer needed | Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18084](https://issues.apache.org/jira/browse/HADOOP-18084) | ABFS: Add testfilePath while verifying test contents are read correctly | Minor | fs/azure, test | Anmol Asrani | Anmol Asrani |
+| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree |
+| [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) | S3A auditing leaks memory through ThreadLocal references | Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18071](https://issues.apache.org/jira/browse/HADOOP-18071) | ABFS: Set driver global timeout for ITestAzureBlobFileSystemBasics | Major | fs/azure | Sumangala Patki | Sumangala Patki |
+| [HADOOP-17765](https://issues.apache.org/jira/browse/HADOOP-17765) | ABFS: Use Unique File Paths in Tests | Major | fs/azure | Sumangala Patki | Sumangala Patki |
+| [HADOOP-17862](https://issues.apache.org/jira/browse/HADOOP-17862) | ABFS: Fix unchecked cast compiler warning for AbfsListStatusRemoteIterator | Major | fs/azure | Sumangala Patki | Sumangala Patki |
+| [HADOOP-18075](https://issues.apache.org/jira/browse/HADOOP-18075) | ABFS: Fix failure caused by listFiles() in ITestAbfsRestOperationException | Major | fs/azure | Sumangala Patki | Sumangala Patki |
+| [HADOOP-18112](https://issues.apache.org/jira/browse/HADOOP-18112) | Implement paging during S3 multi object delete. | Critical | fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-16204](https://issues.apache.org/jira/browse/HADOOP-16204) | ABFS tests to include terasort | Minor | fs/azure, test | Steve Loughran | Steve Loughran |
+| [HDFS-13248](https://issues.apache.org/jira/browse/HDFS-13248) | RBF: Namenode need to choose block location for the client | Major | . | Wu Weiwei | Owen O'Malley |
+| [HADOOP-13704](https://issues.apache.org/jira/browse/HADOOP-13704) | S3A getContentSummary() to move to listFiles(recursive) to count children; instrument use | Minor | fs/s3 | Steve Loughran | Ahmar Suhail |
+| [HADOOP-14661](https://issues.apache.org/jira/browse/HADOOP-14661) | S3A to support Requester Pays Buckets | Minor | common, util | Mandus Momberg | Daniel Carl Jones |
+| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren |
+| [HADOOP-17682](https://issues.apache.org/jira/browse/HADOOP-17682) | ABFS: Support FileStatus input to OpenFileWithOptions() via OpenFileParameters | Major | fs/azure | Sumangala Patki | Sumangala Patki |
+| [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | Use jersey-json that is built to use jackson2 | Major | build | Akira Ajisaka | PJ Fanning |
+| [HADOOP-18104](https://issues.apache.org/jira/browse/HADOOP-18104) | Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads | Major | common, fs | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18168](https://issues.apache.org/jira/browse/HADOOP-18168) | ITestMarkerTool.testRunLimitedLandsatAudit failing due to most of bucket content purged | Minor | fs/s3, test | Steve Loughran | Daniel Carl Jones |
+| [HADOOP-12020](https://issues.apache.org/jira/browse/HADOOP-12020) | Support configuration of different S3 storage classes | Major | fs/s3 | Yann Landrin-Schweitzer | Monthon Klongklaew |
+| [HADOOP-18105](https://issues.apache.org/jira/browse/HADOOP-18105) | Implement a variant of ElasticByteBufferPool which uses weak references for garbage collection. | Major | common, fs | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18107](https://issues.apache.org/jira/browse/HADOOP-18107) | Vectored IO support for large S3 files. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18106](https://issues.apache.org/jira/browse/HADOOP-18106) | Handle memory fragmentation in S3 Vectored IO implementation. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-17461](https://issues.apache.org/jira/browse/HADOOP-17461) | Add thread-level IOStatistics Context | Major | fs, fs/azure, fs/s3 | Steve Loughran | Mehakmeet Singh |
+| [HADOOP-18372](https://issues.apache.org/jira/browse/HADOOP-18372) | ILoadTestS3ABulkDeleteThrottling failing | Minor | fs/s3, test | Steve Loughran | Ahmar Suhail |
+| [HADOOP-18368](https://issues.apache.org/jira/browse/HADOOP-18368) | ITestCustomSigner fails when access point name has '-' | Minor | . | Ahmar Suhail | Ahmar Suhail |
+| [HADOOP-15964](https://issues.apache.org/jira/browse/HADOOP-15964) | Add S3A support for Async Scatter/Gather IO | Major | fs/s3 | Steve Loughran | Mukund Thakur |
+| [HADOOP-18366](https://issues.apache.org/jira/browse/HADOOP-18366) | ITestS3Select.testSelectSeekFullLandsat is timing out | Minor | . | Ahmar Suhail | Ahmar Suhail |
+| [HADOOP-18373](https://issues.apache.org/jira/browse/HADOOP-18373) | IOStatisticsContext tuning | Minor | fs/s3, test | Steve Loughran | Viraj Jasani |
+| [HADOOP-18227](https://issues.apache.org/jira/browse/HADOOP-18227) | Add input stream IOstats for vectored IO api in S3A. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18392](https://issues.apache.org/jira/browse/HADOOP-18392) | Propagate vectored s3a input stream stats to file system stats. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18355](https://issues.apache.org/jira/browse/HADOOP-18355) | Update previous index properly while validating overlapping ranges. | Major | common, fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18371](https://issues.apache.org/jira/browse/HADOOP-18371) | s3a FS init logs at warn if fs.s3a.create.storage.class is unset | Blocker | fs/s3 | Steve Loughran | Viraj Jasani |
+| [HADOOP-18385](https://issues.apache.org/jira/browse/HADOOP-18385) | ITestS3ACannedACLs failure; not in a span | Major | fs/s3, test | Steve Loughran | Ashutosh Gupta |
+| [HADOOP-18403](https://issues.apache.org/jira/browse/HADOOP-18403) | Fix FileSystem leak in ITestS3AAWSCredentialsProvider | Minor | fs/s3 | Viraj Jasani | Viraj Jasani |
+| [HADOOP-17882](https://issues.apache.org/jira/browse/HADOOP-17882) | distcp to use openFile() with sequential IO; ranges of reads | Major | tools/distcp | Steve Loughran | Steve Loughran |
+| [HADOOP-18391](https://issues.apache.org/jira/browse/HADOOP-18391) | Improve VectoredReadUtils#readVectored() for direct buffers | Major | fs | Steve Loughran | Mukund Thakur |
+| [HADOOP-18407](https://issues.apache.org/jira/browse/HADOOP-18407) | Improve vectored IO api spec. | Minor | fs, fs/s3 | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18339](https://issues.apache.org/jira/browse/HADOOP-18339) | S3A storage class option only picked up when buffering writes to disk | Major | fs/s3 | Steve Loughran | Monthon Klongklaew |
+| [HADOOP-18410](https://issues.apache.org/jira/browse/HADOOP-18410) | S3AInputStream.unbuffer() async drain not releasing http connections | Blocker | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18439](https://issues.apache.org/jira/browse/HADOOP-18439) | Fix VectoredIO for LocalFileSystem when checksum is enabled. | Major | common | Mukund Thakur | Mukund Thakur |
+| [HADOOP-18416](https://issues.apache.org/jira/browse/HADOOP-18416) | ITestS3AIOStatisticsContext failure | Major | fs/s3, test | Steve Loughran | Mehakmeet Singh |
+| [HADOOP-18347](https://issues.apache.org/jira/browse/HADOOP-18347) | Restrict vectoredIO threadpool to reduce memory pressure | Major | common, fs, fs/adl, fs/s3 | Rajesh Balamohan | Mukund Thakur |
+| [HADOOP-18463](https://issues.apache.org/jira/browse/HADOOP-18463) | Add an integration test to process data asynchronously during vectored read. | Major | . | Mukund Thakur | Mukund Thakur |
+| [HADOOP-15460](https://issues.apache.org/jira/browse/HADOOP-15460) | S3A FS to add "fs.s3a.create.performance" to the builder file creation option set | Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | Upgrade AWS SDK to V2 - Prerequisites | Minor | . | Ahmar Suhail | Ahmar Suhail |
+| [HADOOP-18480](https://issues.apache.org/jira/browse/HADOOP-18480) | upgrade AWS SDK to 1.12.316 | Major | build, fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18460](https://issues.apache.org/jira/browse/HADOOP-18460) | ITestS3AContractVectoredRead.testStopVectoredIoOperationsUnbuffer failing | Minor | fs/s3, test | Steve Loughran | Mukund Thakur |
+| [HADOOP-18488](https://issues.apache.org/jira/browse/HADOOP-18488) | Cherrypick HADOOP-11245 to branch-3.3 | Major | . | Wei-Chiu Chuang | Ashutosh Gupta |
+| [HADOOP-18481](https://issues.apache.org/jira/browse/HADOOP-18481) | AWS v2 SDK upgrade log to not warn of use standard AWS Credential Providers | Major | fs/s3 | Steve Loughran | Ahmar Suhail |
+| [HADOOP-18476](https://issues.apache.org/jira/browse/HADOOP-18476) | Abfs and S3A FileContext bindings to close wrapped filesystems in finalizer | Blocker | fs/azure, fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18304](https://issues.apache.org/jira/browse/HADOOP-18304) | Improve S3A committers documentation clarity | Trivial | documentation | Daniel Carl Jones | Daniel Carl Jones |
+| [HADOOP-18465](https://issues.apache.org/jira/browse/HADOOP-18465) | S3A server-side encryption tests fail before checking encryption tests should skip | Minor | fs/s3, test | Daniel Carl Jones | Daniel Carl Jones |
+| [HADOOP-18530](https://issues.apache.org/jira/browse/HADOOP-18530) | ChecksumFileSystem::readVectored might return byte buffers not positioned at 0 | Blocker | fs | Harshit Gupta | Harshit Gupta |
+| [HADOOP-18457](https://issues.apache.org/jira/browse/HADOOP-18457) | ABFS: Support for account level throttling | Major | . | Anmol Asrani | Anmol Asrani |
+| [HADOOP-18560](https://issues.apache.org/jira/browse/HADOOP-18560) | AvroFSInput opens a stream twice and discards the second one without closing | Blocker | fs | Steve Loughran | Steve Loughran |
+| [HADOOP-18526](https://issues.apache.org/jira/browse/HADOOP-18526) | Leak of S3AInstrumentation instances via hadoop Metrics references | Blocker | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546) | disable purging list of in progress reads in abfs stream closed | Blocker | fs/azure | Steve Loughran | Pranav Saxena |
+| [HADOOP-18577](https://issues.apache.org/jira/browse/HADOOP-18577) | ABFS: add probes of readahead fix | Major | fs/azure | Steve Loughran | Steve Loughran |
+| [HADOOP-11867](https://issues.apache.org/jira/browse/HADOOP-11867) | Add a high-performance vectored read API. | Major | fs, fs/azure, fs/s3, hdfs-client | Gopal Vijayaraghavan | Mukund Thakur |
+| [HADOOP-18507](https://issues.apache.org/jira/browse/HADOOP-18507) | VectorIO FileRange type to support a "reference" field | Major | fs | Steve Loughran | Steve Loughran |
+| [HADOOP-18627](https://issues.apache.org/jira/browse/HADOOP-18627) | site intro docs to make clear Kerberos is mandatory for secure clusters | Major | site | Steve Loughran | Arnout Engelen |
+| [HADOOP-17584](https://issues.apache.org/jira/browse/HADOOP-17584) | s3a magic committer may commit more data | Major | fs/s3 | yinan zhan | Steve Loughran |
+| [HADOOP-18642](https://issues.apache.org/jira/browse/HADOOP-18642) | Cut excess dependencies from hadoop-azure, hadoop-aliyun transitive imports; fix LICENSE-binary | Blocker | build, fs/azure, fs/oss | Steve Loughran | Steve Loughran |
+
+
+### OTHER:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-15854](https://issues.apache.org/jira/browse/HDFS-15854) | Make some parameters configurable for SlowDiskTracker and SlowPeerTracker | Major | . | Tao Li | Tao Li |
+| [YARN-10747](https://issues.apache.org/jira/browse/YARN-10747) | Bump YARN CSI protobuf version to 3.7.1 | Major | . | Siyao Meng | Siyao Meng |
+| [HDFS-16139](https://issues.apache.org/jira/browse/HDFS-16139) | Update BPServiceActor Scheduler's nextBlockReportTime atomically | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18014](https://issues.apache.org/jira/browse/HADOOP-18014) | CallerContext should not include some characters | Major | . | Takanobu Asanuma | Takanobu Asanuma |
+| [MAPREDUCE-7371](https://issues.apache.org/jira/browse/MAPREDUCE-7371) | DistributedCache alternative APIs should not use DistributedCache APIs internally | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18114](https://issues.apache.org/jira/browse/HADOOP-18114) | Documentation Syntax Error Fix \> AWS Assumed Roles | Trivial | documentation, fs/s3 | Joey Krabacher | Joey Krabacher |
+| [HDFS-16481](https://issues.apache.org/jira/browse/HDFS-16481) | Provide support to set Http and Rpc ports in MiniJournalCluster | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16502](https://issues.apache.org/jira/browse/HDFS-16502) | Reconfigure Block Invalidate limit | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16522](https://issues.apache.org/jira/browse/HDFS-16522) | Set Http and Ipc ports for Datanodes in MiniDFSCluster | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C |
+| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update google-gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak |
+| [HADOOP-18397](https://issues.apache.org/jira/browse/HADOOP-18397) | Shutdown AWSSecurityTokenService when its resources are no longer in use | Major | fs/s3 | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18575](https://issues.apache.org/jira/browse/HADOOP-18575) | Make XML transformer factory more lenient | Major | common | PJ Fanning | PJ Fanning |
+| [HADOOP-18586](https://issues.apache.org/jira/browse/HADOOP-18586) | Update the year to 2023 | Major | . | Ayush Saxena | Ayush Saxena |
+| [HADOOP-18587](https://issues.apache.org/jira/browse/HADOOP-18587) | upgrade to jettison 1.5.3 to fix CVE-2022-40150 | Major | common | PJ Fanning | PJ Fanning |
+
+
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md
new file mode 100644
index 00000000000..b2357e827d2
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md
@@ -0,0 +1,89 @@
+
+
+# Apache Hadoop 3.3.5 Release Notes
+
+These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements.
+
+
+---
+
+* [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | *Major* | **Replace all default Charset usage with UTF-8**
+
+All of the default charset usages have been replaced to UTF-8. If the default charset of your environment is not UTF-8, the behavior can be different.
+
+
+---
+
+* [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | *Major* | **Use jersey-json that is built to use jackson2**
+
+Use modified jersey-json 1.20 in https://github.com/pjfanning/jersey-1.x/tree/v1.20 that uses Jackson 2.x. By this change, Jackson 1.x dependency has been removed from Hadoop.
+downstream applications which explicitly exclude jersey from transitive dependencies must now exclude com.github.pjfanning:jersey-json
+
+
+---
+
+* [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | *Major* | **Slow peer metrics - add median, mad and upper latency limits**
+
+Namenode metrics that represent Slownode Json now include three important factors (median, median absolute deviation, upper latency limit) that can help user determine how urgently a given slownode requires manual intervention.
+
+
+---
+
+* [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | *Minor* | **Improve Magic Committer Performance**
+
+S3A filesytem's createFile() operation supports an option to disable all safety checks when creating a file. Consult the documentation and use with care
+
+
+---
+
+* [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | *Minor* | **Upgrade AWS SDK to V2 - Prerequisites**
+
+In preparation for an (incompatible but necessary) move to the AWS SDK v2, some uses of internal/deprecated uses of AWS classes/interfaces are logged as warnings, though only once during the life of a JVM. Set the log "org.apache.hadoop.fs.s3a.SDKV2Upgrade" to only log at INFO to hide these.
+
+
+---
+
+* [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | *Major* | **Remove the hadoop-openstack module**
+
+The swift:// connector for openstack support has been removed. It had fundamental problems (swift's handling of files \> 4GB). A subset of the S3 protocol is now exported by almost all object store services -please use that through the s3a connector instead. The hadoop-openstack jar remains, only now it is empty of code. This is to ensure that projects which declare the JAR a dependency will still have successful builds.
+
+
+---
+
+* [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | *Major* | **Update Bouncy Castle to 1.68 or later**
+
+bouncy castle 1.68+ is a multirelease JAR containing java classes compiled for different target JREs. older versions of asm.jar and maven shade plugin may have problems with these. fix: upgrade the dependencies
+
+
+---
+
+* [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | *Major* | **Disable abfs prefetching by default**
+
+ABFS block prefetching has been disabled to avoid HADOOP-18521 and buffer sharing on multithreaded processes (Hive, Spark etc). This will have little/no performance impact on queries against Parquet or ORC data, but can slow down sequential stream processing, including CSV files -however, the read data will be correct.
+It may slow down distcp downloads, where the race condition does not arise. For maximum distcp performance re-enable the readahead by setting fs.abfs.enable.readahead to true.
+
+
+---
+
+* [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | *Critical* | **CryptoOutputStream::close leak when encrypted zones + quota exceptions**
+
+**WARNING: No release note provided for this change.**
+
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml
new file mode 100644
index 00000000000..399b62b3010
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml
@@ -0,0 +1,835 @@
+
+
+
+
+
+
+
+
+
+
+ A distributed implementation of {@link
+org.apache.hadoop.fs.FileSystem}. This is loosely modelled after
+Google's GFS.
+
+
The most important difference is that unlike GFS, Hadoop DFS files
+have strictly one writer at any one time. Bytes are always appended
+to the end of the writer's stream. There is no notion of "record appends"
+or "mutations" that are then checked or reordered. Writers simply emit
+a byte stream. That byte stream is guaranteed to be stored in the
+order written.
Applications specify the files, via urls (hdfs:// or http://) to be cached
+ via the {@link org.apache.hadoop.mapred.JobConf}. The
+ DistributedCache assumes that the files specified via urls are
+ already present on the {@link FileSystem} at the path specified by the url
+ and are accessible by every machine in the cluster.
+
+
The framework will copy the necessary files on to the worker node before
+ any tasks for the job are executed on that node. Its efficiency stems from
+ the fact that the files are only copied once per job and the ability to
+ cache archives which are un-archived on the workers.
+
+
DistributedCache can be used to distribute simple, read-only
+ data/text files and/or more complex types such as archives, jars etc.
+ Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes.
+ Jars may be optionally added to the classpath of the tasks, a rudimentary
+ software distribution mechanism. Files have execution permissions.
+ In older version of Hadoop Map/Reduce users could optionally ask for symlinks
+ to be created in the working directory of the child task. In the current
+ version symlinks are always created. If the URL does not have a fragment
+ the name of the file or directory will be used. If multiple files or
+ directories map to the same link name, the last one added, will be used. All
+ others will not even be downloaded.
+
+
DistributedCache tracks modification timestamps of the cache
+ files. Clearly the cache files should not be modified by the application
+ or externally while the job is executing.
+
+
Here is an illustrative example on how to use the
+ DistributedCache:
+
+ // Setting up the cache for the application
+
+ 1. Copy the requisite files to the FileSystem:
+
+ $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
+ $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
+ $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
+ $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
+ $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
+ $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
+
+ 2. Setup the application's JobConf:
+
+ JobConf job = new JobConf();
+ DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
+ job);
+ DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job);
+ DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
+ DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job);
+ DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job);
+ DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job);
+
+ 3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
+ or {@link org.apache.hadoop.mapred.Reducer}:
+
+ public static class MapClass extends MapReduceBase
+ implements Mapper<K, V, K, V> {
+
+ private Path[] localArchives;
+ private Path[] localFiles;
+
+ public void configure(JobConf job) {
+ // Get the cached archives/files
+ File f = new File("./map.zip/some/file/in/zip.txt");
+ }
+
+ public void map(K key, V value,
+ OutputCollector<K, V> output, Reporter reporter)
+ throws IOException {
+ // Use data from the cached archives/files here
+ // ...
+ // ...
+ output.collect(k, v);
+ }
+ }
+
+
+
+ It is also very common to use the DistributedCache by using
+ {@link org.apache.hadoop.util.GenericOptionsParser}.
+
+ This class includes methods that should be used by users
+ (specifically those mentioned in the example above, as well
+ as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}),
+ as well as methods intended for use by the MapReduce framework
+ (e.g., {@link org.apache.hadoop.mapred.JobClient}).
+
+ @see org.apache.hadoop.mapred.JobConf
+ @see org.apache.hadoop.mapred.JobClient
+ @see org.apache.hadoop.mapreduce.Job]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ JobTracker,
+ as {@link JobTracker.State}
+
+ {@link JobTracker.State} should no longer be used on M/R 2.x. The function
+ is kept to be compatible with M/R 1.x applications.
+
+ @return the invalid state of the JobTracker.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ClusterStatus provides clients with information such as:
+
+
+ Size of the cluster.
+
+
+ Name of the trackers.
+
+
+ Task capacity of the cluster.
+
+
+ The number of currently running map and reduce tasks.
+
+
+ State of the JobTracker.
+
+
+ Details regarding black listed trackers.
+
+
+
+
Clients can query for the latest ClusterStatus, via
+ {@link JobClient#getClusterStatus()}.
Grouphandles localization of the class name and the
+ counter names.
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ FileInputFormat always returns
+ true. Implementations that may deal with non-splittable files must
+ override this method.
+
+ FileInputFormat implementations can override this and return
+ false to ensure that individual input files are never split-up
+ so that {@link Mapper}s process entire files.
+
+ @param fs the file system that the file is on
+ @param filename the file name to check
+ @return is this file splitable?]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ FileInputFormat is the base class for all file-based
+ InputFormats. This provides a generic implementation of
+ {@link #getSplits(JobConf, int)}.
+
+ Implementations of FileInputFormat can also override the
+ {@link #isSplitable(FileSystem, Path)} method to prevent input files
+ from being split-up in certain situations. Implementations that may
+ deal with non-splittable files must override this method, since
+ the default implementation assumes splitting is always possible.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the job output should be compressed,
+ false otherwise]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tasks' Side-Effect Files
+
+
Note: The following is valid only if the {@link OutputCommitter}
+ is {@link FileOutputCommitter}. If OutputCommitter is not
+ a FileOutputCommitter, the task's temporary output
+ directory is same as {@link #getOutputPath(JobConf)} i.e.
+ ${mapreduce.output.fileoutputformat.outputdir}$
+
+
Some applications need to create/write-to side-files, which differ from
+ the actual job-outputs.
+
+
In such cases there could be issues with 2 instances of the same TIP
+ (running simultaneously e.g. speculative tasks) trying to open/write-to the
+ same file (path) on HDFS. Hence the application-writer will have to pick
+ unique names per task-attempt (e.g. using the attemptid, say
+ attempt_200709221812_0001_m_000000_0), not just per TIP.
+
+
To get around this the Map-Reduce framework helps the application-writer
+ out by maintaining a special
+ ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}
+ sub-directory for each task-attempt on HDFS where the output of the
+ task-attempt goes. On successful completion of the task-attempt the files
+ in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only)
+ are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the
+ framework discards the sub-directory of unsuccessful task-attempts. This
+ is completely transparent to the application.
+
+
The application-writer can take advantage of this by creating any
+ side-files required in ${mapreduce.task.output.dir} during execution
+ of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the
+ framework will move them out similarly - thus she doesn't have to pick
+ unique paths per task-attempt.
+
+
Note: the value of ${mapreduce.task.output.dir} during
+ execution of a particular task-attempt is actually
+ ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}, and this value is
+ set by the map-reduce framework. So, just create any side-files in the
+ path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce
+ task to take advantage of this feature.
+
+
The entire discussion holds true for maps of jobs with
+ reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
+ goes directly to HDFS.
+
+ @return the {@link Path} to the task's temporary output directory
+ for the map-reduce job.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The generated name can be used to create custom files from within the
+ different tasks for the job, the names for different tasks will not collide
+ with each other.
+
+
The given name is postfixed with the task type, 'm' for maps, 'r' for
+ reduces and the task partition number. For example, give a name 'test'
+ running on the first map o the job the generated name will be
+ 'test-m-00000'.
+
+ @param conf the configuration for the job.
+ @param name the name to make unique.
+ @return a unique name accross all tasks of the job.]]>
+
+
+
+
+
+
+ The path can be used to create custom files from within the map and
+ reduce tasks. The path name will be unique for each task. The path parent
+ will be the job output directory.ls
+
+
This method uses the {@link #getUniqueName} method to make the file name
+ unique for the task.
+ @see FixedLengthRecordReader]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each {@link InputSplit} is then assigned to an individual {@link Mapper}
+ for processing.
+
+
Note: The split is a logical split of the inputs and the
+ input files are not physically split into chunks. For e.g. a split could
+ be <input-file-path, start, offset> tuple.
+
+ @param job job configuration.
+ @param numSplits the desired number of splits, a hint.
+ @return an array of {@link InputSplit}s for the job.]]>
+
+
+
+
+
+
+
+
+ It is the responsibility of the RecordReader to respect
+ record boundaries while processing the logical split to present a
+ record-oriented view to the individual task.
+
+ @param split the {@link InputSplit}
+ @param job the job that this split belongs to
+ @return a {@link RecordReader}]]>
+
+
+
+ InputFormat describes the input-specification for a
+ Map-Reduce job.
+
+
The Map-Reduce framework relies on the InputFormat of the
+ job to:
+
+
+ Validate the input-specification of the job.
+
+ Split-up the input file(s) into logical {@link InputSplit}s, each of
+ which is then assigned to an individual {@link Mapper}.
+
+
+ Provide the {@link RecordReader} implementation to be used to glean
+ input records from the logical InputSplit for processing by
+ the {@link Mapper}.
+
+
+
+
The default behavior of file-based {@link InputFormat}s, typically
+ sub-classes of {@link FileInputFormat}, is to split the
+ input into logical {@link InputSplit}s based on the total size, in
+ bytes, of the input files. However, the {@link FileSystem} blocksize of
+ the input files is treated as an upper bound for input splits. A lower bound
+ on the split size can be set via
+
+ mapreduce.input.fileinputformat.split.minsize.
+
+
Clearly, logical splits based on input-size is insufficient for many
+ applications since record boundaries are to be respected. In such cases, the
+ application has to also implement a {@link RecordReader} on whom lies the
+ responsibilty to respect record-boundaries and present a record-oriented
+ view of the logical InputSplit to the individual task.
+
+ @see InputSplit
+ @see RecordReader
+ @see JobClient
+ @see FileInputFormat]]>
+
+
+
+
+
+
+
+
+
+ InputSplit.
+
+ @return the number of bytes in the input split.
+ @throws IOException]]>
+
+
+
+
+
+ InputSplit is
+ located as an array of Strings.
+ @throws IOException]]>
+
+
+
+ InputSplit represents the data to be processed by an
+ individual {@link Mapper}.
+
+
Typically, it presents a byte-oriented view on the input and is the
+ responsibility of {@link RecordReader} of the job to process this and present
+ a record-oriented view.
+
+ @see InputFormat
+ @see RecordReader]]>
+
+ Checking the input and output specifications of the job.
+
+
+ Computing the {@link InputSplit}s for the job.
+
+
+ Setup the requisite accounting information for the {@link DistributedCache}
+ of the job, if necessary.
+
+
+ Copying the job's jar and configuration to the map-reduce system directory
+ on the distributed file-system.
+
+
+ Submitting the job to the cluster and optionally monitoring
+ it's status.
+
+
+
+ Normally the user creates the application, describes various facets of the
+ job via {@link JobConf} and then uses the JobClient to submit
+ the job and monitor its progress.
+
+
Here is an example on how to use JobClient:
+
+ // Create a new JobConf
+ JobConf job = new JobConf(new Configuration(), MyJob.class);
+
+ // Specify various job-specific parameters
+ job.setJobName("myjob");
+
+ job.setInputPath(new Path("in"));
+ job.setOutputPath(new Path("out"));
+
+ job.setMapperClass(MyJob.MyMapper.class);
+ job.setReducerClass(MyJob.MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ JobClient.runJob(job);
+
+
+ Job Control
+
+
At times clients would chain map-reduce jobs to accomplish complex tasks
+ which cannot be done via a single map-reduce job. This is fairly easy since
+ the output of the job, typically, goes to distributed file-system and that
+ can be used as the input for the next job.
+
+
However, this also means that the onus on ensuring jobs are complete
+ (success/failure) lies squarely on the clients. In such situations the
+ various job-control options are:
+
+
+ {@link #runJob(JobConf)} : submits the job and returns only after
+ the job has completed.
+
+
+ {@link #submitJob(JobConf)} : only submits the job, then poll the
+ returned handle to the {@link RunningJob} to query status and make
+ scheduling decisions.
+
+
+ {@link JobConf#setJobEndNotificationURI(String)} : setup a notification
+ on job-completion, thus avoiding polling.
+
+
+
+ @see JobConf
+ @see ClusterStatus
+ @see Tool
+ @see DistributedCache]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If the parameter {@code loadDefaults} is false, the new instance
+ will not load resources from the default files.
+
+ @param loadDefaults specifies whether to load from the default files]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if framework should keep the intermediate files
+ for failed tasks, false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the outputs of the maps are to be compressed,
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This comparator should be provided if the equivalence rules for keys
+ for sorting the intermediates are different from those for grouping keys
+ before each call to
+ {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.
+
+
For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
+ in a single call to the reduce function if K1 and K2 compare as equal.
+
+
Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
+ how keys are sorted, this can be used in conjunction to simulate
+ secondary sort on values.
+
+
Note: This is not a guarantee of the combiner sort being
+ stable in any sense. (In any case, with the order of available
+ map-outputs to the combiner being non-deterministic, it wouldn't make
+ that much sense.)
+
+ @param theClass the comparator class to be used for grouping keys for the
+ combiner. It should implement RawComparator.
+ @see #setOutputKeyComparatorClass(Class)]]>
+
+
+
+
+
+ This comparator should be provided if the equivalence rules for keys
+ for sorting the intermediates are different from those for grouping keys
+ before each call to
+ {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.
+
+
For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
+ in a single call to the reduce function if K1 and K2 compare as equal.
+
+
Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
+ how keys are sorted, this can be used in conjunction to simulate
+ secondary sort on values.
+
+
Note: This is not a guarantee of the reduce sort being
+ stable in any sense. (In any case, with the order of available
+ map-outputs to the reduce being non-deterministic, it wouldn't make
+ that much sense.)
+
+ @param theClass the comparator class to be used for grouping keys.
+ It should implement RawComparator.
+ @see #setOutputKeyComparatorClass(Class)
+ @see #setCombinerKeyGroupingComparator(Class)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ combiner class used to combine map-outputs
+ before being sent to the reducers. Typically the combiner is same as the
+ the {@link Reducer} for the job i.e. {@link #getReducerClass()}.
+
+ @return the user-defined combiner class used to combine map-outputs.]]>
+
+
+
+
+
+ combiner class used to combine map-outputs
+ before being sent to the reducers.
+
+
The combiner is an application-specified aggregation operation, which
+ can help cut down the amount of data transferred between the
+ {@link Mapper} and the {@link Reducer}, leading to better performance.
+
+
The framework may invoke the combiner 0, 1, or multiple times, in both
+ the mapper and reducer tasks. In general, the combiner is called as the
+ sort/merge result is written to disk. The combiner must:
+
+
be side-effect free
+
have the same input and output key types and the same input and
+ output value types
+
+
+
Typically the combiner is same as the Reducer for the
+ job i.e. {@link #setReducerClass(Class)}.
+
+ @param theClass the user-defined combiner class used to combine
+ map-outputs.]]>
+
+
+
+
+ true.
+
+ @return true if speculative execution be used for this job,
+ false otherwise.]]>
+
+
+
+
+
+ true if speculative execution
+ should be turned on, else false.]]>
+
+
+
+
+ true.
+
+ @return true if speculative execution be
+ used for this job for map tasks,
+ false otherwise.]]>
+
+
+
+
+
+ true if speculative execution
+ should be turned on for map tasks,
+ else false.]]>
+
+
+
+
+ true.
+
+ @return true if speculative execution be used
+ for reduce tasks for this job,
+ false otherwise.]]>
+
+
+
+
+
+ true if speculative execution
+ should be turned on for reduce tasks,
+ else false.]]>
+
+
+
+
+ 1.
+
+ @return the number of map tasks for this job.]]>
+
+
+
+
+
+ Note: This is only a hint to the framework. The actual
+ number of spawned map tasks depends on the number of {@link InputSplit}s
+ generated by the job's {@link InputFormat#getSplits(JobConf, int)}.
+
+ A custom {@link InputFormat} is typically used to accurately control
+ the number of map tasks for the job.
+
+ How many maps?
+
+
The number of maps is usually driven by the total size of the inputs
+ i.e. total number of blocks of the input files.
+
+
The right level of parallelism for maps seems to be around 10-100 maps
+ per-node, although it has been set up to 300 or so for very cpu-light map
+ tasks. Task setup takes awhile, so it is best if the maps take at least a
+ minute to execute.
+
+
The default behavior of file-based {@link InputFormat}s is to split the
+ input into logical {@link InputSplit}s based on the total size, in
+ bytes, of input files. However, the {@link FileSystem} blocksize of the
+ input files is treated as an upper bound for input splits. A lower bound
+ on the split size can be set via
+
+ mapreduce.input.fileinputformat.split.minsize.
+
+
Thus, if you expect 10TB of input data and have a blocksize of 128MB,
+ you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is
+ used to set it even higher.
+
+ @param n the number of map tasks for this job.
+ @see InputFormat#getSplits(JobConf, int)
+ @see FileInputFormat
+ @see FileSystem#getDefaultBlockSize()
+ @see FileStatus#getBlockSize()]]>
+
+
+
+
+ 1.
+
+ @return the number of reduce tasks for this job.]]>
+
+
+
+
+
+ How many reduces?
+
+
The right number of reduces seems to be 0.95 or
+ 1.75 multiplied by (
+ available memory for reduce tasks
+ (The value of this should be smaller than
+ numNodes * yarn.nodemanager.resource.memory-mb
+ since the resource of memory is shared by map tasks and other
+ applications) /
+
+ mapreduce.reduce.memory.mb).
+
+
+
With 0.95 all of the reduces can launch immediately and
+ start transfering map outputs as the maps finish. With 1.75
+ the faster nodes will finish their first round of reduces and launch a
+ second wave of reduces doing a much better job of load balancing.
+
+
Increasing the number of reduces increases the framework overhead, but
+ increases load balancing and lowers the cost of failures.
+
+
The scaling factors above are slightly less than whole numbers to
+ reserve a few reduce slots in the framework for speculative-tasks, failures
+ etc.
+
+ Reducer NONE
+
+
It is legal to set the number of reduce-tasks to zero.
+
+
In this case the output of the map-tasks directly go to distributed
+ file-system, to the path set by
+ {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the
+ framework doesn't sort the map-outputs before writing it out to HDFS.
+
+ @param n the number of reduce tasks for this job.]]>
+
+
+
+
+ mapreduce.map.maxattempts
+ property. If this property is not already set, the default is 4 attempts.
+
+ @return the max number of attempts per map task.]]>
+
+
+
+
+
+
+
+
+
+
+ mapreduce.reduce.maxattempts
+ property. If this property is not already set, the default is 4 attempts.
+
+ @return the max number of attempts per reduce task.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ noFailures, the
+ tasktracker is blacklisted for this job.
+
+ @param noFailures maximum no. of failures of a given job per tasktracker.]]>
+
+
+
+
+ blacklisted for this job.
+
+ @return the maximum no. of failures of a given job per tasktracker.]]>
+
+
+
+
+ failed.
+
+ Defaults to zero, i.e. any failed map-task results in
+ the job being declared as {@link JobStatus#FAILED}.
+
+ @return the maximum percentage of map tasks that can fail without
+ the job being aborted.]]>
+
+
+
+
+
+ failed.
+
+ @param percent the maximum percentage of map tasks that can fail without
+ the job being aborted.]]>
+
+
+
+
+ failed.
+
+ Defaults to zero, i.e. any failed reduce-task results
+ in the job being declared as {@link JobStatus#FAILED}.
+
+ @return the maximum percentage of reduce tasks that can fail without
+ the job being aborted.]]>
+
+
+
+
+
+ failed.
+
+ @param percent the maximum percentage of reduce tasks that can fail without
+ the job being aborted.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The debug script can aid debugging of failed map tasks. The script is
+ given task's stdout, stderr, syslog, jobconf files as arguments.
+
+
The debug command, run on the node where the map failed, is:
+
+ $script $stdout $stderr $syslog $jobconf.
+
+
+
The script file is distributed through {@link DistributedCache}
+ APIs. The script needs to be symlinked.
+
+ @param mDbgScript the script name]]>
+
+
+
+
+
+
+
+
+
+
+ The debug script can aid debugging of failed reduce tasks. The script
+ is given task's stdout, stderr, syslog, jobconf files as arguments.
+
+
The debug command, run on the node where the map failed, is:
+
+ $script $stdout $stderr $syslog $jobconf.
+
+
+
The script file is distributed through {@link DistributedCache}
+ APIs. The script file needs to be symlinked
+
+ @param rDbgScript the script name]]>
+
+
+
+
+
+
+
+
+
+ null if it hasn't
+ been set.
+ @see #setJobEndNotificationURI(String)]]>
+
+
+
+
+
+ The uri can contain 2 special parameters: $jobId and
+ $jobStatus. Those, if present, are replaced by the job's
+ identifier and completion-status respectively.
+
+
This is typically used by application-writers to implement chaining of
+ Map-Reduce jobs in an asynchronous manner.
+
+ @param uri the job end notification uri
+ @see JobStatus]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When a job starts, a shared directory is created at location
+
+ ${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ .
+ This directory is exposed to the users through
+ mapreduce.job.local.dir .
+ So, the tasks can use this space
+ as scratch space and share files among them.
+ This value is available as System property also.
+
+ @return The localized job specific shared directory]]>
+
+
+
+
+
+ For backward compatibility, if the job configuration sets the
+ key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
+ from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
+ after converting it from bytes to MB.
+ @return memory required to run a map task of the job, in MB,]]>
+
+
+
+
+
+
+
+
+ For backward compatibility, if the job configuration sets the
+ key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
+ from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
+ after converting it from bytes to MB.
+ @return memory required to run a reduce task of the job, in MB.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is deprecated. Now, different memory limits can be
+ set for map and reduce tasks of a job, in MB.
+
+ For backward compatibility, if the job configuration sets the
+ key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned.
+ Otherwise, this method will return the larger of the values returned by
+ {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()}
+ after converting them into bytes.
+
+ @return Memory required to run a task of this job, in bytes.
+ @see #setMaxVirtualMemoryForTask(long)
+ @deprecated Use {@link #getMemoryForMapTask()} and
+ {@link #getMemoryForReduceTask()}]]>
+
+
+
+
+
+
+ mapred.task.maxvmem is split into
+ mapreduce.map.memory.mb
+ and mapreduce.map.memory.mb,mapred
+ each of the new key are set
+ as mapred.task.maxvmem / 1024
+ as new values are in MB
+
+ @param vmem Maximum amount of virtual memory in bytes any task of this job
+ can use.
+ @see #getMaxVirtualMemoryForTask()
+ @deprecated
+ Use {@link #setMemoryForMapTask(long mem)} and
+ Use {@link #setMemoryForReduceTask(long mem)}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ k1=v1,k2=v2. Further it can
+ reference existing environment variables via $key on
+ Linux or %key% on Windows.
+
+ Example:
+
+
A=foo - This will set the env variable A to foo.
+
+
+ @deprecated Use {@link #MAPRED_MAP_TASK_ENV} or
+ {@link #MAPRED_REDUCE_TASK_ENV}]]>
+
+
+
+
+ k1=v1,k2=v2. Further it can
+ reference existing environment variables via $key on
+ Linux or %key% on Windows.
+
+ Example:
+
+
A=foo - This will set the env variable A to foo.
+
+
+ You can also add environment variables individually by appending
+ .VARNAME to this configuration key, where VARNAME is
+ the name of the environment variable.
+
+ Example:
+
+
mapreduce.map.env.VARNAME=value
+
]]>
+
+
+
+
+ k1=v1,k2=v2. Further it can
+ reference existing environment variables via $key on
+ Linux or %key% on Windows.
+
+ Example:
+
+
A=foo - This will set the env variable A to foo.
+
+
+ You can also add environment variables individually by appending
+ .VARNAME to this configuration key, where VARNAME is
+ the name of the environment variable.
+
+ Example:
+
+
mapreduce.reduce.env.VARNAME=value
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ JobConf is the primary interface for a user to describe a
+ map-reduce job to the Hadoop framework for execution. The framework tries to
+ faithfully execute the job as-is described by JobConf, however:
+
+
+ Some configuration parameters might have been marked as
+
+ final by administrators and hence cannot be altered.
+
+
+ While some job parameters are straight-forward to set
+ (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly
+ with the rest of the framework and/or job-configuration and is relatively
+ more complex for the user to control finely
+ (e.g. {@link #setNumMapTasks(int)}).
+
+
+
+
JobConf typically specifies the {@link Mapper}, combiner
+ (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
+ {@link OutputFormat} implementations to be used etc.
+
+
Optionally JobConf is used to specify other advanced facets
+ of the job such as Comparators to be used, files to be put in
+ the {@link DistributedCache}, whether or not intermediate and/or job outputs
+ are to be compressed (and how), debugability via user-provided scripts
+ ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}),
+ for doing post-processing on task logs, task's stdout, stderr, syslog.
+ and etc.
+
+
Here is an example on how to configure a job via JobConf:
+
+ // Create a new JobConf
+ JobConf job = new JobConf(new Configuration(), MyJob.class);
+
+ // Specify various job-specific parameters
+ job.setJobName("myjob");
+
+ FileInputFormat.setInputPaths(job, new Path("in"));
+ FileOutputFormat.setOutputPath(job, new Path("out"));
+
+ job.setMapperClass(MyJob.MyMapper.class);
+ job.setCombinerClass(MyJob.MyReducer.class);
+ job.setReducerClass(MyJob.MyReducer.class);
+
+ job.setInputFormat(SequenceFileInputFormat.class);
+ job.setOutputFormat(SequenceFileOutputFormat.class);
+
+
+ @see JobClient
+ @see ClusterStatus
+ @see Tool
+ @see DistributedCache]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ any job
+ run on the jobtracker started at 200707121733, we would use :
+
+ @param jtIdentifier jobTracker identifier, or null
+ @param jobId job number, or null
+ @return a regex pattern matching JobIDs]]>
+
+
+
+
+ An example JobID is :
+ job_200707121733_0003 , which represents the third job
+ running at the jobtracker started at 200707121733.
+
+ Applications should never construct or parse JobID strings, but rather
+ use appropriate constructors or {@link #forName(String)} method.
+
+ @see TaskID
+ @see TaskAttemptID]]>
+
Applications can use the {@link Reporter} provided to report progress
+ or just indicate that they are alive. In scenarios where the application
+ takes significant amount of time to process individual key/value
+ pairs, this is crucial since the framework might assume that the task has
+ timed-out and kill that task. The other way of avoiding this is to set
+
+ mapreduce.task.timeout to a high-enough value (or even zero for no
+ time-outs).
+
+ @param key the input key.
+ @param value the input value.
+ @param output collects mapped keys and values.
+ @param reporter facility to report progress.]]>
+
+
+
+ Maps are the individual tasks which transform input records into a
+ intermediate records. The transformed intermediate records need not be of
+ the same type as the input records. A given input pair may map to zero or
+ many output pairs.
+
+
The Hadoop Map-Reduce framework spawns one map task for each
+ {@link InputSplit} generated by the {@link InputFormat} for the job.
+ Mapper implementations can access the {@link JobConf} for the
+ job via the {@link JobConfigurable#configure(JobConf)} and initialize
+ themselves. Similarly they can use the {@link Closeable#close()} method for
+ de-initialization.
+
+
The framework then calls
+ {@link #map(Object, Object, OutputCollector, Reporter)}
+ for each key/value pair in the InputSplit for that task.
+
+
All intermediate values associated with a given output key are
+ subsequently grouped by the framework, and passed to a {@link Reducer} to
+ determine the final output. Users can control the grouping by specifying
+ a Comparator via
+ {@link JobConf#setOutputKeyComparatorClass(Class)}.
+
+
The grouped Mapper outputs are partitioned per
+ Reducer. Users can control which keys (and hence records) go to
+ which Reducer by implementing a custom {@link Partitioner}.
+
+
Users can optionally specify a combiner, via
+ {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the
+ intermediate outputs, which helps to cut down the amount of data transferred
+ from the Mapper to the Reducer.
+
+
The intermediate, grouped outputs are always stored in
+ {@link SequenceFile}s. Applications can specify if and how the intermediate
+ outputs are to be compressed and which {@link CompressionCodec}s are to be
+ used via the JobConf.
+
+
If the job has
+ zero
+ reduces then the output of the Mapper is directly written
+ to the {@link FileSystem} without grouping by keys.
+
+
Example:
+
+ public class MyMapper<K extends WritableComparable, V extends Writable>
+ extends MapReduceBase implements Mapper<K, V, K, V> {
+
+ static enum MyCounters { NUM_RECORDS }
+
+ private String mapTaskId;
+ private String inputFile;
+ private int noRecords = 0;
+
+ public void configure(JobConf job) {
+ mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
+ inputFile = job.get(JobContext.MAP_INPUT_FILE);
+ }
+
+ public void map(K key, V val,
+ OutputCollector<K, V> output, Reporter reporter)
+ throws IOException {
+ // Process the <key, value> pair (assume this takes a while)
+ // ...
+ // ...
+
+ // Let the framework know that we are alive, and kicking!
+ // reporter.progress();
+
+ // Process some more
+ // ...
+ // ...
+
+ // Increment the no. of <key, value> pairs processed
+ ++noRecords;
+
+ // Increment counters
+ reporter.incrCounter(NUM_RECORDS, 1);
+
+ // Every 100 records update application-level status
+ if ((noRecords%100) == 0) {
+ reporter.setStatus(mapTaskId + " processed " + noRecords +
+ " from input-file: " + inputFile);
+ }
+
+ // Output the result
+ output.collect(key, val);
+ }
+ }
+
+
+
Applications may write a custom {@link MapRunnable} to exert greater
+ control on map processing e.g. multi-threaded Mappers etc.
Mapping of input records to output records is complete when this method
+ returns.
+
+ @param input the {@link RecordReader} to read the input records.
+ @param output the {@link OutputCollector} to collect the outputrecords.
+ @param reporter {@link Reporter} to report progress, status-updates etc.
+ @throws IOException]]>
+
+
+
+ Custom implementations of MapRunnable can exert greater
+ control on map processing e.g. multi-threaded, asynchronous mappers etc.
+
+ @see Mapper]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ nearly
+ equal content length.
+ Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)}
+ to construct RecordReader's for MultiFileSplit's.
+ @see MultiFileSplit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ MultiFileSplit can be used to implement {@link RecordReader}'s, with
+ reading one record per file.
+ @see FileSplit
+ @see MultiFileInputFormat]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <key, value> pairs output by {@link Mapper}s
+ and {@link Reducer}s.
+
+
OutputCollector is the generalization of the facility
+ provided by the Map-Reduce framework to collect data output by either the
+ Mapper or the Reducer i.e. intermediate outputs
+ or the output of the job.
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if task output recovery is supported,
+ false otherwise
+ @throws IOException
+ @see #recoverTask(TaskAttemptContext)]]>
+
+
+
+
+
+
+ true repeatable job commit is supported,
+ false otherwise
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ OutputCommitter. This is called from the application master
+ process, but it is called individually for each task.
+
+ If an exception is thrown the task will be attempted again.
+
+ @param taskContext Context of the task whose output is being recovered
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ OutputCommitter describes the commit of task output for a
+ Map-Reduce job.
+
+
The Map-Reduce framework relies on the OutputCommitter of
+ the job to:
+
+
+ Setup the job during initialization. For example, create the temporary
+ output directory for the job during the initialization of the job.
+
+
+ Cleanup the job after the job completion. For example, remove the
+ temporary output directory after the job completion.
+
+
+ Setup the task temporary output.
+
+
+ Check whether a task needs a commit. This is to avoid the commit
+ procedure if a task does not need commit.
+
+
+ Commit of the task output.
+
+
+ Discard the task commit.
+
+
+ The methods in this class can be called from several different processes and
+ from several different contexts. It is important to know which process and
+ which context each is called from. Each method should be marked accordingly
+ in its documentation. It is also important to note that not all methods are
+ guaranteed to be called once and only once. If a method is not guaranteed to
+ have this property the output committer needs to handle this appropriately.
+ Also note it will only be in rare situations where they may be called
+ multiple times for the same task.
+
+ @see FileOutputCommitter
+ @see JobContext
+ @see TaskAttemptContext]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is to validate the output specification for the job when it is
+ a job is submitted. Typically checks that it does not already exist,
+ throwing an exception when it already exists, so that output is not
+ overwritten.
+
+ Implementations which write to filesystems which support delegation
+ tokens usually collect the tokens for the destination path(s)
+ and attach them to the job configuration.
+ @param ignored
+ @param job job configuration.
+ @throws IOException when output should not be attempted]]>
+
+
+
+ OutputFormat describes the output-specification for a
+ Map-Reduce job.
+
+
The Map-Reduce framework relies on the OutputFormat of the
+ job to:
+
+
+ Validate the output-specification of the job. For e.g. check that the
+ output directory doesn't already exist.
+
+ Provide the {@link RecordWriter} implementation to be used to write out
+ the output files of the job. Output files are stored in a
+ {@link FileSystem}.
+
+
+
+ @see RecordWriter
+ @see JobConf]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Typically a hash function on a all or a subset of the key.
+
+ @param key the key to be paritioned.
+ @param value the entry value.
+ @param numPartitions the total number of partitions.
+ @return the partition number for the key.]]>
+
+
+
+ Partitioner controls the partitioning of the keys of the
+ intermediate map-outputs. The key (or a subset of the key) is used to derive
+ the partition, typically by a hash function. The total number of partitions
+ is the same as the number of reduce tasks for the job. Hence this controls
+ which of the m reduce tasks the intermediate key (and hence the
+ record) is sent for reduction.
+
+
Note: A Partitioner is created only when there are multiple
+ reducers.
RecordReader, typically, converts the byte-oriented view of
+ the input, provided by the InputSplit, and presents a
+ record-oriented view for the {@link Mapper} and {@link Reducer} tasks for
+ processing. It thus assumes the responsibility of processing record
+ boundaries and presenting the tasks with keys and values.
RecordWriter implementations write the job outputs to the
+ {@link FileSystem}.
+
+ @see OutputFormat]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Reduces values for a given key.
+
+
The framework calls this method for each
+ <key, (list of values)> pair in the grouped inputs.
+ Output values must be of the same type as input values. Input keys must
+ not be altered. The framework will reuse the key and value objects
+ that are passed into the reduce, therefore the application should clone
+ the objects they want to keep a copy of. In many cases, all values are
+ combined into zero or one value.
+
+
+
Output pairs are collected with calls to
+ {@link OutputCollector#collect(Object,Object)}.
+
+
Applications can use the {@link Reporter} provided to report progress
+ or just indicate that they are alive. In scenarios where the application
+ takes a significant amount of time to process individual key/value
+ pairs, this is crucial since the framework might assume that the task has
+ timed-out and kill that task. The other way of avoiding this is to set
+
+ mapreduce.task.timeout to a high-enough value (or even zero for no
+ time-outs).
+
+ @param key the key.
+ @param values the list of values to reduce.
+ @param output to collect keys and combined values.
+ @param reporter facility to report progress.]]>
+
+
+
+ The number of Reducers for the job is set by the user via
+ {@link JobConf#setNumReduceTasks(int)}. Reducer implementations
+ can access the {@link JobConf} for the job via the
+ {@link JobConfigurable#configure(JobConf)} method and initialize themselves.
+ Similarly they can use the {@link Closeable#close()} method for
+ de-initialization.
+
+
Reducer has 3 primary phases:
+
+
+
+ Shuffle
+
+
Reducer is input the grouped output of a {@link Mapper}.
+ In the phase the framework, for each Reducer, fetches the
+ relevant partition of the output of all the Mappers, via HTTP.
+
+
+
+
+ Sort
+
+
The framework groups Reducer inputs by keys
+ (since different Mappers may have output the same key) in this
+ stage.
+
+
The shuffle and sort phases occur simultaneously i.e. while outputs are
+ being fetched they are merged.
+
+ SecondarySort
+
+
If equivalence rules for keys while grouping the intermediates are
+ different from those for grouping keys before reduction, then one may
+ specify a Comparator via
+ {@link JobConf#setOutputValueGroupingComparator(Class)}.Since
+ {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to
+ control how intermediate keys are grouped, these can be used in conjunction
+ to simulate secondary sort on values.
+
+
+ For example, say that you want to find duplicate web pages and tag them
+ all with the url of the "best" known example. You would set up the job
+ like:
+
+
Map Input Key: url
+
Map Input Value: document
+
Map Output Key: document checksum, url pagerank
+
Map Output Value: url
+
Partitioner: by checksum
+
OutputKeyComparator: by checksum and then decreasing pagerank
+
OutputValueGroupingComparator: by checksum
+
+
+
+
+ Reduce
+
+
In this phase the
+ {@link #reduce(Object, Iterator, OutputCollector, Reporter)}
+ method is called for each <key, (list of values)> pair in
+ the grouped inputs.
+
The output of the reduce task is typically written to the
+ {@link FileSystem} via
+ {@link OutputCollector#collect(Object, Object)}.
+
+
+
+
The output of the Reducer is not re-sorted.
+
+
Example:
+
+ public class MyReducer<K extends WritableComparable, V extends Writable>
+ extends MapReduceBase implements Reducer<K, V, K, V> {
+
+ static enum MyCounters { NUM_RECORDS }
+
+ private String reduceTaskId;
+ private int noKeys = 0;
+
+ public void configure(JobConf job) {
+ reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
+ }
+
+ public void reduce(K key, Iterator<V> values,
+ OutputCollector<K, V> output,
+ Reporter reporter)
+ throws IOException {
+
+ // Process
+ int noValues = 0;
+ while (values.hasNext()) {
+ V value = values.next();
+
+ // Increment the no. of values for this key
+ ++noValues;
+
+ // Process the <key, value> pair (assume this takes a while)
+ // ...
+ // ...
+
+ // Let the framework know that we are alive, and kicking!
+ if ((noValues%10) == 0) {
+ reporter.progress();
+ }
+
+ // Process some more
+ // ...
+ // ...
+
+ // Output the <key, value>
+ output.collect(key, value);
+ }
+
+ // Increment the no. of <key, list of values> pairs processed
+ ++noKeys;
+
+ // Increment counters
+ reporter.incrCounter(NUM_RECORDS, 1);
+
+ // Every 100 keys update application-level status
+ if ((noKeys%100) == 0) {
+ reporter.setStatus(reduceTaskId + " processed " + noKeys);
+ }
+ }
+ }
+
+
+ @see Mapper
+ @see Partitioner
+ @see Reporter
+ @see MapReduceBase]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Counter of the given group/name.]]>
+
+
+
+
+
+
+ Counter of the given group/name.]]>
+
+
+
+
+
+
+ Enum.
+ @param amount A non-negative amount by which the counter is to
+ be incremented.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ InputSplit that the map is reading from.
+ @throws UnsupportedOperationException if called outside a mapper]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {@link Mapper} and {@link Reducer} can use the Reporter
+ provided to report progress or just indicate that they are alive. In
+ scenarios where the application takes significant amount of time to
+ process individual key/value pairs, this is crucial since the framework
+ might assume that the task has timed-out and kill that task.
+
+
Applications can also update {@link Counters} via the provided
+ Reporter .
+
+ @see Progressable
+ @see Counters]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ progress of the job's map-tasks, as a float between 0.0
+ and 1.0. When all map tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's map-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ progress of the job's reduce-tasks, as a float between 0.0
+ and 1.0. When all reduce tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's reduce-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ progress of the job's cleanup-tasks, as a float between 0.0
+ and 1.0. When all cleanup tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's cleanup-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ progress of the job's setup-tasks, as a float between 0.0
+ and 1.0. When all setup tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's setup-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ true if the job is complete, else false.
+ @throws IOException]]>
+
+
+
+
+
+ true if the job succeeded, else false.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the job retired, else false.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+ RunningJob is the user-interface to query for details on a
+ running Map-Reduce job.
+
+
Clients can get hold of RunningJob via the {@link JobClient}
+ and then query the running-job for details such as name, configuration,
+ progress etc.
+
+ @see JobClient]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This allows the user to specify the key class to be different
+ from the actual class ({@link BytesWritable}) used for writing
+
+ @param conf the {@link JobConf} to modify
+ @param theClass the SequenceFile output key class.]]>
+
+
+
+
+
+
+ This allows the user to specify the value class to be different
+ from the actual class ({@link BytesWritable}) used for writing
+
+ @param conf the {@link JobConf} to modify
+ @param theClass the SequenceFile output key class.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if auto increment
+ {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if auto increment
+ {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Hadoop provides an optional mode of execution in which the bad records
+ are detected and skipped in further attempts.
+
+
This feature can be used when map/reduce tasks crashes deterministically on
+ certain input. This happens due to bugs in the map/reduce function. The usual
+ course would be to fix these bugs. But sometimes this is not possible;
+ perhaps the bug is in third party libraries for which the source code is
+ not available. Due to this, the task never reaches to completion even with
+ multiple attempts and complete data for that task is lost.
+
+
With this feature, only a small portion of data is lost surrounding
+ the bad record, which may be acceptable for some user applications.
+ see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}
+
+
The skipping mode gets kicked off after certain no of failures
+ see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}
+
+
In the skipping mode, the map/reduce task maintains the record range which
+ is getting processed at all times. Before giving the input to the
+ map/reduce function, it sends this record range to the Task tracker.
+ If task crashes, the Task tracker knows which one was the last reported
+ range. On further attempts that range get skipped.
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ all task attempt IDs
+ of any jobtracker, in any job, of the first
+ map task, we would use :
+
+ @param jtIdentifier jobTracker identifier, or null
+ @param jobId job number, or null
+ @param isMap whether the tip is a map, or null
+ @param taskId taskId number, or null
+ @param attemptId the task attempt number, or null
+ @return a regex pattern matching TaskAttemptIDs]]>
+
+
+
+
+
+
+
+
+
+ all task attempt IDs
+ of any jobtracker, in any job, of the first
+ map task, we would use :
+
+ @param jtIdentifier jobTracker identifier, or null
+ @param jobId job number, or null
+ @param type the {@link TaskType}
+ @param taskId taskId number, or null
+ @param attemptId the task attempt number, or null
+ @return a regex pattern matching TaskAttemptIDs]]>
+
+
+
+
+ An example TaskAttemptID is :
+ attempt_200707121733_0003_m_000005_0 , which represents the
+ zeroth task attempt for the fifth map task in the third job
+ running at the jobtracker started at 200707121733.
+
+ Applications should never construct or parse TaskAttemptID strings
+ , but rather use appropriate constructors or {@link #forName(String)}
+ method.
+
+ @see JobID
+ @see TaskID]]>
+
+ @param jtIdentifier jobTracker identifier, or null
+ @param jobId job number, or null
+ @param isMap whether the tip is a map, or null
+ @param taskId taskId number, or null
+ @return a regex pattern matching TaskIDs
+ @deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType,
+ Integer)}]]>
+
+
+
+
+
+
+
+
+ the first map task
+ of any jobtracker, of any job, we would use :
+
+ @param jtIdentifier jobTracker identifier, or null
+ @param jobId job number, or null
+ @param type the {@link TaskType}, or null
+ @param taskId taskId number, or null
+ @return a regex pattern matching TaskIDs]]>
+
+
+
+
+
+
+
+
+ An example TaskID is :
+ task_200707121733_0003_m_000005 , which represents the
+ fifth map task in the third job running at the jobtracker
+ started at 200707121733.
+
+ Applications should never construct or parse TaskID strings
+ , but rather use appropriate constructors or {@link #forName(String)}
+ method.
+
+ @see JobID
+ @see TaskAttemptID]]>
+
+
+
+
+
+
+
+ (tbl(,),tbl(,),...,tbl(,)) }]]>
+
+
+
+
+
+
+
+ (tbl(,),tbl(,),...,tbl(,)) }]]>
+
+
+
+ mapred.join.define.<ident> to a classname. In the expression
+ mapred.join.expr, the identifier will be assumed to be a
+ ComposableRecordReader.
+ mapred.join.keycomparator can be a classname used to compare keys
+ in the join.
+ @see #setFormat
+ @see JoinRecordReader
+ @see MultiFilterRecordReader]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ......
+ }]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ capacity children to position
+ id in the parent reader.
+ The id of a root CompositeRecordReader is -1 by convention, but relying
+ on this is not recommended.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ override(S1,S2,S3) will prefer values
+ from S3 over S2, and values from S2 over S1 for all keys
+ emitted from all sources.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It has to be specified how key and values are passed from one element of
+ the chain to the next, by value or by reference. If a Mapper leverages the
+ assumed semantics that the key and values are not modified by the collector
+ 'by value' must be used. If the Mapper does not expect this semantics, as
+ an optimization to avoid serialization and deserialization 'by reference'
+ can be used.
+
+ For the added Mapper the configuration given for it,
+ mapperConf, have precedence over the job's JobConf. This
+ precedence is in effect when the task is running.
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainMapper, this is done by the addMapper for the last mapper in the chain
+
+
+ @param job job's JobConf to add the Mapper class.
+ @param klass the Mapper class to add.
+ @param inputKeyClass mapper input key class.
+ @param inputValueClass mapper input value class.
+ @param outputKeyClass mapper output key class.
+ @param outputValueClass mapper output value class.
+ @param byValue indicates if key/values should be passed by value
+ to the next Mapper in the chain, if any.
+ @param mapperConf a JobConf with the configuration for the Mapper
+ class. It is recommended to use a JobConf without default values using the
+ JobConf(boolean loadDefaults) constructor with FALSE.]]>
+
+
+
+
+
+
+ If this method is overriden super.configure(...) should be
+ invoked at the beginning of the overwriter method.]]>
+
+
+
+
+
+
+
+
+
+ map(...) methods of the Mappers in the chain.]]>
+
+
+
+
+
+
+ If this method is overriden super.close() should be
+ invoked at the end of the overwriter method.]]>
+
+
+
+
+ The Mapper classes are invoked in a chained (or piped) fashion, the output of
+ the first becomes the input of the second, and so on until the last Mapper,
+ the output of the last Mapper will be written to the task's output.
+
+ The key functionality of this feature is that the Mappers in the chain do not
+ need to be aware that they are executed in a chain. This enables having
+ reusable specialized Mappers that can be combined to perform composite
+ operations within a single task.
+
+ Special care has to be taken when creating chains that the key/values output
+ by a Mapper are valid for the following Mapper in the chain. It is assumed
+ all Mappers and the Reduce in the chain use maching output and input key and
+ value classes as no conversion is done by the chaining code.
+
+ Using the ChainMapper and the ChainReducer classes is possible to compose
+ Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And
+ immediate benefit of this pattern is a dramatic reduction in disk IO.
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainMapper, this is done by the addMapper for the last mapper in the chain.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It has to be specified how key and values are passed from one element of
+ the chain to the next, by value or by reference. If a Reducer leverages the
+ assumed semantics that the key and values are not modified by the collector
+ 'by value' must be used. If the Reducer does not expect this semantics, as
+ an optimization to avoid serialization and deserialization 'by reference'
+ can be used.
+
+ For the added Reducer the configuration given for it,
+ reducerConf, have precedence over the job's JobConf. This
+ precedence is in effect when the task is running.
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainReducer, this is done by the setReducer or the addMapper for the last
+ element in the chain.
+
+ @param job job's JobConf to add the Reducer class.
+ @param klass the Reducer class to add.
+ @param inputKeyClass reducer input key class.
+ @param inputValueClass reducer input value class.
+ @param outputKeyClass reducer output key class.
+ @param outputValueClass reducer output value class.
+ @param byValue indicates if key/values should be passed by value
+ to the next Mapper in the chain, if any.
+ @param reducerConf a JobConf with the configuration for the Reducer
+ class. It is recommended to use a JobConf without default values using the
+ JobConf(boolean loadDefaults) constructor with FALSE.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It has to be specified how key and values are passed from one element of
+ the chain to the next, by value or by reference. If a Mapper leverages the
+ assumed semantics that the key and values are not modified by the collector
+ 'by value' must be used. If the Mapper does not expect this semantics, as
+ an optimization to avoid serialization and deserialization 'by reference'
+ can be used.
+
+ For the added Mapper the configuration given for it,
+ mapperConf, have precedence over the job's JobConf. This
+ precedence is in effect when the task is running.
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainMapper, this is done by the addMapper for the last mapper in the chain
+ .
+
+ @param job chain job's JobConf to add the Mapper class.
+ @param klass the Mapper class to add.
+ @param inputKeyClass mapper input key class.
+ @param inputValueClass mapper input value class.
+ @param outputKeyClass mapper output key class.
+ @param outputValueClass mapper output value class.
+ @param byValue indicates if key/values should be passed by value
+ to the next Mapper in the chain, if any.
+ @param mapperConf a JobConf with the configuration for the Mapper
+ class. It is recommended to use a JobConf without default values using the
+ JobConf(boolean loadDefaults) constructor with FALSE.]]>
+
+
+
+
+
+
+ If this method is overriden super.configure(...) should be
+ invoked at the beginning of the overwriter method.]]>
+
+
+
+
+
+
+
+
+
+ reduce(...) method of the Reducer with the
+ map(...) methods of the Mappers in the chain.]]>
+
+
+
+
+
+
+ If this method is overriden super.close() should be
+ invoked at the end of the overwriter method.]]>
+
+
+
+
+ For each record output by the Reducer, the Mapper classes are invoked in a
+ chained (or piped) fashion, the output of the first becomes the input of the
+ second, and so on until the last Mapper, the output of the last Mapper will
+ be written to the task's output.
+
+ The key functionality of this feature is that the Mappers in the chain do not
+ need to be aware that they are executed after the Reducer or in a chain.
+ This enables having reusable specialized Mappers that can be combined to
+ perform composite operations within a single task.
+
+ Special care has to be taken when creating chains that the key/values output
+ by a Mapper are valid for the following Mapper in the chain. It is assumed
+ all Mappers and the Reduce in the chain use maching output and input key and
+ value classes as no conversion is done by the chaining code.
+
+ Using the ChainMapper and the ChainReducer classes is possible to compose
+ Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And
+ immediate benefit of this pattern is a dramatic reduction in disk IO.
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainReducer, this is done by the setReducer or the addMapper for the last
+ element in the chain.
+
+ MultipleOutputs supports counters, by default the are disabled.
+ The counters group is the {@link MultipleOutputs} class name.
+
+ The names of the counters are the same as the named outputs. For multi
+ named outputs the name of the counter is the concatenation of the named
+ output, and underscore '_' and the multiname.
+
+ @param conf job conf to enableadd the named output.
+ @param enabled indicates if the counters will be enabled or not.]]>
+
+
+
+
+
+
+ By default these counters are disabled.
+
+ MultipleOutputs supports counters, by default the are disabled.
+ The counters group is the {@link MultipleOutputs} class name.
+
+ The names of the counters are the same as the named outputs. For multi
+ named outputs the name of the counter is the concatenation of the named
+ output, and underscore '_' and the multiname.
+
+
+ @param conf job conf to enableadd the named output.
+ @return TRUE if the counters are enabled, FALSE if they are disabled.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If overriden subclasses must invoke super.close() at the
+ end of their close()
+
+ @throws java.io.IOException thrown if any of the MultipleOutput files
+ could not be closed properly.]]>
+
+
+
+ OutputCollector passed to
+ the map() and reduce() methods of the
+ Mapper and Reducer implementations.
+
+ Each additional output, or named output, may be configured with its own
+ OutputFormat, with its own key class and with its own value
+ class.
+
+ A named output can be a single file or a multi file. The later is referred as
+ a multi named output.
+
+ A multi named output is an unbound set of files all sharing the same
+ OutputFormat, key class and value class configuration.
+
+ When named outputs are used within a Mapper implementation,
+ key/values written to a name output are not part of the reduce phase, only
+ key/values written to the job OutputCollector are part of the
+ reduce phase.
+
+ MultipleOutputs supports counters, by default the are disabled. The counters
+ group is the {@link MultipleOutputs} class name.
+
+ The names of the counters are the same as the named outputs. For multi
+ named outputs the name of the counter is the concatenation of the named
+ output, and underscore '_' and the multiname.
+
+ Job configuration usage pattern is:
+
+
+ JobConf conf = new JobConf();
+
+ conf.setInputPath(inDir);
+ FileOutputFormat.setOutputPath(conf, outDir);
+
+ conf.setMapperClass(MOMap.class);
+ conf.setReducerClass(MOReduce.class);
+ ...
+
+ // Defines additional single text based output 'text' for the job
+ MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
+ LongWritable.class, Text.class);
+
+ // Defines additional multi sequencefile based output 'sequence' for the
+ // job
+ MultipleOutputs.addMultiNamedOutput(conf, "seq",
+ SequenceFileOutputFormat.class,
+ LongWritable.class, Text.class);
+ ...
+
+ JobClient jc = new JobClient();
+ RunningJob job = jc.submitJob(conf);
+
+ ...
+
+
+ Job configuration usage pattern is:
+
+
+ public class MOReduce implements
+ Reducer<WritableComparable, Writable> {
+ private MultipleOutputs mos;
+
+ public void configure(JobConf conf) {
+ ...
+ mos = new MultipleOutputs(conf);
+ }
+
+ public void reduce(WritableComparable key, Iterator<Writable> values,
+ OutputCollector output, Reporter reporter)
+ throws IOException {
+ ...
+ mos.getCollector("text", reporter).collect(key, new Text("Hello"));
+ mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
+ mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
+ ...
+ }
+
+ public void close() throws IOException {
+ mos.close();
+ ...
+ }
+
+ }
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It can be used instead of the default implementation,
+ of {@link org.apache.hadoop.mapred.MapRunner}, when the Map
+ operation is not CPU bound in order to improve throughput.
+
+ Map implementations using this MapRunnable must be thread-safe.
+
+ The Map-Reduce job has to be configured to use this MapRunnable class (using
+ the JobConf.setMapRunnerClass method) and
+ the number of threads the thread-pool can use with the
+ mapred.map.multithreadedrunner.threads property, its default
+ value is 10 threads.
+
+ Number of blacklisted and decommissioned trackers.
+
+
+ Slot capacity of the cluster.
+
+
+ The number of currently occupied/reserved map and reduce slots.
+
+
+ The number of currently running map and reduce tasks.
+
+
+ The number of job submissions.
+
+
+
+
Clients can query for the latest ClusterMetrics, via
+ {@link Cluster#getClusterStatus()}.
+
+ @see Cluster]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Counters represent global counters, defined either by the
+ Map-Reduce framework or applications. Each Counter is named by
+ an {@link Enum} and has a long for the value.
+
+
Counters are bunched into Groups, each comprising of
+ counters from a particular Enum class.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the type of counter
+ @param the type of counter group
+ @param counters the old counters object]]>
+
+
+
+ Counters holds per job/task counters, defined either by the
+ Map-Reduce framework or applications. Each Counter can be of
+ any {@link Enum} type.
+
+
Counters are bunched into {@link CounterGroup}s, each
+ comprising of counters from a particular Enum class.]]>
+
Note: The split is a logical split of the inputs and the
+ input files are not physically split into chunks. For e.g. a split could
+ be <input-file-path, start, offset> tuple. The InputFormat
+ also creates the {@link RecordReader} to read the {@link InputSplit}.
+
+ @param context job configuration.
+ @return an array of {@link InputSplit}s for the job.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ InputFormat describes the input-specification for a
+ Map-Reduce job.
+
+
The Map-Reduce framework relies on the InputFormat of the
+ job to:
+
+
+ Validate the input-specification of the job.
+
+ Split-up the input file(s) into logical {@link InputSplit}s, each of
+ which is then assigned to an individual {@link Mapper}.
+
+
+ Provide the {@link RecordReader} implementation to be used to glean
+ input records from the logical InputSplit for processing by
+ the {@link Mapper}.
+
+
+
+
The default behavior of file-based {@link InputFormat}s, typically
+ sub-classes of {@link FileInputFormat}, is to split the
+ input into logical {@link InputSplit}s based on the total size, in
+ bytes, of the input files. However, the {@link FileSystem} blocksize of
+ the input files is treated as an upper bound for input splits. A lower bound
+ on the split size can be set via
+
+ mapreduce.input.fileinputformat.split.minsize.
+
+
Clearly, logical splits based on input-size is insufficient for many
+ applications since record boundaries are to respected. In such cases, the
+ application has to also implement a {@link RecordReader} on whom lies the
+ responsibility to respect record-boundaries and present a record-oriented
+ view of the logical InputSplit to the individual task.
+
+ @see InputSplit
+ @see RecordReader
+ @see FileInputFormat]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SplitLocationInfos describing how the split
+ data is stored at each location. A null value indicates that all the
+ locations have the data stored on disk.
+ @throws IOException]]>
+
+
+
+ InputSplit represents the data to be processed by an
+ individual {@link Mapper}.
+
+
Typically, it presents a byte-oriented view on the input and is the
+ responsibility of {@link RecordReader} of the job to process this and present
+ a record-oriented view.
+
+ @see InputFormat
+ @see RecordReader]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Job makes a copy of the Configuration so
+ that any necessary internal modifications do not reflect on the incoming
+ parameter.
+
+ A Cluster will be created from the conf parameter only when it's needed.
+
+ @param conf the configuration
+ @return the {@link Job} , with no connection to a cluster yet.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Job makes a copy of the Configuration so
+ that any necessary internal modifications do not reflect on the incoming
+ parameter.
+
+ @param conf the configuration
+ @return the {@link Job} , with no connection to a cluster yet.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Job makes a copy of the Configuration so
+ that any necessary internal modifications do not reflect on the incoming
+ parameter.
+
+ @param status job status
+ @param conf job configuration
+ @return the {@link Job} , with no connection to a cluster yet.
+ @throws IOException]]>
+
+
+
+
+
+
+ Job makes a copy of the Configuration so
+ that any necessary internal modifications do not reflect on the incoming
+ parameter.
+
+ @param ignored
+ @return the {@link Job} , with no connection to a cluster yet.
+ @throws IOException
+ @deprecated Use {@link #getInstance()}]]>
+
+
+
+
+
+
+
+ Job makes a copy of the Configuration so
+ that any necessary internal modifications do not reflect on the incoming
+ parameter.
+
+ @param ignored
+ @param conf job configuration
+ @return the {@link Job} , with no connection to a cluster yet.
+ @throws IOException
+ @deprecated Use {@link #getInstance(Configuration)}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ progress of the job's map-tasks, as a float between 0.0
+ and 1.0. When all map tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's map-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ progress of the job's reduce-tasks, as a float between 0.0
+ and 1.0. When all reduce tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's reduce-tasks.
+ @throws IOException]]>
+
+
+
+
+
+
+ progress of the job's cleanup-tasks, as a float between 0.0
+ and 1.0. When all cleanup tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's cleanup-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ progress of the job's setup-tasks, as a float between 0.0
+ and 1.0. When all setup tasks have completed, the function returns 1.0.
+
+ @return the progress of the job's setup-tasks.
+ @throws IOException]]>
+
+
+
+
+
+ true if the job is complete, else false.
+ @throws IOException]]>
+
+
+
+
+
+ true if the job succeeded, else false.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ InputFormat to use
+ @throws IllegalStateException if the job is submitted]]>
+
+
+
+
+
+
+ OutputFormat to use
+ @throws IllegalStateException if the job is submitted]]>
+
+
+
+
+
+
+ Mapper to use
+ @throws IllegalStateException if the job is submitted]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Reducer to use
+ @throws IllegalStateException if the job is submitted]]>
+
+
+
+
+
+
+ Partitioner to use
+ @throws IllegalStateException if the job is submitted]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if speculative execution
+ should be turned on, else false.]]>
+
+
+
+
+
+ true if speculative execution
+ should be turned on for map tasks,
+ else false.]]>
+
+
+
+
+
+ true if speculative execution
+ should be turned on for reduce tasks,
+ else false.]]>
+
+
+
+
+
+ true, job-setup and job-cleanup will be
+ considered from {@link OutputCommitter}
+ else ignored.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The access permissions of the file will determine whether the localized
+ file will be shared across jobs. If the file is not readable by other or
+ if any of its parent directories is not executable by other, then the
+ file will not be shared. In the case of a path that ends in "/*",
+ sharing of the localized files will be determined solely from the
+ access permissions of the parent directories. The access permissions of
+ the individual files will be ignored.
+
+ @param uri The uri of the cache to be localized.
+ @param conf Configuration to add the cache to.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ JobTracker is lost]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Job.
+ @throws IOException if fail to close.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It allows the user to configure the
+ job, submit it, control its execution, and query the state. The set methods
+ only work until the job is submitted, afterwards they will throw an
+ IllegalStateException.
+
+
+ Normally the user creates the application, describes various facets of the
+ job via {@link Job} and then submits the job and monitor its progress.
+
+
Here is an example on how to submit a job:
+
+ // Create a new Job
+ Job job = Job.getInstance();
+ job.setJarByClass(MyJob.class);
+
+ // Specify various job-specific parameters
+ job.setJobName("myjob");
+
+ job.setInputPath(new Path("in"));
+ job.setOutputPath(new Path("out"));
+
+ job.setMapperClass(MyJob.MyMapper.class);
+ job.setReducerClass(MyJob.MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ job.waitForCompletion(true);
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.
+ @return the number of reduce tasks for this job.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mapred.map.max.attempts
+ property. If this property is not already set, the default is 4 attempts.
+
+ @return the max number of attempts per map task.]]>
+
+
+
+
+ mapred.reduce.max.attempts
+ property. If this property is not already set, the default is 4 attempts.
+
+ @return the max number of attempts per reduce task.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An example JobID is :
+ job_200707121733_0003 , which represents the third job
+ running at the jobtracker started at 200707121733.
+
+ Applications should never construct or parse JobID strings, but rather
+ use appropriate constructors or {@link #forName(String)} method.
+
+ @see TaskID
+ @see TaskAttemptID]]>
+
The Hadoop Map-Reduce framework spawns one map task for each
+ {@link InputSplit} generated by the {@link InputFormat} for the job.
+ Mapper implementations can access the {@link Configuration} for
+ the job via the {@link JobContext#getConfiguration()}.
+
+
The framework first calls
+ {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
+ {@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)}
+ for each key/value pair in the InputSplit. Finally
+ {@link #cleanup(org.apache.hadoop.mapreduce.Mapper.Context)} is called.
+
+
All intermediate values associated with a given output key are
+ subsequently grouped by the framework, and passed to a {@link Reducer} to
+ determine the final output. Users can control the sorting and grouping by
+ specifying two key {@link RawComparator} classes.
+
+
The Mapper outputs are partitioned per
+ Reducer. Users can control which keys (and hence records) go to
+ which Reducer by implementing a custom {@link Partitioner}.
+
+
Users can optionally specify a combiner, via
+ {@link Job#setCombinerClass(Class)}, to perform local aggregation of the
+ intermediate outputs, which helps to cut down the amount of data transferred
+ from the Mapper to the Reducer.
+
+
Applications can specify if and how the intermediate
+ outputs are to be compressed and which {@link CompressionCodec}s are to be
+ used via the Configuration.
+
+
If the job has zero
+ reduces then the output of the Mapper is directly written
+ to the {@link OutputFormat} without sorting by keys.
+
+
Example:
+
+ public class TokenCounterMapper
+ extends Mapper<Object, Text, Text, IntWritable>{
+
+ private final static IntWritable one = new IntWritable(1);
+ private Text word = new Text();
+
+ public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
+ StringTokenizer itr = new StringTokenizer(value.toString());
+ while (itr.hasMoreTokens()) {
+ word.set(itr.nextToken());
+ context.write(word, one);
+ }
+ }
+ }
+
+
+
Applications may override the
+ {@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert
+ greater control on map processing e.g. multi-threaded Mappers
+ etc.
+
+ @see InputFormat
+ @see JobContext
+ @see Partitioner
+ @see Reducer]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ MarkableIterator is a wrapper iterator class that
+ implements the {@link MarkableIteratorInterface}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if task output recovery is supported,
+ false otherwise
+ @see #recoverTask(TaskAttemptContext)
+ @deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]>
+
+
+
+
+
+
+ true repeatable job commit is supported,
+ false otherwise
+ @throws IOException]]>
+
+
+
+
+
+
+ true if task output recovery is supported,
+ false otherwise
+ @throws IOException
+ @see #recoverTask(TaskAttemptContext)]]>
+
+
+
+
+
+
+ OutputCommitter. This is called from the application master
+ process, but it is called individually for each task.
+
+ If an exception is thrown the task will be attempted again.
+
+ This may be called multiple times for the same task. But from different
+ application attempts.
+
+ @param taskContext Context of the task whose output is being recovered
+ @throws IOException]]>
+
+
+
+ OutputCommitter describes the commit of task output for a
+ Map-Reduce job.
+
+
The Map-Reduce framework relies on the OutputCommitter of
+ the job to:
+
+
+ Setup the job during initialization. For example, create the temporary
+ output directory for the job during the initialization of the job.
+
+
+ Cleanup the job after the job completion. For example, remove the
+ temporary output directory after the job completion.
+
+
+ Setup the task temporary output.
+
+
+ Check whether a task needs a commit. This is to avoid the commit
+ procedure if a task does not need commit.
+
+
+ Commit of the task output.
+
+
+ Discard the task commit.
+
+
+ The methods in this class can be called from several different processes and
+ from several different contexts. It is important to know which process and
+ which context each is called from. Each method should be marked accordingly
+ in its documentation. It is also important to note that not all methods are
+ guaranteed to be called once and only once. If a method is not guaranteed to
+ have this property the output committer needs to handle this appropriately.
+ Also note it will only be in rare situations where they may be called
+ multiple times for the same task.
+
+ @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+ @see JobContext
+ @see TaskAttemptContext]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is to validate the output specification for the job when it is
+ a job is submitted. Typically checks that it does not already exist,
+ throwing an exception when it already exists, so that output is not
+ overwritten.
+
+ Implementations which write to filesystems which support delegation
+ tokens usually collect the tokens for the destination path(s)
+ and attach them to the job context's JobConf.
+ @param context information about the job
+ @throws IOException when output should not be attempted]]>
+
+
+
+
+
+
+
+
+
+
+
+ OutputFormat describes the output-specification for a
+ Map-Reduce job.
+
+
The Map-Reduce framework relies on the OutputFormat of the
+ job to:
+
+
+ Validate the output-specification of the job. For e.g. check that the
+ output directory doesn't already exist.
+
+ Provide the {@link RecordWriter} implementation to be used to write out
+ the output files of the job. Output files are stored in a
+ {@link FileSystem}.
+
+
+
+ @see RecordWriter]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Typically a hash function on a all or a subset of the key.
+
+ @param key the key to be partioned.
+ @param value the entry value.
+ @param numPartitions the total number of partitions.
+ @return the partition number for the key.]]>
+
+
+
+ Partitioner controls the partitioning of the keys of the
+ intermediate map-outputs. The key (or a subset of the key) is used to derive
+ the partition, typically by a hash function. The total number of partitions
+ is the same as the number of reduce tasks for the job. Hence this controls
+ which of the m reduce tasks the intermediate key (and hence the
+ record) is sent for reduction.
+
+
Note: A Partitioner is created only when there are multiple
+ reducers.
+
+
Note: If you require your Partitioner class to obtain the Job's
+ configuration object, implement the {@link Configurable} interface.
RecordWriter implementations write the job outputs to the
+ {@link FileSystem}.
+
+ @see OutputFormat]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the input keys
+ @param the class of the input values
+ @param the class of the output keys
+ @param the class of the output values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Reducer implementations
+ can access the {@link Configuration} for the job via the
+ {@link JobContext#getConfiguration()} method.
+
+
Reducer has 3 primary phases:
+
+
+
+ Shuffle
+
+
The Reducer copies the sorted output from each
+ {@link Mapper} using HTTP across the network.
+
+
+
+ Sort
+
+
The framework merge sorts Reducer inputs by
+ keys
+ (since different Mappers may have output the same key).
+
+
The shuffle and sort phases occur simultaneously i.e. while outputs are
+ being fetched they are merged.
+
+ SecondarySort
+
+
To achieve a secondary sort on the values returned by the value
+ iterator, the application should extend the key with the secondary
+ key and define a grouping comparator. The keys will be sorted using the
+ entire key, but will be grouped using the grouping comparator to decide
+ which keys and values are sent in the same call to reduce.The grouping
+ comparator is specified via
+ {@link Job#setGroupingComparatorClass(Class)}. The sort order is
+ controlled by
+ {@link Job#setSortComparatorClass(Class)}.
+
+
+ For example, say that you want to find duplicate web pages and tag them
+ all with the url of the "best" known example. You would set up the job
+ like:
+
+
Map Input Key: url
+
Map Input Value: document
+
Map Output Key: document checksum, url pagerank
+
Map Output Value: url
+
Partitioner: by checksum
+
OutputKeyComparator: by checksum and then decreasing pagerank
+
OutputValueGroupingComparator: by checksum
+
+
+
+
+ Reduce
+
+
In this phase the
+ {@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)}
+ method is called for each <key, (collection of values)> in
+ the sorted inputs.
+
The output of the reduce task is typically written to a
+ {@link RecordWriter} via
+ {@link Context#write(Object, Object)}.
+
+
+
+
The output of the Reducer is not re-sorted.
+
+
Example:
+
+ public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
+ Key,IntWritable> {
+ private IntWritable result = new IntWritable();
+
+ public void reduce(Key key, Iterable<IntWritable> values,
+ Context context) throws IOException, InterruptedException {
+ int sum = 0;
+ for (IntWritable val : values) {
+ sum += val.get();
+ }
+ result.set(sum);
+ context.write(key, result);
+ }
+ }
+
+
+ @see Mapper
+ @see Partitioner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counterName.
+ @param counterName counter name
+ @return the Counter for the given counterName]]>
+
+
+
+
+
+
+ groupName and
+ counterName.
+ @param counterName counter name
+ @return the Counter for the given groupName and
+ counterName]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An example TaskAttemptID is :
+ attempt_200707121733_0003_m_000005_0 , which represents the
+ zeroth task attempt for the fifth map task in the third job
+ running at the jobtracker started at 200707121733.
+
+ Applications should never construct or parse TaskAttemptID strings
+ , but rather use appropriate constructors or {@link #forName(String)}
+ method.
+
+ @see JobID
+ @see TaskID]]>
+
+ Applications should never construct or parse TaskID strings
+ , but rather use appropriate constructors or {@link #forName(String)}
+ method.
+
+ @see JobID
+ @see TaskAttemptID]]>
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainMapper, this is done by the addMapper for the last mapper in the chain
+
+
+ @param job
+ The job.
+ @param klass
+ the Mapper class to add.
+ @param inputKeyClass
+ mapper input key class.
+ @param inputValueClass
+ mapper input value class.
+ @param outputKeyClass
+ mapper output key class.
+ @param outputValueClass
+ mapper output value class.
+ @param mapperConf
+ a configuration for the Mapper class. It is recommended to use a
+ Configuration without default values using the
+ Configuration(boolean loadDefaults) constructor with
+ FALSE.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ The Mapper classes are invoked in a chained (or piped) fashion, the output of
+ the first becomes the input of the second, and so on until the last Mapper,
+ the output of the last Mapper will be written to the task's output.
+
+
+ The key functionality of this feature is that the Mappers in the chain do not
+ need to be aware that they are executed in a chain. This enables having
+ reusable specialized Mappers that can be combined to perform composite
+ operations within a single task.
+
+
+ Special care has to be taken when creating chains that the key/values output
+ by a Mapper are valid for the following Mapper in the chain. It is assumed
+ all Mappers and the Reduce in the chain use matching output and input key and
+ value classes as no conversion is done by the chaining code.
+
+
+ Using the ChainMapper and the ChainReducer classes is possible to compose
+ Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And
+ immediate benefit of this pattern is a dramatic reduction in disk IO.
+
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainMapper, this is done by the addMapper for the last mapper in the chain.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The key and values are passed from one element of the chain to the next, by
+ value. For the added Reducer the configuration given for it,
+ reducerConf, have precedence over the job's Configuration.
+ This precedence is in effect when the task is running.
+
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainReducer, this is done by the setReducer or the addMapper for the last
+ element in the chain.
+
+
+ @param job
+ the job
+ @param klass
+ the Reducer class to add.
+ @param inputKeyClass
+ reducer input key class.
+ @param inputValueClass
+ reducer input value class.
+ @param outputKeyClass
+ reducer output key class.
+ @param outputValueClass
+ reducer output value class.
+ @param reducerConf
+ a configuration for the Reducer class. It is recommended to use a
+ Configuration without default values using the
+ Configuration(boolean loadDefaults) constructor with
+ FALSE.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The key and values are passed from one element of the chain to the next, by
+ value For the added Mapper the configuration given for it,
+ mapperConf, have precedence over the job's Configuration. This
+ precedence is in effect when the task is running.
+
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainMapper, this is done by the addMapper for the last mapper in the
+ chain.
+
+
+ @param job
+ The job.
+ @param klass
+ the Mapper class to add.
+ @param inputKeyClass
+ mapper input key class.
+ @param inputValueClass
+ mapper input value class.
+ @param outputKeyClass
+ mapper output key class.
+ @param outputValueClass
+ mapper output value class.
+ @param mapperConf
+ a configuration for the Mapper class. It is recommended to use a
+ Configuration without default values using the
+ Configuration(boolean loadDefaults) constructor with
+ FALSE.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ For each record output by the Reducer, the Mapper classes are invoked in a
+ chained (or piped) fashion. The output of the reducer becomes the input of
+ the first mapper and output of first becomes the input of the second, and so
+ on until the last Mapper, the output of the last Mapper will be written to
+ the task's output.
+
+
+ The key functionality of this feature is that the Mappers in the chain do not
+ need to be aware that they are executed after the Reducer or in a chain. This
+ enables having reusable specialized Mappers that can be combined to perform
+ composite operations within a single task.
+
+
+ Special care has to be taken when creating chains that the key/values output
+ by a Mapper are valid for the following Mapper in the chain. It is assumed
+ all Mappers and the Reduce in the chain use matching output and input key and
+ value classes as no conversion is done by the chaining code.
+
+
Using the ChainMapper and the ChainReducer classes is possible to
+ compose Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And
+ immediate benefit of this pattern is a dramatic reduction in disk IO.
+
+ IMPORTANT: There is no need to specify the output key/value classes for the
+ ChainReducer, this is done by the setReducer or the addMapper for the last
+ element in the chain.
+
+ Implementations are responsible for writing the fields of the object
+ to PreparedStatement, and reading the fields of the object from the
+ ResultSet.
+
+
Example:
+ If we have the following table in the database :
+
+ CREATE TABLE MyTable (
+ counter INTEGER NOT NULL,
+ timestamp BIGINT NOT NULL,
+ );
+
+ then we can read/write the tuples from/to the table with :
+
+ public class MyWritable implements Writable, DBWritable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ //Writable#write() implementation
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ //Writable#readFields() implementation
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public void write(PreparedStatement statement) throws SQLException {
+ statement.setInt(1, counter);
+ statement.setLong(2, timestamp);
+ }
+
+ public void readFields(ResultSet resultSet) throws SQLException {
+ counter = resultSet.getInt(1);
+ timestamp = resultSet.getLong(2);
+ }
+ }
+
+ Mapper implementations using this MapRunnable must be thread-safe.
+
+ The Map-Reduce job has to be configured with the mapper to use via
+ {@link #setMapperClass(Job, Class)} and
+ the number of thread the thread-pool can use with the
+ {@link #getNumberOfThreads(JobContext)} method. The default
+ value is 10 threads.
+
+ In applications which take a classname of committer in
+ a configuration option, set it to the canonical name of this class
+ (see {@link #NAME}). When this class is instantiated, it will
+ use the factory mechanism to locate the configured committer for the
+ destination.
+
+
+ In code, explicitly create an instance of this committer through
+ its constructor, then invoke commit lifecycle operations on it.
+ The dynamically configured committer will be created in the constructor
+ and have the lifecycle operations relayed to it.
+
Some applications need to create/write-to side-files, which differ from
+ the actual job-outputs.
+
+
In such cases there could be issues with 2 instances of the same TIP
+ (running simultaneously e.g. speculative tasks) trying to open/write-to the
+ same file (path) on HDFS. Hence the application-writer will have to pick
+ unique names per task-attempt (e.g. using the attemptid, say
+ attempt_200709221812_0001_m_000000_0), not just per TIP.
+
+
To get around this the Map-Reduce framework helps the application-writer
+ out by maintaining a special
+ ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}
+ sub-directory for each task-attempt on HDFS where the output of the
+ task-attempt goes. On successful completion of the task-attempt the files
+ in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only)
+ are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the
+ framework discards the sub-directory of unsuccessful task-attempts. This
+ is completely transparent to the application.
+
+
The application-writer can take advantage of this by creating any
+ side-files required in a work directory during execution
+ of his task i.e. via
+ {@link #getWorkOutputPath(TaskInputOutputContext)}, and
+ the framework will move them out similarly - thus she doesn't have to pick
+ unique paths per task-attempt.
+
+
The entire discussion holds true for maps of jobs with
+ reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
+ goes directly to HDFS.
+
+ @return the {@link Path} to the task's temporary output directory
+ for the map-reduce job.]]>
+
+
+
+
+
+
+
+
+
+ The path can be used to create custom files from within the map and
+ reduce tasks. The path name will be unique for each task. The path parent
+ will be the job output directory.ls
+
+
This method uses the {@link #getUniqueFile} method to make the file name
+ unique for the task.
+
+ @param context the context for the task.
+ @param name the name for the file.
+ @param extension the extension for the file
+ @return a unique path accross all tasks of the job.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Warning: when the baseOutputPath is a path that resolves
+ outside of the final job output directory, the directory is created
+ immediately and then persists through subsequent task retries, breaking
+ the concept of output committing.]]>
+
+
+
+
+
+
+
+
+
+ Warning: when the baseOutputPath is a path that resolves
+ outside of the final job output directory, the directory is created
+ immediately and then persists through subsequent task retries, breaking
+ the concept of output committing.]]>
+
+
+
+
+
+
+ super.close() at the
+ end of their close()]]>
+
+
+
+
+ Case one: writing to additional outputs other than the job default output.
+
+ Each additional output, or named output, may be configured with its own
+ OutputFormat, with its own key class and with its own value
+ class.
+
+
+
+ Case two: to write data to different files provided by user
+
+
+
+ MultipleOutputs supports counters, by default they are disabled. The
+ counters group is the {@link MultipleOutputs} class name. The names of the
+ counters are the same as the output name. These count the number records
+ written to each output name.
+
+
+ Usage pattern for job submission:
+
+
+ Job job = new Job();
+
+ FileInputFormat.setInputPath(job, inDir);
+ FileOutputFormat.setOutputPath(job, outDir);
+
+ job.setMapperClass(MOMap.class);
+ job.setReducerClass(MOReduce.class);
+ ...
+
+ // Defines additional single text based output 'text' for the job
+ MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class,
+ LongWritable.class, Text.class);
+
+ // Defines additional sequence-file based output 'sequence' for the job
+ MultipleOutputs.addNamedOutput(job, "seq",
+ SequenceFileOutputFormat.class,
+ LongWritable.class, Text.class);
+ ...
+
+ job.waitForCompletion(true);
+ ...
+
+
+ Usage in Reducer:
+
+ <K, V> String generateFileName(K k, V v) {
+ return k.toString() + "_" + v.toString();
+ }
+
+ public class MOReduce extends
+ Reducer<WritableComparable, Writable,WritableComparable, Writable> {
+ private MultipleOutputs mos;
+ public void setup(Context context) {
+ ...
+ mos = new MultipleOutputs(context);
+ }
+
+ public void reduce(WritableComparable key, Iterator<Writable> values,
+ Context context)
+ throws IOException {
+ ...
+ mos.write("text", , key, new Text("Hello"));
+ mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a");
+ mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b");
+ mos.write(key, new Text("value"), generateFileName(key, new Text("value")));
+ ...
+ }
+
+ public void cleanup(Context) throws IOException {
+ mos.close();
+ ...
+ }
+
+ }
+
+
+
+ When used in conjuction with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat,
+ MultipleOutputs can mimic the behaviour of MultipleTextOutputFormat and MultipleSequenceFileOutputFormat
+ from the old Hadoop API - ie, output can be written from the Reducer to more than one location.
+
+
+
+ Use MultipleOutputs.write(KEYOUT key, VALUEOUT value, String baseOutputPath) to write key and
+ value to a path specified by baseOutputPath, with no need to specify a named output.
+ Warning: when the baseOutputPath passed to MultipleOutputs.write
+ is a path that resolves outside of the final job output directory, the
+ directory is created immediately and then persists through subsequent
+ task retries, breaking the concept of output committing:
+
+ Use your own code in generateFileName() to create a custom path to your results.
+ '/' characters in baseOutputPath will be translated into directory levels in your file system.
+ Also, append your custom-generated path with "part" or similar, otherwise your output will be -00000, -00001 etc.
+ No call to context.write() is necessary. See example generateFileName() code below.
+
+
+
+ private String generateFileName(Text k) {
+ // expect Text k in format "Surname|Forename"
+ String[] kStr = k.toString().split("\\|");
+
+ String sName = kStr[0];
+ String fName = kStr[1];
+
+ // example for k = Smith|John
+ // output written to /user/hadoop/path/to/output/Smith/John-r-00000 (etc)
+ return sName + "/" + fName;
+ }
+
+
+
+ Using MultipleOutputs in this way will still create zero-sized default output, eg part-00000.
+ To prevent this use LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
+ instead of job.setOutputFormatClass(TextOutputFormat.class); in your Hadoop job configuration.
+
The subarray to be used for the partitioning can be defined by means
+ of the following properties:
+
+
+ mapreduce.partition.binarypartitioner.left.offset:
+ left offset in array (0 by default)
+
+
+ mapreduce.partition.binarypartitioner.right.offset:
+ right offset in array (-1 by default)
+
+
+ Like in Python, both negative and positive offsets are allowed, but
+ the meaning is slightly different. In case of an array of length 5,
+ for instance, the possible offsets are:
+
+ +---+---+---+---+---+
+ | B | B | B | B | B |
+ +---+---+---+---+---+
+ 0 1 2 3 4
+ -5 -4 -3 -2 -1
+
+ The first row of numbers gives the position of the offsets 0...5 in
+ the array; the second row gives the corresponding negative offsets.
+ Contrary to Python, the specified subarray has byte i
+ and j as first and last element, repectively, when
+ i and j are the left and right offset.
+
+
For Hadoop programs written in Java, it is advisable to use one of
+ the following static convenience methods for setting the offsets:
+
The ResourceManager responds with a new, monotonically
+ increasing, {@link ApplicationId} which is used by the client to submit
+ a new application.
+
+
The ResourceManager also responds with details such
+ as maximum resource capabilities in the cluster as specified in
+ {@link GetNewApplicationResponse}.
+
+ @param request request to get a new ApplicationId
+ @return response containing the new ApplicationId to be used
+ to submit an application
+ @throws YarnException
+ @throws IOException
+ @see #submitApplication(SubmitApplicationRequest)]]>
+
+
+
+
+
+
+
+ The interface used by clients to submit a new application to the
+ ResourceManager.
+
+
The client is required to provide details such as queue,
+ {@link Resource} required to run the ApplicationMaster,
+ the equivalent of {@link ContainerLaunchContext} for launching
+ the ApplicationMaster etc. via the
+ {@link SubmitApplicationRequest}.
+
+
Currently the ResourceManager sends an immediate (empty)
+ {@link SubmitApplicationResponse} on accepting the submission and throws
+ an exception if it rejects the submission. However, this call needs to be
+ followed by {@link #getApplicationReport(GetApplicationReportRequest)}
+ to make sure that the application gets properly submitted - obtaining a
+ {@link SubmitApplicationResponse} from ResourceManager doesn't guarantee
+ that RM 'remembers' this application beyond failover or restart. If RM
+ failover or RM restart happens before ResourceManager saves the
+ application's state successfully, the subsequent
+ {@link #getApplicationReport(GetApplicationReportRequest)} will throw
+ a {@link ApplicationNotFoundException}. The Clients need to re-submit
+ the application with the same {@link ApplicationSubmissionContext} when
+ it encounters the {@link ApplicationNotFoundException} on the
+ {@link #getApplicationReport(GetApplicationReportRequest)} call.
+
+
During the submission process, it checks whether the application
+ already exists. If the application exists, it will simply return
+ SubmitApplicationResponse
+
+
In secure mode,the ResourceManager verifies access to
+ queues etc. before accepting the application submission.
+
+ @param request request to submit a new application
+ @return (empty) response on accepting the submission
+ @throws YarnException
+ @throws IOException
+ @see #getNewApplication(GetNewApplicationRequest)]]>
+
+
+
+
+
+
+
+ The interface used by clients to request the
+ ResourceManager to fail an application attempt.
+
+
The client, via {@link FailApplicationAttemptRequest} provides the
+ {@link ApplicationAttemptId} of the attempt to be failed.
+
+
In secure mode,the ResourceManager verifies access to the
+ application, queue etc. before failing the attempt.
+
+
Currently, the ResourceManager returns an empty response
+ on success and throws an exception on rejecting the request.
+
+ @param request request to fail an attempt
+ @return ResourceManager returns an empty response
+ on success and throws an exception on rejecting the request
+ @throws YarnException
+ @throws IOException
+ @see #getQueueUserAcls(GetQueueUserAclsInfoRequest)]]>
+
+
+
+
+
+
+
+ The interface used by clients to request the
+ ResourceManager to abort submitted application.
+
+
The client, via {@link KillApplicationRequest} provides the
+ {@link ApplicationId} of the application to be aborted.
+
+
In secure mode,the ResourceManager verifies access to the
+ application, queue etc. before terminating the application.
+
+
Currently, the ResourceManager returns an empty response
+ on success and throws an exception on rejecting the request.
+
+ @param request request to abort a submitted application
+ @return ResourceManager returns an empty response
+ on success and throws an exception on rejecting the request
+ @throws YarnException
+ @throws IOException
+ @see #getQueueUserAcls(GetQueueUserAclsInfoRequest)]]>
+
+
+
+
+
+
+
+ The interface used by clients to get metrics about the cluster from
+ the ResourceManager.
+
+
The ResourceManager responds with a
+ {@link GetClusterMetricsResponse} which includes the
+ {@link YarnClusterMetrics} with details such as number of current
+ nodes in the cluster.
+
+ @param request request for cluster metrics
+ @return cluster metrics
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ The interface used by clients to get a report of all nodes
+ in the cluster from the ResourceManager.
+
+
The ResourceManager responds with a
+ {@link GetClusterNodesResponse} which includes the
+ {@link NodeReport} for all the nodes in the cluster.
+
+ @param request request for report on all nodes
+ @return report on all nodes
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ The interface used by clients to get information about queues
+ from the ResourceManager.
+
+
The client, via {@link GetQueueInfoRequest}, can ask for details such
+ as used/total resources, child queues, running applications etc.
+
+
In secure mode,the ResourceManager verifies access before
+ providing the information.
+
+ @param request request to get queue information
+ @return queue information
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ The interface used by clients to get information about queue
+ acls for current user from the ResourceManager.
+
+
+
The ResourceManager responds with queue acls for all
+ existing queues.
+
+ @param request request to get queue acls for current user
+ @return queue acls for current user
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The interface used by clients to obtain a new {@link ReservationId} for
+ submitting new reservations.
+
+
The ResourceManager responds with a new, unique,
+ {@link ReservationId} which is used by the client to submit
+ a new reservation.
+
+ @param request to get a new ReservationId
+ @return response containing the new ReservationId to be used
+ to submit a new reservation
+ @throws YarnException if the reservation system is not enabled.
+ @throws IOException on IO failures.
+ @see #submitReservation(ReservationSubmissionRequest)]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to submit a new reservation to the
+ {@code ResourceManager}.
+
+
+
+ The client packages all details of its request in a
+ {@link ReservationSubmissionRequest} object. This contains information
+ about the amount of capacity, temporal constraints, and concurrency needs.
+ Furthermore, the reservation might be composed of multiple stages, with
+ ordering dependencies among them.
+
+
+
+ In order to respond, a new admission control component in the
+ {@code ResourceManager} performs an analysis of the resources that have
+ been committed over the period of time the user is requesting, verify that
+ the user requests can be fulfilled, and that it respect a sharing policy
+ (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined
+ that the ReservationSubmissionRequest is satisfiable the
+ {@code ResourceManager} answers with a
+ {@link ReservationSubmissionResponse} that include a non-null
+ {@link ReservationId}. Upon failure to find a valid allocation the response
+ is an exception with the reason.
+
+ On application submission the client can use this {@link ReservationId} to
+ obtain access to the reserved resources.
+
+
+
+ The system guarantees that during the time-range specified by the user, the
+ reservationID will be corresponding to a valid reservation. The amount of
+ capacity dedicated to such queue can vary overtime, depending of the
+ allocation that has been determined. But it is guaranteed to satisfy all
+ the constraint expressed by the user in the
+ {@link ReservationSubmissionRequest}.
+
+
+ @param request the request to submit a new Reservation
+ @return response the {@link ReservationId} on accepting the submission
+ @throws YarnException if the request is invalid or reservation cannot be
+ created successfully
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to update an existing Reservation. This is
+ referred to as a re-negotiation process, in which a user that has
+ previously submitted a Reservation.
+
+
+
+ The allocation is attempted by virtually substituting all previous
+ allocations related to this Reservation with new ones, that satisfy the new
+ {@link ReservationUpdateRequest}. Upon success the previous allocation is
+ substituted by the new one, and on failure (i.e., if the system cannot find
+ a valid allocation for the updated request), the previous allocation
+ remains valid.
+
+ The {@link ReservationId} is not changed, and applications currently
+ running within this reservation will automatically receive the resources
+ based on the new allocation.
+
+
+ @param request to update an existing Reservation (the ReservationRequest
+ should refer to an existing valid {@link ReservationId})
+ @return response empty on successfully updating the existing reservation
+ @throws YarnException if the request is invalid or reservation cannot be
+ updated successfully
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to remove an existing Reservation.
+
+ Upon deletion of a reservation applications running with this reservation,
+ are automatically downgraded to normal jobs running without any dedicated
+ reservation.
+
+
+ @param request to remove an existing Reservation (the ReservationRequest
+ should refer to an existing valid {@link ReservationId})
+ @return response empty on successfully deleting the existing reservation
+ @throws YarnException if the request is invalid or reservation cannot be
+ deleted successfully
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to get the list of reservations in a plan.
+ The reservationId will be used to search for reservations to list if it is
+ provided. Otherwise, it will select active reservations within the
+ startTime and endTime (inclusive).
+
+
+ @param request to list reservations in a plan. Contains fields to select
+ String queue, ReservationId reservationId, long startTime,
+ long endTime, and a bool includeReservationAllocations.
+
+ queue: Required. Cannot be null or empty. Refers to the
+ reservable queue in the scheduler that was selected when
+ creating a reservation submission
+ {@link ReservationSubmissionRequest}.
+
+ reservationId: Optional. If provided, other fields will
+ be ignored.
+
+ startTime: Optional. If provided, only reservations that
+ end after the startTime will be selected. This defaults
+ to 0 if an invalid number is used.
+
+ endTime: Optional. If provided, only reservations that
+ start on or before endTime will be selected. This defaults
+ to Long.MAX_VALUE if an invalid number is used.
+
+ includeReservationAllocations: Optional. Flag that
+ determines whether the entire reservation allocations are
+ to be returned. Reservation allocations are subject to
+ change in the event of re-planning as described by
+ {@code ReservationDefinition}.
+
+ @return response that contains information about reservations that are
+ being searched for.
+ @throws YarnException if the request is invalid
+ @throws IOException on IO failures]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get node to labels mappings in existing cluster
+
+
+ @param request
+ @return node to labels mappings
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get labels to nodes mappings
+ in existing cluster
+
+
+ @param request
+ @return labels to nodes mappings
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get node labels in the cluster
+
+
+ @param request to get node labels collection of this cluster
+ @return node labels collection of this cluster
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to set priority of an application.
+
+ @param request to set priority of an application
+ @return an empty response
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ The interface used by clients to request the
+ ResourceManager to signal a container. For example,
+ the client can send command OUTPUT_THREAD_DUMP to dump threads of the
+ container.
+
+
The client, via {@link SignalContainerRequest} provides the
+ id of the container and the signal command.
+
+
In secure mode,the ResourceManager verifies access to the
+ application before signaling the container.
+ The user needs to have MODIFY_APP permission.
+
+
Currently, the ResourceManager returns an empty response
+ on success and throws an exception on rejecting the request.
+
+ @param request request to signal a container
+ @return ResourceManager returns an empty response
+ on success and throws an exception on rejecting the request
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to set ApplicationTimeouts of an application.
+ The UpdateApplicationTimeoutsRequest should have timeout value with
+ absolute time with ISO8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ.
+
+ Note: If application timeout value is less than or equal to current
+ time then update application throws YarnException.
+ @param request to set ApplicationTimeouts of an application
+ @return a response with updated timeouts.
+ @throws YarnException if update request has empty values or application is
+ in completing states.
+ @throws IOException on IO failures]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to get all the resource profiles that are
+ available on the ResourceManager.
+
+ @param request request to get all the resource profiles
+ @return Response containing a map of the profile name to Resource
+ capabilities
+ @throws YARNFeatureNotEnabledException if resource-profile is disabled
+ @throws YarnException if any error happens inside YARN
+ @throws IOException in case of other errors]]>
+
+
+
+
+
+
+
+
+ The interface to get the details for a specific resource profile.
+
+ @param request request to get the details of a resource profile
+ @return Response containing the details for a particular resource profile
+ @throws YARNFeatureNotEnabledException if resource-profile is disabled
+ @throws YarnException if any error happens inside YARN
+ @throws IOException in case of other errors]]>
+
+
+
+
+
+
+
+
+ The interface to get the details for a specific resource profile.
+
+ @param request request to get the details of a resource profile
+ @return Response containing the details for a particular resource profile
+ @throws YarnException if any error happens inside YARN
+ @throws IOException in case of other errors]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get attributes to nodes mappings
+ available in ResourceManager.
+
+
+ @param request request to get details of attributes to nodes mapping.
+ @return Response containing the details of attributes to nodes mappings.
+ @throws YarnException if any error happens inside YARN
+ @throws IOException incase of other errors]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get node attributes available in
+ ResourceManager.
+
+
+ @param request request to get node attributes collection of this cluster.
+ @return Response containing node attributes collection.
+ @throws YarnException if any error happens inside YARN.
+ @throws IOException incase of other errors.]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get node to attributes mappings.
+ in existing cluster.
+
+
+ @param request request to get nodes to attributes mapping.
+ @return nodes to attributes mappings.
+ @throws YarnException if any error happens inside YARN.
+ @throws IOException]]>
+
+
+
+ The protocol between clients and the ResourceManager
+ to submit/abort jobs and to get information on applications, cluster metrics,
+ nodes, queues and ACLs.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The protocol between clients and the ApplicationHistoryServer to
+ get the information of completed applications etc.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+ The interface used by a new ApplicationMaster to register with
+ the ResourceManager.
+
+
+
+ The ApplicationMaster needs to provide details such as RPC
+ Port, HTTP tracking url etc. as specified in
+ {@link RegisterApplicationMasterRequest}.
+
+
+
+ The ResourceManager responds with critical details such as
+ maximum resource capabilities in the cluster as specified in
+ {@link RegisterApplicationMasterResponse}.
+
+
+
+ Re-register is only allowed for Unmanaged Application Master
+ (UAM) HA, with
+ {@link org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext#getKeepContainersAcrossApplicationAttempts()}
+ set to true.
+
+
+ @param request registration request
+ @return registration respose
+ @throws YarnException
+ @throws IOException
+ @throws InvalidApplicationMasterRequestException The exception is thrown
+ when an ApplicationMaster tries to register more then once.
+ @see RegisterApplicationMasterRequest
+ @see RegisterApplicationMasterResponse]]>
+
+
+
+
+
+
+
+ The interface used by an ApplicationMaster to notify the
+ ResourceManager about its completion (success or failed).
+
+
The ApplicationMaster has to provide details such as
+ final state, diagnostics (in case of failures) etc. as specified in
+ {@link FinishApplicationMasterRequest}.
+
+
The ResourceManager responds with
+ {@link FinishApplicationMasterResponse}.
+
+ @param request completion request
+ @return completion response
+ @throws YarnException
+ @throws IOException
+ @see FinishApplicationMasterRequest
+ @see FinishApplicationMasterResponse]]>
+
+
+
+
+
+
+
+
+ The main interface between an ApplicationMaster and the
+ ResourceManager.
+
+
+
+ The ApplicationMaster uses this interface to provide a list of
+ {@link ResourceRequest} and returns unused {@link Container} allocated to
+ it via {@link AllocateRequest}. Optionally, the
+ ApplicationMaster can also blacklist resources which
+ it doesn't want to use.
+
+
+
+ This also doubles up as a heartbeat to let the
+ ResourceManager know that the ApplicationMaster
+ is alive. Thus, applications should periodically make this call to be kept
+ alive. The frequency depends on
+ {@link YarnConfiguration#RM_AM_EXPIRY_INTERVAL_MS} which defaults to
+ {@link YarnConfiguration#DEFAULT_RM_AM_EXPIRY_INTERVAL_MS}.
+
+
+
+ The ResourceManager responds with list of allocated
+ {@link Container}, status of completed containers and headroom information
+ for the application.
+
+
+
+ The ApplicationMaster can use the available headroom
+ (resources) to decide how to utilized allocated resources and make informed
+ decisions about future resource requests.
+
+
+ @param request
+ allocation request
+ @return allocation response
+ @throws YarnException
+ @throws IOException
+ @throws InvalidApplicationMasterRequestException
+ This exception is thrown when an ApplicationMaster calls allocate
+ without registering first.
+ @throws InvalidResourceBlacklistRequestException
+ This exception is thrown when an application provides an invalid
+ specification for blacklist of resources.
+ @throws InvalidResourceRequestException
+ This exception is thrown when a {@link ResourceRequest} is out of
+ the range of the configured lower and upper limits on the
+ resources.
+ @see AllocateRequest
+ @see AllocateResponse]]>
+
+
+
+ The protocol between a live instance of ApplicationMaster
+ and the ResourceManager.
+
+
This is used by the ApplicationMaster to register/unregister
+ and to request and obtain resources in the cluster from the
+ ResourceManager.
]]>
+
+
+
+
+
+
+
+
+
+
+
+ The interface used by clients to claim a resource with the
+ SharedCacheManager. The client uses a checksum to identify the
+ resource and an {@link ApplicationId} to identify which application will be
+ using the resource.
+
+
+
+ The SharedCacheManager responds with whether or not the
+ resource exists in the cache. If the resource exists, a Path
+ to the resource in the shared cache is returned. If the resource does not
+ exist, the response is empty.
+
+
+ @param request request to claim a resource in the shared cache
+ @return response indicating if the resource is already in the cache
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to release a resource with the
+ SharedCacheManager. This method is called once an application
+ is no longer using a claimed resource in the shared cache. The client uses
+ a checksum to identify the resource and an {@link ApplicationId} to
+ identify which application is releasing the resource.
+
+
+
+ Note: This method is an optimization and the client is not required to call
+ it for correctness.
+
+
+
+ Currently the SharedCacheManager sends an empty response.
+
+
+ @param request request to release a resource in the shared cache
+ @return (empty) response on releasing the resource
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+ The protocol between clients and the SharedCacheManager to claim
+ and release resources in the shared cache.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+ The ApplicationMaster provides a list of
+ {@link StartContainerRequest}s to a NodeManager to
+ start {@link Container}s allocated to it using this interface.
+
+
+
+ The ApplicationMaster has to provide details such as allocated
+ resource capability, security tokens (if enabled), command to be executed
+ to start the container, environment for the process, necessary
+ binaries/jar/shared-objects etc. via the {@link ContainerLaunchContext} in
+ the {@link StartContainerRequest}.
+
+
+
+ The NodeManager sends a response via
+ {@link StartContainersResponse} which includes a list of
+ {@link Container}s of successfully launched {@link Container}s, a
+ containerId-to-exception map for each failed {@link StartContainerRequest} in
+ which the exception indicates errors from per container and a
+ allServicesMetaData map between the names of auxiliary services and their
+ corresponding meta-data. Note: None-container-specific exceptions will
+ still be thrown by the API method itself.
+
+
+ The ApplicationMaster can use
+ {@link #getContainerStatuses(GetContainerStatusesRequest)} to get updated
+ statuses of the to-be-launched or launched containers.
+
+
+ @param request
+ request to start a list of containers
+ @return response including conatinerIds of all successfully launched
+ containers, a containerId-to-exception map for failed requests and
+ a allServicesMetaData map.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The ApplicationMaster requests a NodeManager to
+ stop a list of {@link Container}s allocated to it using this
+ interface.
+
+
+
+ The ApplicationMaster sends a {@link StopContainersRequest}
+ which includes the {@link ContainerId}s of the containers to be stopped.
+
+
+
+ The NodeManager sends a response via
+ {@link StopContainersResponse} which includes a list of {@link ContainerId}
+ s of successfully stopped containers, a containerId-to-exception map for
+ each failed request in which the exception indicates errors from per
+ container. Note: None-container-specific exceptions will still be thrown by
+ the API method itself. ApplicationMaster can use
+ {@link #getContainerStatuses(GetContainerStatusesRequest)} to get updated
+ statuses of the containers.
+
+
+ @param request
+ request to stop a list of containers
+ @return response which includes a list of containerIds of successfully
+ stopped containers, a containerId-to-exception map for failed
+ requests.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The API used by the ApplicationMaster to request for current
+ statuses of Containers from the NodeManager.
+
+
+
+ The ApplicationMaster sends a
+ {@link GetContainerStatusesRequest} which includes the {@link ContainerId}s
+ of all containers whose statuses are needed.
+
+
+
+ The NodeManager responds with
+ {@link GetContainerStatusesResponse} which includes a list of
+ {@link ContainerStatus} of the successfully queried containers and a
+ containerId-to-exception map for each failed request in which the exception
+ indicates errors from per container. Note: None-container-specific
+ exceptions will still be thrown by the API method itself.
+
+
+ @param request
+ request to get ContainerStatuses of containers with
+ the specified ContainerIds
+ @return response containing the list of ContainerStatus of the
+ successfully queried containers and a containerId-to-exception map
+ for failed requests.
+
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The API used by the ApplicationMaster to request for
+ resource increase of running containers on the NodeManager.
+
+
+ @param request
+ request to increase resource of a list of containers
+ @return response which includes a list of containerIds of containers
+ whose resource has been successfully increased and a
+ containerId-to-exception map for failed requests.
+
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The API used by the ApplicationMaster to request for
+ resource update of running containers on the NodeManager.
+
+
+ @param request
+ request to update resource of a list of containers
+ @return response which includes a list of containerIds of containers
+ whose resource has been successfully updated and a
+ containerId-to-exception map for failed requests.
+
+ @throws YarnException Exception specific to YARN
+ @throws IOException IOException thrown from NodeManager]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The protocol between an ApplicationMaster and a
+ NodeManager to start/stop and increase resource of containers
+ and to get status of running containers.
+
+
If security is enabled the NodeManager verifies that the
+ ApplicationMaster has truly been allocated the container
+ by the ResourceManager and also verifies all interactions such
+ as stopping the container or obtaining status information for the container.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ response id used to track duplicate responses.
+ @return response id]]>
+
+
+
+
+
+ response id used to track duplicate responses.
+ @param id response id]]>
+
+
+
+
+ current progress of application.
+ @return current progress of application]]>
+
+
+
+
+
+ current progress of application
+ @param progress current progress of application]]>
+
+
+
+
+ ResourceRequest to update the
+ ResourceManager about the application's resource requirements.
+ @return the list of ResourceRequest
+ @see ResourceRequest]]>
+
+
+
+
+
+ ResourceRequest to update the
+ ResourceManager about the application's resource requirements.
+ @param resourceRequests list of ResourceRequest to update the
+ ResourceManager about the application's
+ resource requirements
+ @see ResourceRequest]]>
+
+
+
+
+ ContainerId of containers being
+ released by the ApplicationMaster.
+ @return list of ContainerId of containers being
+ released by the ApplicationMaster]]>
+
+
+
+
+
+ ContainerId of containers being
+ released by the ApplicationMaster
+ @param releaseContainers list of ContainerId of
+ containers being released by the
+ ApplicationMaster]]>
+
+
+
+
+ ResourceBlacklistRequest being sent by the
+ ApplicationMaster.
+ @return the ResourceBlacklistRequest being sent by the
+ ApplicationMaster
+ @see ResourceBlacklistRequest]]>
+
+
+
+
+
+ ResourceBlacklistRequest to inform the
+ ResourceManager about the blacklist additions and removals
+ per the ApplicationMaster.
+
+ @param resourceBlacklistRequest the ResourceBlacklistRequest
+ to inform the ResourceManager about
+ the blacklist additions and removals
+ per the ApplicationMaster
+ @see ResourceBlacklistRequest]]>
+
+
+
+
+ ApplicationMaster.
+ @return list of {@link UpdateContainerRequest}
+ being sent by the
+ ApplicationMaster.]]>
+
+
+
+
+
+ ResourceManager about the containers that need to be
+ updated.
+ @param updateRequests list of UpdateContainerRequest for
+ containers to be updated]]>
+
+
+
+
+ ApplicationMaster.
+ @return list of {@link SchedulingRequest} being sent by the
+ ApplicationMaster.]]>
+
+
+
+
+
+ ResourceManager about the application's resource requirements
+ (potentially including allocation tags and placement constraints).
+ @param schedulingRequests list of {@link SchedulingRequest} to update
+ the ResourceManager about the application's resource
+ requirements.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The core request sent by the ApplicationMaster to the
+ ResourceManager to obtain resources in the cluster.
+
+
The request includes:
+
+
A response id to track duplicate responses.
+
Progress information.
+
+ A list of {@link ResourceRequest} to inform the
+ ResourceManager about the application's
+ resource requirements.
+
+
+ A list of unused {@link Container} which are being returned.
+
+
+ A list of {@link UpdateContainerRequest} to inform
+ the ResourceManager about the change in
+ requirements of running containers.
+
+
+
+ @see ApplicationMasterProtocol#allocate(AllocateRequest)]]>
+
+
+
+
+
+
+
+
+ responseId of the request.
+ @see AllocateRequest#setResponseId(int)
+ @param responseId responseId of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ progress of the request.
+ @see AllocateRequest#setProgress(float)
+ @param progress progress of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ askList of the request.
+ @see AllocateRequest#setAskList(List)
+ @param askList askList of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ releaseList of the request.
+ @see AllocateRequest#setReleaseList(List)
+ @param releaseList releaseList of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ resourceBlacklistRequest of the request.
+ @see AllocateRequest#setResourceBlacklistRequest(
+ ResourceBlacklistRequest)
+ @param resourceBlacklistRequest
+ resourceBlacklistRequest of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ updateRequests of the request.
+ @see AllocateRequest#setUpdateRequests(List)
+ @param updateRequests updateRequests of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ schedulingRequests of the request.
+ @see AllocateRequest#setSchedulingRequests(List)
+ @param schedulingRequests SchedulingRequest of the request
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+ trackingUrl of the request.
+ @see AllocateRequest#setTrackingUrl(String)
+ @param trackingUrl new tracking url
+ @return {@link AllocateRequestBuilder}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ResourceManager needs the
+ ApplicationMaster to take some action then it will send an
+ AMCommand to the ApplicationMaster. See AMCommand
+ for details on commands and actions for them.
+ @return AMCommand if the ApplicationMaster should
+ take action, null otherwise
+ @see AMCommand]]>
+
+
+
+
+ last response id.
+ @return last response id]]>
+
+
+
+
+ newly allocated Container by the
+ ResourceManager.
+ @return list of newly allocatedContainer]]>
+
+
+
+
+ available headroom for resources in the cluster for the
+ application.
+ @return limit of available headroom for resources in the cluster for the
+ application]]>
+
+
+
+
+ completed containers' statuses.
+ @return the list of completed containers' statuses]]>
+
+
+
+
+ updated NodeReports. Updates could
+ be changes in health, availability etc of the nodes.
+ @return The delta of updated nodes since the last response]]>
+
+
+
+
+
+
+
+
+
+
+ The message is a snapshot of the resources the RM wants back from the AM.
+ While demand persists, the RM will repeat its request; applications should
+ not interpret each message as a request for additional
+ resources on top of previous messages. Resources requested consistently
+ over some duration may be forcibly killed by the RM.
+
+ @return A specification of the resources to reclaim from this AM.]]>
+
+
+
+
+
+ 1) AM is receiving first container on underlying NodeManager.
+ OR
+ 2) NMToken master key rolled over in ResourceManager and AM is getting new
+ container on the same underlying NodeManager.
+
+ AM will receive one NMToken per NM irrespective of the number of containers
+ issued on same NM. AM is expected to store these tokens until issued a
+ new token for the same NM.
+ @return list of NMTokens required for communicating with NM]]>
+
+
+
+
+ ResourceManager.
+ @return list of newly increased containers]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UpdateContainerError for
+ containers updates requests that were in error]]>
+
+
+
+
+ ResourceManager from previous application attempts which
+ have not been reported to the Application Master yet.
+
+ These containers were recovered by the RM after the application master
+ had already registered. This may happen after RM restart when some NMs get
+ delayed in connecting to the RM and reporting the active containers.
+ Since they were not reported in the registration
+ response, they are reported in the response to the AM heartbeat.
+
+ @return the list of running containers as viewed by
+ ResourceManager from previous application attempts.]]>
+
+
+
+
+
+
+
+
+ ResourceManager the
+ ApplicationMaster during resource negotiation.
+
+ The response, includes:
+
+
Response ID to track duplicate responses.
+
+ An AMCommand sent by ResourceManager to let the
+ {@code ApplicationMaster} take some actions (resync, shutdown etc.).
+
+
A list of newly allocated {@link Container}.
+
A list of completed {@link Container}s' statuses.
+
+ The available headroom for resources in the cluster for the
+ application.
+
+
A list of nodes whose status has been updated.
+
The number of available nodes in a cluster.
+
A description of resources requested back by the cluster
+
AMRMToken, if AMRMToken has been rolled over
+
+ A list of {@link Container} representing the containers
+ whose resource has been increased.
+
+
+ A list of {@link Container} representing the containers
+ whose resource has been decreased.
+
+
+
+ @see ApplicationMasterProtocol#allocate(AllocateRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: {@link NMToken} will be used for authenticating communication with
+ {@code NodeManager}.
+ @return the list of container tokens to be used for authorization during
+ container resource update.
+ @see NMToken]]>
+
+
+
+
+
+ AllocateResponse.getUpdatedContainers.
+ The token contains the container id and resource capability required for
+ container resource update.
+ @param containersToUpdate the list of container tokens to be used
+ for container resource increase.]]>
+
+
+
+ The request sent by Application Master to the
+ Node Manager to change the resource quota of a container.
+
+ @see ContainerManagementProtocol#updateContainer(ContainerUpdateRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the NodeManager to the
+ ApplicationMaster when asked to update container resource.
+
+
+ @see ContainerManagementProtocol#updateContainer(ContainerUpdateRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationAttemptId of the attempt to be failed.
+ @return ApplicationAttemptId of the attempt.]]>
+
+
+
+
+
+
+ The request sent by the client to the ResourceManager
+ to fail an application attempt.
+
+
The request includes the {@link ApplicationAttemptId} of the attempt to
+ be failed.
+
+ @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest)]]>
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to the client
+ failing an application attempt.
+
+
Currently it's empty.
+
+ @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final state of the ApplicationMaster.
+ @return final state of the ApplicationMaster]]>
+
+
+
+
+
+ final state of the ApplicationMaster
+ @param finalState final state of the ApplicationMaster]]>
+
+
+
+
+ diagnostic information on application failure.
+ @return diagnostic information on application failure]]>
+
+
+
+
+
+ diagnostic information on application failure.
+ @param diagnostics diagnostic information on application failure]]>
+
+
+
+
+ tracking URL for the ApplicationMaster.
+ This url if contains scheme then that will be used by resource manager
+ web application proxy otherwise it will default to http.
+ @return tracking URLfor the ApplicationMaster]]>
+
+
+
+
+
+ final tracking URLfor the ApplicationMaster.
+ This is the web-URL to which ResourceManager or web-application proxy will
+ redirect client/users once the application is finished and the
+ ApplicationMaster is gone.
+
+ If the passed url has a scheme then that will be used by the
+ ResourceManager and web-application proxy, otherwise the scheme will
+ default to http.
+
+
+ Empty, null, "N/A" strings are all valid besides a real URL. In case an url
+ isn't explicitly passed, it defaults to "N/A" on the ResourceManager.
+
+
+ @param url
+ tracking URLfor the ApplicationMaster]]>
+
+
+
+
+ The final request includes details such:
+
+
Final state of the {@code ApplicationMaster}
+
+ Diagnostic information in case of failure of the
+ {@code ApplicationMaster}
+
+
Tracking URL
+
+
+ @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ResourceManager to a
+ ApplicationMaster on it's completion.
+
+ The response, includes:
+
+
A flag which indicates that the application has successfully unregistered
+ with the RM and the application can safely stop.
+
+
+ Note: The flag indicates whether the application has successfully
+ unregistered and is safe to stop. The application may stop after the flag is
+ true. If the application stops before the flag is true then the RM may retry
+ the application.
+
+ @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationAttemptId of an application attempt.
+
+ @return ApplicationAttemptId of an application attempt]]>
+
+
+
+
+
+ ApplicationAttemptId of an application attempt
+
+ @param applicationAttemptId
+ ApplicationAttemptId of an application attempt]]>
+
+
+
+
+ The request sent by a client to the ResourceManager to get an
+ {@link ApplicationAttemptReport} for an application attempt.
+
+
+
+ The request should include the {@link ApplicationAttemptId} of the
+ application attempt.
+
+
+ @see ApplicationAttemptReport
+ @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationAttemptReport for the application attempt.
+
+ @return ApplicationAttemptReport for the application attempt]]>
+
+
+
+
+
+ ApplicationAttemptReport for the application attempt.
+
+ @param applicationAttemptReport
+ ApplicationAttemptReport for the application attempt]]>
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ an application attempt report.
+
+
+
+ The response includes an {@link ApplicationAttemptReport} which has the
+ details about the particular application attempt
+
+
+ @see ApplicationAttemptReport
+ @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of an application
+
+ @return ApplicationId of an application]]>
+
+
+
+
+
+ ApplicationId of an application
+
+ @param applicationId
+ ApplicationId of an application]]>
+
+
+
+
+ The request from clients to get a list of application attempt reports of an
+ application from the ResourceManager.
+
+
+ @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationReport of an application.
+
+ @return a list of ApplicationReport of an application]]>
+
+
+
+
+
+ ApplicationReport of an application.
+
+ @param applicationAttempts
+ a list of ApplicationReport of an application]]>
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ a list of {@link ApplicationAttemptReport} for application attempts.
+
+
+
+ The ApplicationAttemptReport for each application includes the
+ details of an application attempt.
+
+
+ @see ApplicationAttemptReport
+ @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the application.
+ @return ApplicationId of the application]]>
+
+
+
+
+
+ ApplicationId of the application
+ @param applicationId ApplicationId of the application]]>
+
+
+
+ The request sent by a client to the ResourceManager to
+ get an {@link ApplicationReport} for an application.
+
+
The request should include the {@link ApplicationId} of the
+ application.
+
+ @see ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest)
+ @see ApplicationReport]]>
+
+
+
+
+
+
+
+
+
+ ApplicationReport for the application.
+ @return ApplicationReport for the application]]>
+
+
+
+ The response sent by the ResourceManager to a client
+ requesting an application report.
+
+
The response includes an {@link ApplicationReport} which has details such
+ as user, queue, name, host on which the ApplicationMaster is
+ running, RPC port, tracking URL, diagnostics, start time etc.
+
+ @see ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request from clients to get a report of Applications matching the
+ giving application types in the cluster from the
+ ResourceManager.
+
+
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)
+
+
Setting any of the parameters to null, would just disable that
+ filter
+
+ @param scope {@link ApplicationsRequestScope} to filter by
+ @param users list of users to filter by
+ @param queues list of scheduler queues to filter by
+ @param applicationTypes types of applications
+ @param applicationTags application tags to filter by
+ @param applicationStates application states to filter by
+ @param startRange range of application start times to filter by
+ @param finishRange range of application finish times to filter by
+ @param limit number of applications to limit to
+ @return {@link GetApplicationsRequest} to be used with
+ {@link ApplicationClientProtocol#getApplications(GetApplicationsRequest)}]]>
+
+
+
+
+
+
+ The request from clients to get a report of Applications matching the
+ giving application types in the cluster from the
+ ResourceManager.
+
+
+ @param scope {@link ApplicationsRequestScope} to filter by
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)
+ @return a report of Applications in {@link GetApplicationsRequest}]]>
+
+
+
+
+
+
+ The request from clients to get a report of Applications matching the
+ giving application types in the cluster from the
+ ResourceManager.
+
+
+
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)
+ @return a report of Applications in {@link GetApplicationsRequest}]]>
+
+
+
+
+
+
+ The request from clients to get a report of Applications matching the
+ giving application states in the cluster from the
+ ResourceManager.
+
+
+
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)
+ @return a report of Applications in {@link GetApplicationsRequest}]]>
+
+
+
+
+
+
+
+ The request from clients to get a report of Applications matching the
+ giving and application types and application types in the cluster from the
+ ResourceManager.
+
+
+
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)
+ @return a report of Applications in GetApplicationsRequest]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request from clients to get a report of Applications
+ in the cluster from the ResourceManager.
+
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)]]>
+
+
+
+
+
+
+
+
+
+ ApplicationReport for applications.
+ @return ApplicationReport for applications]]>
+
+
+
+ The response sent by the ResourceManager to a client
+ requesting an {@link ApplicationReport} for applications.
+
+
The ApplicationReport for each application includes details
+ such as user, queue, name, host on which the ApplicationMaster
+ is running, RPC port, tracking URL, diagnostics, start time etc.
+
+ @see ApplicationReport
+ @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request from clients to get node to attribute value mapping for all or
+ given set of Node AttributeKey's in the cluster from the
+ ResourceManager.
+
+
+ @see ApplicationClientProtocol#getAttributesToNodes
+ (GetAttributesToNodesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ node to attribute value mapping for all or given set of Node AttributeKey's.
+
+
+ @see ApplicationClientProtocol#getAttributesToNodes
+ (GetAttributesToNodesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+ The request sent by clients to get cluster metrics from the
+ ResourceManager.
+
+
Currently, this is empty.
+
+ @see ApplicationClientProtocol#getClusterMetrics(GetClusterMetricsRequest)]]>
+
+
+
+
+
+
+
+
+
+ YarnClusterMetrics for the cluster.
+ @return YarnClusterMetrics for the cluster]]>
+
+
+
+ ResourceManager to a client
+ requesting cluster metrics.
+
+ @see YarnClusterMetrics
+ @see ApplicationClientProtocol#getClusterMetrics(GetClusterMetricsRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request from clients to get node attributes in the cluster from the
+ ResourceManager.
+
+
+ @see ApplicationClientProtocol#getClusterNodeAttributes
+ (GetClusterNodeAttributesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ a node attributes in cluster.
+
+
+ @see ApplicationClientProtocol#getClusterNodeAttributes
+ (GetClusterNodeAttributesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request from clients to get a report of all nodes
+ in the cluster from the ResourceManager.
+
+ The request will ask for all nodes in the given {@link NodeState}s.
+
+ @see ApplicationClientProtocol#getClusterNodes(GetClusterNodesRequest)]]>
+
+
+
+
+
+
+
+
+
+ NodeReport for all nodes in the cluster.
+ @return NodeReport for all nodes in the cluster]]>
+
+
+
+ The response sent by the ResourceManager to a client
+ requesting a {@link NodeReport} for all nodes.
+
+
The NodeReport contains per-node information such as
+ available resources, number of containers, tracking url, rack name, health
+ status etc.
+
+ @see NodeReport
+ @see ApplicationClientProtocol#getClusterNodes(GetClusterNodesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerId of the Container.
+
+ @return ContainerId of the Container]]>
+
+
+
+
+
+ ContainerId of the container
+
+ @param containerId
+ ContainerId of the container]]>
+
+
+
+
+ The request sent by a client to the ResourceManager to get an
+ {@link ContainerReport} for a container.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerReport for the container.
+
+ @return ContainerReport for the container]]>
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ a container report.
+
+
+
+ The response includes a {@link ContainerReport} which has details of a
+ container.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationAttemptId of an application attempt.
+
+ @return ApplicationAttemptId of an application attempt]]>
+
+
+
+
+
+ ApplicationAttemptId of an application attempt
+
+ @param applicationAttemptId
+ ApplicationAttemptId of an application attempt]]>
+
+
+
+
+ The request from clients to get a list of container reports, which belong to
+ an application attempt from the ResourceManager.
+
+
+ @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerReport for all the containers of an
+ application attempt.
+
+ @return a list of ContainerReport for all the containers of an
+ application attempt]]>
+
+
+
+
+
+ ContainerReport for all the containers of an
+ application attempt.
+
+ @param containers
+ a list of ContainerReport for all the containers of
+ an application attempt]]>
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ a list of {@link ContainerReport} for containers.
+
+
+
+ The ContainerReport for each container includes the container
+ details.
+
+
+ @see ContainerReport
+ @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerIds of containers for which to obtain
+ the ContainerStatus.
+
+ @return the list of ContainerIds of containers for which to
+ obtain the ContainerStatus.]]>
+
+
+
+
+
+ ContainerIds of containers for which to obtain
+ the ContainerStatus
+
+ @param containerIds
+ a list of ContainerIds of containers for which to
+ obtain the ContainerStatus]]>
+
+
+
+ ApplicationMaster to the
+ NodeManager to get {@link ContainerStatus} of requested
+ containers.
+
+ @see ContainerManagementProtocol#getContainerStatuses(GetContainerStatusesRequest)]]>
+
+
+
+
+
+
+
+
+
+ ContainerStatuses of the requested containers.
+
+ @return ContainerStatuses of the requested containers.]]>
+
+
+
+
+
+
+
+
+ NodeManager to the
+ ApplicationMaster when asked to obtain the
+ ContainerStatus of requested containers.
+
+ @see ContainerManagementProtocol#getContainerStatuses(GetContainerStatusesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request sent by clients to get a new {@link ApplicationId} for
+ submitting an application.
+
+
Currently, this is empty.
+
+ @see ApplicationClientProtocol#getNewApplication(GetNewApplicationRequest)]]>
+
+
+
+
+
+
+
+
+
+ new ApplicationId allocated by the
+ ResourceManager.
+ @return newApplicationId allocated by the
+ ResourceManager]]>
+
+
+
+
+ ResourceManager in the cluster.
+ @return maximum capability of allocated resources in the cluster]]>
+
+
+
+ The response sent by the ResourceManager to the client for
+ a request to get a new {@link ApplicationId} for submitting applications.
+
+
Clients can submit an application with the returned
+ {@link ApplicationId}.
+
+ @see ApplicationClientProtocol#getNewApplication(GetNewApplicationRequest)]]>
+
+
+
+
+
+
+
+
+
+
+ The request sent by clients to get a new {@code ReservationId} for
+ submitting an reservation.
+
+ {@code ApplicationClientProtocol#getNewReservation(GetNewReservationRequest)}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to the client for
+ a request to get a new {@link ReservationId} for submitting reservations.
+
+
Clients can submit an reservation with the returned
+ {@link ReservationId}.
+
+ {@code ApplicationClientProtocol#getNewReservation(GetNewReservationRequest)}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request from clients to get nodes to attributes mapping
+ in the cluster from the ResourceManager.
+
+
+ @see ApplicationClientProtocol#getNodesToAttributes
+ (GetNodesToAttributesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to a client requesting
+ nodes to attributes mapping.
+
+
+ @see ApplicationClientProtocol#getNodesToAttributes
+ (GetNodesToAttributesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ queue name for which to get queue information.
+ @return queue name for which to get queue information]]>
+
+
+
+
+
+ queue name for which to get queue information
+ @param queueName queue name for which to get queue information]]>
+
+
+
+
+ active applications required?
+ @return true if applications' information is to be included,
+ else false]]>
+
+
+
+
+
+ active applications?
+ @param includeApplications fetch information about active
+ applications?]]>
+
+
+
+
+ child queues required?
+ @return true if information about child queues is required,
+ else false]]>
+
+
+
+
+
+ child queues?
+ @param includeChildQueues fetch information about child queues?]]>
+
+
+
+
+ child queue hierarchy required?
+ @return true if information about entire hierarchy is
+ required, false otherwise]]>
+
+
+
+
+
+ child queue hierarchy?
+ @param recursive fetch information on the entire child queue
+ hierarchy?]]>
+
+
+
+ The request sent by clients to get queue information
+ from the ResourceManager.
+
+ @see ApplicationClientProtocol#getQueueInfo(GetQueueInfoRequest)]]>
+
+
+
+
+
+
+
+
+
+ QueueInfo for the specified queue.
+ @return QueueInfo for the specified queue]]>
+
+
+
+
+ The response includes a {@link QueueInfo} which has details such as
+ queue name, used/total capacities, running applications, child queues etc.
+
+ @see QueueInfo
+ @see ApplicationClientProtocol#getQueueInfo(GetQueueInfoRequest)]]>
+
+
+
+
+
+
+
+
+
+
+ The request sent by clients to the ResourceManager to
+ get queue acls for the current user.
+
+
Currently, this is empty.
+
+ @see ApplicationClientProtocol#getQueueUserAcls(GetQueueUserAclsInfoRequest)]]>
+
+
+
+
+
+
+
+
+
+ QueueUserACLInfo per queue for the user.
+ @return QueueUserACLInfo per queue for the user]]>
+
+
+
+ The response sent by the ResourceManager to clients
+ seeking queue acls for the user.
+
+
The response contains a list of {@link QueueUserACLInfo} which
+ provides information about {@link QueueACL} per queue.
+
+ @see QueueACL
+ @see QueueUserACLInfo
+ @see ApplicationClientProtocol#getQueueUserAcls(GetQueueUserAclsInfoRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: {@link NMToken} will be used for authenticating communication with
+ {@code NodeManager}.
+ @return the list of container tokens to be used for authorization during
+ container resource increase.
+ @see NMToken]]>
+
+
+
+
+
+ AllocateResponse.getIncreasedContainers.
+ The token contains the container id and resource capability required for
+ container resource increase.
+ @param containersToIncrease the list of container tokens to be used
+ for container resource increase.]]>
+
+
+
+ The request sent by Application Master to the
+ Node Manager to change the resource quota of a container.
+
+ @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the NodeManager to the
+ ApplicationMaster when asked to increase container resource.
+
+
+ @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the application to be aborted.
+ @return ApplicationId of the application to be aborted]]>
+
+
+
+
+
+
+
+ diagnostics to which the application is being killed.
+ @return diagnostics to which the application is being killed]]>
+
+
+
+
+
+ diagnostics to which the application is being killed.
+ @param diagnostics diagnostics to which the application is being
+ killed]]>
+
+
+
+ The request sent by the client to the ResourceManager
+ to abort a submitted application.
+
+
The request includes the {@link ApplicationId} of the application to be
+ aborted.
+
+ @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ResourceManager to the client aborting
+ a submitted application.
+
+ The response, includes:
+
+
+ A flag which indicates that the process of killing the application is
+ completed or not.
+
+
+ Note: user is recommended to wait until this flag becomes true, otherwise if
+ the ResourceManager crashes before the process of killing the
+ application is completed, the ResourceManager may retry this
+ application on recovery.
+
+ @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the application to be moved.
+ @return ApplicationId of the application to be moved]]>
+
+
+
+
+
+ ApplicationId of the application to be moved.
+ @param appId ApplicationId of the application to be moved]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request sent by the client to the ResourceManager
+ to move a submitted application to a different queue.
+
+
The request includes the {@link ApplicationId} of the application to be
+ moved and the queue to place it in.
+
+ @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest)]]>
+
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to the client moving
+ a submitted application to a different queue.
+
+
+ A response without exception means that the move has completed successfully.
+
+
+ @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ RegisterApplicationMasterRequest.
+ If port, trackingUrl is not used, use the following default value:
+
+
port: -1
+
trackingUrl: null
+
+ The port is allowed to be any integer larger than or equal to -1.
+ @return the new instance of RegisterApplicationMasterRequest]]>
+
+
+
+
+ host on which the ApplicationMaster is
+ running.
+ @return host on which the ApplicationMaster is running]]>
+
+
+
+
+
+ host on which the ApplicationMaster is
+ running.
+ @param host host on which the ApplicationMaster
+ is running]]>
+
+
+
+
+ RPC port on which the {@code ApplicationMaster} is
+ responding.
+ @return the RPC port on which the {@code ApplicationMaster}
+ is responding]]>
+
+
+
+
+
+ RPC port on which the {@code ApplicationMaster} is
+ responding.
+ @param port RPC port on which the {@code ApplicationMaster}
+ is responding]]>
+
+
+
+
+ tracking URL for the ApplicationMaster.
+ This url if contains scheme then that will be used by resource manager
+ web application proxy otherwise it will default to http.
+ @return tracking URL for the ApplicationMaster]]>
+
+
+
+
+
+ tracking URLfor the ApplicationMaster while
+ it is running. This is the web-URL to which ResourceManager or
+ web-application proxy will redirect client/users while the application and
+ the ApplicationMaster are still running.
+
+ If the passed url has a scheme then that will be used by the
+ ResourceManager and web-application proxy, otherwise the scheme will
+ default to http.
+
+
+ Empty, null, "N/A" strings are all valid besides a real URL. In case an url
+ isn't explicitly passed, it defaults to "N/A" on the ResourceManager.
+
+
+ @param trackingUrl
+ tracking URLfor the ApplicationMaster]]>
+
+
+
+
+ PlacementConstraint associated with the tags, i.e., each
+ {@link org.apache.hadoop.yarn.api.records.SchedulingRequest} that has those
+ tags will be placed taking into account the corresponding constraint.
+
+ @return A map of Placement Constraints.]]>
+
+
+
+
+
+ PlacementConstraint associated with the tags.
+ For example:
+ Map <
+ <hb_regionserver> -> node_anti_affinity,
+ <hb_regionserver, hb_master> -> rack_affinity,
+ ...
+ >
+ @param placementConstraints Placement Constraint Mapping.]]>
+
+
+
+
+ The registration includes details such as:
+
+
Hostname on which the AM is running.
+
RPC Port
+
Tracking URL
+
+
+ @see ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)]]>
+
+
+
+
+
+
+
+
+
+ ResourceManager in the cluster.
+ @return maximum capability of allocated resources in the cluster]]>
+
+
+
+
+ ApplicationACLs for the application.
+ @return all the ApplicationACLs]]>
+
+
+
+
+ Get ClientToAMToken master key.
+
The ClientToAMToken master key is sent to ApplicationMaster
+ by ResourceManager via {@link RegisterApplicationMasterResponse}
+ , used to verify corresponding ClientToAMToken.
+ @return ClientToAMToken master key]]>
+
+
+
+
+
+
+
+
+
+
+ Get the queue that the application was placed in.
+ @return the queue that the application was placed in.]]>
+
+
+
+
+
+ Set the queue that the application was placed in.
]]>
+
+
+
+
+
+ Get the list of running containers as viewed by
+ ResourceManager from previous application attempts.
+
+
+ @return the list of running containers as viewed by
+ ResourceManager from previous application attempts
+ @see RegisterApplicationMasterResponse#getNMTokensFromPreviousAttempts()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response contains critical details such as:
+
+
Maximum capability for allocated resources in the cluster.
+
{@code ApplicationACL}s for the application.
+
ClientToAMToken master key.
+
+
+ @see ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerId of the container to re-initialize.
+
+ @return ContainerId of the container to re-initialize.]]>
+
+
+
+
+ ContainerLaunchContext to re-initialize the container
+ with.
+
+ @return ContainerLaunchContext of to re-initialize the
+ container with.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the resource to be released.
+
+ @return ApplicationId]]>
+
+
+
+
+
+ ApplicationId of the resource to be released.
+
+ @param id ApplicationId]]>
+
+
+
+
+ key of the resource to be released.
+
+ @return key]]>
+
+
+
+
+
+ key of the resource to be released.
+
+ @param key unique identifier for the resource]]>
+
+
+
+ The request from clients to release a resource in the shared cache.]]>
+
+
+
+
+
+
+
+
+
+ The response to clients from the SharedCacheManager when
+ releasing a resource in the shared cache.
+
+
+
+
+ {@code ApplicationClientProtocol#submitReservation(
+ ReservationSubmissionRequest)}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerId of the container to localize resources.
+
+ @return ContainerId of the container to localize resources.]]>
+
+
+
+
+ LocalResource required by the container.
+
+ @return all LocalResource required by the container]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerId of the container to signal.
+ @return ContainerId of the container to signal.]]>
+
+
+
+
+
+ ContainerId of the container to signal.]]>
+
+
+
+
+ SignalContainerCommand of the signal request.
+ @return SignalContainerCommand of the signal request.]]>
+
+
+
+
+
+ SignalContainerCommand of the signal request.]]>
+
+
+
+ The request sent by the client to the ResourceManager
+ or by the ApplicationMaster to the NodeManager
+ to signal a container.
+ @see SignalContainerCommand ]]>
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to the client
+ signalling a container.
+
+
Currently it's empty.
+
+ @see ApplicationClientProtocol#signalToContainer(SignalContainerRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerLaunchContext for the container to be started
+ by the NodeManager.
+
+ @return ContainerLaunchContext for the container to be started
+ by the NodeManager]]>
+
+
+
+
+
+ ContainerLaunchContext for the container to be started
+ by the NodeManager
+ @param context ContainerLaunchContext for the container to be
+ started by the NodeManager]]>
+
+
+
+
+
+ Note: {@link NMToken} will be used for authenticating communication with
+ {@code NodeManager}.
+ @return the container token to be used for authorization during starting
+ container.
+ @see NMToken
+ @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]>
+
+
+
+
+
+
+ The request sent by the ApplicationMaster to the
+ NodeManager to start a container.
+
+
The ApplicationMaster has to provide details such as
+ allocated resource capability, security tokens (if enabled), command
+ to be executed to start the container, environment for the process,
+ necessary binaries/jar/shared-objects etc. via the
+ {@link ContainerLaunchContext}.
+
+ @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The request which contains a list of {@link StartContainerRequest} sent by
+ the ApplicationMaster to the NodeManager to
+ start containers.
+
+
+
+ In each {@link StartContainerRequest}, the ApplicationMaster has
+ to provide details such as allocated resource capability, security tokens (if
+ enabled), command to be executed to start the container, environment for the
+ process, necessary binaries/jar/shared-objects etc. via the
+ {@link ContainerLaunchContext}.
+
+
+ @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+ ContainerId s of the containers that are
+ started successfully.
+
+ @return the list of ContainerId s of the containers that are
+ started successfully.
+ @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+ Get the meta-data from all auxiliary services running on the
+ NodeManager.
+
+
+ The meta-data is returned as a Map between the auxiliary service names and
+ their corresponding per service meta-data as an opaque blob
+ ByteBuffer
+
+
+
+ To be able to interpret the per-service meta-data, you should consult the
+ documentation for the Auxiliary-service configured on the NodeManager
+
+
+ @return a Map between the names of auxiliary services and their
+ corresponding meta-data]]>
+
+
+
+
+ The response sent by the NodeManager to the
+ ApplicationMaster when asked to start an allocated
+ container.
+
+
+ @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerIds of the containers to be stopped.
+ @return ContainerIds of containers to be stopped]]>
+
+
+
+
+
+ ContainerIds of the containers to be stopped.
+ @param containerIds ContainerIds of the containers to be stopped]]>
+
+
+
+ The request sent by the ApplicationMaster to the
+ NodeManager to stop containers.
+
+ @see ContainerManagementProtocol#stopContainers(StopContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The response sent by the NodeManager to the
+ ApplicationMaster when asked to stop allocated
+ containers.
+
+
+ @see ContainerManagementProtocol#stopContainers(StopContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationSubmissionContext for the application.
+ @return ApplicationSubmissionContext for the application]]>
+
+
+
+
+
+ ApplicationSubmissionContext for the application.
+ @param context ApplicationSubmissionContext for the
+ application]]>
+
+
+
+ The request sent by a client to submit an application to the
+ ResourceManager.
+
+
The request, via {@link ApplicationSubmissionContext}, contains
+ details such as queue, {@link Resource} required to run the
+ ApplicationMaster, the equivalent of
+ {@link ContainerLaunchContext} for launching the
+ ApplicationMaster etc.
+
+ @see ApplicationClientProtocol#submitApplication(SubmitApplicationRequest)]]>
+
+
+
+
+
+
+
+
+ The response sent by the ResourceManager to a client on
+ application submission.
+
+
Currently, this is empty.
+
+ @see ApplicationClientProtocol#submitApplication(SubmitApplicationRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the application.
+
+ @return ApplicationId of the application]]>
+
+
+
+
+
+ ApplicationId of the application.
+
+ @param applicationId ApplicationId of the application]]>
+
+
+
+
+ Priority of the application to be set.
+
+ @return Priority of the application to be set.]]>
+
+
+
+
+
+ Priority of the application.
+
+ @param priority Priority of the application]]>
+
+
+
+
+ The request sent by the client to the ResourceManager to set or
+ update the application priority.
+
+
+ The request includes the {@link ApplicationId} of the application and
+ {@link Priority} to be set for an application
+
+
+ @see ApplicationClientProtocol#updateApplicationPriority(UpdateApplicationPriorityRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Priority of the application to be set.
+ @return Updated Priority of the application.]]>
+
+
+
+
+
+ Priority of the application.
+
+ @param priority Priority of the application]]>
+
+
+
+
+ The response sent by the ResourceManager to the client on update
+ the application priority.
+
+
+ A response without exception means that the move has completed successfully.
+
+
+ @see ApplicationClientProtocol#updateApplicationPriority(UpdateApplicationPriorityRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the application.
+ @return ApplicationId of the application]]>
+
+
+
+
+
+ ApplicationId of the application.
+ @param applicationId ApplicationId of the application]]>
+
+
+
+
+ ApplicationTimeouts of the application. Timeout value is
+ in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ.
+ @return all ApplicationTimeouts of the application.]]>
+
+
+
+
+
+ ApplicationTimeouts for the application. Timeout value
+ is absolute. Timeout value should meet ISO8601 format. Support ISO8601
+ format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries
+ are cleared before adding the new Map.
+ @param applicationTimeouts ApplicationTimeoutss for the
+ application]]>
+
+
+
+
+ The request sent by the client to the ResourceManager to set or
+ update the application timeout.
+
+
+ The request includes the {@link ApplicationId} of the application and timeout
+ to be set for an application
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationTimeouts of the application. Timeout value is
+ in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ.
+ @return all ApplicationTimeouts of the application.]]>
+
+
+
+
+
+ ApplicationTimeouts for the application. Timeout value
+ is absolute. Timeout value should meet ISO8601 format. Support ISO8601
+ format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries
+ are cleared before adding the new Map.
+ @param applicationTimeouts ApplicationTimeoutss for the
+ application]]>
+
+
+
+
+ The response sent by the ResourceManager to the client on update
+ application timeout.
+
+
+ A response without exception means that the update has completed
+ successfully.
+
]]>
+
+
+
+
+
+
+
+
+
+ ApplicationId of the resource to be used.
+
+ @return ApplicationId]]>
+
+
+
+
+
+ ApplicationId of the resource to be used.
+
+ @param id ApplicationId]]>
+
+
+
+
+ key of the resource to be used.
+
+ @return key]]>
+
+
+
+
+
+ key of the resource to be used.
+
+ @param key unique identifier for the resource]]>
+
+
+
+
+ The request from clients to the SharedCacheManager that claims a
+ resource in the shared cache.
+ ]]>
+
+
+
+
+
+
+
+
+
+ Path corresponding to the requested resource in the
+ shared cache.
+
+ @return String A Path if the resource exists in the shared
+ cache, null otherwise]]>
+
+
+
+
+
+ Path corresponding to a resource in the shared cache.
+
+ @param p A Path corresponding to a resource in the shared
+ cache]]>
+
+
+
+
+ The response from the SharedCacheManager to the client that indicates whether
+ a requested resource exists in the cache.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the ApplicationAttempId.
+ @return ApplicationId of the ApplicationAttempId]]>
+
+
+
+
+ attempt id of the Application.
+ @return attempt id of the Application]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationAttemptId denotes the particular attempt
+ of an ApplicationMaster for a given {@link ApplicationId}.
+
+
Multiple attempts might be needed to run an application to completion due
+ to temporal failures of the ApplicationMaster such as hardware
+ failures, connectivity issues etc. on the node on which it was scheduled.
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ YarnApplicationAttemptState of the application attempt.
+
+ @return YarnApplicationAttemptState of the application attempt]]>
+
+
+
+
+ RPC port of this attempt ApplicationMaster.
+
+ @return RPC port of this attempt ApplicationMaster]]>
+
+
+
+
+ host on which this attempt of
+ ApplicationMaster is running.
+
+ @return host on which this attempt of
+ ApplicationMaster is running]]>
+
+
+
+
+ diagnositic information of the application attempt in case
+ of errors.
+
+ @return diagnositic information of the application attempt in case
+ of errors]]>
+
+
+
+
+ tracking url for the application attempt.
+
+ @return tracking url for the application attempt]]>
+
+
+
+
+ original tracking url for the application attempt.
+
+ @return original tracking url for the application attempt]]>
+
+
+
+
+ ApplicationAttemptId of this attempt of the
+ application
+
+ @return ApplicationAttemptId of the attempt]]>
+
+
+
+
+ ContainerId of AMContainer for this attempt
+
+ @return ContainerId of the attempt]]>
+
+
+
+
+
+
+ finish time of the application.
+
+ @return finish time of the application]]>
+
+
+
+
+ It includes details such as:
+
+
{@link ApplicationAttemptId} of the application.
+
Host on which the ApplicationMaster of this attempt is
+ running.
+
RPC port of the ApplicationMaster of this attempt.
+
Tracking URL.
+
Diagnostic information in case of errors.
+
{@link YarnApplicationAttemptState} of the application attempt.
+
{@link ContainerId} of the master Container.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId
+ which is unique for all applications started by a particular instance
+ of the ResourceManager.
+ @return short integer identifier of the ApplicationId]]>
+
+
+
+
+ start time of the ResourceManager which is
+ used to generate globally unique ApplicationId.
+ @return start time of the ResourceManager]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId represents the globally unique
+ identifier for an application.
+
+
The globally unique nature of the identifier is achieved by using the
+ cluster timestamp i.e. start-time of the
+ ResourceManager along with a monotonically increasing counter
+ for the application.
]]>
+
+
+
+
+
+
+
+
+
+ ApplicationId of the application.
+ @return ApplicationId of the application]]>
+
+
+
+
+ ApplicationAttemptId of the current
+ attempt of the application
+ @return ApplicationAttemptId of the attempt]]>
+
+
+
+
+ user who submitted the application.
+ @return user who submitted the application]]>
+
+
+
+
+ queue to which the application was submitted.
+ @return queue to which the application was submitted]]>
+
+
+
+
+ name of the application.
+ @return name of the application]]>
+
+
+
+
+ host on which the ApplicationMaster
+ is running.
+ @return host on which the ApplicationMaster
+ is running]]>
+
+
+
+
+ RPC port of the ApplicationMaster.
+ @return RPC port of the ApplicationMaster]]>
+
+
+
+
+ client token for communicating with the
+ ApplicationMaster.
+
+ ClientToAMToken is the security token used by the AMs to verify
+ authenticity of any client.
+
+
+
+ The ResourceManager, provides a secure token (via
+ {@link ApplicationReport#getClientToAMToken()}) which is verified by the
+ ApplicationMaster when the client directly talks to an AM.
+
+ @return client token for communicating with the
+ ApplicationMaster]]>
+
+
+
+
+ YarnApplicationState of the application.
+ @return YarnApplicationState of the application]]>
+
+
+
+
+ diagnositic information of the application in case of
+ errors.
+ @return diagnositic information of the application in case
+ of errors]]>
+
+
+
+
+ tracking url for the application.
+ @return tracking url for the application]]>
+
+
+
+
+ start time of the application.
+ @return start time of the application]]>
+
+
+
+
+
+
+
+
+ finish time of the application.
+ @return finish time of the application]]>
+
+
+
+
+ final finish status of the application.
+ @return final finish status of the application]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The AMRM token is required for AM to RM scheduling operations. For
+ managed Application Masters YARN takes care of injecting it. For unmanaged
+ Applications Masters, the token must be obtained via this method and set
+ in the {@link org.apache.hadoop.security.UserGroupInformation} of the
+ current user.
+
+ The AMRM token will be returned only if all the following conditions are
+ met:
+
+
the requester is the owner of the ApplicationMaster
+
the application master is an unmanaged ApplicationMaster
+
the application master is in ACCEPTED state
+
+ Else this method returns NULL.
+
+ @return the AM to RM token if available.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It includes details such as:
+
+
{@link ApplicationId} of the application.
+
Applications user.
+
Application queue.
+
Application name.
+
Host on which the ApplicationMaster is running.
+
RPC port of the ApplicationMaster.
+
Tracking URL.
+
{@link YarnApplicationState} of the application.
+
Diagnostic information in case of errors.
+
Start time of the application.
+
Client {@link Token} of the application (if security is enabled).
+
+
+ @see ApplicationClientProtocol#getApplicationReport(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Resource. -1 for invalid/inaccessible reports.
+ @return the used Resource]]>
+
+
+
+
+ Resource. -1 for invalid/inaccessible reports.
+ @return the reserved Resource]]>
+
+
+
+
+ Resource. -1 for invalid/inaccessible reports.
+ @return the needed Resource]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationId of the submitted application.
+ @return ApplicationId of the submitted application]]>
+
+
+
+
+
+ ApplicationId of the submitted application.
+ @param applicationId ApplicationId of the submitted
+ application]]>
+
+
+
+
+ name.
+ @return application name]]>
+
+
+
+
+
+ name.
+ @param applicationName application name]]>
+
+
+
+
+ queue to which the application is being submitted.
+ @return queue to which the application is being submitted]]>
+
+
+
+
+
+ queue to which the application is being submitted
+ @param queue queue to which the application is being submitted]]>
+
+
+
+
+ Priority of the application.
+ @return Priority of the application]]>
+
+
+
+
+ ContainerLaunchContext to describe the
+ Container with which the ApplicationMaster is
+ launched.
+ @return ContainerLaunchContext for the
+ ApplicationMaster container]]>
+
+
+
+
+
+ ContainerLaunchContext to describe the
+ Container with which the ApplicationMaster is
+ launched.
+ @param amContainer ContainerLaunchContext for the
+ ApplicationMaster container]]>
+
+
+
+
+ YarnApplicationState.
+ Such apps will not be retried by the RM on app attempt failure.
+ The default value is false.
+ @return true if the AM is not managed by the RM]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationMaster for this
+ application. Please note this will be DEPRECATED, use getResource
+ in getAMContainerResourceRequest instead.
+
+ @return the resource required by the ApplicationMaster for
+ this application.]]>
+
+
+
+
+
+ ApplicationMaster for this
+ application.
+
+ @param resource the resource required by the ApplicationMaster
+ for this application.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ For managed AM, if the flag is true, running containers will not be killed
+ when application attempt fails and these containers will be retrieved by
+ the new application attempt on registration via
+ {@link ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)}.
+
+
+ For unmanaged AM, if the flag is true, RM allows re-register and returns
+ the running containers in the same attempt back to the UAM for HA.
+
+
+ @param keepContainers the flag which indicates whether to keep containers
+ across application attempts.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ getResource and getPriority of
+ ApplicationSubmissionContext.
+
+ Number of containers and Priority will be ignored.
+
+ @return ResourceRequest of the AM container
+ @deprecated See {@link #getAMContainerResourceRequests()}]]>
+
+
+
+
+
+
+
+
+
+
+ getAMContainerResourceRequest and its behavior.
+
+ Number of containers and Priority will be ignored.
+
+ @return List of ResourceRequests of the AM container]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ LogAggregationContext of the application
+
+ @return LogAggregationContext of the application]]>
+
+
+
+
+
+ LogAggregationContext for the application
+
+ @param logAggregationContext
+ for the application]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationTimeouts of the application. Timeout value is
+ in seconds.
+ @return all ApplicationTimeouts of the application.]]>
+
+
+
+
+
+ ApplicationTimeouts for the application in seconds.
+ All pre-existing Map entries are cleared before adding the new Map.
+
+ Note: If application timeout value is less than or equal to zero
+ then application submission will throw an exception.
+
+ @param applicationTimeouts ApplicationTimeoutss for the
+ application]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It includes details such as:
+
+
{@link ApplicationId} of the application.
+
Application user.
+
Application name.
+
{@link Priority} of the application.
+
+ {@link ContainerLaunchContext} of the container in which the
+ ApplicationMaster is executed.
+
+
+ maxAppAttempts. The maximum number of application attempts.
+ It should be no larger than the global number of max attempts in the
+ YARN configuration.
+
+
+ attemptFailuresValidityInterval. The default value is -1.
+ when attemptFailuresValidityInterval in milliseconds is set to
+ {@literal >} 0, the failure number will no take failures which happen
+ out of the validityInterval into failure count. If failure count
+ reaches to maxAppAttempts, the application will be failed.
+
+
+ @see ContainerLaunchContext
+ @see ApplicationClientProtocol#submitApplication(org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ expiryTime for given timeout type.
+ @return expiryTime in ISO8601 standard with format
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ.]]>
+
+
+
+
+
+ expiryTime for given timeout type.
+ @param expiryTime in ISO8601 standard with format
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ.]]>
+
+
+
+
+ Remaining Time of an application for given timeout type.
+ @return Remaining Time in seconds.]]>
+
+
+
+
+
+ Remaining Time of an application for given timeout type.
+ @param remainingTime in seconds.]]>
+
+
+
+
+
{@link ApplicationTimeoutType} of the timeout type.
+
Expiry time in ISO8601 standard with format
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ or "UNLIMITED".
+
Remaining time in seconds.
+
+ The possible values for {ExpiryTime, RemainingTimeInSeconds} are
+
+
{UNLIMITED,-1} : Timeout is not configured for given timeout type
+ (LIFETIME).
+
{ISO8601 date string, 0} : Timeout is configured and application has
+ completed.
+
{ISO8601 date string, greater than zero} : Timeout is configured and
+ application is RUNNING. Application will be timed out after configured
+ value.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Resource allocated to the container.
+ @return Resource allocated to the container]]>
+
+
+
+
+ Priority at which the Container was
+ allocated.
+ @return Priority at which the Container was
+ allocated]]>
+
+
+
+
+ ContainerToken for the container.
+
ContainerToken is the security token used by the framework
+ to verify authenticity of any Container.
+
+
The ResourceManager, on container allocation provides a
+ secure token which is verified by the NodeManager on
+ container launch.
+
+
Applications do not need to care about ContainerToken, they
+ are transparently handled by the framework - the allocated
+ Container includes the ContainerToken.
+
+ @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)
+ @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)
+
+ @return ContainerToken for the container]]>
+
+
+
+
+ ID corresponding to the original {@code
+ ResourceRequest{@link #getAllocationRequestId()}}s which is satisfied by
+ this allocated {@code Container}.
+
+ The scheduler may return multiple {@code AllocateResponse}s corresponding
+ to the same ID as and when scheduler allocates {@code Container}s.
+ Applications can continue to completely ignore the returned ID in
+ the response and use the allocation for any of their outstanding requests.
+
+
+ @return the ID corresponding to the original allocation request
+ which is satisfied by this allocation.]]>
+
+
+
+
+ The {@code ResourceManager} is the sole authority to allocate any
+ {@code Container} to applications. The allocated {@code Container}
+ is always on a single node and has a unique {@link ContainerId}. It has
+ a specific amount of {@link Resource} allocated.
+
+ It includes details such as:
+
+
{@link ContainerId} for the container, which is globally unique.
+
+ {@link NodeId} of the node on which it is allocated.
+
+
HTTP uri of the node.
+
{@link Resource} allocated to the container.
+
{@link Priority} at which the container was allocated.
+
+ Container {@link Token} of the container, used to securely verify
+ authenticity of the allocation.
+
+
+
+ Typically, an {@code ApplicationMaster} receives the {@code Container}
+ from the {@code ResourceManager} during resource-negotiation and then
+ talks to the {@code NodeManager} to start/stop containers.
+
+ @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)
+ @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)
+ @see ContainerManagementProtocol#stopContainers(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationAttemptId of the application to which the
+ Container was assigned.
+
+ Note: If containers are kept alive across application attempts via
+ {@link ApplicationSubmissionContext#setKeepContainersAcrossApplicationAttempts(boolean)}
+ the ContainerId does not necessarily contain the current
+ running application attempt's ApplicationAttemptId This
+ container can be allocated by previously exited application attempt and
+ managed by the current running attempt thus have the previous application
+ attempt's ApplicationAttemptId.
+
+
+ @return ApplicationAttemptId of the application to which the
+ Container was assigned]]>
+
+
+
+
+ ContainerId,
+ which doesn't include epoch. Note that this method will be marked as
+ deprecated, so please use getContainerId instead.
+ @return lower 32 bits of identifier of the ContainerId]]>
+
+
+
+
+ ContainerId. Upper 24 bits are
+ reserved as epoch of cluster, and lower 40 bits are reserved as
+ sequential number of containers.
+ @return identifier of the ContainerId]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerId represents a globally unique identifier
+ for a {@link Container} in the cluster.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ LocalResource required by the container.
+ @return all LocalResource required by the container]]>
+
+
+
+
+
+ LocalResource required by the container. All pre-existing
+ Map entries are cleared before adding the new Map
+ @param localResources LocalResource required by the container]]>
+
+
+
+
+
+ Get application-specific binary service data. This is a map keyed
+ by the name of each {@link AuxiliaryService} that is configured on a
+ NodeManager and value correspond to the application specific data targeted
+ for the keyed {@link AuxiliaryService}.
+
+
+
+ This will be used to initialize this application on the specific
+ {@link AuxiliaryService} running on the NodeManager by calling
+ {@link AuxiliaryService#initializeApplication(ApplicationInitializationContext)}
+
+
+ @return application-specific binary service data]]>
+
+
+
+
+
+
+ Set application-specific binary service data. This is a map keyed
+ by the name of each {@link AuxiliaryService} that is configured on a
+ NodeManager and value correspond to the application specific data targeted
+ for the keyed {@link AuxiliaryService}. All pre-existing Map entries are
+ preserved.
+
+
+ @param serviceData
+ application-specific binary service data]]>
+
+
+
+
+ environment variables for the container.
+ @return environment variables for the container]]>
+
+
+
+
+
+ environment variables for the container. All pre-existing Map
+ entries are cleared before adding the new Map
+ @param environment environment variables for the container]]>
+
+
+
+
+ commands for launching the container.
+ @return the list of commands for launching the container]]>
+
+
+
+
+
+ commands for launching the container. All
+ pre-existing List entries are cleared before adding the new List
+ @param commands the list of commands for launching the container]]>
+
+
+
+
+ ApplicationACLs for the application.
+ @return all the ApplicationACLs]]>
+
+
+
+
+
+ ApplicationACLs for the application. All pre-existing
+ Map entries are cleared before adding the new Map
+ @param acls ApplicationACLs for the application]]>
+
+
+
+
+ ContainerRetryContext to relaunch container.
+ @return ContainerRetryContext to relaunch container.]]>
+
+
+
+
+
+ ContainerRetryContext to relaunch container.
+ @param containerRetryContext ContainerRetryContext to
+ relaunch container.]]>
+
+
+
+
+ It includes details such as:
+
+
{@link ContainerId} of the container.
+
{@link Resource} allocated to the container.
+
User to whom the container is allocated.
+
Security tokens (if security is enabled).
+
+ {@link LocalResource} necessary for running the container such
+ as binaries, jar, shared-objects, side-files etc.
+
+
Optional, application-specific binary service data.
+
Environment variables for the launched process.
+
Command to launch the container.
+
Retry strategy when container exits with failure.
+
+
+ @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+ ContainerId of the container.
+
+ @return ContainerId of the container.]]>
+
+
+
+
+
+
+
+ Resource of the container.
+
+ @return allocated Resource of the container.]]>
+
+
+
+
+
+
+
+ NodeId where container is running.
+
+ @return allocated NodeId where container is running.]]>
+
+
+
+
+
+
+
+ Priority of the container.
+
+ @return allocated Priority of the container.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerState of the container.
+
+ @return final ContainerState of the container.]]>
+
+
+
+
+
+
+
+ exit status of the container.
+
+ @return final exit status of the container.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It includes details such as:
+
+ {@link ContainerRetryPolicy} :
+ - NEVER_RETRY(DEFAULT value): no matter what error code is when container
+ fails to run, just do not retry.
+ - RETRY_ON_ALL_ERRORS: no matter what error code is, when container fails
+ to run, just retry.
+ - RETRY_ON_SPECIFIC_ERROR_CODES: when container fails to run, do retry if
+ the error code is one of errorCodes, otherwise do not retry.
+
+ Note: if error code is 137(SIGKILL) or 143(SIGTERM), it will not retry
+ because it is usually killed on purpose.
+
+
+ maxRetries specifies how many times to retry if need to retry.
+ If the value is -1, it means retry forever.
+
+
retryInterval specifies delaying some time before relaunch
+ container, the unit is millisecond.
+
+ failuresValidityInterval: default value is -1.
+ When failuresValidityInterval in milliseconds is set to {@literal >} 0,
+ the failure number will not take failures which happen out of the
+ failuresValidityInterval into failure count. If failure count
+ reaches to maxRetries, the container will be failed.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+ Retry policy for relaunching a Container.]]>
+
+
+
+
+
+
+
+
+
+
+
+ State of a Container.]]>
+
+
+
+
+
+
+
+
+
+ ContainerId of the container.
+ @return ContainerId of the container]]>
+
+
+
+
+ ExecutionType of the container.
+ @return ExecutionType of the container]]>
+
+
+
+
+ ContainerState of the container.
+ @return ContainerState of the container]]>
+
+
+
+
+ Get the exit status for the container.
+
+
Note: This is valid only for completed containers i.e. containers
+ with state {@link ContainerState#COMPLETE}.
+ Otherwise, it returns an ContainerExitStatus.INVALID.
+
+
+
Containers killed by the framework, either due to being released by
+ the application or being 'lost' due to node failures etc. have a special
+ exit code of ContainerExitStatus.ABORTED.
+
+
When threshold number of the nodemanager-local-directories or
+ threshold number of the nodemanager-log-directories become bad, then
+ container is not launched and is exited with ContainersExitStatus.DISKS_FAILED.
+
+
+ @return exit status for the container]]>
+
+
+
+
+ diagnostic messages for failed containers.
+ @return diagnostic messages for failed containers]]>
+
+
+
+
+ Resource allocated to the container.
+ @return Resource allocated to the container]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It provides details such as:
+
{@link #GUARANTEED} - this container is guaranteed to start its
+ execution, once the corresponding start container request is received by
+ an NM.
+
{@link #OPPORTUNISTIC} - the execution of this container may not start
+ immediately at the NM that receives the corresponding start container
+ request (depending on the NM's available resources). Moreover, it may be
+ preempted if it blocks a GUARANTEED container from being executed.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ExecutionType of the requested container.
+
+ @param execType
+ ExecutionType of the requested container]]>
+
+
+
+
+ ExecutionType.
+
+ @return ExecutionType.]]>
+
+
+
+
+
+
+
+
+
+
+ ResourceRequest.
+ Defaults to false.
+ @return whether ExecutionType request should be strictly honored]]>
+
+
+
+
+
+
+
+
+ ExecutionType as well as flag that explicitly asks the
+ configuredScheduler to return Containers of exactly the Execution Type
+ requested.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Application.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
resource key
+
{@link LocalizationState} of the resource
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ location of the resource to be localized.
+ @return location of the resource to be localized]]>
+
+
+
+
+
+ location of the resource to be localized.
+ @param resource location of the resource to be localized]]>
+
+
+
+
+ size of the resource to be localized.
+ @return size of the resource to be localized]]>
+
+
+
+
+
+ size of the resource to be localized.
+ @param size size of the resource to be localized]]>
+
+
+
+
+ timestamp of the resource to be localized, used
+ for verification.
+ @return timestamp of the resource to be localized]]>
+
+
+
+
+
+ timestamp of the resource to be localized, used
+ for verification.
+ @param timestamp timestamp of the resource to be localized]]>
+
+
+
+
+ LocalResourceType of the resource to be localized.
+ @return LocalResourceType of the resource to be localized]]>
+
+
+
+
+
+ LocalResourceType of the resource to be localized.
+ @param type LocalResourceType of the resource to be localized]]>
+
+
+
+
+ LocalResourceVisibility of the resource to be
+ localized.
+ @return LocalResourceVisibility of the resource to be
+ localized]]>
+
+
+
+
+
+ LocalResourceVisibility of the resource to be
+ localized.
+ @param visibility LocalResourceVisibility of the resource to be
+ localized]]>
+
+
+
+
+ pattern that should be used to extract entries from the
+ archive (only used when type is PATTERN).
+ @return pattern that should be used to extract entries from the
+ archive.]]>
+
+
+
+
+
+ pattern that should be used to extract entries from the
+ archive (only used when type is PATTERN).
+ @param pattern pattern that should be used to extract entries
+ from the archive.]]>
+
+
+
+
+
+
+
+
+
+
+ shouldBeUploadedToSharedCache
+ of this request]]>
+
+
+
+ LocalResource represents a local resource required to
+ run a container.
+
+
The NodeManager is responsible for localizing the resource
+ prior to launching the container.
+
+
Applications can specify {@link LocalResourceType} and
+ {@link LocalResourceVisibility}.
+
+ @see LocalResourceType
+ @see LocalResourceVisibility
+ @see ContainerLaunchContext
+ @see ApplicationSubmissionContext
+ @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+ type
+ of a resource localized by the {@code NodeManager}.
+
+ The type can be one of:
+
+
+ {@link #FILE} - Regular file i.e. uninterpreted bytes.
+
+
+ {@link #ARCHIVE} - Archive, which is automatically unarchived by the
+ NodeManager.
+
+
+ {@link #PATTERN} - A hybrid between {@link #ARCHIVE} and {@link #FILE}.
+
+
+
+ @see LocalResource
+ @see ContainerLaunchContext
+ @see ApplicationSubmissionContext
+ @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+ visibility
+ of a resource localized by the {@code NodeManager}.
+
+ The visibility can be one of:
+
+
{@link #PUBLIC} - Shared by all users on the node.
+
+ {@link #PRIVATE} - Shared among all applications of the
+ same user on the node.
+
+
+ {@link #APPLICATION} - Shared only among containers of the
+ same application on the node.
+
+ includePattern. It uses Java Regex to filter the log files
+ which match the defined include pattern and those log files
+ will be uploaded when the application finishes.
+
+
+ excludePattern. It uses Java Regex to filter the log files
+ which match the defined exclude pattern and those log files
+ will not be uploaded when application finishes. If the log file
+ name matches both the include and the exclude pattern, this file
+ will be excluded eventually.
+
+
+ rolledLogsIncludePattern. It uses Java Regex to filter the log files
+ which match the defined include pattern and those log files
+ will be aggregated in a rolling fashion.
+
+
+ rolledLogsExcludePattern. It uses Java Regex to filter the log files
+ which match the defined exclude pattern and those log files
+ will not be aggregated in a rolling fashion. If the log file
+ name matches both the include and the exclude pattern, this file
+ will be excluded eventually.
+
+
+ policyClassName. The policy class name that implements
+ ContainerLogAggregationPolicy. At runtime, nodemanager will the policy
+ if a given container's log should be aggregated based on the
+ ContainerType and other runtime state such as exit code by calling
+ ContainerLogAggregationPolicy#shouldDoLogAggregation.
+ This is useful when the app only wants to aggregate logs of a subset of
+ containers. Here are the available policies. Please make sure to specify
+ the canonical name by prefixing org.apache.hadoop.yarn.server.
+ nodemanager.containermanager.logaggregation.
+ to the class simple name below.
+ NoneContainerLogAggregationPolicy: skip aggregation for all containers.
+ AllContainerLogAggregationPolicy: aggregate all containers.
+ AMOrFailedContainerLogAggregationPolicy: aggregate application master
+ or failed containers.
+ FailedOrKilledContainerLogAggregationPolicy: aggregate failed or killed
+ containers
+ FailedContainerLogAggregationPolicy: aggregate failed containers
+ AMOnlyLogAggregationPolicy: aggregate application master containers
+ SampleContainerLogAggregationPolicy: sample logs of successful worker
+ containers, in addition to application master and failed/killed
+ containers.
+ LimitSizeContainerLogAggregationPolicy: skip aggregation for killed
+ containers whose log size exceeds the limit of container log size.
+ If it isn't specified, it will use the cluster-wide default policy
+ defined by configuration yarn.nodemanager.log-aggregation.policy.class.
+ The default value of yarn.nodemanager.log-aggregation.policy.class is
+ AllContainerLogAggregationPolicy.
+
+
+ policyParameters. The parameters passed to the policy class via
+ ContainerLogAggregationPolicy#parseParameters during the policy object
+ initialization. This is optional. Some policy class might use parameters
+ to adjust its settings. It is up to policy class to define the scheme of
+ parameters.
+ For example, SampleContainerLogAggregationPolicy supports the format of
+ "SR:0.5,MIN:50", which means sample rate of 50% beyond the first 50
+ successful worker containers.
+
+
+
+ @see ApplicationSubmissionContext]]>
+
+
+
+
+
+
+
+
+
+ NodeManager for which the NMToken
+ is used to authenticate.
+ @return the {@link NodeId} of the NodeManager for which the
+ NMToken is used to authenticate.]]>
+
+
+
+
+
+
+
+ NodeManager
+ @return the {@link Token} used for authenticating with NodeManager]]>
+
+
+
+
+
+
+
+
+
+
+
+ The NMToken is used for authenticating communication with
+ NodeManager
+
It is issued by ResourceMananger when ApplicationMaster
+ negotiates resource with ResourceManager and
+ validated on NodeManager side.
+ @see AllocateResponse#getNMTokens()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Node Attribute is a kind of a label which represents one of the
+ attribute/feature of a Node. Its different from node partition label as
+ resource guarantees across the queues will not be maintained for these type
+ of labels.
+
+
+ A given Node can be mapped with any kind of attribute, few examples are
+ HAS_SSD=true, JAVA_VERSION=JDK1.8, OS_TYPE=WINDOWS.
+
+
+ Its not compulsory for all the attributes to have value, empty string is the
+ default value of the NodeAttributeType.STRING
+
+
+ Node Attribute Prefix is used as namespace to segregate the attributes.
+
+ Node Attribute Prefix is used as namespace to segregate the attributes.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of a node Attribute.
+
+ Based on this attribute expressions and values will be evaluated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname of the node.
+ @return hostname of the node]]>
+
+
+
+
+ port for communicating with the node.
+ @return port for communicating with the node]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NodeId is the unique identifier for a node.
+
+
It includes the hostname and port to uniquely
+ identify the node. Thus, it is unique across restarts of any
+ NodeManager.
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NodeId of the node.
+ @return NodeId of the node]]>
+
+
+
+
+ NodeState of the node.
+ @return NodeState of the node]]>
+
+
+
+
+ http address of the node.
+ @return http address of the node]]>
+
+
+
+
+ rack name for the node.
+ @return rack name for the node]]>
+
+
+
+
+ used Resource on the node.
+ @return usedResource on the node]]>
+
+
+
+
+ total Resource on the node.
+ @return totalResource on the node]]>
+
+
+
+
+ diagnostic health report of the node.
+ @return diagnostic health report of the node]]>
+
+
+
+
+ last timestamp at which the health report was received.
+ @return last timestamp at which the health report was received]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It includes details such as:
+
+
{@link NodeId} of the node.
+
HTTP Tracking URL of the node.
+
Rack name for the node.
+
Used {@link Resource} on the node.
+
Total available {@link Resource} of the node.
+
Number of running containers on the node.
+
+
+ @see ApplicationClientProtocol#getClusterNodes(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ State of a Node.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Mapping of Attribute Value to a Node.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ResourceManager.
+ @see PreemptionContract
+ @see StrictPreemptionContract]]>
+
+
+
+
+
+
+
+
+
+ ApplicationMaster about resources requested back by the
+ ResourceManager.
+ @see AllocateRequest#setAskList(List)]]>
+
+
+
+
+ ApplicationMaster that may be reclaimed by the
+ ResourceManager. If the AM prefers a different set of
+ containers, then it may checkpoint or kill containers matching the
+ description in {@link #getResourceRequest}.
+ @return Set of containers at risk if the contract is not met.]]>
+
+
+
+ ResourceManager.
+ The ApplicationMaster (AM) can satisfy this request according
+ to its own priorities to prevent containers from being forcibly killed by
+ the platform.
+ @see PreemptionMessage]]>
+
+
+
+
+
+
+
+
+
+ ResourceManager]]>
+
+
+
+
+
+
+
+
+
+ The AM should decode both parts of the message. The {@link
+ StrictPreemptionContract} specifies particular allocations that the RM
+ requires back. The AM can checkpoint containers' state, adjust its execution
+ plan to move the computation, or take no action and hope that conditions that
+ caused the RM to ask for the container will change.
+
+ In contrast, the {@link PreemptionContract} also includes a description of
+ resources with a set of containers. If the AM releases containers matching
+ that profile, then the containers enumerated in {@link
+ PreemptionContract#getContainers()} may not be killed.
+
+ Each preemption message reflects the RM's current understanding of the
+ cluster state, so a request to return N containers may not
+ reflect containers the AM is releasing, recently exited containers the RM has
+ yet to learn about, or new containers allocated before the message was
+ generated. Conversely, an RM may request a different profile of containers in
+ subsequent requests.
+
+ The policy enforced by the RM is part of the scheduler. Generally, only
+ containers that have been requested consistently should be killed, but the
+ details are not specified.]]>
+
+ {@link #SUBMIT_APPLICATIONS} - ACL to submit applications to the queue.
+
+
{@link #ADMINISTER_QUEUE} - ACL to administer the queue.
+
+
+ @see QueueInfo
+ @see ApplicationClientProtocol#getQueueUserAcls(org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest)]]>
+
+
+
+
+
+
+
+
+
+ name of the queue.
+ @return name of the queue]]>
+
+
+
+
+ configured capacity of the queue.
+ @return configured capacity of the queue]]>
+
+
+
+
+ maximum capacity of the queue.
+ @return maximum capacity of the queue]]>
+
+
+
+
+ current capacity of the queue.
+ @return current capacity of the queue]]>
+
+
+
+
+ child queues of the queue.
+ @return child queues of the queue]]>
+
+
+
+
+ running applications of the queue.
+ @return running applications of the queue]]>
+
+
+
+
+ QueueState of the queue.
+ @return QueueState of the queue]]>
+
+
+
+
+ accessible node labels of the queue.
+ @return accessible node labels of the queue]]>
+
+
+
+
+ default node label expression of the queue, this takes
+ affect only when the ApplicationSubmissionContext and
+ ResourceRequest don't specify their
+ NodeLabelExpression.
+
+ @return default node label expression of the queue]]>
+
+
+
+
+
+
+
+ queue stats for the queue
+
+ @return queue stats of the queue]]>
+
+
+
+
+
+
+
+
+
+
+ preemption status of the queue.
+ @return if property is not in proto, return null;
+ otherwise, return preemption status of the queue]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It includes information such as:
+
+
Queue name.
+
Capacity of the queue.
+
Maximum capacity of the queue.
+
Current capacity of the queue.
+
Child queues.
+
Running applications.
+
{@link QueueState} of the queue.
+
{@link QueueConfigurations} of the queue.
+
+
+ @see QueueState
+ @see QueueConfigurations
+ @see ApplicationClientProtocol#getQueueInfo(org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ A queue is in one of:
+
+
{@link #RUNNING} - normal state.
+
{@link #STOPPED} - not accepting new application submissions.
+
+ {@link #DRAINING} - not accepting new application submissions
+ and waiting for applications finish.
+
+ List of {@link ResourceAllocationRequest}, which includes the time
+ interval, and capability of the allocation.
+ {@code ResourceAllocationRequest} represents an allocation
+ made for a reservation for the current state of the queue. This can be
+ changed for reasons such as re-planning, but will always be subject to
+ the constraints of the user contract as described by
+ {@link ReservationDefinition}
+
+
{@link ReservationId} of the reservation.
+
{@link ReservationDefinition} used to make the reservation.
+ The globally unique nature of the identifier is achieved by using the
+ cluster timestamp i.e. start-time of the {@code ResourceManager}
+ along with a monotonically increasing counter for the reservation.
+
+ Number of containers, of above specifications, which are required by the
+ application.
+
+
Concurrency that indicates the gang size of the request.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ memory of the resource. Note - while memory has
+ never had a unit specified, all YARN configurations have specified memory
+ in MB. The assumption has been that the daemons and applications are always
+ using the same units. With the introduction of the ResourceInformation
+ class we have support for units - so this function will continue to return
+ memory but in the units of MB
+
+ @return memory(in MB) of the resource]]>
+
+
+
+
+ memory of the resource. Note - while memory has
+ never had a unit specified, all YARN configurations have specified memory
+ in MB. The assumption has been that the daemons and applications are always
+ using the same units. With the introduction of the ResourceInformation
+ class we have support for units - so this function will continue to return
+ memory but in the units of MB
+
+ @return memory of the resource]]>
+
+
+
+
+
+ memory of the resource. Note - while memory has
+ never had a unit specified, all YARN configurations have specified memory
+ in MB. The assumption has been that the daemons and applications are always
+ using the same units. With the introduction of the ResourceInformation
+ class we have support for units - so this function will continue to set
+ memory but the assumption is that the value passed is in units of MB.
+
+ @param memory memory(in MB) of the resource]]>
+
+
+
+
+
+ memory of the resource.
+ @param memory memory of the resource]]>
+
+
+
+
+ number of virtual cpu cores of the resource.
+
+ Virtual cores are a unit for expressing CPU parallelism. A node's capacity
+ should be configured with virtual cores equal to its number of physical
+ cores. A container should be requested with the number of cores it can
+ saturate, i.e. the average number of threads it expects to have runnable
+ at a time.
+
+ @return num of virtual cpu cores of the resource]]>
+
+
+
+
+
+ number of virtual cpu cores of the resource.
+
+ Virtual cores are a unit for expressing CPU parallelism. A node's capacity
+ should be configured with virtual cores equal to its number of physical
+ cores. A container should be requested with the number of cores it can
+ saturate, i.e. the average number of threads it expects to have runnable
+ at a time.
+
+ @param vCores number of virtual cpu cores of the resource]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Resource models a set of computer resources in the
+ cluster.
+
+
Currently it models both memory and CPU.
+
+
The unit for memory is megabytes. CPU is modeled with virtual cores
+ (vcores), a unit for expressing parallelism. A node's capacity should
+ be configured with virtual cores equal to its number of physical cores. A
+ container should be requested with the number of cores it can saturate, i.e.
+ the average number of threads it expects to have runnable at a time.
+
+
Virtual cores take integer values and thus currently CPU-scheduling is
+ very coarse. A complementary axis for CPU requests that represents
+ processing power will likely be added in the future to enable finer-grained
+ resource configuration.
+
+
Typically, applications request Resource of suitable
+ capability to run their component tasks.
+
+ @see Resource]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ blacklist of resources
+ for the application.
+
+ @see ResourceRequest
+ @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ host/rack string represents an arbitrary
+ host name.
+
+ @param hostName host/rack on which the allocation is desired
+ @return whether the given host/rack string represents an arbitrary
+ host name]]>
+
+
+
+
+ Priority of the request.
+ @return Priority of the request]]>
+
+
+
+
+
+ Priority of the request
+ @param priority Priority of the request]]>
+
+
+
+
+ host/rack) on which the allocation
+ is desired.
+
+ A special value of * signifies that any resource
+ (host/rack) is acceptable.
+
+ @return resource (e.g. host/rack) on which the allocation
+ is desired]]>
+
+
+
+
+
+ host/rack) on which the allocation
+ is desired.
+
+ A special value of * signifies that any resource name
+ (e.g. host/rack) is acceptable.
+
+ @param resourceName (e.g. host/rack) on which the
+ allocation is desired]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ResourceRequest. Defaults to true.
+
+ @return whether locality relaxation is enabled with this
+ ResourceRequest.]]>
+
+
+
+
+
+ ExecutionTypeRequest of the requested container.
+
+ @param execSpec
+ ExecutionTypeRequest of the requested container]]>
+
+
+
+
+ ResourceRequest. Defaults to true.
+
+ @return whether locality relaxation is enabled with this
+ ResourceRequest.]]>
+
+
+
+
+
+ For a request at a network hierarchy level, set whether locality can be relaxed
+ to that level and beyond.
+
+
If the flag is off on a rack-level ResourceRequest,
+ containers at that request's priority will not be assigned to nodes on that
+ request's rack unless requests specifically for those nodes have also been
+ submitted.
+
+
If the flag is off on an {@link ResourceRequest#ANY}-level
+ ResourceRequest, containers at that request's priority will
+ only be assigned on racks for which specific requests have also been
+ submitted.
+
+
For example, to request a container strictly on a specific node, the
+ corresponding rack-level and any-level requests should have locality
+ relaxation set to false. Similarly, to request a container strictly on a
+ specific rack, the corresponding any-level request should have locality
+ relaxation set to false.
+
+ @param relaxLocality whether locality relaxation is enabled with this
+ ResourceRequest.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ID corresponding to this allocation request. This
+ ID is an identifier for different {@code ResourceRequest}s from the same
+ application. The allocated {@code Container}(s) received as part of the
+ {@code AllocateResponse} response will have the ID corresponding to the
+ original {@code ResourceRequest} for which the RM made the allocation.
+
+ The scheduler may return multiple {@code AllocateResponse}s corresponding
+ to the same ID as and when scheduler allocates {@code Container}(s).
+ Applications can continue to completely ignore the returned ID in
+ the response and use the allocation for any of their outstanding requests.
+
+ If one wishes to replace an entire {@code ResourceRequest} corresponding to
+ a specific ID, they can simply cancel the corresponding {@code
+ ResourceRequest} and submit a new one afresh.
+
+ @return the ID corresponding to this allocation request.]]>
+
+
+
+
+
+ ID corresponding to this allocation request. This
+ ID is an identifier for different {@code ResourceRequest}s from the same
+ application. The allocated {@code Container}(s) received as part of the
+ {@code AllocateResponse} response will have the ID corresponding to the
+ original {@code ResourceRequest} for which the RM made the allocation.
+
+ The scheduler may return multiple {@code AllocateResponse}s corresponding
+ to the same ID as and when scheduler allocates {@code Container}(s).
+ Applications can continue to completely ignore the returned ID in
+ the response and use the allocation for any of their outstanding requests.
+
+ If one wishes to replace an entire {@code ResourceRequest} corresponding to
+ a specific ID, they can simply cancel the corresponding {@code
+ ResourceRequest} and submit a new one afresh.
+
+ If the ID is not set, scheduler will continue to work as previously and all
+ allocated {@code Container}(s) will have the default ID, -1.
+
+ @param allocationRequestID the ID corresponding to this allocation
+ request.]]>
+
+
+
+
+
+ Resource capability of the request.
+ @param capability Resource capability of the request]]>
+
+
+
+
+ Resource capability of the request.
+ @return Resource capability of the request]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It includes:
+
+
{@link Priority} of the request.
+
+ The name of the host or rack on which the allocation is
+ desired. A special value of * signifies that
+ any host/rack is acceptable to the application.
+
+
{@link Resource} required for each request.
+
+ Number of containers, of above specifications, which are required
+ by the application.
+
+
+ A boolean relaxLocality flag, defaulting to {@code true},
+ which tells the {@code ResourceManager} if the application wants
+ locality to be loose (i.e. allows fall-through to rack or any)
+ or strict (i.e. specify hard constraint on resource allocation).
+
+
+
+ @see Resource
+ @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]>
+
+
+
+
+
+
+
+
+ priority of the request.
+ @see ResourceRequest#setPriority(Priority)
+ @param priority priority of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ resourceName of the request.
+ @see ResourceRequest#setResourceName(String)
+ @param resourceName resourceName of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ capability of the request.
+ @see ResourceRequest#setCapability(Resource)
+ @param capability capability of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ numContainers of the request.
+ @see ResourceRequest#setNumContainers(int)
+ @param numContainers numContainers of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ relaxLocality of the request.
+ @see ResourceRequest#setRelaxLocality(boolean)
+ @param relaxLocality relaxLocality of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ nodeLabelExpression of the request.
+ @see ResourceRequest#setNodeLabelExpression(String)
+ @param nodeLabelExpression
+ nodeLabelExpression of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ executionTypeRequest of the request.
+ @see ResourceRequest#setExecutionTypeRequest(
+ ExecutionTypeRequest)
+ @param executionTypeRequest
+ executionTypeRequest of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ executionTypeRequest of the request with 'ensure
+ execution type' flag set to true.
+ @see ResourceRequest#setExecutionTypeRequest(
+ ExecutionTypeRequest)
+ @param executionType executionType of the request.
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+ allocationRequestId of the request.
+ @see ResourceRequest#setAllocationRequestId(long)
+ @param allocationRequestId
+ allocationRequestId of the request
+ @return {@link ResourceRequestBuilder}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ virtual memory.
+
+ @return virtual memory in MB]]>
+
+
+
+
+
+ virtual memory.
+
+ @param vmem virtual memory in MB]]>
+
+
+
+
+ physical memory.
+
+ @return physical memory in MB]]>
+
+
+
+
+
+ physical memory.
+
+ @param pmem physical memory in MB]]>
+
+
+
+
+ CPU utilization (The amount of vcores used).
+
+ @return CPU utilization]]>
+
+
+
+
+
+ CPU utilization (The amount of vcores used).
+
+ @param cpu CPU utilization]]>
+
+
+
+
+
+ custom resource utilization
+ (The amount of custom resource used).
+
+ @param resourceName resourceName of custom resource
+ @return resourceName utilization]]>
+
+
+
+
+
+
+
+
+
+
+
+ custom resource utilization
+ (The amount of custom resource used).
+ @param resourceName resourceName
+ @param utilization utilization of custom resource]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ResourceUtilization models the utilization of a set of computer
+ resources in the cluster.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ allocationRequestId of the request.
+
+ @see SchedulingRequest#setAllocationRequestId(long)
+ @param allocationRequestId allocationRequestId of the
+ request
+ @return {@link SchedulingRequest.SchedulingRequestBuilder}]]>
+
+
+
+
+
+ priority of the request.
+
+ @param priority priority of the request
+ @return {@link SchedulingRequest.SchedulingRequestBuilder}
+ @see SchedulingRequest#setPriority(Priority)]]>
+
+
+
+
+
+ executionType of the request.
+
+ @see SchedulingRequest#setExecutionType(ExecutionTypeRequest)
+ @param executionType executionType of the request
+ @return {@link SchedulingRequest.SchedulingRequestBuilder}]]>
+
+
+
+
+
+ allocationTags of the request.
+
+ @see SchedulingRequest#setAllocationTags(Set)
+ @param allocationTags allocationsTags of the request
+ @return {@link SchedulingRequest.SchedulingRequestBuilder}]]>
+
+
+
+
+
+ executionType of the request.
+
+ @see SchedulingRequest#setResourceSizing(ResourceSizing)
+ @param resourceSizing resourceSizing of the request
+ @return {@link SchedulingRequest.SchedulingRequestBuilder}]]>
+
+
+
+
+
+ placementConstraintExpression of the request.
+
+ @see SchedulingRequest#setPlacementConstraint(
+ PlacementConstraint)
+ @param placementConstraintExpression placementConstraints of
+ the request
+ @return {@link SchedulingRequest.SchedulingRequestBuilder}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationMaster that may be reclaimed by the
+ ResourceManager.
+ @return the set of {@link ContainerId} to be preempted.]]>
+
+
+
+ ApplicationMaster (AM)
+ may attempt to checkpoint work or adjust its execution plan to accommodate
+ it. In contrast to {@link PreemptionContract}, the AM has no flexibility in
+ selecting which resources to return to the cluster.
+ @see PreemptionMessage]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Token is the security entity used by the framework
+ to verify authenticity of any resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerId of the container.
+ @return ContainerId of the container]]>
+
+
+
+
+
+
+
+
+
+
+ ContainerUpdateType of the container.
+ @return ContainerUpdateType of the container.]]>
+
+
+
+
+
+ ContainerUpdateType of the container.
+ @param updateType of the Container]]>
+
+
+
+
+ ContainerId of the container.
+ @return ContainerId of the container]]>
+
+
+
+
+
+ ContainerId of the container.
+ @param containerId ContainerId of the container]]>
+
+
+
+
+ ExecutionType of the container.
+ @return ExecutionType of the container]]>
+
+
+
+
+
+ ExecutionType of the container.
+ @param executionType ExecutionType of the container]]>
+
+
+
+
+
+ Resource capability of the request.
+ @param capability Resource capability of the request]]>
+
+
+
+
+ Resource capability of the request.
+ @return Resource capability of the request]]>
+
+
+
+
+
+
+
+
+
+
+
+ It includes:
+
+
version for the container.
+
{@link ContainerId} for the container.
+
+ {@link Resource} capability of the container after the update request
+ is completed.
+
+
+ {@link ExecutionType} of the container after the update request is
+ completed.
+
+
+
+ Update rules:
+
+
+ Currently only ONE aspect of the container can be updated per request
+ (user can either update Capability OR ExecutionType in one request..
+ not both).
+
+
+ There must be only 1 update request per container in an allocate call.
+
+
+ If a new update request is sent for a container (in a subsequent allocate
+ call) before the first one is satisfied by the Scheduler, it will
+ overwrite the previous request.
+
+
+ @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerUpdateType.
+ @return ContainerUpdateType]]>
+
+
+
+
+
+ ContainerUpdateType.
+ @param updateType ContainerUpdateType]]>
+
+
+
+
+ Container.
+ @return Container]]>
+
+
+
+
+
+ Container.
+ @param container Container]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ URL represents a serializable {@link java.net.URL}.]]>
+
+
+
+
+
+
+
+
+
+
+
+ RMAppAttempt.]]>
+
+
+
+
+
+
+
+
+
+
+
+ ApplicationMaster.]]>
+
+
+
+
+
+
+
+
+
+ NodeManagers in the cluster.
+ @return number of NodeManagers in the cluster]]>
+
+
+
+
+ DecommissionedNodeManagers in the cluster.
+
+ @return number of DecommissionedNodeManagers in the cluster]]>
+
+
+
+
+ ActiveNodeManagers in the cluster.
+
+ @return number of ActiveNodeManagers in the cluster]]>
+
+
+
+
+ LostNodeManagers in the cluster.
+
+ @return number of LostNodeManagers in the cluster]]>
+
+
+
+
+ UnhealthyNodeManagers in the cluster.
+
+ @return number of UnhealthyNodeManagers in the cluster]]>
+
+
+
+
+ RebootedNodeManagers in the cluster.
+
+ @return number of RebootedNodeManagers in the cluster]]>
+
+
+
+ YarnClusterMetrics represents cluster metrics.
+
+
Currently only number of NodeManagers is provided.
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class contains the information about a timeline domain, which is used
+ to a user to host a number of timeline entities, isolating them from others'.
+ The user can also define the reader and writer users/groups for the the
+ domain, which is used to control the access to its entities.
+
+
+
+ The reader and writer users/groups pattern that the user can supply is the
+ same as what AccessControlList takes.
+
+ Primary filters will be used to index the entities in
+ TimelineStore, such that users should carefully choose the
+ information they want to store as the primary filters. The remaining can be
+ stored as other information.
+
LENIENT - the RM will generate and provide a keystore and truststore
+ to the AM, which it is free to use for HTTPS in its tracking URL web
+ server. The RM proxy will still allow HTTP connections to AMs that opt
+ not to use HTTPS.
+
STRICT - this is the same as LENIENT, except that the RM proxy will
+ only allow HTTPS connections to AMs; HTTP connections will be blocked
+ and result in a warning page to the user.
+ Note: Use {@link #DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH} for
+ cross-platform practice i.e. submit an application from a Windows client to
+ a Linux/Unix server or vice versa.
+
+
+ Optional when using NodeAttribute Constraint.
+
+ and where Pn can be any form of a valid constraint expression,
+ such as:
+
+
+
in,node,foo,bar
+
notin,node,foo,bar,1,2
+
and(notin,node,foo:notin,node,bar)
+
+
+ and NodeAttribute Constraint such as
+
+
+
yarn.rm.io/foo=true
+
java=1.7,1.8
+
+ @param expression expression string.
+ @return a map of source tags to placement constraint mapping.
+ @throws PlacementConstraintParseException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml
new file mode 100644
index 00000000000..a2b0cd041fc
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml
@@ -0,0 +1,3067 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In secure mode, YARN verifies access to the application, queue
+ etc. before accepting the request.
+
+ If the user does not have VIEW_APP access then the following
+ fields in the report will be set to stubbed values:
+
+
host - set to "N/A"
+
RPC port - set to -1
+
client token - set to "N/A"
+
diagnostics - set to "N/A"
+
tracking URL - set to "N/A"
+
original tracking URL - set to "N/A"
+
resource usage report - all values are -1
+
+
+ @param appId
+ {@link ApplicationId} of the application that needs a report
+ @return application report
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of all Applications in the cluster.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @return a list of reports for all applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of the given ApplicationAttempt.
+
+
+
+ In secure mode, YARN verifies access to the application, queue
+ etc. before accepting the request.
+
+
+ @param applicationAttemptId
+ {@link ApplicationAttemptId} of the application attempt that needs
+ a report
+ @return application attempt report
+ @throws YarnException
+ @throws ApplicationAttemptNotFoundException if application attempt
+ not found
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of all (ApplicationAttempts) of Application in the cluster.
+
+
+ @param applicationId
+ @return a list of reports for all application attempts for specified
+ application
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of the given Container.
+
+
+
+ In secure mode, YARN verifies access to the application, queue
+ etc. before accepting the request.
+
+
+ @param containerId
+ {@link ContainerId} of the container that needs a report
+ @return container report
+ @throws YarnException
+ @throws ContainerNotFoundException if container not found
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of all (Containers) of ApplicationAttempt in the cluster.
+
+
+ @param applicationAttemptId
+ @return a list of reports of all containers for specified application
+ attempt
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+ {@code
+ AMRMClient.createAMRMClientContainerRequest()
+ }
+ @return the newly create AMRMClient instance.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ RegisterApplicationMasterResponse
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ RegisterApplicationMasterResponse
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ addContainerRequest are sent to the
+ ResourceManager. New containers assigned to the master are
+ retrieved. Status of completed containers and node health updates are also
+ retrieved. This also doubles up as a heartbeat to the ResourceManager and
+ must be made periodically. The call may not always return any new
+ allocations of containers. App should not make concurrent allocate
+ requests. May cause request loss.
+
+
+ Note : If the user has not removed container requests that have already
+ been satisfied, then the re-register may end up sending the entire
+ container requests to the RM (including matched requests). Which would mean
+ the RM could end up giving it a lot of new allocated containers.
+
+
+ @param progressIndicator Indicates progress made by the master
+ @return the response of the allocate request
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ allocate
+ @param req Resource request]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ allocate.
+ Any previous pending resource change request of the same container will be
+ removed.
+
+ Application that calls this method is expected to maintain the
+ Containers that are returned from previous successful
+ allocations or resource changes. By passing in the existing container and a
+ target resource capability to this method, the application requests the
+ ResourceManager to change the existing resource allocation to the target
+ resource allocation.
+
+ @deprecated use
+ {@link #requestContainerUpdate(Container, UpdateContainerRequest)}
+
+ @param container The container returned from the last successful resource
+ allocation or resource change
+ @param capability The target resource capability of the container]]>
+
+
+
+
+
+
+ allocate.
+ Any previous pending update request of the same container will be
+ removed.
+
+ @param container The container returned from the last successful resource
+ allocation or update
+ @param updateContainerRequest The UpdateContainerRequest.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerRequests matching the given
+ parameters. These ContainerRequests should have been added via
+ addContainerRequest earlier in the lifecycle. For performance,
+ the AMRMClient may return its internal collection directly without creating
+ a copy. Users should not perform mutable operations on the return value.
+ Each collection in the list contains requests with identical
+ Resource size that fit in the given capability. In a
+ collection, requests will be returned in the same order as they were added.
+
+ NOTE: This API only matches Container requests that were created by the
+ client WITHOUT the allocationRequestId being set.
+
+ @return Collection of request matching the parameters]]>
+
+
+
+
+
+
+
+
+ ContainerRequests matching the given
+ parameters. These ContainerRequests should have been added via
+ addContainerRequest earlier in the lifecycle. For performance,
+ the AMRMClient may return its internal collection directly without creating
+ a copy. Users should not perform mutable operations on the return value.
+ Each collection in the list contains requests with identical
+ Resource size that fit in the given capability. In a
+ collection, requests will be returned in the same order as they were added.
+ specify an ExecutionType.
+
+ NOTE: This API only matches Container requests that were created by the
+ client WITHOUT the allocationRequestId being set.
+
+ @param priority Priority
+ @param resourceName Location
+ @param executionType ExecutionType
+ @param capability Capability
+ @return Collection of request matching the parameters]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ ContainerRequests matching the given
+ allocationRequestId. These ContainerRequests should have been added via
+ addContainerRequest earlier in the lifecycle. For performance,
+ the AMRMClient may return its internal collection directly without creating
+ a copy. Users should not perform mutable operations on the return value.
+
+ NOTE: This API only matches Container requests that were created by the
+ client WITH the allocationRequestId being set to a non-default value.
+
+ @param allocationRequestId Allocation Request Id
+ @return Collection of request matching the parameters]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ AMRMClient. This cache must
+ be shared with the {@link NMClient} used to manage containers for the
+ AMRMClient
+
+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()}
+ singleton instance will be used.
+
+ @param nmTokenCache the NM token cache to use.]]>
+
+
+
+
+ AMRMClient. This cache must be
+ shared with the {@link NMClient} used to manage containers for the
+ AMRMClient.
+
+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()}
+ singleton instance will be used.
+
+ @return the NM token cache.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ check to return true for each 1000 ms.
+ See also {@link #waitFor(java.util.function.Supplier, int)}
+ and {@link #waitFor(java.util.function.Supplier, int, int)}
+ @param check the condition for which it should wait]]>
+
+
+
+
+
+
+
+ check to return true for each
+ checkEveryMillis ms.
+ See also {@link #waitFor(java.util.function.Supplier, int, int)}
+ @param check user defined checker
+ @param checkEveryMillis interval to call check]]>
+
+
+
+
+
+
+
+
+ check to return true for each
+ checkEveryMillis ms. In the main loop, this method will log
+ the message "waiting in main loop" for each logInterval times
+ iteration to confirm the thread is alive.
+ @param check user defined checker
+ @param checkEveryMillis interval to call check
+ @param logInterval interval to log for each]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Start an allocated container.
+
+
The ApplicationMaster or other applications that use the
+ client must provide the details of the allocated container, including the
+ Id, the assigned node's Id and the token via {@link Container}. In
+ addition, the AM needs to provide the {@link ContainerLaunchContext} as
+ well.
+
+ @param container the allocated container
+ @param containerLaunchContext the context information needed by the
+ NodeManager to launch the
+ container
+ @return a map between the auxiliary service names and their outputs
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+ Increase the resource of a container.
+
+
The ApplicationMaster or other applications that use the
+ client must provide the details of the container, including the Id and
+ the target resource encapsulated in the updated container token via
+ {@link Container}.
+
+
+ @param container the container with updated token.
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+ Update the resources of a container.
+
+
The ApplicationMaster or other applications that use the
+ client must provide the details of the container, including the Id and
+ the target resource encapsulated in the updated container token via
+ {@link Container}.
+
+
+ @param container the container with updated token.
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+
+ Stop an started container.
+
+ @param containerId the Id of the started container
+ @param nodeId the Id of the NodeManager
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+
+ Query the status of a container.
+
+ @param containerId the Id of the started container
+ @param nodeId the Id of the NodeManager
+
+ @return the status of a container.
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+
+
+ Re-Initialize the Container.
+
+ @param containerId the Id of the container to Re-Initialize.
+ @param containerLaunchContex the updated ContainerLaunchContext.
+ @param autoCommit commit re-initialization automatically ?
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+ Restart the specified container.
+
+ @param containerId the Id of the container to restart.
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+ Rollback last reInitialization of the specified container.
+
+ @param containerId the Id of the container to restart.
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+
+
+ Commit last reInitialization of the specified container.
+
+ @param containerId the Id of the container to commit reInitialize.
+
+ @throws YarnException YarnException.
+ @throws IOException IOException.]]>
+
+
+
+
+
+ Set whether the containers that are started by this client, and are
+ still running should be stopped when the client stops. By default, the
+ feature should be enabled. However, containers will be stopped only
+ when service is stopped. i.e. after {@link NMClient#stop()}.
+
+ @param enabled whether the feature is enabled or not]]>
+
+
+
+
+
+ NMClient. This cache must be
+ shared with the {@link AMRMClient} that requested the containers managed
+ by this NMClient
+
+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()}
+ singleton instance will be used.
+
+ @param nmTokenCache the NM token cache to use.]]>
+
+
+
+
+ NMClient. This cache must be
+ shared with the {@link AMRMClient} that requested the containers managed
+ by this NMClient
+
+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()}
+ singleton instance will be used.
+
+ @return the NM token cache]]>
+
+ Using the singleton instance of the cache is appropriate when running a
+ single ApplicationMaster in the same JVM.
+
+
+ When using the singleton, users don't need to do anything special,
+ {@link AMRMClient} and {@link NMClient} are already set up to use the
+ default singleton {@link NMTokenCache}
+
+
+ If running multiple Application Masters in the same JVM, a different cache
+ instance should be used for each Application Master.
+
+
+ If using the {@link AMRMClient} and the {@link NMClient}, setting up
+ and using an instance cache is as follows:
+
+ If using {@link ApplicationMasterProtocol} and
+ {@link ContainerManagementProtocol} directly, setting up and using an
+ instance cache is as follows:
+
+ It is also possible to mix the usage of a client ({@code AMRMClient} or
+ {@code NMClient}, or the async versions of them) with a protocol proxy
+ ({@code ContainerManagementProtocolProxy} or
+ {@code ApplicationMasterProtocol}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The method to claim a resource with the SharedCacheManager.
+ The client uses a checksum to identify the resource and an
+ {@link ApplicationId} to identify which application will be using the
+ resource.
+
+
+
+ The SharedCacheManager responds with whether or not the
+ resource exists in the cache. If the resource exists, a URL to
+ the resource in the shared cache is returned. If the resource does not
+ exist, null is returned instead.
+
+
+
+ Once a URL has been returned for a resource, that URL is safe to use for
+ the lifetime of the application that corresponds to the provided
+ ApplicationId.
+
+
+ @param applicationId ApplicationId of the application using the resource
+ @param resourceKey the key (i.e. checksum) that identifies the resource
+ @return URL to the resource, or null if it does not exist]]>
+
+
+
+
+
+
+
+
+ The method to release a resource with the SharedCacheManager.
+ This method is called once an application is no longer using a claimed
+ resource in the shared cache. The client uses a checksum to identify the
+ resource and an {@link ApplicationId} to identify which application is
+ releasing the resource.
+
+
+
+ Note: This method is an optimization and the client is not required to call
+ it for correctness.
+
+
+ @param applicationId ApplicationId of the application releasing the
+ resource
+ @param resourceKey the key (i.e. checksum) that identifies the resource]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Obtain a {@link YarnClientApplication} for a new application,
+ which in turn contains the {@link ApplicationSubmissionContext} and
+ {@link org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse}
+ objects.
+
+
+ @return {@link YarnClientApplication} built for a new application
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Submit a new application to YARN. It is a blocking call - it
+ will not return {@link ApplicationId} until the submitted application is
+ submitted successfully and accepted by the ResourceManager.
+
+
+
+ Users should provide an {@link ApplicationId} as part of the parameter
+ {@link ApplicationSubmissionContext} when submitting a new application,
+ otherwise it will throw the {@link ApplicationIdNotProvidedException}.
+
+
+
This internally calls {@link ApplicationClientProtocol#submitApplication
+ (SubmitApplicationRequest)}, and after that, it internally invokes
+ {@link ApplicationClientProtocol#getApplicationReport
+ (GetApplicationReportRequest)} and waits till it can make sure that the
+ application gets properly submitted. If RM fails over or RM restart
+ happens before ResourceManager saves the application's state,
+ {@link ApplicationClientProtocol
+ #getApplicationReport(GetApplicationReportRequest)} will throw
+ the {@link ApplicationNotFoundException}. This API automatically resubmits
+ the application with the same {@link ApplicationSubmissionContext} when it
+ catches the {@link ApplicationNotFoundException}
+
+ @param appContext
+ {@link ApplicationSubmissionContext} containing all the details
+ needed to submit a new application
+ @return {@link ApplicationId} of the accepted application
+ @throws YarnException
+ @throws IOException
+ @see #createApplication()]]>
+
+
+
+
+
+
+
+
+ Fail an application attempt identified by given ID.
+
+
+ @param applicationAttemptId
+ {@link ApplicationAttemptId} of the attempt to fail.
+ @throws YarnException
+ in case of errors or if YARN rejects the request due to
+ access-control restrictions.
+ @throws IOException
+ @see #getQueueAclsInfo()]]>
+
+
+
+
+
+
+
+
+ Kill an application identified by given ID.
+
+
+ @param applicationId
+ {@link ApplicationId} of the application that needs to be killed
+ @throws YarnException
+ in case of errors or if YARN rejects the request due to
+ access-control restrictions.
+ @throws IOException
+ @see #getQueueAclsInfo()]]>
+
+
+
+
+
+
+
+
+
+ Kill an application identified by given ID.
+
+ @param applicationId {@link ApplicationId} of the application that needs to
+ be killed
+ @param diagnostics for killing an application.
+ @throws YarnException in case of errors or if YARN rejects the request due
+ to access-control restrictions.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of the given Application.
+
+
+
+ In secure mode, YARN verifies access to the application, queue
+ etc. before accepting the request.
+
+
+
+ If the user does not have VIEW_APP access then the following
+ fields in the report will be set to stubbed values:
+
+
host - set to "N/A"
+
RPC port - set to -1
+
client token - set to "N/A"
+
diagnostics - set to "N/A"
+
tracking URL - set to "N/A"
+
original tracking URL - set to "N/A"
+
resource usage report - all values are -1
+
+
+ @param appId
+ {@link ApplicationId} of the application that needs a report
+ @return application report
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The AMRM token is required for AM to RM scheduling operations. For
+ managed Application Masters YARN takes care of injecting it. For unmanaged
+ Applications Masters, the token must be obtained via this method and set
+ in the {@link org.apache.hadoop.security.UserGroupInformation} of the
+ current user.
+
+ The AMRM token will be returned only if all the following conditions are
+ met:
+
+
the requester is the owner of the ApplicationMaster
+
the application master is an unmanaged ApplicationMaster
+
the application master is in ACCEPTED state
+
+ Else this method returns NULL.
+
+ @param appId {@link ApplicationId} of the application to get the AMRM token
+ @return the AMRM token if available
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of all Applications in the cluster.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @return a list of reports of all running applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of Applications
+ matching the given application types in the cluster.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @param applicationTypes set of application types you are interested in
+ @return a list of reports of applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of Applications matching the given
+ application states in the cluster.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @param applicationStates set of application states you are interested in
+ @return a list of reports of applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of Applications matching the given
+ application types and application states in the cluster.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @param applicationTypes set of application types you are interested in
+ @param applicationStates set of application states you are interested in
+ @return a list of reports of applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of Applications matching the given
+ application types, application states and application tags in the cluster.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @param applicationTypes set of application types you are interested in
+ @param applicationStates set of application states you are interested in
+ @param applicationTags set of application tags you are interested in
+ @return a list of reports of applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+ Get a report (ApplicationReport) of Applications matching the given users,
+ queues, application types and application states in the cluster. If any of
+ the params is set to null, it is not used when filtering.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @param queues set of queues you are interested in
+ @param users set of users you are interested in
+ @param applicationTypes set of application types you are interested in
+ @param applicationStates set of application states you are interested in
+ @return a list of reports of applications
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a list of ApplicationReports that match the given
+ {@link GetApplicationsRequest}.
+
+
+
+ If the user does not have VIEW_APP access for an application
+ then the corresponding report will be filtered as described in
+ {@link #getApplicationReport(ApplicationId)}.
+
+
+ @param request the request object to get the list of applications.
+ @return The list of ApplicationReports that match the request
+ @throws YarnException Exception specific to YARN.
+ @throws IOException Exception mostly related to connection errors.]]>
+
+
+
+
+
+
+
+ Get metrics ({@link YarnClusterMetrics}) about the cluster.
+
+
+ @return cluster metrics
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of nodes ({@link NodeReport}) in the cluster.
+
+
+ @param states The {@link NodeState}s to filter on. If no filter states are
+ given, nodes in all states will be returned.
+ @return A list of node reports
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a delegation token so as to be able to talk to YARN using those tokens.
+
+ @param renewer
+ Address of the renewer who can renew these tokens when needed by
+ securely talking to YARN.
+ @return a delegation token ({@link Token}) that can be used to
+ talk to YARN
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get information ({@link QueueInfo}) about a given queue.
+
+
+ @param queueName
+ Name of the queue whose information is needed
+ @return queue information
+ @throws YarnException
+ in case of errors or if YARN rejects the request due to
+ access-control restrictions.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Get information ({@link QueueInfo}) about all queues, recursively if there
+ is a hierarchy
+
+
+ @return a list of queue-information for all queues
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Get information ({@link QueueInfo}) about top level queues.
+
+
+ @return a list of queue-information for all the top-level queues
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get information ({@link QueueInfo}) about all the immediate children queues
+ of the given queue
+
+
+ @param parent
+ Name of the queue whose child-queues' information is needed
+ @return a list of queue-information for all queues who are direct children
+ of the given parent queue.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Get information about acls for current user on all the
+ existing queues.
+
+
+ @return a list of queue acls ({@link QueueUserACLInfo}) for
+ current user
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of the given ApplicationAttempt.
+
+
+
+ In secure mode, YARN verifies access to the application, queue
+ etc. before accepting the request.
+
+
+ @param applicationAttemptId
+ {@link ApplicationAttemptId} of the application attempt that needs
+ a report
+ @return application attempt report
+ @throws YarnException
+ @throws ApplicationAttemptNotFoundException if application attempt
+ not found
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of all (ApplicationAttempts) of Application in the cluster.
+
+
+ @param applicationId application id of the app
+ @return a list of reports for all application attempts for specified
+ application.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of the given Container.
+
+
+
+ In secure mode, YARN verifies access to the application, queue
+ etc. before accepting the request.
+
+
+ @param containerId
+ {@link ContainerId} of the container that needs a report
+ @return container report
+ @throws YarnException
+ @throws ContainerNotFoundException if container not found.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Get a report of all (Containers) of ApplicationAttempt in the cluster.
+
+
+ @param applicationAttemptId application attempt id
+ @return a list of reports of all containers for specified application
+ attempts
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+ Attempts to move the given application to the given queue.
+
+
+ @param appId
+ Application to move.
+ @param queue
+ Queue to place it in to.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Obtain a {@link GetNewReservationResponse} for a new reservation,
+ which contains the {@link ReservationId} object.
+
+
+ @return The {@link GetNewReservationResponse} containing a new
+ {@link ReservationId} object.
+ @throws YarnException if reservation cannot be created.
+ @throws IOException if reservation cannot be created.]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to submit a new reservation to the
+ {@code ResourceManager}.
+
+
+
+ The client packages all details of its request in a
+ {@link ReservationSubmissionRequest} object. This contains information
+ about the amount of capacity, temporal constraints, and gang needs.
+ Furthermore, the reservation might be composed of multiple stages, with
+ ordering dependencies among them.
+
+
+
+ In order to respond, a new admission control component in the
+ {@code ResourceManager} performs an analysis of the resources that have
+ been committed over the period of time the user is requesting, verify that
+ the user requests can be fulfilled, and that it respect a sharing policy
+ (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined
+ that the ReservationRequest is satisfiable the {@code ResourceManager}
+ answers with a {@link ReservationSubmissionResponse} that includes a
+ {@link ReservationId}. Upon failure to find a valid allocation the response
+ is an exception with the message detailing the reason of failure.
+
+
+
+ The semantics guarantees that the {@link ReservationId} returned,
+ corresponds to a valid reservation existing in the time-range request by
+ the user. The amount of capacity dedicated to such reservation can vary
+ overtime, depending of the allocation that has been determined. But it is
+ guaranteed to satisfy all the constraint expressed by the user in the
+ {@link ReservationDefinition}
+
+
+ @param request request to submit a new Reservation
+ @return response contains the {@link ReservationId} on accepting the
+ submission
+ @throws YarnException if the reservation cannot be created successfully
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to update an existing Reservation. This is
+ referred to as a re-negotiation process, in which a user that has
+ previously submitted a Reservation.
+
+
+
+ The allocation is attempted by virtually substituting all previous
+ allocations related to this Reservation with new ones, that satisfy the new
+ {@link ReservationDefinition}. Upon success the previous allocation is
+ atomically substituted by the new one, and on failure (i.e., if the system
+ cannot find a valid allocation for the updated request), the previous
+ allocation remains valid.
+
+
+ @param request to update an existing Reservation (the
+ {@link ReservationUpdateRequest} should refer to an existing valid
+ {@link ReservationId})
+ @return response empty on successfully updating the existing reservation
+ @throws YarnException if the request is invalid or reservation cannot be
+ updated successfully
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to remove an existing Reservation.
+
+
+ @param request to remove an existing Reservation (the
+ {@link ReservationDeleteRequest} should refer to an existing valid
+ {@link ReservationId})
+ @return response empty on successfully deleting the existing reservation
+ @throws YarnException if the request is invalid or reservation cannot be
+ deleted successfully
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by clients to get the list of reservations in a plan.
+ The reservationId will be used to search for reservations to list if it is
+ provided. Otherwise, it will select active reservations within the
+ startTime and endTime (inclusive).
+
+
+ @param request to list reservations in a plan. Contains fields to select
+ String queue, ReservationId reservationId, long startTime,
+ long endTime, and a bool includeReservationAllocations.
+
+ queue: Required. Cannot be null or empty. Refers to the
+ reservable queue in the scheduler that was selected when
+ creating a reservation submission
+ {@link ReservationSubmissionRequest}.
+
+ reservationId: Optional. If provided, other fields will
+ be ignored.
+
+ startTime: Optional. If provided, only reservations that
+ end after the startTime will be selected. This defaults
+ to 0 if an invalid number is used.
+
+ endTime: Optional. If provided, only reservations that
+ start on or before endTime will be selected. This defaults
+ to Long.MAX_VALUE if an invalid number is used.
+
+ includeReservationAllocations: Optional. Flag that
+ determines whether the entire reservation allocations are
+ to be returned. Reservation allocations are subject to
+ change in the event of re-planning as described by
+ {@link ReservationDefinition}.
+
+ @return response that contains information about reservations that are
+ being searched for.
+ @throws YarnException if the request is invalid
+ @throws IOException if the request failed otherwise]]>
+
+
+
+
+
+
+
+ The interface used by client to get node to labels mappings in existing cluster
+
+
+ @return node to labels mappings
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ The interface used by client to get labels to nodes mapping
+ in existing cluster
+
+
+ @return node to labels mappings
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get labels to nodes mapping
+ for specified labels in existing cluster
+
+
+ @param labels labels for which labels to nodes mapping has to be retrieved
+ @return labels to nodes mappings for specific labels
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+ The interface used by client to get node labels in the cluster
+
+
+ @return cluster node labels collection
+ @throws YarnException when there is a failure in
+ {@link ApplicationClientProtocol}
+ @throws IOException when there is a failure in
+ {@link ApplicationClientProtocol}]]>
+
+
+
+
+
+
+
+
+
+ The interface used by client to set priority of an application
+
+ @param applicationId
+ @param priority
+ @return updated priority of an application.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+ Signal a container identified by given ID.
+
+
+ @param containerId
+ {@link ContainerId} of the container that needs to be signaled
+ @param command the signal container command
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Get the resource profiles available in the RM.
+
+ @return a Map of the resource profile names to their capabilities
+ @throws YARNFeatureNotEnabledException if resource-profile is disabled
+ @throws YarnException if any error happens inside YARN
+ @throws IOException in case of other errors]]>
+
+
+
+
+
+
+
+
+ Get the details of a specific resource profile from the RM.
+
+ @param profile the profile name
+ @return resource profile name with its capabilities
+ @throws YARNFeatureNotEnabledException if resource-profile is disabled
+ @throws YarnException if any error happens inside YARN
+ @throws IOException in case of other others]]>
+
+
+
+
+
+
+
+ Get available resource types supported by RM.
+
+ @return list of supported resource types with detailed information
+ @throws YarnException if any issue happens inside YARN
+ @throws IOException in case of other others]]>
+
+
+
+
+
+
+
+ The interface used by client to get node attributes in the cluster.
+
+
+ @return cluster node attributes collection
+ @throws YarnException when there is a failure in
+ {@link ApplicationClientProtocol}
+ @throws IOException when there is a failure in
+ {@link ApplicationClientProtocol}]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get mapping of AttributeKey to associated
+ NodeToAttributeValue list for specified node attributeKeys in the cluster.
+
+
+ @param attributes AttributeKeys for which associated NodeToAttributeValue
+ mapping value has to be retrieved. If empty or null is set then
+ will return mapping for all attributeKeys in the cluster
+ @return mapping of AttributeKey to List of associated
+ NodeToAttributeValue's.
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get all node to attribute mapping in
+ existing cluster.
+
+
+ @param hostNames HostNames for which host to attributes mapping has to
+ be retrived.If empty or null is set then will return
+ all nodes to attributes mapping in cluster.
+ @return Node to attribute mappings
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ The interface used by client to get a shell to a container.
+
+
+ @param containerId Container ID
+ @param command Shell type
+ @throws IOException if connection fails.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Create a new instance of AMRMClientAsync.
+
+ @param intervalMs heartbeat interval in milliseconds between AM and RM
+ @param callbackHandler callback handler that processes responses from
+ the ResourceManager]]>
+
+
+
+
+
+
+
+ Create a new instance of AMRMClientAsync.
+
+ @param client the AMRMClient instance
+ @param intervalMs heartbeat interval in milliseconds between AM and RM
+ @param callbackHandler callback handler that processes responses from
+ the ResourceManager]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ RegisterApplicationMasterResponse
+ @throws YarnException
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ allocate
+ @param req Resource request]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ allocate.
+ Any previous pending resource change request of the same container will be
+ removed.
+
+ Application that calls this method is expected to maintain the
+ Containers that are returned from previous successful
+ allocations or resource changes. By passing in the existing container and a
+ target resource capability to this method, the application requests the
+ ResourceManager to change the existing resource allocation to the target
+ resource allocation.
+
+ @deprecated use
+ {@link #requestContainerUpdate(Container, UpdateContainerRequest)}
+
+ @param container The container returned from the last successful resource
+ allocation or resource change
+ @param capability The target resource capability of the container]]>
+
+
+
+
+
+
+ allocate.
+ Any previous pending update request of the same container will be
+ removed.
+
+ @param container The container returned from the last successful resource
+ allocation or update
+ @param updateContainerRequest The UpdateContainerRequest.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ check to return true for each 1000 ms.
+ See also {@link #waitFor(java.util.function.Supplier, int)}
+ and {@link #waitFor(java.util.function.Supplier, int, int)}
+ @param check the condition for which it should wait]]>
+
+
+
+
+
+
+
+ check to return true for each
+ checkEveryMillis ms.
+ See also {@link #waitFor(java.util.function.Supplier, int, int)}
+ @param check user defined checker
+ @param checkEveryMillis interval to call check]]>
+
+
+
+
+
+
+
+
+ check to return true for each
+ checkEveryMillis ms. In the main loop, this method will log
+ the message "waiting in main loop" for each logInterval times
+ iteration to confirm the thread is alive.
+ @param check user defined checker
+ @param checkEveryMillis interval to call check
+ @param logInterval interval to log for each]]>
+
+
+
+
+
+
+
+
+
+ AMRMClientAsync handles communication with the ResourceManager
+ and provides asynchronous updates on events such as container allocations and
+ completions. It contains a thread that sends periodic heartbeats to the
+ ResourceManager.
+
+ It should be used by implementing a CallbackHandler:
+
+ {@code
+ class MyCallbackHandler extends AMRMClientAsync.AbstractCallbackHandler {
+ public void onContainersAllocated(List containers) {
+ [run tasks on the containers]
+ }
+
+ public void onContainersUpdated(List containers) {
+ [determine if resource allocation of containers have been increased in
+ the ResourceManager, and if so, inform the NodeManagers to increase the
+ resource monitor/enforcement on the containers]
+ }
+
+ public void onContainersCompleted(List statuses) {
+ [update progress, check whether app is done]
+ }
+
+ public void onNodesUpdated(List updated) {}
+
+ public void onReboot() {}
+ }
+ }
+
+
+ The client's lifecycle should be managed similarly to the following:
+
+
The ApplicationMaster or other applications that use the
+ client must provide the details of the container, including the Id and
+ the target resource encapsulated in the updated container token via
+ {@link Container}.
+
+
+ @param container the container with updated token.]]>
+
+
+
+
+
+
+
+ Re-Initialize the Container.
+
+ @param containerId the Id of the container to Re-Initialize.
+ @param containerLaunchContex the updated ContainerLaunchContext.
+ @param autoCommit commit re-initialization automatically ?]]>
+
+
+
+
+
+ Restart the specified container.
+
+ @param containerId the Id of the container to restart.]]>
+
+
+
+
+
+ Rollback last reInitialization of the specified container.
+
+ @param containerId the Id of the container to restart.]]>
+
+
+
+
+
+ Commit last reInitialization of the specified container.
+
+ @param containerId the Id of the container to commit reInitialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NMClientAsync handles communication with all the NodeManagers
+ and provides asynchronous updates on getting responses from them. It
+ maintains a thread pool to communicate with individual NMs where a number of
+ worker threads process requests to NMs by using {@link NMClientImpl}. The max
+ size of the thread pool is configurable through
+ {@link YarnConfiguration#NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE}.
+
+ It should be used in conjunction with a CallbackHandler. For example
+
+
+ {@code
+ class MyCallbackHandler extends NMClientAsync.AbstractCallbackHandler {
+ public void onContainerStarted(ContainerId containerId,
+ Map allServiceResponse) {
+ [post process after the container is started, process the response]
+ }
+
+ public void onContainerResourceIncreased(ContainerId containerId,
+ Resource resource) {
+ [post process after the container resource is increased]
+ }
+
+ public void onContainerStatusReceived(ContainerId containerId,
+ ContainerStatus containerStatus) {
+ [make use of the status of the container]
+ }
+
+ public void onContainerStopped(ContainerId containerId) {
+ [post process after the container is stopped]
+ }
+
+ public void onStartContainerError(
+ ContainerId containerId, Throwable t) {
+ [handle the raised exception]
+ }
+
+ public void onGetContainerStatusError(
+ ContainerId containerId, Throwable t) {
+ [handle the raised exception]
+ }
+
+ public void onStopContainerError(
+ ContainerId containerId, Throwable t) {
+ [handle the raised exception]
+ }
+ }
+ }
+
+
+ The client's life-cycle should be managed like the following:
+
+
+ {@code
+ NMClientAsync asyncClient =
+ NMClientAsync.createNMClientAsync(new MyCallbackhandler());
+ asyncClient.init(conf);
+ asyncClient.start();
+ asyncClient.startContainer(container, containerLaunchContext);
+ [... wait for container being started]
+ asyncClient.getContainerStatus(container.getId(), container.getNodeId(),
+ container.getContainerToken());
+ [... handle the status in the callback instance]
+ asyncClient.stopContainer(container.getId(), container.getNodeId(),
+ container.getContainerToken());
+ [... wait for container being stopped]
+ asyncClient.stop();
+ }
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml
new file mode 100644
index 00000000000..311a793df5a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml
@@ -0,0 +1,3982 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of proxy.
+ @return Proxy to the ResourceManager for the specified client protocol.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Create a new instance of AppAdminClient.
+
+
+ @param appType application type
+ @param conf configuration
+ @return app admin client]]>
+
+
+
+
+
+
+
+
+
+
+
+ Launch a new YARN application.
+
+
+ @param fileName specification of application
+ @param appName name of the application
+ @param lifetime lifetime of the application
+ @param queue queue of the application
+ @return exit code
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+ Stop a YARN application (attempt to stop gracefully before killing the
+ application). In the case of a long-running service, the service may be
+ restarted later.
+
+
+ @param appName the name of the application
+ @return exit code
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+ Start a YARN application from a previously saved specification. In the
+ case of a long-running service, the service must have been previously
+ launched/started and then stopped, or previously saved but not started.
+
+
+ @param appName the name of the application
+ @return exit code
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+
+
+
+ Save the specification for a YARN application / long-running service.
+ The application may be started later.
+
+
+ @param fileName specification of application to save
+ @param appName name of the application
+ @param lifetime lifetime of the application
+ @param queue queue of the application
+ @return exit code
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+ Remove the specification and all application data for a YARN application.
+ The application cannot be running.
+
+
+ @param appName the name of the application
+ @return exit code
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+
+ Change the number of running containers for a component of a YARN
+ application / long-running service.
+
+
+ @param appName the name of the application
+ @param componentCounts map of component name to new component count or
+ amount to change existing component count (e.g.
+ 5, +5, -5)
+ @return exit code
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+ Upload AM dependencies to HDFS. This makes future application launches
+ faster since the dependencies do not have to be uploaded on each launch.
+
+
+ @param destinationFolder
+ an optional HDFS folder where dependency tarball will be uploaded
+ @return exit code
+ @throws IOException
+ IOException
+ @throws YarnException
+ exception in client or server]]>
+
+
+
+
+
+
+
+
+ Get detailed app specific status string for a YARN application.
+
+
+ @param appIdOrName appId or appName
+ @return status string
+ @throws IOException IOException
+ @throws YarnException exception in client or server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Send the information of a number of conceptual entities to the timeline
+ server. It is a blocking API. The method will not return until it gets the
+ response from the timeline server.
+
+
+ @param entities
+ the collection of {@link TimelineEntity}
+ @return the error information if the sent entities are not correctly stored
+ @throws IOException if there are I/O errors
+ @throws YarnException if entities are incomplete/invalid]]>
+
+
+
+
+
+
+
+
+
+
+ Send the information of a number of conceptual entities to the timeline
+ server. It is a blocking API. The method will not return until it gets the
+ response from the timeline server.
+
+ This API is only for timeline service v1.5
+
+
+ @param appAttemptId {@link ApplicationAttemptId}
+ @param groupId {@link TimelineEntityGroupId}
+ @param entities
+ the collection of {@link TimelineEntity}
+ @return the error information if the sent entities are not correctly stored
+ @throws IOException if there are I/O errors
+ @throws YarnException if entities are incomplete/invalid]]>
+
+
+
+
+
+
+
+
+ Send the information of a domain to the timeline server. It is a
+ blocking API. The method will not return until it gets the response from
+ the timeline server.
+
+
+ @param domain
+ an {@link TimelineDomain} object
+ @throws IOException
+ @throws YarnException]]>
+
+
+
+
+
+
+
+
+
+ Send the information of a domain to the timeline server. It is a
+ blocking API. The method will not return until it gets the response from
+ the timeline server.
+
+ This API is only for timeline service v1.5
+
+
+ @param domain
+ an {@link TimelineDomain} object
+ @param appAttemptId {@link ApplicationAttemptId}
+ @throws IOException
+ @throws YarnException]]>
+
+
+
+
+
+
+
+
+ Get a delegation token so as to be able to talk to the timeline server in a
+ secure way.
+
+
+ @param renewer
+ Address of the renewer who can renew these tokens when needed by
+ securely talking to the timeline server
+ @return a delegation token ({@link Token}) that can be used to talk to the
+ timeline server
+ @throws IOException
+ @throws YarnException]]>
+
+
+
+
+
+
+
+
+ Renew a timeline delegation token.
+
+
+ @param timelineDT
+ the delegation token to renew
+ @return the new expiration time
+ @throws IOException
+ @throws YarnException]]>
+
+
+
+
+
+
+
+
+ Cancel a timeline delegation token.
+
+
+ @param timelineDT
+ the delegation token to cancel
+ @throws IOException
+ @throws YarnException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ parameterized event of type T]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ InputStream to be checksumed
+ @return the message digest of the input stream
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+ SharedCacheChecksum object based on the configurable
+ algorithm implementation
+ (see yarn.sharedcache.checksum.algo.impl)
+
+ @return SharedCacheChecksum object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The object type on which this state machine operates.
+ @param The state of the entity.
+ @param The external eventType to be handled.
+ @param The event object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When {@link #limit} would be reached on append, past messages will be
+ truncated from head, and a header telling the user about truncation will be
+ prepended, with ellipses in between header and messages.
+
+ Note that header and ellipses are not counted against {@link #limit}.
+
+ An example:
+
+
+ {@code
+ // At the beginning it's an empty string
+ final Appendable shortAppender = new BoundedAppender(80);
+ // The whole message fits into limit
+ shortAppender.append(
+ "message1 this is a very long message but fitting into limit\n");
+ // The first message is truncated, the second not
+ shortAppender.append("message2 this is shorter than the previous one\n");
+ // The first message is deleted, the second truncated, the third
+ // preserved
+ shortAppender.append("message3 this is even shorter message, maybe.\n");
+ // The first two are deleted, the third one truncated, the last preserved
+ shortAppender.append("message4 the shortest one, yet the greatest :)");
+ // Current contents are like this:
+ // Diagnostic messages truncated, showing last 80 chars out of 199:
+ // ...s is even shorter message, maybe.
+ // message4 the shortest one, yet the greatest :)
+ }
+
+
+ Note that null values are {@link #append(CharSequence) append}ed
+ just like in {@link StringBuilder#append(CharSequence) original
+ implementation}.
+