From a90238c0b8b767a39b106b7a40f294e5f2bb2410 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sat, 25 Feb 2023 09:47:44 -0800 Subject: [PATCH 01/97] HADOOP-18631. Migrate Async appenders to log4j properties (#5418) --- .../dev-support/findbugsExcludeFile.xml | 10 + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 39 +++- .../hdfs/server/common/MetricsLoggerTask.java | 37 +-- .../hadoop/hdfs/server/datanode/DataNode.java | 2 - .../hdfs/server/namenode/FSNamesystem.java | 51 ++--- .../hadoop/hdfs/server/namenode/NameNode.java | 2 - .../hadoop/hdfs/util/AsyncRFAAppender.java | 146 ++++++++++++ .../datanode/TestDataNodeMetricsLogger.java | 43 +--- .../namenode/PatternMatchingAppender.java | 58 +++++ .../server/namenode/TestAuditLogAtDebug.java | 2 +- .../hdfs/server/namenode/TestAuditLogs.java | 214 +++++++----------- .../hadoop/hdfs/server/namenode/TestFsck.java | 79 ++----- .../namenode/TestNameNodeMetricsLogger.java | 47 +--- .../ha/TestDNFencingWithReplication.java | 2 +- .../src/test/resources/log4j.properties | 59 +++-- 15 files changed, 425 insertions(+), 366 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AsyncRFAAppender.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/PatternMatchingAppender.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 5c2df9acf4e..8632c567aa1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -310,4 +310,14 @@ + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 1ab7edd6adc..e5e21e4307a 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -733,12 +733,43 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default"; public static final String DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY = "dfs.namenode.audit.log.token.tracking.id"; public static final boolean DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT = false; - public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async"; + /** + * Deprecated. Use log4j properties instead. + * Set system env variable HDFS_AUDIT_LOGGER, which in tern assigns the value to + * "hdfs.audit.logger" for log4j properties to determine log level and appender. + */ + @Deprecated + public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async"; + @Deprecated public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT = false; - public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY = "dfs.namenode.audit.log.async.blocking"; + + /** + * Deprecated. Use log4j properties instead. + * Set value to Async appender "blocking" property as part of log4j properties configuration. + *

+ * For example, + * log4j.appender.ASYNCAPPENDER=org.apache.log4j.AsyncAppender + * log4j.appender.ASYNCAPPENDER.blocking=false + */ + @Deprecated + public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY = + "dfs.namenode.audit.log.async.blocking"; + @Deprecated public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT = true; - public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY = "dfs.namenode.audit.log.async.buffer.size"; - public static final int DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT = 128; + + /** + * Deprecated. Use log4j properties instead. + * Set value to Async appender "bufferSize" property as part of log4j properties configuration. + *

+ * For example, + * log4j.appender.ASYNCAPPENDER=org.apache.log4j.AsyncAppender + * log4j.appender.ASYNCAPPENDER.bufferSize=128 + */ + @Deprecated + public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY = + "dfs.namenode.audit.log.async.buffer.size"; + @Deprecated + public static final int DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT = 128; public static final String DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST = "dfs.namenode.audit.log.debug.cmdlist"; public static final String DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY = "dfs.namenode.metrics.logger.period.seconds"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java index 66685f6cc18..21c01cebd40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java @@ -18,9 +18,7 @@ package org.apache.hadoop.hdfs.server.common; import java.lang.management.ManagementFactory; -import java.util.Collections; import java.util.HashSet; -import java.util.List; import java.util.Set; import javax.management.Attribute; @@ -34,8 +32,6 @@ import javax.management.ObjectName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.metrics2.util.MBeans; -import org.apache.log4j.Appender; -import org.apache.log4j.AsyncAppender; /** * MetricsLoggerTask can be used as utility to dump metrics to log. @@ -56,12 +52,12 @@ public class MetricsLoggerTask implements Runnable { } } - private org.apache.log4j.Logger metricsLog; + private Logger metricsLog; private String nodeName; private short maxLogLineLength; public MetricsLoggerTask(String metricsLog, String nodeName, short maxLogLineLength) { - this.metricsLog = org.apache.log4j.Logger.getLogger(metricsLog); + this.metricsLog = LoggerFactory.getLogger(metricsLog); this.nodeName = nodeName; this.maxLogLineLength = maxLogLineLength; } @@ -115,8 +111,11 @@ public class MetricsLoggerTask implements Runnable { .substring(0, maxLogLineLength) + "..."); } - private static boolean hasAppenders(org.apache.log4j.Logger logger) { - return logger.getAllAppenders().hasMoreElements(); + // TODO : hadoop-logging module to hide log4j implementation details, this method + // can directly call utility from hadoop-logging. + private static boolean hasAppenders(Logger logger) { + return org.apache.log4j.Logger.getLogger(logger.getName()).getAllAppenders() + .hasMoreElements(); } /** @@ -138,26 +137,4 @@ public class MetricsLoggerTask implements Runnable { return attributeNames; } - /** - * Make the metrics logger async and add all pre-existing appenders to the - * async appender. - */ - public static void makeMetricsLoggerAsync(String metricsLog) { - org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(metricsLog); - logger.setAdditivity(false); // Don't pollute actual logs with metrics dump - - @SuppressWarnings("unchecked") - List appenders = Collections.list(logger.getAllAppenders()); - // failsafe against trying to async it more than once - if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { - AsyncAppender asyncAppender = new AsyncAppender(); - // change logger to have an async appender containing all the - // previously configured appenders - for (Appender appender : appenders) { - logger.removeAppender(appender); - asyncAppender.addAppender(appender); - } - logger.addAppender(asyncAppender); - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index ce566885983..b781053a767 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -4058,8 +4058,6 @@ public class DataNode extends ReconfigurableBase return; } - MetricsLoggerTask.makeMetricsLoggerAsync(METRICS_LOG_NAME); - // Schedule the periodic logging. metricsLoggerTimer = new ScheduledThreadPoolExecutor(1); metricsLoggerTimer.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index a7ee3354977..e44a16f029e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -338,10 +338,9 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.util.Lists; -import org.apache.log4j.Logger; -import org.apache.log4j.Appender; -import org.apache.log4j.AsyncAppender; import org.eclipse.jetty.util.ajax.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; @@ -349,8 +348,6 @@ import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.slf4j.LoggerFactory; - /** * FSNamesystem is a container of both transient * and persisted name-space state, and does all the book-keeping @@ -384,8 +381,7 @@ import org.slf4j.LoggerFactory; public class FSNamesystem implements Namesystem, FSNamesystemMBean, NameNodeMXBean, ReplicatedBlocksMBean, ECBlockGroupsMBean { - public static final org.slf4j.Logger LOG = LoggerFactory - .getLogger(FSNamesystem.class.getName()); + public static final Logger LOG = LoggerFactory.getLogger(FSNamesystem.class); // The following are private configurations public static final String DFS_NAMENODE_SNAPSHOT_TRASHROOT_ENABLED = @@ -488,7 +484,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, * perm=<permissions (optional)> * */ - public static final Logger AUDIT_LOG = Logger.getLogger(FSNamesystem.class.getName() + ".audit"); + public static final Logger AUDIT_LOG = + LoggerFactory.getLogger(FSNamesystem.class.getName() + ".audit"); private final int maxCorruptFileBlocksReturn; private final boolean isPermissionEnabled; @@ -858,11 +855,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, throws IOException { provider = DFSUtil.createKeyProviderCryptoExtension(conf); LOG.info("KeyProvider: " + provider); - if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, - DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { - LOG.info("Enabling async auditlog"); - enableAsyncAuditLog(conf); - } + checkForAsyncLogEnabledByOldConfigs(conf); auditLogWithRemotePort = conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_KEY, DFS_NAMENODE_AUDIT_LOG_WITH_REMOTE_PORT_DEFAULT); @@ -1076,6 +1069,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } } + @SuppressWarnings("deprecation") + private static void checkForAsyncLogEnabledByOldConfigs(Configuration conf) { + if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { + LOG.warn("Use log4j properties to enable async log for audit logs. {} is deprecated", + DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY); + } + } + @VisibleForTesting public List getAuditLoggers() { return auditLoggers; @@ -8856,30 +8857,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } } - private static void enableAsyncAuditLog(Configuration conf) { - Logger logger = AUDIT_LOG; - @SuppressWarnings("unchecked") - List appenders = Collections.list(logger.getAllAppenders()); - // failsafe against trying to async it more than once - if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { - AsyncAppender asyncAppender = new AsyncAppender(); - asyncAppender.setBlocking(conf.getBoolean( - DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY, - DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT - )); - asyncAppender.setBufferSize(conf.getInt( - DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY, - DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT - )); - // change logger to have an async appender containing all the - // previously configured appenders - for (Appender appender : appenders) { - logger.removeAppender(appender); - asyncAppender.addAppender(appender); - } - logger.addAppender(asyncAppender); - } - } /** * Return total number of Sync Operations on FSEditLog. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index ddd9fd8087f..ff25eedea0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -946,8 +946,6 @@ public class NameNode extends ReconfigurableBase implements return; } - MetricsLoggerTask.makeMetricsLoggerAsync(METRICS_LOG_NAME); - // Schedule the periodic logging. metricsLoggerTimer = new ScheduledThreadPoolExecutor(1); metricsLoggerTimer.setExecuteExistingDelayedTasksAfterShutdownPolicy( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AsyncRFAAppender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AsyncRFAAppender.java new file mode 100644 index 00000000000..276e5b0987a --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AsyncRFAAppender.java @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.util; + +import java.io.IOException; + +import org.apache.log4j.AsyncAppender; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.RollingFileAppender; +import org.apache.log4j.spi.LoggingEvent; + +/** + * Until we migrate to log4j2, use this appender for namenode audit logger as well as + * datanode and namenode metric loggers with log4j properties, if async logging is required with + * RFA. + * This appender will take parameters necessary to supply RollingFileAppender to AsyncAppender. + * While migrating to log4j2, we can directly wrap RFA appender to Async appender as part of + * log4j2 properties. However, same is not possible with log4j1 properties. + */ +public class AsyncRFAAppender extends AsyncAppender { + + /** + * The default maximum file size is 10MB. + */ + private String maxFileSize = String.valueOf(10*1024*1024); + + /** + * There is one backup file by default. + */ + private int maxBackupIndex = 1; + + /** + * The name of the log file. + */ + private String fileName = null; + + private String conversionPattern = null; + + /** + * Does appender block when buffer is full. + */ + private boolean blocking = true; + + /** + * Buffer size. + */ + private int bufferSize = DEFAULT_BUFFER_SIZE; + + private RollingFileAppender rollingFileAppender = null; + + private volatile boolean isRollingFileAppenderAssigned = false; + + @Override + public void append(LoggingEvent event) { + if (rollingFileAppender == null) { + appendRFAToAsyncAppender(); + } + super.append(event); + } + + private synchronized void appendRFAToAsyncAppender() { + if (!isRollingFileAppenderAssigned) { + PatternLayout patternLayout; + if (conversionPattern != null) { + patternLayout = new PatternLayout(conversionPattern); + } else { + patternLayout = new PatternLayout(); + } + try { + rollingFileAppender = new RollingFileAppender(patternLayout, fileName, true); + } catch (IOException e) { + throw new RuntimeException(e); + } + rollingFileAppender.setMaxBackupIndex(maxBackupIndex); + rollingFileAppender.setMaxFileSize(maxFileSize); + this.addAppender(rollingFileAppender); + isRollingFileAppenderAssigned = true; + super.setBlocking(blocking); + super.setBufferSize(bufferSize); + } + } + + public String getMaxFileSize() { + return maxFileSize; + } + + public void setMaxFileSize(String maxFileSize) { + this.maxFileSize = maxFileSize; + } + + public int getMaxBackupIndex() { + return maxBackupIndex; + } + + public void setMaxBackupIndex(int maxBackupIndex) { + this.maxBackupIndex = maxBackupIndex; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public String getConversionPattern() { + return conversionPattern; + } + + public void setConversionPattern(String conversionPattern) { + this.conversionPattern = conversionPattern; + } + + public boolean isBlocking() { + return blocking; + } + + public void setBlocking(boolean blocking) { + this.blocking = blocking; + } + + public int getBufferSize() { + return bufferSize; + } + + public void setBufferSize(int bufferSize) { + this.bufferSize = bufferSize; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java index 29619cc4e31..73201ba6054 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java @@ -30,7 +30,6 @@ import java.util.Collections; import java.util.List; import java.util.Random; import java.util.concurrent.TimeoutException; -import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,12 +39,11 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.PatternMatchingAppender; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Appender; -import org.apache.log4j.AppenderSkeleton; import org.apache.log4j.AsyncAppender; -import org.apache.log4j.spi.LoggingEvent; import org.junit.After; import org.junit.Assert; import org.junit.Rule; @@ -151,9 +149,9 @@ public class TestDataNodeMetricsLogger { metricsProvider); startDNForTest(true); assertNotNull(dn); - final PatternMatchingAppender appender = new PatternMatchingAppender( - "^.*FakeMetric.*$"); - addAppender(org.apache.log4j.Logger.getLogger(DataNode.METRICS_LOG_NAME), appender); + final PatternMatchingAppender appender = + (PatternMatchingAppender) org.apache.log4j.Logger.getLogger(DataNode.METRICS_LOG_NAME) + .getAppender("PATTERNMATCHERAPPENDER"); // Ensure that the supplied pattern was matched. GenericTestUtils.waitFor(new Supplier() { @@ -186,37 +184,4 @@ public class TestDataNodeMetricsLogger { } } - /** - * An appender that matches logged messages against the given regular - * expression. - */ - public static class PatternMatchingAppender extends AppenderSkeleton { - private final Pattern pattern; - private volatile boolean matched; - - public PatternMatchingAppender(String pattern) { - this.pattern = Pattern.compile(pattern); - this.matched = false; - } - - public boolean isMatched() { - return matched; - } - - @Override - protected void append(LoggingEvent event) { - if (pattern.matcher(event.getMessage().toString()).matches()) { - matched = true; - } - } - - @Override - public void close() { - } - - @Override - public boolean requiresLayout() { - return false; - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/PatternMatchingAppender.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/PatternMatchingAppender.java new file mode 100644 index 00000000000..f099dfae733 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/PatternMatchingAppender.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import java.util.regex.Pattern; + +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.spi.LoggingEvent; + +/** + * An appender that matches logged messages against the given + * regular expression. + */ +public class PatternMatchingAppender extends AppenderSkeleton { + private final Pattern pattern; + private volatile boolean matched; + + public PatternMatchingAppender() { + this.pattern = Pattern.compile("^.*FakeMetric.*$"); + this.matched = false; + } + + public boolean isMatched() { + return matched; + } + + @Override + protected void append(LoggingEvent event) { + if (pattern.matcher(event.getMessage().toString()).matches()) { + matched = true; + } + } + + @Override + public void close() { + } + + @Override + public boolean requiresLayout() { + return false; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java index dad4fa306c7..a6eba0ea051 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java @@ -26,11 +26,11 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.FSNamesystemAuditLogger; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; +import org.slf4j.event.Level; import java.net.Inet4Address; import java.util.Arrays; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java index 54fcc17cdcf..698178e4e96 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java @@ -25,10 +25,10 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Enumeration; import java.util.List; import java.util.regex.Pattern; @@ -46,15 +46,10 @@ import org.apache.hadoop.hdfs.web.WebHdfsTestUtil; import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.test.PathUtils; import org.apache.log4j.Appender; import org.apache.log4j.AsyncAppender; -import org.apache.log4j.Level; -import org.apache.log4j.LogManager; import org.apache.log4j.Logger; -import org.apache.log4j.PatternLayout; -import org.apache.log4j.RollingFileAppender; + import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -68,36 +63,39 @@ import org.slf4j.LoggerFactory; */ @RunWith(Parameterized.class) public class TestAuditLogs { - static final String auditLogFile = PathUtils.getTestDirName(TestAuditLogs.class) + "/TestAuditLogs-audit.log"; - final boolean useAsyncLog; + + private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TestAuditLogs.class); + + private static final File AUDIT_LOG_FILE = + new File(System.getProperty("hadoop.log.dir"), "hdfs-audit.log"); + final boolean useAsyncEdits; @Parameters public static Collection data() { - Collection params = new ArrayList(); - params.add(new Object[]{Boolean.FALSE, Boolean.FALSE}); - params.add(new Object[]{Boolean.TRUE, Boolean.FALSE}); - params.add(new Object[]{Boolean.FALSE, Boolean.TRUE}); - params.add(new Object[]{Boolean.TRUE, Boolean.TRUE}); + Collection params = new ArrayList<>(); + params.add(new Object[]{Boolean.FALSE}); + params.add(new Object[]{Boolean.TRUE}); return params; } - public TestAuditLogs(boolean useAsyncLog, boolean useAsyncEdits) { - this.useAsyncLog = useAsyncLog; + public TestAuditLogs(boolean useAsyncEdits) { this.useAsyncEdits = useAsyncEdits; } // Pattern for: // allowed=(true|false) ugi=name ip=/address cmd={cmd} src={path} dst=null perm=null - static final Pattern auditPattern = Pattern.compile( + private static final Pattern AUDIT_PATTERN = Pattern.compile( "allowed=.*?\\s" + "ugi=.*?\\s" + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s" + "cmd=.*?\\ssrc=.*?\\sdst=null\\s" + "perm=.*?"); - static final Pattern successPattern = Pattern.compile( + private static final Pattern SUCCESS_PATTERN = Pattern.compile( ".*allowed=true.*"); - static final Pattern webOpenPattern = Pattern.compile( + private static final Pattern FAILURE_PATTERN = Pattern.compile( + ".*allowed=false.*"); + private static final Pattern WEB_OPEN_PATTERN = Pattern.compile( ".*cmd=open.*proto=webhdfs.*"); static final String username = "bob"; @@ -113,14 +111,15 @@ public class TestAuditLogs { @Before public void setupCluster() throws Exception { + try (PrintWriter writer = new PrintWriter(AUDIT_LOG_FILE)) { + writer.print(""); + } // must configure prior to instantiating the namesystem because it // will reconfigure the logger if async is enabled - configureAuditLogs(); conf = new HdfsConfiguration(); final long precision = 1L; conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, precision); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L); - conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, useAsyncLog); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_EDITS_ASYNC_LOGGING, useAsyncEdits); util = new DFSTestUtil.Builder().setName("TestAuditAllowed"). setNumFiles(20).build(); @@ -129,19 +128,25 @@ public class TestAuditLogs { util.createFiles(fs, fileName); // make sure the appender is what it's supposed to be - Logger logger = FSNamesystem.AUDIT_LOG; + Logger logger = org.apache.log4j.Logger.getLogger( + "org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit"); @SuppressWarnings("unchecked") List appenders = Collections.list(logger.getAllAppenders()); assertEquals(1, appenders.size()); - assertEquals(useAsyncLog, appenders.get(0) instanceof AsyncAppender); + assertTrue(appenders.get(0) instanceof AsyncAppender); fnames = util.getFileNames(fileName); util.waitReplication(fs, fileName, (short)3); userGroupInfo = UserGroupInformation.createUserForTesting(username, groups); + LOG.info("Audit log file: {}, exists: {}, length: {}", AUDIT_LOG_FILE, AUDIT_LOG_FILE.exists(), + AUDIT_LOG_FILE.length()); } @After public void teardownCluster() throws Exception { + try (PrintWriter writer = new PrintWriter(AUDIT_LOG_FILE)) { + writer.print(""); + } util.cleanup(fs, "/srcdat"); if (fs != null) { fs.close(); @@ -159,11 +164,10 @@ public class TestAuditLogs { final Path file = new Path(fnames[0]); FileSystem userfs = DFSTestUtil.getFileSystemAs(userGroupInfo, conf); - setupAuditLogs(); InputStream istream = userfs.open(file); int val = istream.read(); istream.close(); - verifyAuditLogs(true); + verifySuccessCommandsAuditLogs(2, fnames[0], "cmd=open"); assertTrue("failed to read from file", val >= 0); } @@ -173,9 +177,8 @@ public class TestAuditLogs { final Path file = new Path(fnames[0]); FileSystem userfs = DFSTestUtil.getFileSystemAs(userGroupInfo, conf); - setupAuditLogs(); FileStatus st = userfs.getFileStatus(file); - verifyAuditLogs(true); + verifySuccessCommandsAuditLogs(2, fnames[0], "cmd=getfileinfo"); assertTrue("failed to stat file", st != null && st.isFile()); } @@ -188,15 +191,13 @@ public class TestAuditLogs { fs.setPermission(file, new FsPermission((short)0600)); fs.setOwner(file, "root", null); - setupAuditLogs(); - try { userfs.open(file); fail("open must not succeed"); } catch(AccessControlException e) { System.out.println("got access denied, as expected."); } - verifyAuditLogs(false); + verifyFailedCommandsAuditLogs(1, fnames[0], "cmd=open"); } /** test that access via webhdfs puts proper entry in audit log */ @@ -207,14 +208,12 @@ public class TestAuditLogs { fs.setPermission(file, new FsPermission((short)0644)); fs.setOwner(file, "root", null); - setupAuditLogs(); - WebHdfsFileSystem webfs = WebHdfsTestUtil.getWebHdfsFileSystemAs(userGroupInfo, conf, WebHdfsConstants.WEBHDFS_SCHEME); InputStream istream = webfs.open(file); int val = istream.read(); istream.close(); - verifyAuditLogsRepeat(true, 3); + verifySuccessCommandsAuditLogs(3, fnames[0], "cmd=open"); assertTrue("failed to read from file", val >= 0); } @@ -226,12 +225,10 @@ public class TestAuditLogs { fs.setPermission(file, new FsPermission((short)0644)); fs.setOwner(file, "root", null); - setupAuditLogs(); - WebHdfsFileSystem webfs = WebHdfsTestUtil.getWebHdfsFileSystemAs(userGroupInfo, conf, WebHdfsConstants.WEBHDFS_SCHEME); FileStatus st = webfs.getFileStatus(file); - verifyAuditLogs(true); + verifySuccessCommandsAuditLogs(2, fnames[0], "cmd=getfileinfo"); assertTrue("failed to stat file", st != null && st.isFile()); } @@ -243,7 +240,6 @@ public class TestAuditLogs { fs.setPermission(file, new FsPermission((short)0600)); fs.setOwner(file, "root", null); - setupAuditLogs(); try { WebHdfsFileSystem webfs = WebHdfsTestUtil.getWebHdfsFileSystemAs(userGroupInfo, conf, WebHdfsConstants.WEBHDFS_SCHEME); InputStream istream = webfs.open(file); @@ -252,7 +248,7 @@ public class TestAuditLogs { } catch(AccessControlException E) { System.out.println("got access denied, as expected."); } - verifyAuditLogsRepeat(false, 2); + verifyFailedCommandsAuditLogs(1, fnames[0], "cmd=open"); } /** test that open via webhdfs puts proper entry in audit log */ @@ -263,124 +259,68 @@ public class TestAuditLogs { fs.setPermission(file, new FsPermission((short)0644)); fs.setOwner(file, "root", null); - setupAuditLogs(); - WebHdfsFileSystem webfs = WebHdfsTestUtil.getWebHdfsFileSystemAs(userGroupInfo, conf, WebHdfsConstants.WEBHDFS_SCHEME); webfs.open(file).read(); - verifyAuditLogsCheckPattern(true, 3, webOpenPattern); + verifySuccessCommandsAuditLogs(3, fnames[0], "cmd=open"); } /** make sure that "\r\n" isn't made into a newline in audit log */ @Test public void testAuditCharacterEscape() throws Exception { final Path file = new Path("foo" + "\r\n" + "bar"); - setupAuditLogs(); fs.create(file); - verifyAuditLogsRepeat(true, 1); + verifySuccessCommandsAuditLogs(1, "foo", "cmd=create"); } - /** Sets up log4j logger for auditlogs */ - private void setupAuditLogs() throws IOException { - Logger logger = FSNamesystem.AUDIT_LOG; - // enable logging now that the test is ready to run - logger.setLevel(Level.INFO); - } - - private void configureAuditLogs() throws IOException { - // Shutdown the LogManager to release all logger open file handles. - // Unfortunately, Apache commons logging library does not provide - // means to release underlying loggers. For additional info look up - // commons library FAQ. - LogManager.shutdown(); - - File file = new File(auditLogFile); - if (file.exists()) { - assertTrue(file.delete()); - } - // disable logging while the cluster startup preps files - disableAuditLog(); - PatternLayout layout = new PatternLayout("%m%n"); - RollingFileAppender appender = new RollingFileAppender(layout, auditLogFile); - Logger logger = FSNamesystem.AUDIT_LOG; - logger.addAppender(appender); - } - - // Ensure audit log has only one entry - private void verifyAuditLogs(boolean expectSuccess) throws IOException { - verifyAuditLogsRepeat(expectSuccess, 1); - } - - // Ensure audit log has exactly N entries - private void verifyAuditLogsRepeat(boolean expectSuccess, int ndupe) + private void verifySuccessCommandsAuditLogs(int leastExpected, String file, String cmd) throws IOException { - // Turn off the logs - disableAuditLog(); - // Close the appenders and force all logs to be flushed - Logger logger = FSNamesystem.AUDIT_LOG; - Enumeration appenders = logger.getAllAppenders(); - while (appenders.hasMoreElements()) { - Appender appender = (Appender)appenders.nextElement(); - appender.close(); - } - - BufferedReader reader = new BufferedReader(new FileReader(auditLogFile)); - String line = null; - boolean ret = true; - - try { - for (int i = 0; i < ndupe; i++) { - line = reader.readLine(); + try (BufferedReader reader = new BufferedReader(new FileReader(AUDIT_LOG_FILE))) { + String line; + int success = 0; + while ((line = reader.readLine()) != null) { assertNotNull(line); - assertTrue("Expected audit event not found in audit log", - auditPattern.matcher(line).matches()); - ret &= successPattern.matcher(line).matches(); - } - assertNull("Unexpected event in audit log", reader.readLine()); - assertTrue("Expected success=" + expectSuccess, ret == expectSuccess); - } finally { - reader.close(); - } - } - - // Ensure audit log has exactly N entries - private void verifyAuditLogsCheckPattern(boolean expectSuccess, int ndupe, Pattern pattern) - throws IOException { - // Turn off the logs - disableAuditLog(); - - // Close the appenders and force all logs to be flushed - Logger logger = FSNamesystem.AUDIT_LOG; - Enumeration appenders = logger.getAllAppenders(); - while (appenders.hasMoreElements()) { - Appender appender = (Appender)appenders.nextElement(); - appender.close(); - } - - BufferedReader reader = new BufferedReader(new FileReader(auditLogFile)); - String line = null; - boolean ret = true; - boolean patternMatches = false; - - try { - for (int i = 0; i < ndupe; i++) { - line = reader.readLine(); - assertNotNull(line); - patternMatches |= pattern.matcher(line).matches(); - ret &= successPattern.matcher(line).matches(); + LOG.info("Line: {}", line); + if (SUCCESS_PATTERN.matcher(line).matches() && line.contains(file) && line.contains( + cmd)) { + assertTrue("Expected audit event not found in audit log", + AUDIT_PATTERN.matcher(line).matches()); + LOG.info("Successful verification. Log line: {}", line); + success++; } - assertNull("Unexpected event in audit log", reader.readLine()); - assertTrue("Expected audit event not found in audit log", patternMatches); - assertTrue("Expected success=" + expectSuccess, ret == expectSuccess); - } finally { - reader.close(); } + if (success < leastExpected) { + throw new AssertionError( + "Least expected: " + leastExpected + ". Actual success: " + success); + } + } } - private void disableAuditLog() { - GenericTestUtils.disableLog(LoggerFactory.getLogger( - FSNamesystem.class.getName() + ".audit")); + private void verifyFailedCommandsAuditLogs(int leastExpected, String file, String cmd) + throws IOException { + + try (BufferedReader reader = new BufferedReader(new FileReader(AUDIT_LOG_FILE))) { + String line; + int success = 0; + while ((line = reader.readLine()) != null) { + assertNotNull(line); + LOG.info("Line: {}", line); + if (FAILURE_PATTERN.matcher(line).matches() && line.contains(file) && line.contains( + cmd)) { + assertTrue("Expected audit event not found in audit log", + AUDIT_PATTERN.matcher(line).matches()); + LOG.info("Failure verification. Log line: {}", line); + success++; + } + } + assertEquals("Expected: " + leastExpected + ". Actual failure: " + success, leastExpected, + success); + if (success < leastExpected) { + throw new AssertionError( + "Least expected: " + leastExpected + ". Actual success: " + success); + } + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 0f8ca10174a..8d8183e5ad1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -23,7 +23,6 @@ import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; @@ -119,11 +118,8 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.PatternLayout; -import org.apache.log4j.RollingFileAppender; import org.junit.After; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -136,9 +132,9 @@ public class TestFsck { private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TestFsck.class.getName()); - static final String AUDITLOG_FILE = - GenericTestUtils.getTempPath("TestFsck-audit.log"); - + private static final File AUDIT_LOG_FILE = + new File(System.getProperty("hadoop.log.dir"), "hdfs-audit.log"); + // Pattern for: // allowed=true ugi=name ip=/address cmd=FSCK src=/ dst=null perm=null static final Pattern FSCK_PATTERN = Pattern.compile( @@ -195,6 +191,11 @@ public class TestFsck { shutdownCluster(); } + @AfterClass + public static void afterClass() throws Exception { + assertTrue(AUDIT_LOG_FILE.delete()); + } + private void shutdownCluster() throws Exception { if (cluster != null) { cluster.shutdown(); @@ -221,7 +222,6 @@ public class TestFsck { final Path file = new Path(fileName); long aTime = fs.getFileStatus(file).getAccessTime(); Thread.sleep(precision); - setupAuditLogs(); String outStr = runFsck(conf, 0, true, "/"); verifyAuditLogs(); assertEquals(aTime, fs.getFileStatus(file).getAccessTime()); @@ -245,54 +245,27 @@ public class TestFsck { util.cleanup(fs, "/srcdat"); } - /** Sets up log4j logger for auditlogs. */ - private void setupAuditLogs() throws IOException { - File file = new File(AUDITLOG_FILE); - if (file.exists()) { - file.delete(); - } - Logger logger = FSNamesystem.AUDIT_LOG; - logger.removeAllAppenders(); - logger.setLevel(Level.INFO); - PatternLayout layout = new PatternLayout("%m%n"); - RollingFileAppender appender = - new RollingFileAppender(layout, AUDITLOG_FILE); - logger.addAppender(appender); - } - private void verifyAuditLogs() throws IOException { - // Turn off the logs - GenericTestUtils.disableLog(LoggerFactory.getLogger( - FSNamesystem.class.getName() + ".audit")); - - BufferedReader reader = null; - try { + try (BufferedReader reader = new BufferedReader(new FileReader(AUDIT_LOG_FILE))) { // Audit log should contain one getfileinfo and one fsck - reader = new BufferedReader(new FileReader(AUDITLOG_FILE)); String line; - - // one extra getfileinfo stems from resolving the path - // - for (int i = 0; i < 2; i++) { - line = reader.readLine(); - assertNotNull(line); - assertTrue("Expected getfileinfo event not found in audit log", - GET_FILE_INFO_PATTERN.matcher(line).matches()); + int getFileStatusSuccess = 0; + int fsckCount = 0; + while ((line = reader.readLine()) != null) { + LOG.info("Line: {}", line); + if (line.contains("cmd=getfileinfo") && GET_FILE_INFO_PATTERN.matcher(line).matches()) { + getFileStatusSuccess++; + } else if (FSCK_PATTERN.matcher(line).matches()) { + fsckCount++; + } } - line = reader.readLine(); - assertNotNull(line); - assertTrue("Expected fsck event not found in audit log", FSCK_PATTERN - .matcher(line).matches()); - assertNull("Unexpected event in audit log", reader.readLine()); - } finally { - // Close the reader and remove the appender to release the audit log file - // handle after verifying the content of the file. - if (reader != null) { - reader.close(); + if (getFileStatusSuccess < 2) { + throw new AssertionError( + "getfileinfo cmd should occur at least 2 times. Actual count: " + getFileStatusSuccess); } - Logger logger = FSNamesystem.AUDIT_LOG; - if (logger != null) { - logger.removeAllAppenders(); + if (fsckCount < 1) { + throw new AssertionError( + "fsck should be present at least once. Actual count: " + fsckCount); } } } @@ -1411,7 +1384,6 @@ public class TestFsck { util.waitReplication(fs, fileName, (short)3); long aTime = fc.getFileStatus(symlink).getAccessTime(); Thread.sleep(precision); - setupAuditLogs(); String outStr = runFsck(conf, 0, true, "/"); verifyAuditLogs(); assertEquals(aTime, fc.getFileStatus(symlink).getAccessTime()); @@ -2055,7 +2027,6 @@ public class TestFsck { long replTime = fs.getFileStatus(replFilePath).getAccessTime(); long ecTime = fs.getFileStatus(largeFilePath).getAccessTime(); Thread.sleep(precision); - setupAuditLogs(); String outStr = runFsck(conf, 0, true, "/"); verifyAuditLogs(); assertEquals(replTime, fs.getFileStatus(replFilePath).getAccessTime()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java index 7548adbd5d2..464fdfcd6c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java @@ -26,9 +26,8 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Appender; -import org.apache.log4j.AppenderSkeleton; import org.apache.log4j.AsyncAppender; -import org.apache.log4j.spi.LoggingEvent; + import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -37,7 +36,6 @@ import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeoutException; -import java.util.regex.Pattern; import static org.apache.hadoop.hdfs.DFSConfigKeys.*; import static org.junit.Assert.*; @@ -86,8 +84,8 @@ public class TestNameNodeMetricsLogger { "DummyMetrics", metricsProvider); makeNameNode(true); // Log metrics early and often. final PatternMatchingAppender appender = - new PatternMatchingAppender("^.*FakeMetric42.*$"); - addAppender(org.apache.log4j.Logger.getLogger(NameNode.METRICS_LOG_NAME), appender); + (PatternMatchingAppender) org.apache.log4j.Logger.getLogger(NameNode.METRICS_LOG_NAME) + .getAppender("PATTERNMATCHERAPPENDER"); // Ensure that the supplied pattern was matched. GenericTestUtils.waitFor(new Supplier() { @@ -115,12 +113,6 @@ public class TestNameNodeMetricsLogger { return new TestNameNode(conf); } - private void addAppender(org.apache.log4j.Logger logger, Appender appender) { - @SuppressWarnings("unchecked") - List appenders = Collections.list(logger.getAllAppenders()); - ((AsyncAppender) appenders.get(0)).addAppender(appender); - } - /** * A NameNode that stubs out the NameSystem for testing. */ @@ -149,37 +141,4 @@ public class TestNameNodeMetricsLogger { } } - /** - * An appender that matches logged messages against the given - * regular expression. - */ - public static class PatternMatchingAppender extends AppenderSkeleton { - private final Pattern pattern; - private volatile boolean matched; - - public PatternMatchingAppender(String pattern) { - this.pattern = Pattern.compile(pattern); - this.matched = false; - } - - public boolean isMatched() { - return matched; - } - - @Override - protected void append(LoggingEvent event) { - if (pattern.matcher(event.getMessage().toString()).matches()) { - matched = true; - } - } - - @Override - public void close() { - } - - @Override - public boolean requiresLayout() { - return false; - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java index a243255cdad..78d227b0b62 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java @@ -45,7 +45,7 @@ import java.util.function.Supplier; */ public class TestDNFencingWithReplication { static { - GenericTestUtils.setLogLevel(FSNamesystem.AUDIT_LOG, org.apache.log4j.Level.WARN); + GenericTestUtils.setLogLevel(FSNamesystem.AUDIT_LOG, Level.WARN); GenericTestUtils.setLogLevel(Server.LOG, Level.ERROR); GenericTestUtils.setLogLevel(RetryInvocationHandler.LOG, Level.ERROR); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties index 997854dcce7..368deef4020 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties @@ -22,31 +22,60 @@ log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n +# Only to be used for testing +log4j.appender.PATTERNMATCHERAPPENDER=org.apache.hadoop.hdfs.server.namenode.PatternMatchingAppender + # # NameNode metrics logging. # The default is to retain two namenode-metrics.log files up to 64MB each. # -log4j.logger.NameNodeMetricsLog=INFO,NNMETRICSRFA + +# TODO : While migrating to log4j2, replace AsyncRFAAppender with AsyncAppender as +# log4j2 properties support wrapping of other appenders to AsyncAppender using appender ref +namenode.metrics.logger=INFO,ASYNCNNMETRICSRFA,PATTERNMATCHERAPPENDER +log4j.logger.NameNodeMetricsLog=${namenode.metrics.logger} log4j.additivity.NameNodeMetricsLog=false -log4j.appender.NNMETRICSRFA=org.apache.log4j.RollingFileAppender -log4j.appender.NNMETRICSRFA.File=${hadoop.log.dir}/namenode-metrics.log -log4j.appender.NNMETRICSRFA.layout=org.apache.log4j.PatternLayout -log4j.appender.NNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n -log4j.appender.NNMETRICSRFA.MaxBackupIndex=1 -log4j.appender.NNMETRICSRFA.MaxFileSize=64MB +log4j.appender.ASYNCNNMETRICSRFA=org.apache.hadoop.hdfs.util.AsyncRFAAppender +log4j.appender.ASYNCNNMETRICSRFA.conversionPattern=%d{ISO8601} %m%n +log4j.appender.ASYNCNNMETRICSRFA.maxFileSize=64MB +log4j.appender.ASYNCNNMETRICSRFA.fileName=${hadoop.log.dir}/namenode-metrics.log +log4j.appender.ASYNCNNMETRICSRFA.maxBackupIndex=1 # # DataNode metrics logging. # The default is to retain two datanode-metrics.log files up to 64MB each. # -log4j.logger.DataNodeMetricsLog=INFO,DNMETRICSRFA + +# TODO : While migrating to log4j2, replace AsyncRFAAppender with AsyncAppender as +# log4j2 properties support wrapping of other appenders to AsyncAppender using appender ref +datanode.metrics.logger=INFO,ASYNCDNMETRICSRFA,PATTERNMATCHERAPPENDER +log4j.logger.DataNodeMetricsLog=${datanode.metrics.logger} log4j.additivity.DataNodeMetricsLog=false -log4j.appender.DNMETRICSRFA=org.apache.log4j.RollingFileAppender -log4j.appender.DNMETRICSRFA.File=${hadoop.log.dir}/datanode-metrics.log -log4j.appender.DNMETRICSRFA.layout=org.apache.log4j.PatternLayout -log4j.appender.DNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n -log4j.appender.DNMETRICSRFA.MaxBackupIndex=1 -log4j.appender.DNMETRICSRFA.MaxFileSize=64MB +log4j.appender.ASYNCDNMETRICSRFA=org.apache.hadoop.hdfs.util.AsyncRFAAppender +log4j.appender.ASYNCDNMETRICSRFA.conversionPattern=%d{ISO8601} %m%n +log4j.appender.ASYNCDNMETRICSRFA.maxFileSize=64MB +log4j.appender.ASYNCDNMETRICSRFA.fileName=${hadoop.log.dir}/datanode-metrics.log +log4j.appender.ASYNCDNMETRICSRFA.maxBackupIndex=1 + # Supress KMS error log -log4j.logger.com.sun.jersey.server.wadl.generators.WadlGeneratorJAXBGrammarGenerator=OFF \ No newline at end of file +log4j.logger.com.sun.jersey.server.wadl.generators.WadlGeneratorJAXBGrammarGenerator=OFF + +# +# hdfs audit logging +# + +# TODO : While migrating to log4j2, replace AsyncRFAAppender with AsyncAppender as +# log4j2 properties support wrapping of other appenders to AsyncAppender using appender ref +hdfs.audit.logger=INFO,ASYNCAUDITAPPENDER +hdfs.audit.log.maxfilesize=256MB +hdfs.audit.log.maxbackupindex=20 +log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} +log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false +log4j.appender.ASYNCAUDITAPPENDER=org.apache.hadoop.hdfs.util.AsyncRFAAppender +log4j.appender.ASYNCAUDITAPPENDER.blocking=false +log4j.appender.ASYNCAUDITAPPENDER.bufferSize=256 +log4j.appender.ASYNCAUDITAPPENDER.conversionPattern=%m%n +log4j.appender.ASYNCAUDITAPPENDER.maxFileSize=${hdfs.audit.log.maxfilesize} +log4j.appender.ASYNCAUDITAPPENDER.fileName=${hadoop.log.dir}/hdfs-audit.log +log4j.appender.ASYNCAUDITAPPENDER.maxBackupIndex=${hdfs.audit.log.maxbackupindex} From 8798b94ee1b1c753f67bb279f5c1c0a90a7ada3d Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 28 Feb 2023 01:34:39 +0800 Subject: [PATCH 02/97] YARN-11221. [Federation] Add replaceLabelsOnNodes, replaceLabelsOnNode REST APIs for Router. (#5302) --- .../webapp/dao/NodeToLabelsEntry.java | 5 + .../yarn/server/router/RouterMetrics.java | 62 +++++++++ .../webapp/FederationInterceptorREST.java | 119 +++++++++++++++++- .../yarn/server/router/TestRouterMetrics.java | 66 ++++++++++ .../MockDefaultRequestInterceptorREST.java | 22 +++- .../webapp/TestFederationInterceptorREST.java | 98 +++++++++++++++ 6 files changed, 367 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeToLabelsEntry.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeToLabelsEntry.java index 702d6f0d1a4..905ceb64b79 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeToLabelsEntry.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeToLabelsEntry.java @@ -44,6 +44,11 @@ public class NodeToLabelsEntry { this.labels = labels; } + public NodeToLabelsEntry(String nodeId, Collection pLabels) { + this.nodeId = nodeId; + this.labels.addAll(pLabels); + } + public String getNodeId() { return nodeId; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java index 8806cbb9aea..d3dd7bab11f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java @@ -147,6 +147,10 @@ public final class RouterMetrics { private MutableGaugeInt numRefreshSuperUserGroupsConfigurationFailedRetrieved; @Metric("# of refreshUserToGroupsMappings failed to be retrieved") private MutableGaugeInt numRefreshUserToGroupsMappingsFailedRetrieved; + @Metric("# of replaceLabelsOnNodes failed to be retrieved") + private MutableGaugeInt numReplaceLabelsOnNodesFailedRetrieved; + @Metric("# of replaceLabelsOnNode failed to be retrieved") + private MutableGaugeInt numReplaceLabelsOnNodeFailedRetrieved; @Metric("# of addToClusterNodeLabels failed to be retrieved") private MutableGaugeInt numAddToClusterNodeLabelsFailedRetrieved; @Metric("# of removeFromClusterNodeLabels failed to be retrieved") @@ -257,6 +261,10 @@ public final class RouterMetrics { private MutableRate totalSucceededRefreshSuperUserGroupsConfigurationRetrieved; @Metric("Total number of successful Retrieved RefreshUserToGroupsMappings and latency(ms)") private MutableRate totalSucceededRefreshUserToGroupsMappingsRetrieved; + @Metric("Total number of successful Retrieved ReplaceLabelsOnNodes and latency(ms)") + private MutableRate totalSucceededReplaceLabelsOnNodesRetrieved; + @Metric("Total number of successful Retrieved ReplaceLabelsOnNode and latency(ms)") + private MutableRate totalSucceededReplaceLabelsOnNodeRetrieved; @Metric("Total number of successful Retrieved GetSchedulerInfo and latency(ms)") private MutableRate totalSucceededGetSchedulerInfoRetrieved; @Metric("Total number of successful Retrieved AddToClusterNodeLabels and latency(ms)") @@ -320,6 +328,8 @@ public final class RouterMetrics { private MutableQuantiles getSchedulerInfoRetrievedLatency; private MutableQuantiles refreshSuperUserGroupsConfLatency; private MutableQuantiles refreshUserToGroupsMappingsLatency; + private MutableQuantiles replaceLabelsOnNodesLatency; + private MutableQuantiles replaceLabelsOnNodeLatency; private MutableQuantiles addToClusterNodeLabelsLatency; private MutableQuantiles removeFromClusterNodeLabelsLatency; @@ -514,6 +524,12 @@ public final class RouterMetrics { refreshUserToGroupsMappingsLatency = registry.newQuantiles("refreshUserToGroupsMappingsLatency", "latency of refresh user to groups mappings timeouts", "ops", "latency", 10); + replaceLabelsOnNodesLatency = registry.newQuantiles("replaceLabelsOnNodesLatency", + "latency of replace labels on nodes timeouts", "ops", "latency", 10); + + replaceLabelsOnNodeLatency = registry.newQuantiles("replaceLabelsOnNodeLatency", + "latency of replace labels on node timeouts", "ops", "latency", 10); + addToClusterNodeLabelsLatency = registry.newQuantiles("addToClusterNodeLabelsLatency", "latency of add cluster nodelabels timeouts", "ops", "latency", 10); @@ -810,6 +826,16 @@ public final class RouterMetrics { return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().numSamples(); } + @VisibleForTesting + public long getNumSucceededReplaceLabelsOnNodesRetrieved() { + return totalSucceededReplaceLabelsOnNodesRetrieved.lastStat().numSamples(); + } + + @VisibleForTesting + public long getNumSucceededReplaceLabelsOnNodeRetrieved() { + return totalSucceededReplaceLabelsOnNodeRetrieved.lastStat().numSamples(); + } + @VisibleForTesting public double getLatencySucceededAppsCreated() { return totalSucceededAppsCreated.lastStat().mean(); @@ -1080,6 +1106,16 @@ public final class RouterMetrics { return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().mean(); } + @VisibleForTesting + public double getLatencySucceededReplaceLabelsOnNodesRetrieved() { + return totalSucceededReplaceLabelsOnNodesRetrieved.lastStat().mean(); + } + + @VisibleForTesting + public double getLatencySucceededReplaceLabelsOnNodeRetrieved() { + return totalSucceededReplaceLabelsOnNodeRetrieved.lastStat().mean(); + } + @VisibleForTesting public int getAppsFailedCreated() { return numAppsFailedCreated.value(); @@ -1286,6 +1322,14 @@ public final class RouterMetrics { return numRefreshUserToGroupsMappingsFailedRetrieved.value(); } + public int getNumReplaceLabelsOnNodesFailedRetrieved() { + return numReplaceLabelsOnNodesFailedRetrieved.value(); + } + + public int getNumReplaceLabelsOnNodeFailedRetrieved() { + return numReplaceLabelsOnNodeFailedRetrieved.value(); + } + public int getNumAddToClusterNodeLabelsFailedRetrieved() { return numAddToClusterNodeLabelsFailedRetrieved.value(); } @@ -1597,6 +1641,16 @@ public final class RouterMetrics { refreshUserToGroupsMappingsLatency.add(duration); } + public void succeededReplaceLabelsOnNodesRetrieved(long duration) { + totalSucceededReplaceLabelsOnNodesRetrieved.add(duration); + replaceLabelsOnNodesLatency.add(duration); + } + + public void succeededReplaceLabelsOnNodeRetrieved(long duration) { + totalSucceededReplaceLabelsOnNodeRetrieved.add(duration); + replaceLabelsOnNodeLatency.add(duration); + } + public void incrAppsFailedCreated() { numAppsFailedCreated.incr(); } @@ -1801,6 +1855,14 @@ public final class RouterMetrics { numCancelDelegationTokenFailedRetrieved.incr(); } + public void incrReplaceLabelsOnNodesFailedRetrieved() { + numReplaceLabelsOnNodesFailedRetrieved.incr(); + } + + public void incrReplaceLabelsOnNodeFailedRetrieved() { + numReplaceLabelsOnNodeFailedRetrieved.incr(); + } + public void incrDumpSchedulerLogsFailedRetrieved() { numDumpSchedulerLogsFailedRetrieved.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java index 1c7af645855..94b4b1ca251 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java @@ -118,6 +118,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesIn import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerTypeInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationDefinitionInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsEntry; import org.apache.hadoop.yarn.server.router.RouterMetrics; import org.apache.hadoop.yarn.server.router.RouterServerUtil; import org.apache.hadoop.yarn.server.router.clientrm.ClientMethod; @@ -1539,16 +1540,130 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { "getLabelsToNodes by labels = %s Failed.", StringUtils.join(labels, ",")); } + /** + * This method replaces all the node labels for specific nodes, and it is + * reachable by using {@link RMWSConsts#REPLACE_NODE_TO_LABELS}. + * + * @see ResourceManagerAdministrationProtocol#replaceLabelsOnNode + * @param newNodeToLabels the list of new labels. It is a content param. + * @param hsr the servlet request + * @return Response containing the status code + * @throws IOException if an exception happened + */ @Override public Response replaceLabelsOnNodes(NodeToLabelsEntryList newNodeToLabels, HttpServletRequest hsr) throws IOException { - throw new NotImplementedException("Code is not implemented"); + + // Step1. Check the parameters to ensure that the parameters are not empty. + if (newNodeToLabels == null) { + routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved(); + throw new IllegalArgumentException("Parameter error, newNodeToLabels must not be empty."); + } + List nodeToLabelsEntries = newNodeToLabels.getNodeToLabels(); + if (CollectionUtils.isEmpty(nodeToLabelsEntries)) { + routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved(); + throw new IllegalArgumentException("Parameter error, " + + "nodeToLabelsEntries must not be empty."); + } + + try { + + // Step2. We map the NodeId and NodeToLabelsEntry in the request. + Map nodeIdToLabels = new HashMap<>(); + newNodeToLabels.getNodeToLabels().stream().forEach(nodeIdToLabel -> { + String nodeId = nodeIdToLabel.getNodeId(); + nodeIdToLabels.put(nodeId, nodeIdToLabel); + }); + + // Step3. We map SubCluster with NodeToLabelsEntryList + Map subClusterToNodeToLabelsEntryList = + new HashMap<>(); + nodeIdToLabels.forEach((nodeId, nodeToLabelsEntry) -> { + SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId); + NodeToLabelsEntryList nodeToLabelsEntryList = subClusterToNodeToLabelsEntryList. + getOrDefault(subClusterInfo, new NodeToLabelsEntryList()); + nodeToLabelsEntryList.getNodeToLabels().add(nodeToLabelsEntry); + subClusterToNodeToLabelsEntryList.put(subClusterInfo, nodeToLabelsEntryList); + }); + + // Step4. Traverse the subCluster and call the replaceLabelsOnNodes interface. + long startTime = clock.getTime(); + final HttpServletRequest hsrCopy = clone(hsr); + StringBuilder builder = new StringBuilder(); + subClusterToNodeToLabelsEntryList.forEach((subCluster, nodeToLabelsEntryList) -> { + SubClusterId subClusterId = subCluster.getSubClusterId(); + try { + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( + subCluster.getSubClusterId(), subCluster.getRMWebServiceAddress()); + interceptor.replaceLabelsOnNodes(nodeToLabelsEntryList, hsrCopy); + builder.append("subCluster-").append(subClusterId.getId()).append(":Success,"); + } catch (Exception e) { + LOG.error("replaceLabelsOnNodes Failed. subClusterId = {}.", subClusterId, e); + builder.append("subCluster-").append(subClusterId.getId()).append(":Failed,"); + } + }); + long stopTime = clock.getTime(); + routerMetrics.succeededReplaceLabelsOnNodesRetrieved(stopTime - startTime); + + // Step5. return call result. + return Response.status(Status.OK).entity(builder.toString()).build(); + } catch (NotFoundException e) { + routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved(); + throw e; + } catch (Exception e) { + routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved(); + throw e; + } } + /** + * This method replaces all the node labels for specific node, and it is + * reachable by using {@link RMWSConsts#NODES_NODEID_REPLACE_LABELS}. + * + * @see ResourceManagerAdministrationProtocol#replaceLabelsOnNode + * @param newNodeLabelsName the list of new labels. It is a QueryParam. + * @param hsr the servlet request + * @param nodeId the node we want to replace the node labels. It is a + * PathParam. + * @return Response containing the status code + * @throws Exception if an exception happened + */ @Override public Response replaceLabelsOnNode(Set newNodeLabelsName, HttpServletRequest hsr, String nodeId) throws Exception { - throw new NotImplementedException("Code is not implemented"); + + // Step1. Check the parameters to ensure that the parameters are not empty. + if (StringUtils.isBlank(nodeId)) { + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + throw new IllegalArgumentException("Parameter error, nodeId must not be null or empty."); + } + if (CollectionUtils.isEmpty(newNodeLabelsName)) { + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + throw new IllegalArgumentException("Parameter error, newNodeLabelsName must not be empty."); + } + + try { + // Step2. We find the subCluster according to the nodeId, + // and then call the replaceLabelsOnNode of the subCluster. + long startTime = clock.getTime(); + SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( + subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + final HttpServletRequest hsrCopy = clone(hsr); + interceptor.replaceLabelsOnNode(newNodeLabelsName, hsrCopy, nodeId); + + // Step3. Return the response result. + long stopTime = clock.getTime(); + routerMetrics.succeededReplaceLabelsOnNodeRetrieved(stopTime - startTime); + String msg = "subCluster#" + subClusterInfo.getSubClusterId().getId() + ":Success;"; + return Response.status(Status.OK).entity(msg).build(); + } catch (NotFoundException e) { + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + throw e; + } catch (Exception e){ + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + throw e; + } } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java index 3e451627968..db0b6837603 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java @@ -534,6 +534,16 @@ public class TestRouterMetrics { metrics.incrRenewDelegationTokenFailedRetrieved(); } + public void getReplaceLabelsOnNodesFailed() { + LOG.info("Mocked: failed replaceLabelsOnNodes call"); + metrics.incrReplaceLabelsOnNodesFailedRetrieved(); + } + + public void getReplaceLabelsOnNodeFailed() { + LOG.info("Mocked: failed ReplaceLabelOnNode call"); + metrics.incrReplaceLabelsOnNodeFailedRetrieved(); + } + public void getDumpSchedulerLogsFailed() { LOG.info("Mocked: failed DumpSchedulerLogs call"); metrics.incrDumpSchedulerLogsFailedRetrieved(); @@ -779,6 +789,16 @@ public class TestRouterMetrics { metrics.succeededRenewDelegationTokenRetrieved(duration); } + public void getNumSucceededReplaceLabelsOnNodesRetrieved(long duration) { + LOG.info("Mocked: successful ReplaceLabelsOnNodes call with duration {}", duration); + metrics.succeededReplaceLabelsOnNodesRetrieved(duration); + } + + public void getNumSucceededReplaceLabelsOnNodeRetrieved(long duration) { + LOG.info("Mocked: successful ReplaceLabelOnNode call with duration {}", duration); + metrics.succeededReplaceLabelsOnNodeRetrieved(duration); + } + public void getDumpSchedulerLogsRetrieved(long duration) { LOG.info("Mocked: successful DumpSchedulerLogs call with duration {}", duration); metrics.succeededDumpSchedulerLogsRetrieved(duration); @@ -1633,6 +1653,52 @@ public class TestRouterMetrics { metrics.getRenewDelegationTokenFailedRetrieved()); } + @Test + public void testReplaceLabelsOnNodesRetrieved() { + long totalGoodBefore = metrics.getNumSucceededReplaceLabelsOnNodesRetrieved(); + goodSubCluster.getNumSucceededReplaceLabelsOnNodesRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededReplaceLabelsOnNodesRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededReplaceLabelsOnNodesRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getNumSucceededReplaceLabelsOnNodesRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededReplaceLabelsOnNodesRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededReplaceLabelsOnNodesRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testReplaceLabelsOnNodesRetrievedFailed() { + long totalBadBefore = metrics.getNumReplaceLabelsOnNodesFailedRetrieved(); + badSubCluster.getReplaceLabelsOnNodesFailed(); + Assert.assertEquals(totalBadBefore + 1, + metrics.getNumReplaceLabelsOnNodesFailedRetrieved()); + } + + @Test + public void testReplaceLabelsOnNodeRetrieved() { + long totalGoodBefore = metrics.getNumSucceededReplaceLabelsOnNodeRetrieved(); + goodSubCluster.getNumSucceededReplaceLabelsOnNodeRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededReplaceLabelsOnNodeRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededReplaceLabelsOnNodeRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getNumSucceededReplaceLabelsOnNodeRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededReplaceLabelsOnNodeRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededReplaceLabelsOnNodeRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testReplaceLabelOnNodeRetrievedFailed() { + long totalBadBefore = metrics.getNumReplaceLabelsOnNodeFailedRetrieved(); + badSubCluster.getReplaceLabelsOnNodeFailed(); + Assert.assertEquals(totalBadBefore + 1, + metrics.getNumReplaceLabelsOnNodeFailedRetrieved()); + } + @Test public void testDumpSchedulerLogsRetrieved() { long totalGoodBefore = metrics.getNumSucceededDumpSchedulerLogsRetrieved(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java index 9d3223f9095..653224a7d37 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java @@ -137,6 +137,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationReque import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationRequestsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationUpdateResponseInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationDeleteResponseInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsEntryList; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWSConsts; @@ -305,9 +306,14 @@ public class MockDefaultRequestInterceptorREST if (!isRunning) { throw new RuntimeException("RM is stopped"); } - NodeInfo node = new NodeInfo(); - node.setId(nodeId); - node.setLastHealthUpdate(Integer.valueOf(getSubClusterId().getId())); + NodeInfo node = null; + SubClusterId subCluster = getSubClusterId(); + String subClusterId = subCluster.getId(); + if (nodeId.contains(subClusterId) || nodeId.contains("test")) { + node = new NodeInfo(); + node.setId(nodeId); + node.setLastHealthUpdate(Integer.valueOf(getSubClusterId().getId())); + } return node; } @@ -1236,7 +1242,17 @@ public class MockDefaultRequestInterceptorREST return webSvc.dumpSchedulerLogs(time, hsr); } + public Response replaceLabelsOnNodes(NodeToLabelsEntryList newNodeToLabels, + HttpServletRequest hsr) throws IOException { + return super.replaceLabelsOnNodes(newNodeToLabels, hsr); + } + @Override + public Response replaceLabelsOnNode(Set newNodeLabelsName, + HttpServletRequest hsr, String nodeId) throws Exception { + return super.replaceLabelsOnNode(newNodeLabelsName, hsr, nodeId); + } + public ActivitiesInfo getActivities(HttpServletRequest hsr, String nodeId, String groupBy) { if (!EnumUtils.isValidEnum(RMWSConsts.ActivitiesGroupBy.class, groupBy.toUpperCase())) { String errMessage = "Got invalid groupBy: " + groupBy + ", valid groupBy types: " diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java index 5ec53a63e20..a2831657dc8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java @@ -99,6 +99,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationSubmissionRequestInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.RMQueueAclInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsEntry; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsEntryList; import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo; import org.apache.hadoop.yarn.server.router.clientrm.RouterClientRMService; import org.apache.hadoop.yarn.server.router.clientrm.RouterClientRMService.RequestInterceptorChainWrapper; @@ -1786,6 +1788,102 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertEquals(response.getStatus(), Status.OK.getStatusCode()); } + @Test + public void testReplaceLabelsOnNodes() throws Exception { + // subCluster0 -> node0:0 -> label:NodeLabel0 + // subCluster1 -> node1:1 -> label:NodeLabel1 + // subCluster2 -> node2:2 -> label:NodeLabel2 + // subCluster3 -> node3:3 -> label:NodeLabel3 + NodeToLabelsEntryList nodeToLabelsEntryList = new NodeToLabelsEntryList(); + for (int i = 0; i < NUM_SUBCLUSTER; i++) { + // labels + List labels = new ArrayList<>(); + labels.add("NodeLabel" + i); + // nodes + String nodeId = "node" + i + ":" + i; + NodeToLabelsEntry nodeToLabelsEntry = new NodeToLabelsEntry(nodeId, labels); + List nodeToLabelsEntries = nodeToLabelsEntryList.getNodeToLabels(); + nodeToLabelsEntries.add(nodeToLabelsEntry); + } + + // one of the results: + // subCluster#0:Success;subCluster#1:Success;subCluster#3:Success;subCluster#2:Success; + // We can't confirm the complete return order. + Response response = interceptor.replaceLabelsOnNodes(nodeToLabelsEntryList, null); + Assert.assertNotNull(response); + Assert.assertEquals(200, response.getStatus()); + + Object entityObject = response.getEntity(); + Assert.assertNotNull(entityObject); + + String entityValue = String.valueOf(entityObject); + String[] entities = entityValue.split(","); + Assert.assertNotNull(entities); + Assert.assertEquals(4, entities.length); + String expectValue = + "subCluster-0:Success,subCluster-1:Success,subCluster-2:Success,subCluster-3:Success,"; + for (String entity : entities) { + Assert.assertTrue(expectValue.contains(entity)); + } + } + + @Test + public void testReplaceLabelsOnNodesError() throws Exception { + // newNodeToLabels is null + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, newNodeToLabels must not be empty.", + () -> interceptor.replaceLabelsOnNodes(null, null)); + + // nodeToLabelsEntryList is Empty + NodeToLabelsEntryList nodeToLabelsEntryList = new NodeToLabelsEntryList(); + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, nodeToLabelsEntries must not be empty.", + () -> interceptor.replaceLabelsOnNodes(nodeToLabelsEntryList, null)); + } + + @Test + public void testReplaceLabelsOnNode() throws Exception { + // subCluster3 -> node3:3 -> label:NodeLabel3 + String nodeId = "node3:3"; + Set labels = Collections.singleton("NodeLabel3"); + + // We expect the following result: subCluster#3:Success; + String expectValue = "subCluster#3:Success;"; + Response response = interceptor.replaceLabelsOnNode(labels, null, nodeId); + Assert.assertNotNull(response); + Assert.assertEquals(200, response.getStatus()); + + Object entityObject = response.getEntity(); + Assert.assertNotNull(entityObject); + + String entityValue = String.valueOf(entityObject); + Assert.assertNotNull(entityValue); + Assert.assertEquals(expectValue, entityValue); + } + + @Test + public void testReplaceLabelsOnNodeError() throws Exception { + // newNodeToLabels is null + String nodeId = "node3:3"; + Set labels = Collections.singleton("NodeLabel3"); + Set labelsEmpty = new HashSet<>(); + + // nodeId is null + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, nodeId must not be null or empty.", + () -> interceptor.replaceLabelsOnNode(labels, null, null)); + + // labels is null + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, newNodeLabelsName must not be empty.", + () -> interceptor.replaceLabelsOnNode(null, null, nodeId)); + + // labels is empty + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, newNodeLabelsName must not be empty.", + () -> interceptor.replaceLabelsOnNode(labelsEmpty, null, nodeId)); + } + @Test public void testDumpSchedulerLogs() throws Exception { HttpServletRequest mockHsr = mockHttpServletRequestByUserName("admin"); From 61f369c43e254796f997ec034a35ca764d723e38 Mon Sep 17 00:00:00 2001 From: Simbarashe Dzinamarira Date: Mon, 27 Feb 2023 09:56:24 -0800 Subject: [PATCH 03/97] HDFS-16890: RBF: Ensures router periodically refreshes its record of a namespace's state. (#5298) --- .../federation/router/RBFConfigKeys.java | 4 ++ .../federation/router/RouterRpcClient.java | 58 +++++++++++++++++-- .../src/main/resources/hdfs-rbf-default.xml | 10 ++++ .../router/TestObserverWithRouter.java | 45 +++++++++++++- 4 files changed, 110 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java index 7e07d7b6549..c0ee9504597 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java @@ -201,6 +201,10 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic { FEDERATION_ROUTER_PREFIX + "observer.federated.state.propagation.maxsize"; public static final int DFS_ROUTER_OBSERVER_FEDERATED_STATE_PROPAGATION_MAXSIZE_DEFAULT = 5; + public static final String DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY = + FEDERATION_ROUTER_PREFIX + "observer.state.id.refresh.period"; + public static final String DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_DEFAULT = "15s"; + public static final String FEDERATION_STORE_SERIALIZER_CLASS = FEDERATION_STORE_PREFIX + "serializer"; public static final Class diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java index 06e64439011..92f1fc06a81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java @@ -57,6 +57,7 @@ import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.LongAccumulator; import java.util.concurrent.atomic.LongAdder; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -86,6 +87,7 @@ import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Time; import org.eclipse.jetty.util.ajax.JSON; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -136,6 +138,14 @@ public class RouterRpcClient { private final boolean observerReadEnabledDefault; /** Nameservice specific overrides of the default setting for enabling observer reads. */ private HashSet observerReadEnabledOverrides = new HashSet<>(); + /** + * Period to refresh namespace stateID using active namenode. + * This ensures the namespace stateID is fresh even when an + * observer is trailing behind. + */ + private long activeNNStateIdRefreshPeriodMs; + /** Last msync times for each namespace. */ + private final ConcurrentHashMap lastActiveNNRefreshTimes; /** Pattern to parse a stack trace line. */ private static final Pattern STACK_TRACE_PATTERN = @@ -211,13 +221,25 @@ public class RouterRpcClient { this.observerReadEnabledDefault = conf.getBoolean( RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_KEY, RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_VALUE); - String[] observerReadOverrides = conf.getStrings(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_OVERRIDES); + String[] observerReadOverrides = + conf.getStrings(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_OVERRIDES); if (observerReadOverrides != null) { observerReadEnabledOverrides.addAll(Arrays.asList(observerReadOverrides)); } if (this.observerReadEnabledDefault) { LOG.info("Observer read is enabled for router."); } + this.activeNNStateIdRefreshPeriodMs = conf.getTimeDuration( + RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY, + RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_DEFAULT, + TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + if (activeNNStateIdRefreshPeriodMs < 0) { + LOG.info("Periodic stateId freshness check is disabled" + + " since '{}' is {}ms, which is less than 0.", + RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY, + activeNNStateIdRefreshPeriodMs); + } + this.lastActiveNNRefreshTimes = new ConcurrentHashMap<>(); } /** @@ -1707,10 +1729,13 @@ public class RouterRpcClient { boolean isObserverRead) throws IOException { final List namenodes; - if (RouterStateIdContext.getClientStateIdFromCurrentCall(nsId) > Long.MIN_VALUE) { - namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, isObserverRead); - } else { - namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, false); + boolean listObserverNamenodesFirst = isObserverRead + && isNamespaceStateIdFresh(nsId) + && (RouterStateIdContext.getClientStateIdFromCurrentCall(nsId) > Long.MIN_VALUE); + namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, listObserverNamenodesFirst); + if (!listObserverNamenodesFirst) { + // Refresh time of last call to active NameNode. + getTimeOfLastCallToActive(nsId).accumulate(Time.monotonicNow()); } if (namenodes == null || namenodes.isEmpty()) { @@ -1721,7 +1746,8 @@ public class RouterRpcClient { } private boolean isObserverReadEligible(String nsId, Method method) { - boolean isReadEnabledForNamespace = observerReadEnabledDefault != observerReadEnabledOverrides.contains(nsId); + boolean isReadEnabledForNamespace = + observerReadEnabledDefault != observerReadEnabledOverrides.contains(nsId); return isReadEnabledForNamespace && isReadCall(method); } @@ -1735,4 +1761,24 @@ public class RouterRpcClient { } return !method.getAnnotationsByType(ReadOnly.class)[0].activeOnly(); } + + /** + * Checks and sets last refresh time for a namespace's stateId. + * Returns true if refresh time is newer than threshold. + * Otherwise, return false and call should be handled by active namenode. + * @param nsId namespaceID + */ + @VisibleForTesting + boolean isNamespaceStateIdFresh(String nsId) { + if (activeNNStateIdRefreshPeriodMs < 0) { + return true; + } + long timeSinceRefreshMs = Time.monotonicNow() - getTimeOfLastCallToActive(nsId).get(); + return (timeSinceRefreshMs <= activeNNStateIdRefreshPeriodMs); + } + + private LongAccumulator getTimeOfLastCallToActive(String namespaceId) { + return lastActiveNNRefreshTimes + .computeIfAbsent(namespaceId, key -> new LongAccumulator(Math::max, 0)); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index b5096cd253d..79a16cc2022 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -884,4 +884,14 @@ of namespaces in use and the latency of the msync requests. + + + dfs.federation.router.observer.state.id.refresh.period + 15s + + Period to refresh namespace stateID using active namenode. This ensures the + namespace stateID is refresh even when an observer is trailing behind. + If this is below 0, the auto-refresh is disabled. + + diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java index 45001b461ba..72e8f8f66d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java @@ -34,9 +34,11 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.LongAccumulator; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.ClientGSIContext; +import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RouterFederatedStateProto; @@ -50,6 +52,7 @@ import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServi import org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.AfterEach; @@ -95,7 +98,9 @@ public class TestObserverWithRouter { conf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "0ms"); conf.setBoolean(DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY, true); if (confOverrides != null) { - conf.addResource(confOverrides); + confOverrides + .iterator() + .forEachRemaining(entry -> conf.set(entry.getKey(), entry.getValue())); } cluster = new MiniRouterDFSCluster(true, 2, numberOfNamenode); cluster.addNamenodeOverrides(conf); @@ -639,4 +644,42 @@ public class TestObserverWithRouter { assertEquals("ns0", namespace1.get(0)); assertTrue(namespace2.isEmpty()); } + + @Test + @Tag(SKIP_BEFORE_EACH_CLUSTER_STARTUP) + public void testPeriodicStateRefreshUsingActiveNamenode() throws Exception { + Path rootPath = new Path("/"); + + Configuration confOverride = new Configuration(false); + confOverride.set(RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY, "500ms"); + confOverride.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "3s"); + startUpCluster(1, confOverride); + + fileSystem = routerContext.getFileSystem(getConfToEnableObserverReads()); + fileSystem.listStatus(rootPath); + int initialLengthOfRootListing = fileSystem.listStatus(rootPath).length; + + DFSClient activeClient = cluster.getNamenodes("ns0") + .stream() + .filter(nnContext -> nnContext.getNamenode().isActiveState()) + .findFirst().orElseThrow(() -> new IllegalStateException("No active namenode.")) + .getClient(); + + for (int i = 0; i < 10; i++) { + activeClient.mkdirs("/dir" + i, null, false); + } + activeClient.close(); + + // Wait long enough for state in router to be considered stale. + GenericTestUtils.waitFor( + () -> !routerContext + .getRouterRpcClient() + .isNamespaceStateIdFresh("ns0"), + 100, + 10000, + "Timeout: Namespace state was never considered stale."); + FileStatus[] rootFolderAfterMkdir = fileSystem.listStatus(rootPath); + assertEquals("List-status should show newly created directories.", + initialLengthOfRootListing + 10, rootFolderAfterMkdir.length); + } } From 0ca56860341af2a489cea7cf01cb8746e57d7b6f Mon Sep 17 00:00:00 2001 From: rdingankar Date: Mon, 27 Feb 2023 10:26:32 -0800 Subject: [PATCH 04/97] HDFS-16917 Add transfer rate quantile metrics for DataNode reads (#5397) Co-authored-by: Ravindra Dingankar --- .../hadoop-common/src/site/markdown/Metrics.md | 3 +++ .../java/org/apache/hadoop/hdfs/DFSUtil.java | 15 +++++++++++++++ .../hadoop/hdfs/server/datanode/DataXceiver.java | 3 +++ .../server/datanode/metrics/DataNodeMetrics.java | 14 ++++++++++++++ .../java/org/apache/hadoop/hdfs/TestDFSUtil.java | 16 ++++++++++++++++ .../server/datanode/TestDataNodeMetrics.java | 4 ++++ 6 files changed, 55 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index e7d387b1131..a551e3ae15f 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -370,6 +370,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i |:---- |:---- | | `BytesWritten` | Total number of bytes written to DataNode | | `BytesRead` | Total number of bytes read from DataNode | +| `ReadTransferRateNumOps` | Total number of data read transfers | +| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. | +| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. | | `BlocksWritten` | Total number of blocks written to DataNode | | `BlocksRead` | Total number of blocks read from DataNode | | `BlocksReplicated` | Total number of blocks replicated | diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index 7237489e7bf..25726cee510 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -69,6 +69,7 @@ import org.apache.commons.cli.PosixParser; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.FSDirectory; import org.apache.hadoop.hdfs.server.namenode.INodesInPath; import org.apache.hadoop.ipc.ProtobufRpcEngine; @@ -1936,4 +1937,18 @@ public class DFSUtil { return path.charAt(parent.length()) == Path.SEPARATOR_CHAR || parent.equals(Path.SEPARATOR); } + + /** + * Add transfer rate metrics for valid data read and duration values. + * @param metrics metrics for datanodes + * @param read bytes read + * @param duration read duration + */ + public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) { + if (read >= 0 && duration > 0) { + metrics.addReadTransferRate(read * 1000 / duration); + } else { + LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 3cf4bde3d45..ab706fb1731 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.protobuf.ByteString; import javax.crypto.SecretKey; @@ -632,6 +633,7 @@ class DataXceiver extends Receiver implements Runnable { datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); datanode.metrics.incrTotalReadTime(duration); + DFSUtil.addTransferRateMetric(datanode.metrics, read, duration); } catch ( SocketException ignored ) { LOG.trace("{}:Ignoring exception while serving {} to {}", dnR, block, remoteAddress, ignored); @@ -1122,6 +1124,7 @@ class DataXceiver extends Receiver implements Runnable { datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); datanode.metrics.incrTotalReadTime(duration); + DFSUtil.addTransferRateMetric(datanode.metrics, read, duration); LOG.info("Copied {} to {}", block, peer.getRemoteAddressString()); } catch (IOException ioe) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index 649d30e91e0..675dbbff4c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -61,6 +61,8 @@ public class DataNodeMetrics { @Metric MutableCounterLong bytesRead; @Metric("Milliseconds spent reading") MutableCounterLong totalReadTime; + @Metric private MutableRate readTransferRate; + final private MutableQuantiles[] readTransferRateQuantiles; @Metric MutableCounterLong blocksWritten; @Metric MutableCounterLong blocksRead; @Metric MutableCounterLong blocksReplicated; @@ -227,6 +229,7 @@ public class DataNodeMetrics { sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len]; ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len]; ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len]; + readTransferRateQuantiles = new MutableQuantiles[len]; for (int i = 0; i < len; i++) { int interval = intervals[i]; @@ -255,6 +258,10 @@ public class DataNodeMetrics { "ramDiskBlocksLazyPersistWindows" + interval + "s", "Time between the RamDisk block write and disk persist in ms", "ops", "latency", interval); + readTransferRateQuantiles[i] = registry.newQuantiles( + "readTransferRate" + interval + "s", + "Rate at which bytes are read from datanode calculated in bytes per second", + "ops", "rate", interval); } } @@ -316,6 +323,13 @@ public class DataNodeMetrics { } } + public void addReadTransferRate(long readTransferRate) { + this.readTransferRate.add(readTransferRate); + for (MutableQuantiles q : readTransferRateQuantiles) { + q.add(readTransferRate); + } + } + public void addCacheReport(long latency) { cacheReports.add(latency); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java index e6ce29316c5..f8e8e4120c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java @@ -45,6 +45,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.*; import java.io.File; import java.io.IOException; @@ -71,6 +72,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider; import org.apache.hadoop.http.HttpConfig; @@ -1108,4 +1110,18 @@ public class TestDFSUtil { LambdaTestUtils.intercept(IOException.class, expectedErrorMessage, ()->DFSUtil.getNNServiceRpcAddressesForCluster(conf)); } + + @Test + public void testAddTransferRateMetricForValidValues() { + DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class); + DFSUtil.addTransferRateMetric(mockMetrics, 100, 10); + verify(mockMetrics).addReadTransferRate(10000); + } + + @Test + public void testAddTransferRateMetricForInvalidValue() { + DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class); + DFSUtil.addTransferRateMetric(mockMetrics, 100, 0); + verify(mockMetrics, times(0)).addReadTransferRate(anyLong()); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java index 2bf7861287a..de5c985a4f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java @@ -380,6 +380,7 @@ public class TestDataNodeMetrics { @Test(timeout=120000) public void testDataNodeTimeSpend() throws Exception { Configuration conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + 60); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { final FileSystem fs = cluster.getFileSystem(); @@ -391,6 +392,7 @@ public class TestDataNodeMetrics { final long startWriteValue = getLongCounter("TotalWriteTime", rb); final long startReadValue = getLongCounter("TotalReadTime", rb); + assertCounter("ReadTransferRateNumOps", 0L, rb); final AtomicInteger x = new AtomicInteger(0); // Lets Metric system update latest metrics @@ -410,6 +412,8 @@ public class TestDataNodeMetrics { MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name()); final long endWriteValue = getLongCounter("TotalWriteTime", rbNew); final long endReadValue = getLongCounter("TotalReadTime", rbNew); + assertCounter("ReadTransferRateNumOps", 1L, rbNew); + assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate"); return endWriteValue > startWriteValue && endReadValue > startReadValue; } From dcd9dc6983434ad20b1c6158889fe627701e65b2 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 28 Feb 2023 10:48:54 +0000 Subject: [PATCH 05/97] HADOOP-18641. Cloud connector dependency and LICENSE fixup. (#5429) POM and LICENSE fixup of transient dependencies * Exclude hadoop-cloud-storage imports which come in with hadoop-common * Add explicit import of hadoop's org.codehaus.jettison declaration to hadoop-aliyun * Tune aliyun jars imports * Update LICENSE-binary for the current set of libraries. Contributed by Steve Loughran --- LICENSE-binary | 19 ++++++++++---- hadoop-project/pom.xml | 30 ++++++++++++++++++++++ hadoop-tools/hadoop-aliyun/pom.xml | 6 +++++ hadoop-tools/hadoop-azure-datalake/pom.xml | 16 ++++++++++++ hadoop-tools/hadoop-azure/pom.xml | 13 ++++++++++ 5 files changed, 79 insertions(+), 5 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 60fdcb45e84..aa7f9a42e96 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -210,9 +210,9 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java -com.aliyun:aliyun-java-sdk-core:3.4.0 -com.aliyun:aliyun-java-sdk-ecs:4.2.0 -com.aliyun:aliyun-java-sdk-ram:3.0.0 +com.aliyun:aliyun-java-sdk-core:4.5.10 +com.aliyun:aliyun-java-sdk-kms:2.11.0 +com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 com.aliyun.oss:aliyun-sdk-oss:3.13.2 com.amazonaws:aws-java-sdk-bundle:1.12.316 @@ -289,8 +289,12 @@ io.netty:netty-resolver-dns-classes-macos:4.1.77.Final io.netty:netty-transport-native-epoll:4.1.77.Final io.netty:netty-transport-native-kqueue:4.1.77.Final io.netty:netty-resolver-dns-native-macos:4.1.77.Final -io.opencensus:opencensus-api:0.12.3 -io.opencensus:opencensus-contrib-grpc-metrics:0.12.3 +io.opencensus:opencensus-api:0.24.0 +io.opencensus:opencensus-contrib-grpc-metrics:0.24.0 +io.opentracing:opentracing-api:0.33.0 +io.opentracing:opentracing-noop:0.33.0 +io.opentracing:opentracing-util:0.33.0 +io.perfmark:perfmark-api:0.19.0 io.reactivex:rxjava:1.3.8 io.reactivex:rxjava-string:1.1.1 io.reactivex:rxnetty:0.4.20 @@ -357,6 +361,9 @@ org.eclipse.jetty:jetty-xml:9.4.48.v20220622 org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.48.v20220622 org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.48.v20220622 org.ehcache:ehcache:3.3.1 +org.ini4j:ini4j:0.5.4 +org.jetbrains.kotlin:kotlin-stdlib:1.4.10 +org.jetbrains.kotlin:kotlin-stdlib-common:1.4.10 org.lz4:lz4-java:1.7.1 org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.0.5 @@ -516,6 +523,8 @@ Eclipse Public License 1.0 -------------------------- junit:junit:4.13.2 +org.jacoco:org.jacoco.agent:0.8.5 + HSQL License diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 550c716d485..062abb3f1db 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1578,6 +1578,36 @@ commons-beanutils commons-beanutils + + org.apache.commons + commons-lang3 + + + + javax.xml.bind + jaxb-api + + + org.slf4j + slf4j-api + + + com.google.code.gson + gson + + + org.apache.httpcomponents + httpcore + + + commons-logging + commons-logging + + + + org.codehaus.jettison + jettison + diff --git a/hadoop-tools/hadoop-aliyun/pom.xml b/hadoop-tools/hadoop-aliyun/pom.xml index 570032f370b..7605b18b538 100644 --- a/hadoop-tools/hadoop-aliyun/pom.xml +++ b/hadoop-tools/hadoop-aliyun/pom.xml @@ -125,6 +125,12 @@ + + + org.codehaus.jettison + jettison + + org.apache.hadoop hadoop-common diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml index cec050d2c1b..14ffa3798aa 100644 --- a/hadoop-tools/hadoop-azure-datalake/pom.xml +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -110,6 +110,22 @@ com.microsoft.azure azure-data-lake-store-sdk ${azure.data.lake.store.sdk.version} + + + + com.fasterxml.jackson.core + jackson-core + + + org.slf4j + slf4j-api + + + + org.wildfly.openssl + wildfly-openssl + + diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index c313fa28a3a..e8c5fb78efd 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -164,6 +164,19 @@ org.apache.commons commons-lang3 + + + com.fasterxml.jackson.core + jackson-core + + + org.slf4j + slf4j-api + + + com.google.guava + guava + From bcc51ce2c58f4fc7df9372f437ddf5c49813b51a Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 1 Mar 2023 06:44:00 +0800 Subject: [PATCH 06/97] =?UTF-8?q?YARN-11375.=20=20[Federation]=20Support?= =?UTF-8?q?=20refreshAdminAcls=E3=80=81refreshServiceAcls=20API's=20for=20?= =?UTF-8?q?Federation.=20(#5312)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../RefreshAdminAclsRequest.java | 23 +++++++ .../RefreshServiceAclsRequest.java | 23 +++++++ ...erver_resourcemanager_service_protos.proto | 2 + .../pb/RefreshAdminAclsRequestPBImpl.java | 49 +++++++++++--- .../pb/RefreshServiceAclsRequestPBImpl.java | 46 +++++++++++-- .../yarn/server/resourcemanager/MockRM.java | 20 ++++++ .../yarn/server/router/RouterMetrics.java | 62 +++++++++++++++++ .../rmadmin/FederationRMAdminInterceptor.java | 58 +++++++++++++++- .../yarn/server/router/TestRouterMetrics.java | 66 +++++++++++++++++++ .../TestFederationRMAdminInterceptor.java | 61 +++++++++++++++++ .../TestableFederationRMAdminInterceptor.java | 9 ++- 11 files changed, 399 insertions(+), 20 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshAdminAclsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshAdminAclsRequest.java index 71c4a2c46d7..5371741331e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshAdminAclsRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshAdminAclsRequest.java @@ -33,4 +33,27 @@ public abstract class RefreshAdminAclsRequest { Records.newRecord(RefreshAdminAclsRequest.class); return request; } + + @Public + @Stable + public static RefreshAdminAclsRequest newInstance(String subClusterId) { + RefreshAdminAclsRequest request = + Records.newRecord(RefreshAdminAclsRequest.class); + request.setSubClusterId(subClusterId); + return request; + } + + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshServiceAclsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshServiceAclsRequest.java index 789f54fe29a..e382ebccba1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshServiceAclsRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshServiceAclsRequest.java @@ -33,4 +33,27 @@ public abstract class RefreshServiceAclsRequest { Records.newRecord(RefreshServiceAclsRequest.class); return request; } + + @Public + @Stable + public static RefreshServiceAclsRequest newInstance(String subClusterId) { + RefreshServiceAclsRequest request = + Records.newRecord(RefreshServiceAclsRequest.class); + request.setSubClusterId(subClusterId); + return request; + } + + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto index 97e29f954cd..4050a5b356f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto @@ -58,11 +58,13 @@ message RefreshUserToGroupsMappingsResponseProto { } message RefreshAdminAclsRequestProto { + optional string sub_cluster_id = 1; } message RefreshAdminAclsResponseProto { } message RefreshServiceAclsRequestProto { + optional string sub_cluster_id = 1; } message RefreshServiceAclsResponseProto { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshAdminAclsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshAdminAclsRequestPBImpl.java index 47eadc111bb..0738e8a1b0c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshAdminAclsRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshAdminAclsRequestPBImpl.java @@ -18,21 +18,22 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; +import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshAdminAclsRequestProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshAdminAclsRequestProtoOrBuilder; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; import org.apache.hadoop.thirdparty.protobuf.TextFormat; @Private @Unstable -public class RefreshAdminAclsRequestPBImpl -extends RefreshAdminAclsRequest { +public class RefreshAdminAclsRequestPBImpl extends RefreshAdminAclsRequest { - RefreshAdminAclsRequestProto proto = RefreshAdminAclsRequestProto.getDefaultInstance(); - RefreshAdminAclsRequestProto.Builder builder = null; - boolean viaProto = false; + private RefreshAdminAclsRequestProto proto = RefreshAdminAclsRequestProto.getDefaultInstance(); + private RefreshAdminAclsRequestProto.Builder builder = null; + private boolean viaProto = false; public RefreshAdminAclsRequestPBImpl() { builder = RefreshAdminAclsRequestProto.newBuilder(); @@ -48,6 +49,13 @@ extends RefreshAdminAclsRequest { viaProto = true; return proto; } + + private synchronized void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = RefreshAdminAclsRequestProto.newBuilder(proto); + } + viaProto = false; + } @Override public int hashCode() { @@ -56,16 +64,39 @@ extends RefreshAdminAclsRequest { @Override public boolean equals(Object other) { - if (other == null) + + if (!(other instanceof RefreshAdminAclsRequest)) { return false; - if (other.getClass().isAssignableFrom(this.getClass())) { - return this.getProto().equals(this.getClass().cast(other).getProto()); } - return false; + + RefreshAdminAclsRequestPBImpl otherImpl = this.getClass().cast(other); + return new EqualsBuilder() + .append(this.getProto(), otherImpl.getProto()) + .isEquals(); } @Override public String toString() { return TextFormat.shortDebugString(getProto()); } + + @Override + public String getSubClusterId() { + RefreshAdminAclsRequestProtoOrBuilder p = viaProto ? proto : builder; + boolean hasSubClusterId = p.hasSubClusterId(); + if (hasSubClusterId) { + return p.getSubClusterId(); + } + return null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshServiceAclsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshServiceAclsRequestPBImpl.java index d4529f43e65..4c30d0f2a54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshServiceAclsRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshServiceAclsRequestPBImpl.java @@ -18,9 +18,11 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; +import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshServiceAclsRequestProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshServiceAclsRequestProtoOrBuilder; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest; import org.apache.hadoop.thirdparty.protobuf.TextFormat; @@ -29,10 +31,10 @@ import org.apache.hadoop.thirdparty.protobuf.TextFormat; @Unstable public class RefreshServiceAclsRequestPBImpl extends RefreshServiceAclsRequest { - RefreshServiceAclsRequestProto proto = + private RefreshServiceAclsRequestProto proto = RefreshServiceAclsRequestProto.getDefaultInstance(); - RefreshServiceAclsRequestProto.Builder builder = null; - boolean viaProto = false; + private RefreshServiceAclsRequestProto.Builder builder = null; + private boolean viaProto = false; public RefreshServiceAclsRequestPBImpl() { builder = RefreshServiceAclsRequestProto.newBuilder(); @@ -50,6 +52,13 @@ public class RefreshServiceAclsRequestPBImpl extends RefreshServiceAclsRequest { return proto; } + private synchronized void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = RefreshServiceAclsRequestProto.newBuilder(proto); + } + viaProto = false; + } + @Override public int hashCode() { return getProto().hashCode(); @@ -57,16 +66,39 @@ public class RefreshServiceAclsRequestPBImpl extends RefreshServiceAclsRequest { @Override public boolean equals(Object other) { - if (other == null) + + if (!(other instanceof RefreshServiceAclsRequest)) { return false; - if (other.getClass().isAssignableFrom(this.getClass())) { - return this.getProto().equals(this.getClass().cast(other).getProto()); } - return false; + + RefreshServiceAclsRequestPBImpl otherImpl = this.getClass().cast(other); + return new EqualsBuilder() + .append(this.getProto(), otherImpl.getProto()) + .isEquals(); } @Override public String toString() { return TextFormat.shortDebugString(getProto()); } + + @Override + public String getSubClusterId() { + RefreshServiceAclsRequestProtoOrBuilder p = viaProto ? proto : builder; + boolean hasSubClusterId = p.hasSubClusterId(); + if (hasSubClusterId) { + return p.getSubClusterId(); + } + return null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 316f8e06cb5..faa5ddb7186 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.yarn.server.resourcemanager.MockNM.createMockNod import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; @@ -55,8 +56,13 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher; @@ -789,6 +795,7 @@ public class MockRM extends ResourceManager { @Override protected AdminService createAdminService() { + RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); return new AdminService(this) { @Override protected void startServer() { @@ -799,6 +806,19 @@ public class MockRM extends ResourceManager { protected void stopServer() { // don't do anything } + + @Override + public RefreshServiceAclsResponse refreshServiceAcls(RefreshServiceAclsRequest request) + throws YarnException, IOException { + Configuration config = this.getConfig(); + boolean authorization = + config.getBoolean(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false); + if (!authorization) { + throw RPCUtil.getRemoteException(new IOException("Service Authorization (" + + CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION + ") not enabled.")); + } + return recordFactory.newRecordInstance(RefreshServiceAclsResponse.class); + } }; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java index d3dd7bab11f..fdcd890ea6e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java @@ -147,6 +147,10 @@ public final class RouterMetrics { private MutableGaugeInt numRefreshSuperUserGroupsConfigurationFailedRetrieved; @Metric("# of refreshUserToGroupsMappings failed to be retrieved") private MutableGaugeInt numRefreshUserToGroupsMappingsFailedRetrieved; + @Metric("# of refreshAdminAcls failed to be retrieved") + private MutableGaugeInt numRefreshAdminAclsFailedRetrieved; + @Metric("# of refreshServiceAcls failed to be retrieved") + private MutableGaugeInt numRefreshServiceAclsFailedRetrieved; @Metric("# of replaceLabelsOnNodes failed to be retrieved") private MutableGaugeInt numReplaceLabelsOnNodesFailedRetrieved; @Metric("# of replaceLabelsOnNode failed to be retrieved") @@ -267,6 +271,10 @@ public final class RouterMetrics { private MutableRate totalSucceededReplaceLabelsOnNodeRetrieved; @Metric("Total number of successful Retrieved GetSchedulerInfo and latency(ms)") private MutableRate totalSucceededGetSchedulerInfoRetrieved; + @Metric("Total number of successful Retrieved RefreshAdminAcls and latency(ms)") + private MutableRate totalSucceededRefreshAdminAclsRetrieved; + @Metric("Total number of successful Retrieved RefreshServiceAcls and latency(ms)") + private MutableRate totalSucceededRefreshServiceAclsRetrieved; @Metric("Total number of successful Retrieved AddToClusterNodeLabels and latency(ms)") private MutableRate totalSucceededAddToClusterNodeLabelsRetrieved; @Metric("Total number of successful Retrieved RemoveFromClusterNodeLabels and latency(ms)") @@ -328,6 +336,8 @@ public final class RouterMetrics { private MutableQuantiles getSchedulerInfoRetrievedLatency; private MutableQuantiles refreshSuperUserGroupsConfLatency; private MutableQuantiles refreshUserToGroupsMappingsLatency; + private MutableQuantiles refreshAdminAclsLatency; + private MutableQuantiles refreshServiceAclsLatency; private MutableQuantiles replaceLabelsOnNodesLatency; private MutableQuantiles replaceLabelsOnNodeLatency; private MutableQuantiles addToClusterNodeLabelsLatency; @@ -524,6 +534,12 @@ public final class RouterMetrics { refreshUserToGroupsMappingsLatency = registry.newQuantiles("refreshUserToGroupsMappingsLatency", "latency of refresh user to groups mappings timeouts", "ops", "latency", 10); + refreshAdminAclsLatency = registry.newQuantiles("refreshAdminAclsLatency", + "latency of refresh admin acls timeouts", "ops", "latency", 10); + + refreshServiceAclsLatency = registry.newQuantiles("refreshServiceAclsLatency", + "latency of refresh service acls timeouts", "ops", "latency", 10); + replaceLabelsOnNodesLatency = registry.newQuantiles("replaceLabelsOnNodesLatency", "latency of replace labels on nodes timeouts", "ops", "latency", 10); @@ -811,6 +827,16 @@ public final class RouterMetrics { return totalSucceededGetSchedulerInfoRetrieved.lastStat().numSamples(); } + @VisibleForTesting + public long getNumSucceededRefreshAdminAclsRetrieved() { + return totalSucceededRefreshAdminAclsRetrieved.lastStat().numSamples(); + } + + @VisibleForTesting + public long getNumSucceededRefreshServiceAclsRetrieved() { + return totalSucceededRefreshServiceAclsRetrieved.lastStat().numSamples(); + } + @VisibleForTesting public long getNumSucceededAddToClusterNodeLabelsRetrieved() { return totalSucceededAddToClusterNodeLabelsRetrieved.lastStat().numSamples(); @@ -1091,6 +1117,16 @@ public final class RouterMetrics { return totalSucceededGetSchedulerInfoRetrieved.lastStat().mean(); } + @VisibleForTesting + public double getLatencySucceededRefreshAdminAclsRetrieved() { + return totalSucceededRefreshAdminAclsRetrieved.lastStat().mean(); + } + + @VisibleForTesting + public double getLatencySucceededRefreshServiceAclsRetrieved() { + return totalSucceededRefreshServiceAclsRetrieved.lastStat().mean(); + } + @VisibleForTesting public double getLatencySucceededAddToClusterNodeLabelsRetrieved() { return totalSucceededAddToClusterNodeLabelsRetrieved.lastStat().mean(); @@ -1322,6 +1358,14 @@ public final class RouterMetrics { return numRefreshUserToGroupsMappingsFailedRetrieved.value(); } + public int getNumRefreshAdminAclsFailedRetrieved() { + return numRefreshAdminAclsFailedRetrieved.value(); + } + + public int getNumRefreshServiceAclsFailedRetrieved() { + return numRefreshServiceAclsFailedRetrieved.value(); + } + public int getNumReplaceLabelsOnNodesFailedRetrieved() { return numReplaceLabelsOnNodesFailedRetrieved.value(); } @@ -1621,6 +1665,16 @@ public final class RouterMetrics { getSchedulerInfoRetrievedLatency.add(duration); } + public void succeededRefreshAdminAclsRetrieved(long duration) { + totalSucceededRefreshAdminAclsRetrieved.add(duration); + refreshAdminAclsLatency.add(duration); + } + + public void succeededRefreshServiceAclsRetrieved(long duration) { + totalSucceededRefreshServiceAclsRetrieved.add(duration); + refreshServiceAclsLatency.add(duration); + } + public void succeededAddToClusterNodeLabelsRetrieved(long duration) { totalSucceededAddToClusterNodeLabelsRetrieved.add(duration); addToClusterNodeLabelsLatency.add(duration); @@ -1835,6 +1889,14 @@ public final class RouterMetrics { numRefreshUserToGroupsMappingsFailedRetrieved.incr(); } + public void incrRefreshAdminAclsFailedRetrieved() { + numRefreshAdminAclsFailedRetrieved.incr(); + } + + public void incrRefreshServiceAclsFailedRetrieved() { + numRefreshServiceAclsFailedRetrieved.incr(); + } + public void incrAddToClusterNodeLabelsFailedRetrieved() { numAddToClusterNodeLabelsFailedRetrieved.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java index 41d87c3f588..93e864bb980 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java @@ -372,13 +372,67 @@ public class FederationRMAdminInterceptor extends AbstractRMAdminRequestIntercep @Override public RefreshAdminAclsResponse refreshAdminAcls(RefreshAdminAclsRequest request) throws YarnException, IOException { - throw new NotImplementedException(); + + // parameter verification. + if (request == null) { + routerMetrics.incrRefreshAdminAclsFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing RefreshAdminAcls request.", null); + } + + // call refreshAdminAcls of activeSubClusters. + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[] {RefreshAdminAclsRequest.class}, new Object[] {request}); + String subClusterId = request.getSubClusterId(); + Collection refreshAdminAclsResps = + remoteMethod.invokeConcurrent(this, RefreshAdminAclsResponse.class, subClusterId); + if (CollectionUtils.isNotEmpty(refreshAdminAclsResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededRefreshAdminAclsRetrieved(stopTime - startTime); + return RefreshAdminAclsResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrRefreshAdminAclsFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to refreshAdminAcls due to exception. " + e.getMessage()); + } + + routerMetrics.incrRefreshAdminAclsFailedRetrieved(); + throw new YarnException("Unable to refreshAdminAcls."); } @Override public RefreshServiceAclsResponse refreshServiceAcls(RefreshServiceAclsRequest request) throws YarnException, IOException { - throw new NotImplementedException(); + + // parameter verification. + if (request == null) { + routerMetrics.incrRefreshServiceAclsFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing RefreshServiceAcls request.", null); + } + + // call refreshAdminAcls of activeSubClusters. + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[]{RefreshServiceAclsRequest.class}, new Object[]{request}); + String subClusterId = request.getSubClusterId(); + Collection refreshServiceAclsResps = + remoteMethod.invokeConcurrent(this, RefreshServiceAclsResponse.class, subClusterId); + if (CollectionUtils.isNotEmpty(refreshServiceAclsResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededRefreshServiceAclsRetrieved(stopTime - startTime); + return RefreshServiceAclsResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrRefreshServiceAclsFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to refreshAdminAcls due to exception. " + e.getMessage()); + } + + routerMetrics.incrRefreshServiceAclsFailedRetrieved(); + throw new YarnException("Unable to refreshServiceAcls."); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java index db0b6837603..a3756174573 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java @@ -534,6 +534,16 @@ public class TestRouterMetrics { metrics.incrRenewDelegationTokenFailedRetrieved(); } + public void getRefreshAdminAclsFailedRetrieved() { + LOG.info("Mocked: failed refreshAdminAcls call"); + metrics.incrRefreshAdminAclsFailedRetrieved(); + } + + public void getRefreshServiceAclsFailedRetrieved() { + LOG.info("Mocked: failed refreshServiceAcls call"); + metrics.incrRefreshServiceAclsFailedRetrieved(); + } + public void getReplaceLabelsOnNodesFailed() { LOG.info("Mocked: failed replaceLabelsOnNodes call"); metrics.incrReplaceLabelsOnNodesFailedRetrieved(); @@ -789,6 +799,16 @@ public class TestRouterMetrics { metrics.succeededRenewDelegationTokenRetrieved(duration); } + public void getRefreshAdminAclsRetrieved(long duration) { + LOG.info("Mocked: successful RefreshAdminAcls call with duration {}", duration); + metrics.succeededRefreshAdminAclsRetrieved(duration); + } + + public void getRefreshServiceAclsRetrieved(long duration) { + LOG.info("Mocked: successful RefreshServiceAcls call with duration {}", duration); + metrics.succeededRefreshServiceAclsRetrieved(duration); + } + public void getNumSucceededReplaceLabelsOnNodesRetrieved(long duration) { LOG.info("Mocked: successful ReplaceLabelsOnNodes call with duration {}", duration); metrics.succeededReplaceLabelsOnNodesRetrieved(duration); @@ -1653,6 +1673,52 @@ public class TestRouterMetrics { metrics.getRenewDelegationTokenFailedRetrieved()); } + @Test + public void testRefreshAdminAclsRetrieved() { + long totalGoodBefore = metrics.getNumSucceededRefreshAdminAclsRetrieved(); + goodSubCluster.getRefreshAdminAclsRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededRefreshAdminAclsRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededRefreshAdminAclsRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getRefreshAdminAclsRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededRefreshAdminAclsRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededRefreshAdminAclsRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testRefreshAdminAclsRetrievedFailed() { + long totalBadBefore = metrics.getNumRefreshAdminAclsFailedRetrieved(); + badSubCluster.getRefreshAdminAclsFailedRetrieved(); + Assert.assertEquals(totalBadBefore + 1, + metrics.getNumRefreshAdminAclsFailedRetrieved()); + } + + @Test + public void testRefreshServiceAclsRetrieved() { + long totalGoodBefore = metrics.getNumSucceededRefreshServiceAclsRetrieved(); + goodSubCluster.getRefreshServiceAclsRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededRefreshServiceAclsRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededRefreshServiceAclsRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getRefreshServiceAclsRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededRefreshServiceAclsRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededRefreshServiceAclsRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testRefreshServiceAclsRetrievedFailed() { + long totalBadBefore = metrics.getNumRefreshServiceAclsFailedRetrieved(); + badSubCluster.getRefreshServiceAclsFailedRetrieved(); + Assert.assertEquals(totalBadBefore + 1, + metrics.getNumRefreshServiceAclsFailedRetrieved()); + } + @Test public void testReplaceLabelsOnNodesRetrieved() { long totalGoodBefore = metrics.getNumSucceededReplaceLabelsOnNodesRetrieved(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java index 977f82dd3cd..60a782bd8a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.router.rmadmin; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.yarn.api.records.DecommissionType; @@ -30,6 +31,10 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsC import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse; import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade; @@ -113,6 +118,8 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest { config.set(YarnConfiguration.ROUTER_RMADMIN_INTERCEPTOR_CLASS_PIPELINE, mockPassThroughInterceptorClass + "," + mockPassThroughInterceptorClass + "," + TestFederationRMAdminInterceptor.class.getName()); + config.setBoolean( + CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, true); return config; } @@ -259,4 +266,58 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest { "subClusterId = SC-NON is not an active subCluster.", () -> interceptor.refreshUserToGroupsMappings(request1)); } + + @Test + public void testRefreshAdminAcls() throws Exception { + // null request. + LambdaTestUtils.intercept(YarnException.class, "Missing RefreshAdminAcls request.", + () -> interceptor.refreshAdminAcls(null)); + + // normal request. + RefreshAdminAclsRequest request = RefreshAdminAclsRequest.newInstance(); + RefreshAdminAclsResponse response = interceptor.refreshAdminAcls(request); + assertNotNull(response); + } + + @Test + public void testSC1RefreshAdminAcls() throws Exception { + // case 1, test the existing subCluster (SC-1). + String existSubCluster = "SC-1"; + RefreshAdminAclsRequest request = RefreshAdminAclsRequest.newInstance(existSubCluster); + RefreshAdminAclsResponse response = interceptor.refreshAdminAcls(request); + assertNotNull(response); + + // case 2, test the non-exist subCluster. + String notExistsSubCluster = "SC-NON"; + RefreshAdminAclsRequest request1 = RefreshAdminAclsRequest.newInstance(notExistsSubCluster); + LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.refreshAdminAcls(request1)); + } + + @Test + public void testRefreshServiceAcls() throws Exception { + // null request. + LambdaTestUtils.intercept(YarnException.class, "Missing RefreshServiceAcls request.", + () -> interceptor.refreshServiceAcls(null)); + + // normal request. + RefreshServiceAclsRequest request = RefreshServiceAclsRequest.newInstance(); + RefreshServiceAclsResponse response = interceptor.refreshServiceAcls(request); + assertNotNull(response); + } + + @Test + public void testSC1RefreshServiceAcls() throws Exception { + // case 1, test the existing subCluster (SC-1). + String existSubCluster = "SC-1"; + RefreshServiceAclsRequest request = RefreshServiceAclsRequest.newInstance(existSubCluster); + RefreshServiceAclsResponse response = interceptor.refreshServiceAcls(request); + assertNotNull(response); + + // case 2, test the non-exist subCluster. + String notExistsSubCluster = "SC-NON"; + RefreshServiceAclsRequest request1 = RefreshServiceAclsRequest.newInstance(notExistsSubCluster); + LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.refreshServiceAcls(request1)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java index 26f50f88b89..b95bcd4a62b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.router.rmadmin; import org.apache.commons.collections.MapUtils; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId; @@ -35,6 +36,8 @@ import java.util.HashSet; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_CLUSTER_ID; + public class TestableFederationRMAdminInterceptor extends FederationRMAdminInterceptor { // Record log information @@ -55,11 +58,13 @@ public class TestableFederationRMAdminInterceptor extends FederationRMAdminInter if (mockRMs.containsKey(subClusterId)) { mockRM = mockRMs.get(subClusterId); } else { - mockRM = new MockRM(); + YarnConfiguration config = new YarnConfiguration(super.getConf()); + config.set(RM_CLUSTER_ID, "subcluster." + subClusterId); + mockRM = new MockRM(config); if (badSubCluster.contains(subClusterId)) { return new MockRMAdminBadService(mockRM); } - mockRM.init(super.getConf()); + mockRM.init(config); mockRM.start(); mockRMs.put(subClusterId, mockRM); } From 28d2753d2f86237868034ca1695f84e2eb2ab6cf Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 28 Feb 2023 17:34:12 -0800 Subject: [PATCH 07/97] HADOOP-18645. Provide keytab file key name with ServiceStateException (#5433) Signed-off-by: Tao Li --- .../src/main/java/org/apache/hadoop/security/SecurityUtil.java | 3 ++- .../server/federation/router/TestRouterWithSecureStartup.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java index 3369869bde2..d045a7f6fc4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java @@ -314,7 +314,8 @@ public final class SecurityUtil { String keytabFilename = conf.get(keytabFileKey); if (keytabFilename == null || keytabFilename.length() == 0) { - throw new IOException("Running in secure mode, but config doesn't have a keytab"); + throw new IOException( + "Running in secure mode, but config doesn't have a keytab for key: " + keytabFileKey); } String principalConfig = conf.get(userNameKey, System diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterWithSecureStartup.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterWithSecureStartup.java index b660b4bcbb2..b0c0c050bf0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterWithSecureStartup.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterWithSecureStartup.java @@ -56,7 +56,8 @@ public class TestRouterWithSecureStartup { @Test public void testStartupWithoutKeytab() throws Exception { testCluster(DFS_ROUTER_KEYTAB_FILE_KEY, - "Running in secure mode, but config doesn't have a keytab"); + "Running in secure mode, but config doesn't have a keytab for " + + "key: dfs.federation.router.keytab.file"); } @Test From 8f6be3678d1113e3e7f5477c357fc81f62d460b8 Mon Sep 17 00:00:00 2001 From: Szilard Nemeth Date: Wed, 1 Mar 2023 16:10:05 +0100 Subject: [PATCH 08/97] MAPREDUCE-7434. Fix ShuffleHandler tests. Contributed by Tamas Domok --- .../mapred/TestShuffleChannelHandler.java | 2 +- .../hadoop/mapred/TestShuffleHandler.java | 44 +++++++++++++------ .../hadoop/mapred/TestShuffleHandlerBase.java | 29 ++++++------ 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleChannelHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleChannelHandler.java index 7fedc7bb2dc..66fa3de94f8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleChannelHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleChannelHandler.java @@ -225,7 +225,7 @@ public class TestShuffleChannelHandler extends TestShuffleHandlerBase { final ShuffleTest t = createShuffleTest(); final EmbeddedChannel shuffle = t.createShuffleHandlerChannelFileRegion(); - String dataFile = getDataFile(tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_2); + String dataFile = getDataFile(TEST_USER, tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_2); assertTrue("should delete", new File(dataFile).delete()); FullHttpRequest req = t.createRequest(getUri(TEST_JOB_ID, 0, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandler.java index 37a9210286c..cc46b49b113 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandler.java @@ -29,6 +29,7 @@ import static org.apache.hadoop.test.MetricsAsserts.assertCounter; import static org.apache.hadoop.test.MetricsAsserts.assertGauge; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -41,6 +42,7 @@ import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; @@ -159,7 +161,7 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { shuffleHandler.init(conf); shuffleHandler.start(); final String port = shuffleHandler.getConfig().get(SHUFFLE_PORT_CONFIG_KEY); - final SecretKey secretKey = shuffleHandler.addTestApp(); + final SecretKey secretKey = shuffleHandler.addTestApp(TEST_USER); // setup connections HttpURLConnection[] conns = new HttpURLConnection[connAttempts]; @@ -237,7 +239,7 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { shuffleHandler.init(conf); shuffleHandler.start(); final String port = shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY); - final SecretKey secretKey = shuffleHandler.addTestApp(); + final SecretKey secretKey = shuffleHandler.addTestApp(TEST_USER); HttpURLConnection conn1 = createRequest( geURL(port, TEST_JOB_ID, 0, Collections.singletonList(TEST_ATTEMPT_1), true), @@ -278,18 +280,34 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); UserGroupInformation.setConfiguration(conf); + final String randomUser = "randomUser"; + final String attempt = "attempt_1111111111111_0004_m_000004_0"; + generateMapOutput(randomUser, tempDir.toAbsolutePath().toString(), attempt, + Arrays.asList(TEST_DATA_C, TEST_DATA_B, TEST_DATA_A)); + ShuffleHandlerMock shuffleHandler = new ShuffleHandlerMock(); shuffleHandler.init(conf); try { shuffleHandler.start(); final String port = shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY); - final SecretKey secretKey = shuffleHandler.addTestApp(); + final SecretKey secretKey = shuffleHandler.addTestApp(randomUser); HttpURLConnection conn = createRequest( - geURL(port, TEST_JOB_ID, 0, Collections.singletonList(TEST_ATTEMPT_1), false), + geURL(port, TEST_JOB_ID, 0, Collections.singletonList(attempt), false), secretKey); conn.connect(); - BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); + + InputStream is = null; + try { + is = conn.getInputStream(); + } catch (IOException ioe) { + if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) { + is = conn.getErrorStream(); + } + } + + assertNotNull(is); + BufferedReader in = new BufferedReader(new InputStreamReader(is)); StringBuilder builder = new StringBuilder(); String inputLine; while ((inputLine = in.readLine()) != null) { @@ -299,7 +317,7 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { String receivedString = builder.toString(); //Retrieve file owner name - String indexFilePath = getIndexFile(tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_1); + String indexFilePath = getIndexFile(randomUser, tempDir.toAbsolutePath().toString(), attempt); String owner; try (FileInputStream fis = new FileInputStream(indexFilePath)) { owner = NativeIO.POSIX.getFstat(fis.getFD()).getOwner(); @@ -307,11 +325,11 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { String message = "Owner '" + owner + "' for path " + indexFilePath - + " did not match expected owner '" + TEST_USER + "'"; + + " did not match expected owner '" + randomUser + "'"; assertTrue(String.format("Received string '%s' should contain " + "message '%s'", receivedString, message), receivedString.contains(message)); - assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); + assertEquals(HttpURLConnection.HTTP_INTERNAL_ERROR, conn.getResponseCode()); LOG.info("received: " + receivedString); assertNotEquals("", receivedString); } finally { @@ -334,7 +352,7 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { shuffle.init(conf); shuffle.start(); final String port = shuffle.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY); - final SecretKey secretKey = shuffle.addTestApp(); + final SecretKey secretKey = shuffle.addTestApp(TEST_USER); // verify we are authorized to shuffle int rc = getShuffleResponseCode(port, secretKey); @@ -387,7 +405,7 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { shuffle.init(conf); shuffle.start(); final String port = shuffle.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY); - final SecretKey secretKey = shuffle.addTestApp(); + final SecretKey secretKey = shuffle.addTestApp(TEST_USER); // verify we are authorized to shuffle int rc = getShuffleResponseCode(port, secretKey); @@ -489,14 +507,14 @@ public class TestShuffleHandler extends TestShuffleHandlerBase { class ShuffleHandlerMock extends ShuffleHandler { - public SecretKey addTestApp() throws IOException { + public SecretKey addTestApp(String user) throws IOException { DataOutputBuffer outputBuffer = new DataOutputBuffer(); outputBuffer.reset(); Token jt = new Token<>( - "identifier".getBytes(), "password".getBytes(), new Text(TEST_USER), + "identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService")); jt.write(outputBuffer); - initializeApplication(new ApplicationInitializationContext(TEST_USER, TEST_APP_ID, + initializeApplication(new ApplicationInitializationContext(user, TEST_APP_ID, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength()))); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandlerBase.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandlerBase.java index 1bce443381d..406f2866230 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandlerBase.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/src/test/java/org/apache/hadoop/mapred/TestShuffleHandlerBase.java @@ -55,7 +55,7 @@ public class TestShuffleHandlerBase { public static final String TEST_ATTEMPT_2 = "attempt_1111111111111_0002_m_000002_0"; public static final String TEST_ATTEMPT_3 = "attempt_1111111111111_0003_m_000003_0"; public static final String TEST_JOB_ID = "job_1111111111111_0001"; - public static final String TEST_USER = "testUser"; + public static final String TEST_USER = System.getProperty("user.name"); public static final String TEST_DATA_A = "aaaaa"; public static final String TEST_DATA_B = "bbbbb"; public static final String TEST_DATA_C = "ccccc"; @@ -70,11 +70,11 @@ public class TestShuffleHandlerBase { tempDir = Files.createTempDirectory("test-shuffle-channel-handler"); tempDir.toFile().deleteOnExit(); - generateMapOutput(tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_1, + generateMapOutput(TEST_USER, tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_1, Arrays.asList(TEST_DATA_A, TEST_DATA_B, TEST_DATA_C)); - generateMapOutput(tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_2, + generateMapOutput(TEST_USER, tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_2, Arrays.asList(TEST_DATA_B, TEST_DATA_A, TEST_DATA_C)); - generateMapOutput(tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_3, + generateMapOutput(TEST_USER, tempDir.toAbsolutePath().toString(), TEST_ATTEMPT_3, Arrays.asList(TEST_DATA_C, TEST_DATA_B, TEST_DATA_A)); outputStreamCaptor.reset(); @@ -101,12 +101,13 @@ public class TestShuffleHandlerBase { return allMatches; } - public static void generateMapOutput(String tempDir, String attempt, List maps) + public static void generateMapOutput(String user, String tempDir, + String attempt, List maps) throws IOException { SpillRecord record = new SpillRecord(maps.size()); - assertTrue(new File(getBasePath(tempDir, attempt)).mkdirs()); - try (PrintWriter writer = new PrintWriter(getDataFile(tempDir, attempt), "UTF-8")) { + assertTrue(new File(getBasePath(user, tempDir, attempt)).mkdirs()); + try (PrintWriter writer = new PrintWriter(getDataFile(user, tempDir, attempt), "UTF-8")) { long startOffset = 0; int partition = 0; for (String map : maps) { @@ -119,21 +120,21 @@ public class TestShuffleHandlerBase { partition++; writer.write(map); } - record.writeToFile(new Path(getIndexFile(tempDir, attempt)), + record.writeToFile(new Path(getIndexFile(user, tempDir, attempt)), new JobConf(new Configuration())); } } - public static String getIndexFile(String tempDir, String attempt) { - return String.format("%s/%s", getBasePath(tempDir, attempt), INDEX_FILE_NAME); + public static String getIndexFile(String user, String tempDir, String attempt) { + return String.format("%s/%s", getBasePath(user, tempDir, attempt), INDEX_FILE_NAME); } - public static String getDataFile(String tempDir, String attempt) { - return String.format("%s/%s", getBasePath(tempDir, attempt), DATA_FILE_NAME); + public static String getDataFile(String user, String tempDir, String attempt) { + return String.format("%s/%s", getBasePath(user, tempDir, attempt), DATA_FILE_NAME); } - private static String getBasePath(String tempDir, String attempt) { - return String.format("%s/%s/%s/%s", tempDir, TEST_JOB_ID, TEST_USER, attempt); + private static String getBasePath(String user, String tempDir, String attempt) { + return String.format("%s/%s/%s/%s", tempDir, TEST_JOB_ID, user, attempt); } public static String getUri(String jobId, int reduce, List maps, boolean keepAlive) { From 2ab7eb4caa9fe012e671434c5bce0e7169440e16 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 1 Mar 2023 10:53:10 -0800 Subject: [PATCH 09/97] HDFS-16935. Fix TestFsDatasetImpl#testReportBadBlocks (#5432) Contributed by Viraj Jasani --- .../fsdataset/impl/TestFsDatasetImpl.java | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index d6f42f3d020..b744a6fa586 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -1075,16 +1075,14 @@ public class TestFsDatasetImpl { @Test(timeout = 30000) public void testReportBadBlocks() throws Exception { boolean threwException = false; - MiniDFSCluster cluster = null; - try { - Configuration config = new HdfsConfiguration(); - cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build(); + final Configuration config = new HdfsConfiguration(); + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(config) + .numDataNodes(1).build()) { cluster.waitActive(); Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks()); DataNode dataNode = cluster.getDataNodes().get(0); - ExtendedBlock block = - new ExtendedBlock(cluster.getNamesystem().getBlockPoolId(), 0); + ExtendedBlock block = new ExtendedBlock(cluster.getNamesystem().getBlockPoolId(), 0); try { // Test the reportBadBlocks when the volume is null dataNode.reportBadBlocks(block); @@ -1101,15 +1099,11 @@ public class TestFsDatasetImpl { block = DFSTestUtil.getFirstBlock(fs, filePath); // Test for the overloaded method reportBadBlocks - dataNode.reportBadBlocks(block, dataNode.getFSDataset() - .getFsVolumeReferences().get(0)); - Thread.sleep(3000); - BlockManagerTestUtil.updateState(cluster.getNamesystem() - .getBlockManager()); - // Verify the bad block has been reported to namenode - Assert.assertEquals(1, cluster.getNamesystem().getCorruptReplicaBlocks()); - } finally { - cluster.shutdown(); + dataNode.reportBadBlocks(block, dataNode.getFSDataset().getFsVolumeReferences().get(0)); + DataNodeTestUtils.triggerHeartbeat(dataNode); + BlockManagerTestUtil.updateState(cluster.getNamesystem().getBlockManager()); + assertEquals("Corrupt replica blocks could not be reflected with the heartbeat", 1, + cluster.getNamesystem().getCorruptReplicaBlocks()); } } From 162288bc0af944f116a4dd73e27f4676a204d9e9 Mon Sep 17 00:00:00 2001 From: Tom Date: Wed, 1 Mar 2023 11:47:04 -0800 Subject: [PATCH 10/97] =?UTF-8?q?HDFS-16896=20clear=20ignoredNodes=20list?= =?UTF-8?q?=20when=20we=20clear=20deadnode=20list=20on=20ref=E2=80=A6=20(#?= =?UTF-8?q?5322)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HDFS-16896 clear ignoredNodes list when we clear deadnode list on refetchLocations. ignoredNodes list is only used on hedged read codepath Co-authored-by: Tom McCormick --- .../apache/hadoop/hdfs/DFSInputStream.java | 34 ++++++++++++++++--- .../TestDFSInputStreamBlockLocations.java | 23 +++++++++++++ .../org/apache/hadoop/hdfs/TestPread.java | 4 ++- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index 7b664e4f311..a8d80016072 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -224,7 +224,7 @@ public class DFSInputStream extends FSInputStream } /** - * Grab the open-file info from namenode + * Grab the open-file info from namenode. * @param refreshLocatedBlocks whether to re-fetch locatedblocks */ void openInfo(boolean refreshLocatedBlocks) throws IOException { @@ -940,7 +940,8 @@ public class DFSInputStream extends FSInputStream * @return Returns chosen DNAddrPair; Can be null if refetchIfRequired is * false. */ - private DNAddrPair chooseDataNode(LocatedBlock block, + @VisibleForTesting + DNAddrPair chooseDataNode(LocatedBlock block, Collection ignoredNodes, boolean refetchIfRequired) throws IOException { while (true) { @@ -955,6 +956,14 @@ public class DFSInputStream extends FSInputStream } } + /** + * RefetchLocations should only be called when there are no active requests + * to datanodes. In the hedged read case this means futures should be empty. + * @param block The locatedBlock to get new datanode locations for. + * @param ignoredNodes A list of ignored nodes. This list can be null and can be cleared. + * @return the locatedBlock with updated datanode locations. + * @throws IOException + */ private LocatedBlock refetchLocations(LocatedBlock block, Collection ignoredNodes) throws IOException { String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), @@ -999,13 +1008,24 @@ public class DFSInputStream extends FSInputStream throw new InterruptedIOException( "Interrupted while choosing DataNode for read."); } - clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] + clearCachedNodeState(ignoredNodes); openInfo(true); block = refreshLocatedBlock(block); failures++; return block; } + /** + * Clear both the dead nodes and the ignored nodes + * @param ignoredNodes is cleared + */ + private void clearCachedNodeState(Collection ignoredNodes) { + clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] + if (ignoredNodes != null) { + ignoredNodes.clear(); + } + } + /** * Get the best node from which to stream the data. * @param block LocatedBlock, containing nodes in priority order. @@ -1337,8 +1357,12 @@ public class DFSInputStream extends FSInputStream } catch (InterruptedException ie) { // Ignore and retry } - if (refetch) { - refetchLocations(block, ignored); + // If refetch is true, then all nodes are in deadNodes or ignoredNodes. + // We should loop through all futures and remove them, so we do not + // have concurrent requests to the same node. + // Once all futures are cleared, we can clear the ignoredNodes and retry. + if (refetch && futures.isEmpty()) { + block = refetchLocations(block, ignored); } // We got here if exception. Ignore this node on next go around IFF // we found a chosenNode to hedge read against. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java index 50378f60381..2e4e496bc3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java @@ -27,6 +27,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -35,11 +36,14 @@ import java.util.Map; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.util.Time; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -200,6 +204,25 @@ public class TestDFSInputStreamBlockLocations { testWithRegistrationMethod(DFSInputStream::getAllBlocks); } + /** + * If the ignoreList contains all datanodes, the ignoredList should be cleared to take advantage + * of retries built into chooseDataNode. This is needed for hedged reads + * @throws IOException + */ + @Test + public void testClearIgnoreListChooseDataNode() throws IOException { + final String fileName = "/test_cache_locations"; + filePath = createFile(fileName); + + try (DFSInputStream fin = dfsClient.open(fileName)) { + LocatedBlocks existing = fin.locatedBlocks; + LocatedBlock block = existing.getLastLocatedBlock(); + ArrayList ignoreList = new ArrayList<>(Arrays.asList(block.getLocations())); + Assert.assertNotNull(fin.chooseDataNode(block, ignoreList, true)); + Assert.assertEquals(0, ignoreList.size()); + } + } + @FunctionalInterface interface ThrowingConsumer { void accept(DFSInputStream fin) throws IOException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index c1e0dbb8e63..729a7941605 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -603,7 +603,9 @@ public class TestPread { input.read(0, buffer, 0, 1024); Assert.fail("Reading the block should have thrown BlockMissingException"); } catch (BlockMissingException e) { - assertEquals(3, input.getHedgedReadOpsLoopNumForTesting()); + // The result of 9 is due to 2 blocks by 4 iterations plus one because + // hedgedReadOpsLoopNumForTesting is incremented at start of the loop. + assertEquals(9, input.getHedgedReadOpsLoopNumForTesting()); assertTrue(metrics.getHedgedReadOps() == 0); } finally { Mockito.reset(injector); From e1ca466bdbf330572eab4772446e004ae5151313 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 1 Mar 2023 16:02:07 -0800 Subject: [PATCH 11/97] HADOOP-18648. Avoid loading kms log4j properties dynamically by KMSWebServer (#5441) --- .../key/kms/server/KMSConfiguration.java | 39 +++++++------------ .../crypto/key/kms/server/KMSWebServer.java | 2 +- .../main/libexec/shellprofile.d/hadoop-kms.sh | 2 + 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java index 35ffb429816..353260f0f9b 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java @@ -20,7 +20,7 @@ package org.apache.hadoop.crypto.key.kms.server; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.log4j.PropertyConfigurator; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -104,8 +104,6 @@ public class KMSConfiguration { public static final boolean KEY_AUTHORIZATION_ENABLE_DEFAULT = true; - private static final String LOG4J_PROPERTIES = "kms-log4j.properties"; - static { Configuration.addDefaultResource(KMS_DEFAULT_XML); Configuration.addDefaultResource(KMS_SITE_XML); @@ -163,31 +161,20 @@ public class KMSConfiguration { return newer; } - public static void initLogging() { - String confDir = System.getProperty(KMS_CONFIG_DIR); - if (confDir == null) { - throw new RuntimeException("System property '" + - KMSConfiguration.KMS_CONFIG_DIR + "' not defined"); + /** + * Validate whether "kms.config.dir" and "log4j.configuration" are defined in the System + * properties. If not, abort the KMS WebServer. + */ + public static void validateSystemProps() { + if (System.getProperty(KMS_CONFIG_DIR) == null) { + String errorMsg = "System property '" + KMS_CONFIG_DIR + "' not defined"; + System.err.println("Aborting KMSWebServer because " + errorMsg); + throw new RuntimeException(errorMsg); } if (System.getProperty("log4j.configuration") == null) { - System.setProperty("log4j.defaultInitOverride", "true"); - boolean fromClasspath = true; - File log4jConf = new File(confDir, LOG4J_PROPERTIES).getAbsoluteFile(); - if (log4jConf.exists()) { - PropertyConfigurator.configureAndWatch(log4jConf.getPath(), 1000); - fromClasspath = false; - } else { - ClassLoader cl = Thread.currentThread().getContextClassLoader(); - URL log4jUrl = cl.getResource(LOG4J_PROPERTIES); - if (log4jUrl != null) { - PropertyConfigurator.configure(log4jUrl); - } - } - LOG.debug("KMS log starting"); - if (fromClasspath) { - LOG.warn("Log4j configuration file '{}' not found", LOG4J_PROPERTIES); - LOG.warn("Logging with INFO level to standard output"); - } + String errorMsg = "System property 'log4j.configuration' not defined"; + System.err.println("Aborting KMSWebServer because " + errorMsg); + throw new RuntimeException(errorMsg); } } } diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java index a6cab81eb8e..5c9f23e9a0c 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java @@ -185,7 +185,7 @@ public class KMSWebServer { } public static void main(String[] args) throws Exception { - KMSConfiguration.initLogging(); + KMSConfiguration.validateSystemProps(); StringUtils.startupShutdownMessage(KMSWebServer.class, args, LOG); Configuration conf = KMSConfiguration.getKMSConf(); Configuration sslConf = SSLFactory.readSSLConfiguration(conf, SSLFactory.Mode.SERVER); diff --git a/hadoop-common-project/hadoop-kms/src/main/libexec/shellprofile.d/hadoop-kms.sh b/hadoop-common-project/hadoop-kms/src/main/libexec/shellprofile.d/hadoop-kms.sh index 0d084bb36e6..b54bf811d6b 100755 --- a/hadoop-common-project/hadoop-kms/src/main/libexec/shellprofile.d/hadoop-kms.sh +++ b/hadoop-common-project/hadoop-kms/src/main/libexec/shellprofile.d/hadoop-kms.sh @@ -49,6 +49,8 @@ function hadoop_subcommand_kms "-Dkms.config.dir=${HADOOP_CONF_DIR}" hadoop_add_param HADOOP_OPTS "-Dkms.log.dir=" \ "-Dkms.log.dir=${HADOOP_LOG_DIR}" + hadoop_add_param HADOOP_OPTS "-Dlog4j.configuration=" \ + "-Dlog4j.configuration=file:${HADOOP_CONF_DIR}/kms-log4j.properties" if [[ "${HADOOP_DAEMON_MODE}" == "default" ]] || [[ "${HADOOP_DAEMON_MODE}" == "start" ]]; then From 6bd24448154fcd3ab9099d7783cc7f7f76c61e08 Mon Sep 17 00:00:00 2001 From: ZanderXu Date: Thu, 2 Mar 2023 08:18:38 +0800 Subject: [PATCH 12/97] HDFS-16923. [SBN read] getlisting RPC to observer will throw NPE if path does not exist (#5400) Signed-off-by: Erik Krogen --- .../hadoop/hdfs/server/namenode/FSNamesystem.java | 2 +- .../hdfs/server/namenode/ha/TestObserverNode.java | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index e44a16f029e..0e46dca9dff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4174,7 +4174,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, logAuditEvent(false, operationName, src); throw e; } - if (needLocation && isObserver()) { + if (dl != null && needLocation && isObserver()) { for (HdfsFileStatus fs : dl.getPartialListing()) { if (fs instanceof HdfsLocatedFileStatus) { LocatedBlocks lbs = ((HdfsLocatedFileStatus) fs).getLocatedBlocks(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java index 8b691a11725..6af7e158fa0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hdfs.tools.GetGroups; import org.apache.hadoop.ipc.ObserverRetryOnActiveException; import org.apache.hadoop.ipc.metrics.RpcMetrics; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.junit.After; @@ -652,6 +653,17 @@ public class TestObserverNode { } } + @Test + public void testGetListingForDeletedDir() throws Exception { + Path path = new Path("/dir1/dir2/testFile"); + dfs.create(path).close(); + + assertTrue(dfs.delete(new Path("/dir1/dir2"), true)); + + LambdaTestUtils.intercept(FileNotFoundException.class, + () -> dfs.listLocatedStatus(new Path("/dir1/dir2"))); + } + @Test public void testSimpleReadEmptyDirOrFile() throws IOException { // read empty dir From 2a0dc2ab2f5fb46dc540ed440d6c8b2896dd195b Mon Sep 17 00:00:00 2001 From: Varun Saxena Date: Sun, 5 Mar 2023 21:25:16 +0530 Subject: [PATCH 13/97] YARN-11383. Workflow priority mappings is case sensitive (#5171) Contributed by Aparajita Choudhary --- .../WorkflowPriorityMappingsManager.java | 20 +++++++++---------- ...acitySchedulerWorkflowPriorityMapping.java | 16 +++++---------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/WorkflowPriorityMappingsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/WorkflowPriorityMappingsManager.java index e9a52d21b4f..ec05b134911 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/WorkflowPriorityMappingsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/WorkflowPriorityMappingsManager.java @@ -56,8 +56,8 @@ public class WorkflowPriorityMappingsManager { private boolean overrideWithPriorityMappings = false; // Map of queue to a map of workflow ID to priority - private Map> priorityMappings = - new HashMap>(); + private Map> priorityMappings = + new HashMap<>(); public static class WorkflowPriorityMapping { String workflowID; @@ -115,10 +115,9 @@ public class WorkflowPriorityMappingsManager { * * @return workflowID to priority mappings for a queue */ - public Map> + public Map> getWorkflowPriorityMappings() { - Map> mappings = - new HashMap>(); + Map> mappings = new HashMap<>(); Collection workflowMappings = conf.getWorkflowPriorityMappings(); for (String workflowMapping : workflowMappings) { @@ -127,9 +126,9 @@ public class WorkflowPriorityMappingsManager { if (mapping != null) { if (!mappings.containsKey(mapping.queue)) { mappings.put(mapping.queue, - new HashMap()); + new HashMap()); } - mappings.get(mapping.queue).put(mapping.workflowID, mapping); + mappings.get(mapping.queue).put(mapping.workflowID, mapping.priority); } } return mappings; @@ -150,8 +149,9 @@ public class WorkflowPriorityMappingsManager { } WorkflowPriorityMapping mapping; try { - mapping = new WorkflowPriorityMapping(mappingArray[0], mappingArray[1], - Priority.newInstance(Integer.parseInt(mappingArray[2]))); + //Converting workflow id to lowercase as yarn converts application tags also to lowercase + mapping = new WorkflowPriorityMapping(StringUtils.toLowerCase(mappingArray[0]), + mappingArray[1], Priority.newInstance(Integer.parseInt(mappingArray[2]))); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Illegal workflow priority for mapping " + mappingString); @@ -168,7 +168,7 @@ public class WorkflowPriorityMappingsManager { String queuePath = queue.getQueuePath(); if (priorityMappings.containsKey(queuePath) && priorityMappings.get(queuePath).containsKey(workflowID)) { - return priorityMappings.get(queuePath).get(workflowID).priority; + return priorityMappings.get(queuePath).get(workflowID); } else { queue = queue.getParent(); return getMappedPriority(workflowID, queue); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWorkflowPriorityMapping.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWorkflowPriorityMapping.java index 442c952ba12..d1931b71939 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWorkflowPriorityMapping.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerWorkflowPriorityMapping.java @@ -78,7 +78,7 @@ public class TestCapacitySchedulerWorkflowPriorityMapping { List mappings = Arrays.asList( new WorkflowPriorityMapping("workflow1", B, Priority.newInstance(2)), new WorkflowPriorityMapping("workflow2", A1, Priority.newInstance(3)), - new WorkflowPriorityMapping("workflow3", A, Priority.newInstance(4))); + new WorkflowPriorityMapping("Workflow3", A, Priority.newInstance(4))); conf.setWorkflowPriorityMappings(mappings); } @@ -99,16 +99,10 @@ public class TestCapacitySchedulerWorkflowPriorityMapping { mockRM.start(); cs.start(); - Map> expected = ImmutableMap.of( - A, ImmutableMap.of("workflow3", - new WorkflowPriorityMapping( - "workflow3", A, Priority.newInstance(4))), - B, ImmutableMap.of("workflow1", - new WorkflowPriorityMapping( - "workflow1", B, Priority.newInstance(2))), - A1, ImmutableMap.of("workflow2", - new WorkflowPriorityMapping( - "workflow2", A1, Priority.newInstance(3)))); + Map expected = ImmutableMap.of( + A, ImmutableMap.of("workflow3", Priority.newInstance(4)), + B, ImmutableMap.of("workflow1", Priority.newInstance(2)), + A1, ImmutableMap.of("workflow2", Priority.newInstance(3))); assertEquals(expected, cs.getWorkflowPriorityMappingsManager() .getWorkflowPriorityMappings()); From 2cb0c35fc1fe64bbbae7f6448977143d3dcc8ed4 Mon Sep 17 00:00:00 2001 From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com> Date: Mon, 6 Mar 2023 20:10:31 +0800 Subject: [PATCH 14/97] HDFS-16939. Fix the thread safety bug in LowRedundancyBlocks. (#5450). Contributed by Shuyan Zhang. Signed-off-by: He Xiaoqiao --- .../hdfs/server/blockmanagement/LowRedundancyBlocks.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java index ddaa2fec5de..d1c3b727e6f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java @@ -86,10 +86,10 @@ class LowRedundancyBlocks implements Iterable { private final List> priorityQueues = new ArrayList<>(LEVEL); - /** The number of corrupt blocks with replication factor 1 */ private final LongAdder lowRedundancyBlocks = new LongAdder(); private final LongAdder corruptBlocks = new LongAdder(); + /** The number of corrupt blocks with replication factor 1 */ private final LongAdder corruptReplicationOneBlocks = new LongAdder(); private final LongAdder lowRedundancyECBlockGroups = new LongAdder(); private final LongAdder corruptECBlockGroups = new LongAdder(); @@ -369,11 +369,11 @@ class LowRedundancyBlocks implements Iterable { * @return true if the block was found and removed from one of the priority * queues */ - boolean remove(BlockInfo block, int priLevel) { + synchronized boolean remove(BlockInfo block, int priLevel) { return remove(block, priLevel, block.getReplication()); } - boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) { + synchronized boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) { if(priLevel >= 0 && priLevel < LEVEL && priorityQueues.get(priLevel).remove(block)) { NameNode.blockStateChangeLog.debug( From 927401886ae5be5f3c8dd6d82f13363bba594396 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Mon, 6 Mar 2023 23:26:53 +0800 Subject: [PATCH 15/97] HDFS-16934. TestDFSAdmin.testAllDatanodesReconfig regression (#5434) Contributed by Shilun Fan --- .../hadoop/hdfs/tools/TestDFSAdmin.java | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java index 9a87365eb2f..d81aebf3c2e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java @@ -90,6 +90,7 @@ import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.ToolRunner; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -1238,36 +1239,38 @@ public class TestDFSAdmin { when(reconfigurationUtil.parseChangedProperties(any(Configuration.class), any(Configuration.class))).thenReturn(changes); - assertEquals(0, admin.startReconfiguration("datanode", "livenodes")); + int result = admin.startReconfiguration("datanode", "livenodes"); + Assertions.assertThat(result).isEqualTo(0); final List outsForStartReconf = new ArrayList<>(); final List errsForStartReconf = new ArrayList<>(); reconfigurationOutErrFormatter("startReconfiguration", "datanode", "livenodes", outsForStartReconf, errsForStartReconf); - assertEquals(3, outsForStartReconf.size()); - assertEquals(0, errsForStartReconf.size()); - assertTrue(outsForStartReconf.get(0).startsWith("Started reconfiguration task on node")); - assertTrue(outsForStartReconf.get(1).startsWith("Started reconfiguration task on node")); - assertEquals("Starting of reconfiguration task successful on 2 nodes, failed on 0 nodes.", - outsForStartReconf.get(2)); + String started = "Started reconfiguration task on node"; + String starting = + "Starting of reconfiguration task successful on 2 nodes, failed on 0 nodes."; + Assertions.assertThat(outsForStartReconf).hasSize(3); + Assertions.assertThat(errsForStartReconf).hasSize(0); + Assertions.assertThat(outsForStartReconf.get(0)).startsWith(started); + Assertions.assertThat(outsForStartReconf.get(1)).startsWith(started); + Assertions.assertThat(outsForStartReconf.get(2)).startsWith(starting); Thread.sleep(1000); final List outs = new ArrayList<>(); final List errs = new ArrayList<>(); awaitReconfigurationFinished("datanode", "livenodes", outs, errs); - assertEquals(9, outs.size()); - assertEquals(0, errs.size()); + Assertions.assertThat(outs).hasSize(9); + Assertions.assertThat(errs).hasSize(0); LOG.info("dfsadmin -status -livenodes output:"); outs.forEach(s -> LOG.info("{}", s)); - assertTrue(outs.get(0).startsWith("Reconfiguring status for node")); - assertTrue("SUCCESS: Changed property dfs.datanode.peer.stats.enabled".equals(outs.get(2)) - || "SUCCESS: Changed property dfs.datanode.peer.stats.enabled".equals(outs.get(1))); - assertTrue("\tFrom: \"false\"".equals(outs.get(3)) || "\tFrom: \"false\"".equals(outs.get(2))); - assertTrue("\tTo: \"true\"".equals(outs.get(4)) || "\tTo: \"true\"".equals(outs.get(3))); - assertEquals("SUCCESS: Changed property dfs.datanode.peer.stats.enabled", outs.get(5)); - assertEquals("\tFrom: \"false\"", outs.get(6)); - assertEquals("\tTo: \"true\"", outs.get(7)); - assertEquals("Retrieval of reconfiguration status successful on 2 nodes, failed on 0 nodes.", - outs.get(8)); - } + Assertions.assertThat(outs.get(0)).startsWith("Reconfiguring status for node"); + String success = "SUCCESS: Changed property dfs.datanode.peer.stats.enabled"; + String from = "\tFrom: \"false\""; + String to = "\tTo: \"true\""; + String retrieval = + "Retrieval of reconfiguration status successful on 2 nodes, failed on 0 nodes."; + + Assertions.assertThat(outs.subList(1, 5)).containsSubsequence(success, from, to); + Assertions.assertThat(outs.subList(5, 9)).containsSubsequence(success, from, to, retrieval); + } } From 358bf80c945e5342915424e176c08fed42a38188 Mon Sep 17 00:00:00 2001 From: Pranav Saxena <108325433+saxenapranav@users.noreply.github.com> Date: Tue, 7 Mar 2023 22:32:13 +0530 Subject: [PATCH 16/97] HADOOP-18606. ABFS: Add reason in x-ms-client-request-id on a retried API call. (#5299) Contributed by Pranav Saxena --- .../azurebfs/constants/AbfsHttpConstants.java | 11 + .../services/AzureServiceErrorCode.java | 4 + .../azurebfs/services/AbfsRestOperation.java | 32 +- .../fs/azurebfs/services/RetryReason.java | 102 ++++++ .../services/RetryReasonConstants.java | 39 +++ .../ClientErrorRetryReason.java | 43 +++ .../ConnectionResetRetryReason.java | 42 +++ .../ConnectionTimeoutRetryReason.java | 43 +++ .../ReadTimeoutRetryReason.java | 41 +++ .../RetryReasonCategory.java | 90 ++++++ .../ServerErrorRetryReason.java | 67 ++++ .../UnknownHostRetryReason.java | 45 +++ .../UnknownIOExceptionRetryReason.java | 47 +++ .../UnknownSocketExceptionRetryReason.java | 46 +++ .../retryReasonCategories/package-info.java | 27 ++ .../fs/azurebfs/utils/TracingContext.java | 13 +- .../TestAbfsRestOperationMockFailures.java | 302 ++++++++++++++++++ .../fs/azurebfs/services/TestRetryReason.java | 134 ++++++++ 18 files changed, 1124 insertions(+), 4 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 5cf7ec565b5..e1b791f6ef2 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -111,6 +111,17 @@ public final class AbfsHttpConstants { public static final char CHAR_EQUALS = '='; public static final char CHAR_STAR = '*'; public static final char CHAR_PLUS = '+'; + /** + * Value that differentiates categories of the http_status. + *

+   * 100 - 199 : Informational responses
+   * 200 - 299 : Successful responses
+   * 300 - 399 : Redirection messages
+   * 400 - 499 : Client error responses
+   * 500 - 599 : Server error responses
+   * 
+ */ + public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100; private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java index 8bc31c4f92b..8a5e9db8553 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java @@ -66,6 +66,10 @@ public enum AzureServiceErrorCode { return this.errorCode; } + public String getErrorMessage() { + return this.errorMessage; + } + public static List getAzureServiceCode(int httpStatusCode) { List errorCodes = new ArrayList<>(); if (httpStatusCode == UNKNOWN.httpStatusCode) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 00da9b66013..ad99020390a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -28,6 +28,7 @@ import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; @@ -73,6 +74,12 @@ public class AbfsRestOperation { private AbfsHttpOperation result; private AbfsCounters abfsCounters; + /** + * This variable contains the reason of last API call within the same + * AbfsRestOperation object. + */ + private String failureReason; + /** * Checks if there is non-null HTTP response. * @return true if there is a non-null HTTP response from the ABFS call. @@ -208,7 +215,7 @@ public class AbfsRestOperation { private void completeExecute(TracingContext tracingContext) throws AzureBlobFileSystemException { // see if we have latency reports from the previous requests - String latencyHeader = this.client.getAbfsPerfTracker().getClientLatency(); + String latencyHeader = getClientLatency(); if (latencyHeader != null && !latencyHeader.isEmpty()) { AbfsHttpHeader httpHeader = new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_ABFS_CLIENT_LATENCY, latencyHeader); @@ -237,6 +244,11 @@ public class AbfsRestOperation { LOG.trace("{} REST operation complete", operationType); } + @VisibleForTesting + String getClientLatency() { + return client.getAbfsPerfTracker().getClientLatency(); + } + /** * Executes a single HTTP operation to complete the REST operation. If it * fails, there may be a retry. The retryCount is incremented with each @@ -248,9 +260,9 @@ public class AbfsRestOperation { try { // initialize the HTTP request and open the connection - httpOperation = new AbfsHttpOperation(url, method, requestHeaders); + httpOperation = createHttpOperation(); incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1); - tracingContext.constructHeader(httpOperation); + tracingContext.constructHeader(httpOperation, failureReason); switch(client.getAuthType()) { case Custom: @@ -303,6 +315,7 @@ public class AbfsRestOperation { } catch (UnknownHostException ex) { String hostname = null; hostname = httpOperation.getHost(); + failureReason = RetryReason.getAbbreviation(ex, null, null); LOG.warn("Unknown host name: {}. Retrying to resolve the host name...", hostname); if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { @@ -314,6 +327,8 @@ public class AbfsRestOperation { LOG.debug("HttpRequestFailure: {}, {}", httpOperation, ex); } + failureReason = RetryReason.getAbbreviation(ex, -1, ""); + if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { throw new InvalidAbfsRestOperationException(ex); } @@ -326,6 +341,8 @@ public class AbfsRestOperation { LOG.debug("HttpRequest: {}: {}", operationType, httpOperation); if (client.getRetryPolicy().shouldRetry(retryCount, httpOperation.getStatusCode())) { + int status = httpOperation.getStatusCode(); + failureReason = RetryReason.getAbbreviation(null, status, httpOperation.getStorageErrorMessage()); return false; } @@ -334,6 +351,15 @@ public class AbfsRestOperation { return true; } + /** + * Creates new object of {@link AbfsHttpOperation} with the url, method, and + * requestHeaders fields of the AbfsRestOperation object. + */ + @VisibleForTesting + AbfsHttpOperation createHttpOperation() throws IOException { + return new AbfsHttpOperation(url, method, requestHeaders); + } + /** * Incrementing Abfs counters with a long value. * diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java new file mode 100644 index 00000000000..40e8cdc1e07 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReason.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ClientErrorRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionResetRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionTimeoutRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ReadTimeoutRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.RetryReasonCategory; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ServerErrorRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownHostRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownIOExceptionRetryReason; +import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownSocketExceptionRetryReason; + + +/** + * This utility class exposes methods to convert a server response-error to a + * category of error. + */ +final class RetryReason { + + /** + * Linked-list of the implementations of RetryReasonCategory. The objects in the + * list are arranged by the rank of their significance. + *

    + *
  • ServerError (statusCode==5XX), ClientError (statusCode==4XX) are + * independent of other retryReason categories.
  • + *
  • Since {@link java.net.SocketException} is subclass of + * {@link java.io.IOException}, + * hence, {@link UnknownIOExceptionRetryReason} is placed before + * {@link UnknownSocketExceptionRetryReason}
  • + *
  • Since, connectionTimeout, readTimeout, and connectionReset are + * {@link java.net.SocketTimeoutException} exceptions with different messages, + * hence, {@link ConnectionTimeoutRetryReason}, {@link ReadTimeoutRetryReason}, + * {@link ConnectionResetRetryReason} are above {@link UnknownIOExceptionRetryReason}. + * There is no order between the three reasons as they are differentiated + * by exception-message.
  • + *
  • Since, {@link java.net.UnknownHostException} is subclass of + * {@link java.io.IOException}, {@link UnknownHostRetryReason} is placed + * over {@link UnknownIOExceptionRetryReason}
  • + *
+ */ + private static List rankedReasonCategories + = new LinkedList() {{ + add(new ServerErrorRetryReason()); + add(new ClientErrorRetryReason()); + add(new UnknownIOExceptionRetryReason()); + add(new UnknownSocketExceptionRetryReason()); + add(new ConnectionTimeoutRetryReason()); + add(new ReadTimeoutRetryReason()); + add(new UnknownHostRetryReason()); + add(new ConnectionResetRetryReason()); + }}; + + private RetryReason() { + + } + + /** + * Method to get correct abbreviation for a given set of exception, statusCode, + * storageStatusCode. + * + * @param ex exception caught during server communication. + * @param statusCode statusCode in the server response. + * @param storageErrorMessage storageErrorMessage in the server response. + * + * @return abbreviation for the the given set of exception, statusCode, storageStatusCode. + */ + static String getAbbreviation(Exception ex, + Integer statusCode, + String storageErrorMessage) { + String result = null; + for (RetryReasonCategory retryReasonCategory : rankedReasonCategories) { + final String abbreviation + = retryReasonCategory.captureAndGetAbbreviation(ex, + statusCode, storageErrorMessage); + if (abbreviation != null) { + result = abbreviation; + } + } + return result; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java new file mode 100644 index 00000000000..8a0af183e30 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RetryReasonConstants.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +public final class RetryReasonConstants { + + private RetryReasonConstants() { + + } + public static final String CONNECTION_TIMEOUT_JDK_MESSAGE = "connect timed out"; + public static final String READ_TIMEOUT_JDK_MESSAGE = "Read timed out"; + public static final String CONNECTION_RESET_MESSAGE = "Connection reset"; + public static final String OPERATION_BREACH_MESSAGE = "Operations per second is over the account limit."; + public static final String CONNECTION_RESET_ABBREVIATION = "CR"; + public static final String CONNECTION_TIMEOUT_ABBREVIATION = "CT"; + public static final String READ_TIMEOUT_ABBREVIATION = "RT"; + public static final String INGRESS_LIMIT_BREACH_ABBREVIATION = "ING"; + public static final String EGRESS_LIMIT_BREACH_ABBREVIATION = "EGR"; + public static final String OPERATION_LIMIT_BREACH_ABBREVIATION = "OPR"; + public static final String UNKNOWN_HOST_EXCEPTION_ABBREVIATION = "UH"; + public static final String IO_EXCEPTION_ABBREVIATION = "IOE"; + public static final String SOCKET_EXCEPTION_ABBREVIATION = "SE"; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java new file mode 100644 index 00000000000..cf1c47e3eb0 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ClientErrorRetryReason.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT; + +/** + * Category that can capture server-response errors for 4XX status-code. + */ +public class ClientErrorRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 4) { + return false; + } + return true; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return statusCode + ""; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java new file mode 100644 index 00000000000..702f8875646 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionResetRetryReason.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; + +/** + * Category that can capture server-response errors for connection-reset exception. + */ +public class ConnectionResetRetryReason extends + RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + return checkExceptionMessage(ex, CONNECTION_RESET_MESSAGE); + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return CONNECTION_RESET_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java new file mode 100644 index 00000000000..28f35dcc805 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ConnectionTimeoutRetryReason.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; + +/** + * Category that can capture server-response errors for connection-timeout. + */ +public class ConnectionTimeoutRetryReason extends + RetryReasonCategory { + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return CONNECTION_TIMEOUT_ABBREVIATION; + } + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + return checkExceptionMessage(ex, CONNECTION_TIMEOUT_JDK_MESSAGE); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java new file mode 100644 index 00000000000..4663d9a52bb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ReadTimeoutRetryReason.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE; + +/** + * Category that can capture server-response errors for read-timeout. + */ +public class ReadTimeoutRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + return checkExceptionMessage(ex, READ_TIMEOUT_JDK_MESSAGE); + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return READ_TIMEOUT_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java new file mode 100644 index 00000000000..893451b496f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/RetryReasonCategory.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.util.Locale; + +/** + * Provides methods to define if given exception can be categorised to certain category. + * Each category has a different implementation of the abstract class. + */ +public abstract class RetryReasonCategory { + + /** + * Returns if given server response error can be categorised by the implementation. + * + * @param ex exception captured in the server response. + * @param statusCode statusCode on the server response + * @param serverErrorMessage serverErrorMessage on the server response. + * + * @return

  1. true if server response error can be categorised by the implementation
  2. + *
  3. false if response error can not be categorised by the implementation
+ */ + abstract Boolean canCapture(Exception ex, + Integer statusCode, + String serverErrorMessage); + + /** + * Returns the abbreviation corresponding to the server response error. + * + * @param statusCode statusCode on the server response + * @param serverErrorMessage serverErrorMessage on the server response. + * + * @return abbreviation on the basis of the statusCode and the serverErrorMessage + */ + abstract String getAbbreviation(Integer statusCode, String serverErrorMessage); + + /** + * Converts the server-error response to an abbreviation if the response can be + * categorised by the implementation. + * + * @param ex exception received while making API request + * @param statusCode statusCode received in the server-response + * @param serverErrorMessage error-message received in the server-response + * + * @return abbreviation if the server-response can be categorised by the implementation. + * null if the server-response can not be categorised by the implementation. + */ + public String captureAndGetAbbreviation(Exception ex, + Integer statusCode, + String serverErrorMessage) { + if (canCapture(ex, statusCode, serverErrorMessage)) { + return getAbbreviation(statusCode, serverErrorMessage); + } + return null; + } + + /** + * Checks if a required search-string is in the exception's message. + */ + Boolean checkExceptionMessage(final Exception exceptionCaptured, + final String search) { + if (search == null) { + return false; + } + if (exceptionCaptured != null + && exceptionCaptured.getMessage() != null + && exceptionCaptured.getMessage() + .toLowerCase(Locale.US) + .contains(search.toLowerCase(Locale.US))) { + return true; + } + return false; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java new file mode 100644 index 00000000000..dd67a0cb8cb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/ServerErrorRetryReason.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; + +/** + * Category that can capture server-response errors for 5XX status-code. + */ +public class ServerErrorRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 5) { + return false; + } + return true; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + if (statusCode == HTTP_UNAVAILABLE && serverErrorMessage != null) { + String splitedServerErrorMessage = serverErrorMessage.split(System.lineSeparator(), + 2)[0]; + if (INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase( + splitedServerErrorMessage)) { + return INGRESS_LIMIT_BREACH_ABBREVIATION; + } + if (EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage().equalsIgnoreCase( + splitedServerErrorMessage)) { + return EGRESS_LIMIT_BREACH_ABBREVIATION; + } + if (OPERATION_BREACH_MESSAGE.equalsIgnoreCase( + splitedServerErrorMessage)) { + return OPERATION_LIMIT_BREACH_ABBREVIATION; + } + return HTTP_UNAVAILABLE + ""; + } + return statusCode + ""; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java new file mode 100644 index 00000000000..c329348d81f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownHostRetryReason.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.net.UnknownHostException; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + +/** + * Category that can capture server-response errors for {@link UnknownHostException}. + */ +public class UnknownHostRetryReason extends RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (ex instanceof UnknownHostException) { + return true; + } + return false; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java new file mode 100644 index 00000000000..8a69ebb928d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownIOExceptionRetryReason.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.io.IOException; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION; + + +/** + * Category that can capture server-response errors for {@link IOException}. + */ +public class UnknownIOExceptionRetryReason extends + RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (ex instanceof IOException) { + return true; + } + return false; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return IO_EXCEPTION_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java new file mode 100644 index 00000000000..18e9f115fea --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/UnknownSocketExceptionRetryReason.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import java.net.SocketException; + +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION; + +/** + * Category that can capture server-response errors for {@link SocketException}. + */ +public class UnknownSocketExceptionRetryReason extends + RetryReasonCategory { + + @Override + Boolean canCapture(final Exception ex, + final Integer statusCode, + final String serverErrorMessage) { + if (ex instanceof SocketException) { + return true; + } + return false; + } + + @Override + String getAbbreviation(final Integer statusCode, + final String serverErrorMessage) { + return SOCKET_EXCEPTION_ABBREVIATION; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java new file mode 100644 index 00000000000..7d8078620af --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/retryReasonCategories/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A retryReasonCategory defines methods applicable on server-response errors. + */ +@Private +@Evolving +package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 5a115451df1..9a2ccda36fb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -152,8 +152,10 @@ public class TracingContext { * X_MS_CLIENT_REQUEST_ID header of the http operation * @param httpOperation AbfsHttpOperation instance to set header into * connection + * @param previousFailure List of failures seen before this API trigger on + * same operation from AbfsClient. */ - public void constructHeader(AbfsHttpOperation httpOperation) { + public void constructHeader(AbfsHttpOperation httpOperation, String previousFailure) { clientRequestId = UUID.randomUUID().toString(); switch (format) { case ALL_ID_FORMAT: // Optional IDs (e.g. streamId) may be empty @@ -161,6 +163,7 @@ public class TracingContext { clientCorrelationID + ":" + clientRequestId + ":" + fileSystemID + ":" + primaryRequestId + ":" + streamID + ":" + opType + ":" + retryCount; + header = addFailureReasons(header, previousFailure); break; case TWO_ID_FORMAT: header = clientCorrelationID + ":" + clientRequestId; @@ -174,6 +177,14 @@ public class TracingContext { httpOperation.setRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID, header); } + private String addFailureReasons(final String header, + final String previousFailure) { + if (previousFailure == null) { + return header; + } + return String.format("%s_%s", header, previousFailure); + } + /** * Return header representing the request associated with the tracingContext * @return Header string set into X_MS_CLIENT_REQUEST_ID diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java new file mode 100644 index 00000000000..bfa524a25e6 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java @@ -0,0 +1,302 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.HttpURLConnection; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; +import java.util.ArrayList; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.stubbing.Stubber; + +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.services.AuthType.OAuth; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.nullable; + +public class TestAbfsRestOperationMockFailures { + + @Test + public void testClientRequestIdForConnectTimeoutRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE); + abbreviations[0] = CONNECTION_TIMEOUT_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForConnectAndReadTimeoutRetry() + throws Exception { + Exception[] exceptions = new Exception[2]; + String[] abbreviations = new String[2]; + exceptions[0] = new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE); + abbreviations[0] = CONNECTION_TIMEOUT_ABBREVIATION; + exceptions[1] = new SocketTimeoutException(READ_TIMEOUT_JDK_MESSAGE); + abbreviations[1] = READ_TIMEOUT_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForReadTimeoutRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketTimeoutException(READ_TIMEOUT_JDK_MESSAGE); + abbreviations[0] = READ_TIMEOUT_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForUnknownHostRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new UnknownHostException(); + abbreviations[0] = UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForConnectionResetRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketTimeoutException(CONNECTION_RESET_MESSAGE + " by peer"); + abbreviations[0] = CONNECTION_RESET_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForUnknownSocketExRetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new SocketException("unknown"); + abbreviations[0] = SOCKET_EXCEPTION_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdForIOERetry() throws Exception { + Exception[] exceptions = new Exception[1]; + String[] abbreviations = new String[1]; + exceptions[0] = new InterruptedIOException(); + abbreviations[0] = IO_EXCEPTION_ABBREVIATION; + testClientRequestIdForTimeoutRetry(exceptions, abbreviations, 1); + } + + @Test + public void testClientRequestIdFor400Retry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_BAD_REQUEST, "", "400"); + } + + @Test + public void testClientRequestIdFor500Retry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_INTERNAL_ERROR, "", "500"); + } + + @Test + public void testClientRequestIdFor503INGRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, + INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage(), + INGRESS_LIMIT_BREACH_ABBREVIATION); + } + + @Test + public void testClientRequestIdFor503egrRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, + EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage(), + EGRESS_LIMIT_BREACH_ABBREVIATION); + } + + @Test + public void testClientRequestIdFor503OPRRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, + OPERATION_BREACH_MESSAGE, OPERATION_LIMIT_BREACH_ABBREVIATION); + } + + @Test + public void testClientRequestIdFor503OtherRetry() throws Exception { + testClientRequestIdForStatusRetry(HTTP_UNAVAILABLE, "Other.", "503"); + } + + private void testClientRequestIdForStatusRetry(int status, + String serverErrorMessage, + String keyExpected) throws Exception { + + AbfsClient abfsClient = Mockito.mock(AbfsClient.class); + ExponentialRetryPolicy retryPolicy = Mockito.mock( + ExponentialRetryPolicy.class); + addMockBehaviourToAbfsClient(abfsClient, retryPolicy); + + + AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.ReadFile, + abfsClient, + "PUT", + null, + new ArrayList<>() + )); + + AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); + addMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation); + + Mockito.doNothing() + .doNothing() + .when(httpOperation) + .processResponse(nullable(byte[].class), nullable(int.class), + nullable(int.class)); + + int[] statusCount = new int[1]; + statusCount[0] = 0; + Mockito.doAnswer(answer -> { + if (statusCount[0] <= 5) { + statusCount[0]++; + return status; + } + return HTTP_OK; + }).when(httpOperation).getStatusCode(); + + Mockito.doReturn(serverErrorMessage) + .when(httpOperation) + .getStorageErrorMessage(); + + TracingContext tracingContext = Mockito.mock(TracingContext.class); + Mockito.doNothing().when(tracingContext).setRetryCount(nullable(int.class)); + + int[] count = new int[1]; + count[0] = 0; + Mockito.doAnswer(invocationOnMock -> { + if (count[0] == 1) { + Assertions.assertThat((String) invocationOnMock.getArgument(1)) + .isEqualTo(keyExpected); + } + count[0]++; + return null; + }).when(tracingContext).constructHeader(any(), any()); + + abfsRestOperation.execute(tracingContext); + Assertions.assertThat(count[0]).isEqualTo(2); + + } + + private void testClientRequestIdForTimeoutRetry(Exception[] exceptions, + String[] abbreviationsExpected, + int len) throws Exception { + AbfsClient abfsClient = Mockito.mock(AbfsClient.class); + ExponentialRetryPolicy retryPolicy = Mockito.mock( + ExponentialRetryPolicy.class); + addMockBehaviourToAbfsClient(abfsClient, retryPolicy); + + + AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.ReadFile, + abfsClient, + "PUT", + null, + new ArrayList<>() + )); + + AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); + addMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation); + + Stubber stubber = Mockito.doThrow(exceptions[0]); + for (int iteration = 1; iteration < len; iteration++) { + stubber.doThrow(exceptions[iteration]); + } + stubber + .doNothing() + .when(httpOperation) + .processResponse(nullable(byte[].class), nullable(int.class), + nullable(int.class)); + + Mockito.doReturn(HTTP_OK).when(httpOperation).getStatusCode(); + + TracingContext tracingContext = Mockito.mock(TracingContext.class); + Mockito.doNothing().when(tracingContext).setRetryCount(nullable(int.class)); + + int[] count = new int[1]; + count[0] = 0; + Mockito.doAnswer(invocationOnMock -> { + if (count[0] > 0 && count[0] <= len) { + Assertions.assertThat((String) invocationOnMock.getArgument(1)) + .isEqualTo(abbreviationsExpected[count[0] - 1]); + } + count[0]++; + return null; + }).when(tracingContext).constructHeader(any(), any()); + + abfsRestOperation.execute(tracingContext); + Assertions.assertThat(count[0]).isEqualTo(len + 1); + } + + private void addMockBehaviourToRestOpAndHttpOp(final AbfsRestOperation abfsRestOperation, + final AbfsHttpOperation httpOperation) throws IOException { + HttpURLConnection httpURLConnection = Mockito.mock(HttpURLConnection.class); + Mockito.doNothing() + .when(httpURLConnection) + .setRequestProperty(nullable(String.class), nullable(String.class)); + Mockito.doReturn(httpURLConnection).when(httpOperation).getConnection(); + Mockito.doReturn("").when(abfsRestOperation).getClientLatency(); + Mockito.doReturn(httpOperation).when(abfsRestOperation).createHttpOperation(); + } + + private void addMockBehaviourToAbfsClient(final AbfsClient abfsClient, + final ExponentialRetryPolicy retryPolicy) throws IOException { + Mockito.doReturn(OAuth).when(abfsClient).getAuthType(); + Mockito.doReturn("").when(abfsClient).getAccessToken(); + AbfsThrottlingIntercept intercept = Mockito.mock( + AbfsThrottlingIntercept.class); + Mockito.doReturn(intercept).when(abfsClient).getIntercept(); + Mockito.doNothing() + .when(intercept) + .sendingRequest(any(), nullable(AbfsCounters.class)); + Mockito.doNothing().when(intercept).updateMetrics(any(), any()); + + Mockito.doReturn(retryPolicy).when(abfsClient).getRetryPolicy(); + Mockito.doReturn(true) + .when(retryPolicy) + .shouldRetry(nullable(Integer.class), nullable(Integer.class)); + Mockito.doReturn(false).when(retryPolicy).shouldRetry(1, HTTP_OK); + Mockito.doReturn(false).when(retryPolicy).shouldRetry(2, HTTP_OK); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java new file mode 100644 index 00000000000..76fcc6dc2c8 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestRetryReason.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import static java.net.HttpURLConnection.HTTP_FORBIDDEN; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.IO_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_BREACH_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.OPERATION_LIMIT_BREACH_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.READ_TIMEOUT_JDK_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.SOCKET_EXCEPTION_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.UNKNOWN_HOST_EXCEPTION_ABBREVIATION; + +public class TestRetryReason { + + @Test + public void test4xxStatusRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_FORBIDDEN, null)) + .describedAs("Abbreviation for 4xx should be equal to 4xx") + .isEqualTo(HTTP_FORBIDDEN + ""); + } + + @Test + public void testConnectionResetRetryReason() { + SocketException connReset = new SocketException(CONNECTION_RESET_MESSAGE.toUpperCase()); + Assertions.assertThat(RetryReason.getAbbreviation(connReset, null, null)).isEqualTo(CONNECTION_RESET_ABBREVIATION); + } + + @Test + public void testConnectionTimeoutRetryReason() { + SocketTimeoutException connectionTimeoutException = new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE); + Assertions.assertThat(RetryReason.getAbbreviation(connectionTimeoutException, null, null)).isEqualTo( + CONNECTION_TIMEOUT_ABBREVIATION + ); + } + + @Test + public void testReadTimeoutRetryReason() { + SocketTimeoutException connectionTimeoutException = new SocketTimeoutException(READ_TIMEOUT_JDK_MESSAGE); + Assertions.assertThat(RetryReason.getAbbreviation(connectionTimeoutException, null, null)).isEqualTo( + READ_TIMEOUT_ABBREVIATION + ); + } + + @Test + public void testEgressLimitRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage())).isEqualTo( + EGRESS_LIMIT_BREACH_ABBREVIATION + ); + } + + @Test + public void testIngressLimitRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, INGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage())).isEqualTo( + INGRESS_LIMIT_BREACH_ABBREVIATION + ); + } + + @Test + public void testOperationLimitRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, OPERATION_BREACH_MESSAGE)).isEqualTo( + OPERATION_LIMIT_BREACH_ABBREVIATION + ); + } + + @Test + public void test503UnknownRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_UNAVAILABLE, null)).isEqualTo( + "503" + ); + } + + @Test + public void test500RetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(null, HTTP_INTERNAL_ERROR, null)).isEqualTo( + "500" + ); + } + + @Test + public void testUnknownHostRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(new UnknownHostException(), null, null)).isEqualTo( + UNKNOWN_HOST_EXCEPTION_ABBREVIATION + ); + } + + @Test + public void testUnknownIOExceptionRetryReason() { + Assertions.assertThat(RetryReason.getAbbreviation(new IOException(), null, null)).isEqualTo( + IO_EXCEPTION_ABBREVIATION + ); + } + + @Test + public void testUnknownSocketException() { + Assertions.assertThat(RetryReason.getAbbreviation(new SocketException(), null, null)).isEqualTo( + SOCKET_EXCEPTION_ABBREVIATION + ); + } +} From 487368c4b92c5b8128cb3ef6661f21afe7e8acb9 Mon Sep 17 00:00:00 2001 From: rohit-kb <115476286+rohit-kb@users.noreply.github.com> Date: Wed, 8 Mar 2023 21:01:03 +0530 Subject: [PATCH 17/97] HADOOP-18655. Upgrade kerby to 2.0.3 due to CVE-2023-25613 (#5458) Upgrade kerby to 2.0.3 due to the CVE https://nvd.nist.gov/vuln/detail/CVE-2023-25613 Contributed by Rohit Kumar Badeau --- LICENSE-binary | 30 +++++++++++++++--------------- hadoop-project/pom.xml | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index aa7f9a42e96..0fab0eea8ae 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -327,21 +327,21 @@ org.apache.htrace:htrace-core4:4.1.0-incubating org.apache.httpcomponents:httpclient:4.5.6 org.apache.httpcomponents:httpcore:4.4.10 org.apache.kafka:kafka-clients:2.8.2 -org.apache.kerby:kerb-admin:2.0.2 -org.apache.kerby:kerb-client:2.0.2 -org.apache.kerby:kerb-common:2.0.2 -org.apache.kerby:kerb-core:2.0.2 -org.apache.kerby:kerb-crypto:2.0.2 -org.apache.kerby:kerb-identity:2.0.2 -org.apache.kerby:kerb-server:2.0.2 -org.apache.kerby:kerb-simplekdc:2.0.2 -org.apache.kerby:kerb-util:2.0.2 -org.apache.kerby:kerby-asn1:2.0.2 -org.apache.kerby:kerby-config:2.0.2 -org.apache.kerby:kerby-pkix:2.0.2 -org.apache.kerby:kerby-util:2.0.2 -org.apache.kerby:kerby-xdr:2.0.2 -org.apache.kerby:token-provider:2.0.2 +org.apache.kerby:kerb-admin:2.0.3 +org.apache.kerby:kerb-client:2.0.3 +org.apache.kerby:kerb-common:2.0.3 +org.apache.kerby:kerb-core:2.0.3 +org.apache.kerby:kerb-crypto:2.0.3 +org.apache.kerby:kerb-identity:2.0.3 +org.apache.kerby:kerb-server:2.0.3 +org.apache.kerby:kerb-simplekdc:2.0.3 +org.apache.kerby:kerb-util:2.0.3 +org.apache.kerby:kerby-asn1:2.0.3 +org.apache.kerby:kerby-config:2.0.3 +org.apache.kerby:kerby-pkix:2.0.3 +org.apache.kerby:kerby-util:2.0.3 +org.apache.kerby:kerby-xdr:2.0.3 +org.apache.kerby:token-provider:2.0.3 org.apache.solr:solr-solrj:8.8.2 org.apache.yetus:audience-annotations:0.5.0 org.apache.zookeeper:zookeeper:3.6.3 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 062abb3f1db..88a7b1f119c 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -125,7 +125,7 @@ 3.9.0 1.10.0 - 2.0.2 + 2.0.3 1.0-alpha-1 3.3.1 4.0.3 From b406060c6b04a6115a920ad6800ee4a52a2f291f Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 9 Mar 2023 05:29:30 +0800 Subject: [PATCH 18/97] YARN-8972. [Router] Add support to prevent DoS attack over ApplicationSubmissionContext size. (#5382) --- .../hadoop/yarn/conf/YarnConfiguration.java | 7 + .../src/main/resources/yarn-default.xml | 10 + .../yarn/server/router/RouterServerUtil.java | 127 +++++++ ...plicationSubmissionContextInterceptor.java | 66 ++++ .../PassThroughClientRequestInterceptor.java | 315 ++++++++++++++++++ .../router/webapp/RouterWebServiceUtil.java | 1 + ...plicationSubmissionContextInterceptor.java | 160 +++++++++ .../src/site/markdown/Federation.md | 31 +- 8 files changed, 711 insertions(+), 6 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/ApplicationSubmissionContextInterceptor.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestApplicationSubmissionContextInterceptor.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 699059f068f..eb7d3143ca7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -4249,6 +4249,13 @@ public class YarnConfiguration extends Configuration { "org.apache.hadoop.yarn.server.router.webapp." + "DefaultRequestInterceptorREST"; + /** + * ApplicationSubmissionContextInterceptor configurations. + **/ + public static final String ROUTER_ASC_INTERCEPTOR_MAX_SIZE = + ROUTER_PREFIX + "asc-interceptor-max-size"; + public static final String DEFAULT_ROUTER_ASC_INTERCEPTOR_MAX_SIZE = "1MB"; + /** * The interceptor class used in FederationInterceptorREST should return * partial AppReports. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index dc58f2f8285..ab422330788 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -5117,6 +5117,16 @@ + + yarn.router.asc-interceptor-max-size + 1MB + + We define the size limit of ApplicationSubmissionContext. + If the size of the ApplicationSubmissionContext is larger than this value, + We will throw an exception. the default value is 1MB. + + + The number of threads to use for the Router scheduled executor service. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java index 8fa6ca2f055..0dbead33f02 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java @@ -18,13 +18,16 @@ package org.apache.hadoop.yarn.server.router; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.thirdparty.protobuf.GeneratedMessageV3; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ReservationRequest; @@ -32,10 +35,18 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.ReservationRequestInterpreter; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ReservationRequests; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.proto.YarnProtos.StringStringMapProto; +import org.apache.hadoop.yarn.proto.YarnProtos.StringBytesMapProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto; +import org.apache.hadoop.yarn.proto.YarnProtos.StringLocalResourceMapProto; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationDefinitionInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationRequestsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationRequestInfo; import org.apache.hadoop.yarn.api.records.ReservationDefinition; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -43,6 +54,8 @@ import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayOutputStream; +import java.io.ObjectOutputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; @@ -624,4 +637,118 @@ public final class RouterServerUtil { return definition; } + + /** + * Checks if the ApplicationSubmissionContext submitted with the application + * is valid. + * + * Current checks: + * - if its size is within limits. + * + * @param appContext the app context to check. + * @throws IOException if an IO error occurred. + * @throws YarnException yarn exception. + */ + @Public + @Unstable + public static void checkAppSubmissionContext(ApplicationSubmissionContextPBImpl appContext, + Configuration conf) throws IOException, YarnException { + // Prevents DoS over the ApplicationClientProtocol by checking the context + // the application was submitted with for any excessively large fields. + double bytesOfMaxAscSize = conf.getStorageSize( + YarnConfiguration.ROUTER_ASC_INTERCEPTOR_MAX_SIZE, + YarnConfiguration.DEFAULT_ROUTER_ASC_INTERCEPTOR_MAX_SIZE, StorageUnit.BYTES); + if (appContext != null) { + int bytesOfSerializedSize = appContext.getProto().getSerializedSize(); + if (bytesOfSerializedSize >= bytesOfMaxAscSize) { + logContainerLaunchContext(appContext); + String applicationId = appContext.getApplicationId().toString(); + String limit = StringUtils.byteDesc((long) bytesOfMaxAscSize); + String appContentSize = StringUtils.byteDesc(bytesOfSerializedSize); + String errMsg = String.format( + "The size of the ApplicationSubmissionContext of the application %s is " + + "above the limit %s, size = %s.", applicationId, limit, appContentSize); + LOG.error(errMsg); + throw new YarnException(errMsg); + } + } + } + + /** + * Private helper for checkAppSubmissionContext that logs the fields in the + * context for debugging. + * + * @param appContext the app context. + * @throws IOException if an IO error occurred. + */ + @Private + @Unstable + private static void logContainerLaunchContext(ApplicationSubmissionContextPBImpl appContext) + throws IOException { + if (appContext == null || appContext.getAMContainerSpec() == null || + !(appContext.getAMContainerSpec() instanceof ContainerLaunchContextPBImpl)) { + return; + } + + ContainerLaunchContext launchContext = appContext.getAMContainerSpec(); + ContainerLaunchContextPBImpl clc = (ContainerLaunchContextPBImpl) launchContext; + LOG.warn("ContainerLaunchContext size: {}.", clc.getProto().getSerializedSize()); + + // ContainerLaunchContext contains: + // 1) Map localResources, + List lrs = clc.getProto().getLocalResourcesList(); + logContainerLaunchContext("LocalResource size: {}. Length: {}.", lrs); + + // 2) Map environment, List commands, + List envs = clc.getProto().getEnvironmentList(); + logContainerLaunchContext("Environment size: {}. Length: {}.", envs); + + List cmds = clc.getCommands(); + if (CollectionUtils.isNotEmpty(cmds)) { + LOG.warn("Commands size: {}. Length: {}.", cmds.size(), serialize(cmds).length); + } + + // 3) Map serviceData, + List serviceData = clc.getProto().getServiceDataList(); + logContainerLaunchContext("ServiceData size: {}. Length: {}.", serviceData); + + // 4) Map acls + List acls = clc.getProto().getApplicationACLsList(); + logContainerLaunchContext("ACLs size: {}. Length: {}.", acls); + } + + /** + * Log ContainerLaunchContext Data SerializedSize. + * + * @param format format of logging. + * @param lists data list. + * @param generic type R. + */ + private static void logContainerLaunchContext(String format, + List lists) { + if (CollectionUtils.isNotEmpty(lists)) { + int sumLength = 0; + for (R item : lists) { + sumLength += item.getSerializedSize(); + } + LOG.warn(format, lists.size(), sumLength); + } + } + + /** + * Serialize an object in ByteArray. + * + * @return obj ByteArray. + * @throws IOException if an IO error occurred. + */ + @Private + @Unstable + private static byte[] serialize(Object obj) throws IOException { + try (ByteArrayOutputStream b = new ByteArrayOutputStream()) { + try (ObjectOutputStream o = new ObjectOutputStream(b)) { + o.writeObject(obj); + } + return b.toByteArray(); + } + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/ApplicationSubmissionContextInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/ApplicationSubmissionContextInterceptor.java new file mode 100644 index 00000000000..6ec3fe334d3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/ApplicationSubmissionContextInterceptor.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.router.clientrm; + +import java.io.IOException; + +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.router.RouterAuditLogger; +import org.apache.hadoop.yarn.server.router.RouterMetrics; +import org.apache.hadoop.yarn.server.router.RouterServerUtil; + +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.SUBMIT_NEW_APP; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.TARGET_CLIENT_RM_SERVICE; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UNKNOWN; + +/** + * It prevents DoS attack over the ApplicationClientProtocol. Currently, it + * checks the size of the ApplicationSubmissionContext. If it exceeds the limit + * it can cause Zookeeper failures. + */ +public class ApplicationSubmissionContextInterceptor extends PassThroughClientRequestInterceptor { + + @Override + public SubmitApplicationResponse submitApplication( + SubmitApplicationRequest request) throws YarnException, IOException { + + if (request == null || request.getApplicationSubmissionContext() == null || + request.getApplicationSubmissionContext().getApplicationId() == null) { + RouterMetrics.getMetrics().incrAppsFailedSubmitted(); + String errMsg = + "Missing submitApplication request or applicationSubmissionContext information."; + RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_NEW_APP, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, errMsg); + RouterServerUtil.logAndThrowException(errMsg, null); + } + + ApplicationSubmissionContext appContext = request.getApplicationSubmissionContext(); + ApplicationSubmissionContextPBImpl asc = (ApplicationSubmissionContextPBImpl) appContext; + + // Check for excessively large fields, throw exception if found + RouterServerUtil.checkAppSubmissionContext(asc, getConf()); + + // Check succeeded - app submit will be passed on to the next interceptor + return getNextInterceptor().submitApplication(request); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java new file mode 100644 index 00000000000..fa830d38811 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java @@ -0,0 +1,315 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.router.clientrm; + +import java.io.IOException; + +import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; +import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainersResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetLabelsToNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetLabelsToNodesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationListRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationListResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionResponse; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationUpdateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.ReservationUpdateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest; +import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityRequest; +import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityResponse; +import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; +import org.apache.hadoop.yarn.exceptions.YarnException; + +/** + * Interceptor that does not do anything other than forwarding it to the next + * Interceptor in the chain. + */ +public class PassThroughClientRequestInterceptor extends AbstractClientRequestInterceptor { + + @Override + public GetNewApplicationResponse getNewApplication( + GetNewApplicationRequest request) throws YarnException, IOException { + return getNextInterceptor().getNewApplication(request); + } + + @Override + public SubmitApplicationResponse submitApplication( + SubmitApplicationRequest request) throws YarnException, IOException { + return getNextInterceptor().submitApplication(request); + } + + @Override + public KillApplicationResponse forceKillApplication( + KillApplicationRequest request) throws YarnException, IOException { + return getNextInterceptor().forceKillApplication(request); + } + + @Override + public GetClusterMetricsResponse getClusterMetrics( + GetClusterMetricsRequest request) throws YarnException, IOException { + return getNextInterceptor().getClusterMetrics(request); + } + + @Override + public GetClusterNodesResponse getClusterNodes(GetClusterNodesRequest request) + throws YarnException, IOException { + return getNextInterceptor().getClusterNodes(request); + } + + @Override + public GetQueueInfoResponse getQueueInfo(GetQueueInfoRequest request) + throws YarnException, IOException { + return getNextInterceptor().getQueueInfo(request); + } + + @Override + public GetQueueUserAclsInfoResponse getQueueUserAcls( + GetQueueUserAclsInfoRequest request) throws YarnException, IOException { + return getNextInterceptor().getQueueUserAcls(request); + } + + @Override + public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues( + MoveApplicationAcrossQueuesRequest request) + throws YarnException, IOException { + return getNextInterceptor().moveApplicationAcrossQueues(request); + } + + @Override + public GetNewReservationResponse getNewReservation( + GetNewReservationRequest request) throws YarnException, IOException { + return getNextInterceptor().getNewReservation(request); + } + + @Override + public ReservationSubmissionResponse submitReservation( + ReservationSubmissionRequest request) throws YarnException, IOException { + return getNextInterceptor().submitReservation(request); + } + + @Override + public ReservationListResponse listReservations( + ReservationListRequest request) throws YarnException, IOException { + return getNextInterceptor().listReservations(request); + } + + @Override + public ReservationUpdateResponse updateReservation( + ReservationUpdateRequest request) throws YarnException, IOException { + return getNextInterceptor().updateReservation(request); + } + + @Override + public ReservationDeleteResponse deleteReservation( + ReservationDeleteRequest request) throws YarnException, IOException { + return getNextInterceptor().deleteReservation(request); + } + + @Override + public GetNodesToLabelsResponse getNodeToLabels( + GetNodesToLabelsRequest request) throws YarnException, IOException { + return getNextInterceptor().getNodeToLabels(request); + } + + @Override + public GetLabelsToNodesResponse getLabelsToNodes( + GetLabelsToNodesRequest request) throws YarnException, IOException { + return getNextInterceptor().getLabelsToNodes(request); + } + + @Override + public GetClusterNodeLabelsResponse getClusterNodeLabels( + GetClusterNodeLabelsRequest request) throws YarnException, IOException { + return getNextInterceptor().getClusterNodeLabels(request); + } + + @Override + public GetApplicationReportResponse getApplicationReport( + GetApplicationReportRequest request) throws YarnException, IOException { + return getNextInterceptor().getApplicationReport(request); + } + + @Override + public GetApplicationsResponse getApplications(GetApplicationsRequest request) + throws YarnException, IOException { + return getNextInterceptor().getApplications(request); + } + + @Override + public GetApplicationAttemptReportResponse getApplicationAttemptReport( + GetApplicationAttemptReportRequest request) + throws YarnException, IOException { + return getNextInterceptor().getApplicationAttemptReport(request); + } + + @Override + public GetApplicationAttemptsResponse getApplicationAttempts( + GetApplicationAttemptsRequest request) throws YarnException, IOException { + return getNextInterceptor().getApplicationAttempts(request); + } + + @Override + public GetContainerReportResponse getContainerReport( + GetContainerReportRequest request) throws YarnException, IOException { + return getNextInterceptor().getContainerReport(request); + } + + @Override + public GetContainersResponse getContainers(GetContainersRequest request) + throws YarnException, IOException { + return getNextInterceptor().getContainers(request); + } + + @Override + public GetDelegationTokenResponse getDelegationToken( + GetDelegationTokenRequest request) throws YarnException, IOException { + return getNextInterceptor().getDelegationToken(request); + } + + @Override + public RenewDelegationTokenResponse renewDelegationToken( + RenewDelegationTokenRequest request) throws YarnException, IOException { + return getNextInterceptor().renewDelegationToken(request); + } + + @Override + public CancelDelegationTokenResponse cancelDelegationToken( + CancelDelegationTokenRequest request) throws YarnException, IOException { + return getNextInterceptor().cancelDelegationToken(request); + } + + @Override + public FailApplicationAttemptResponse failApplicationAttempt( + FailApplicationAttemptRequest request) throws YarnException, IOException { + return getNextInterceptor().failApplicationAttempt(request); + } + + @Override + public UpdateApplicationPriorityResponse updateApplicationPriority( + UpdateApplicationPriorityRequest request) + throws YarnException, IOException { + return getNextInterceptor().updateApplicationPriority(request); + } + + @Override + public SignalContainerResponse signalToContainer( + SignalContainerRequest request) throws YarnException, IOException { + return getNextInterceptor().signalToContainer(request); + } + + @Override + public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( + UpdateApplicationTimeoutsRequest request) + throws YarnException, IOException { + return getNextInterceptor().updateApplicationTimeouts(request); + } + + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceProfiles(request); + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceProfile(request); + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceTypeInfo(request); + } + + @Override + public GetAttributesToNodesResponse getAttributesToNodes( + GetAttributesToNodesRequest request) throws YarnException, IOException { + return getNextInterceptor().getAttributesToNodes(request); + } + + @Override + public GetClusterNodeAttributesResponse getClusterNodeAttributes( + GetClusterNodeAttributesRequest request) + throws YarnException, IOException { + return getNextInterceptor().getClusterNodeAttributes(request); + } + + @Override + public GetNodesToAttributesResponse getNodesToAttributes( + GetNodesToAttributesRequest request) throws YarnException, IOException { + return getNextInterceptor().getNodesToAttributes(request); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java index e33ce155079..7af470dc583 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java @@ -112,6 +112,7 @@ public final class RouterWebServiceUtil { * @param additionalParam the query parameters as input for a specific REST * call in case the call has no servlet request * @param client same client used to reduce number of clients created + * @param conf configuration * @return the retrieved entity from the REST call */ protected static T genericForward(final String webApp, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestApplicationSubmissionContextInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestApplicationSubmissionContextInterceptor.java new file mode 100644 index 00000000000..d3cf6de4abf --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestApplicationSubmissionContextInterceptor.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.router.clientrm; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.router.RouterServerUtil; +import org.junit.Test; + +/** + * Extends the {@code BaseRouterClientRMTest} and overrides methods in order to + * use the {@code RouterClientRMService} pipeline test cases for testing the + * {@code ApplicationSubmissionContextInterceptor} class. The tests for + * {@code RouterClientRMService} has been written cleverly so that it can be + * reused to validate different request interceptor chains. + */ +public class TestApplicationSubmissionContextInterceptor extends BaseRouterClientRMTest { + + @Override + protected YarnConfiguration createConfiguration() { + YarnConfiguration conf = new YarnConfiguration(); + String mockPassThroughInterceptorClass = + PassThroughClientRequestInterceptor.class.getName(); + + // Create a request interceptor pipeline for testing. The last one in the + // chain is the application submission context interceptor that checks + // for exceeded submission context size + // The others in the chain will simply forward it to the next one in the + // chain + conf.set(YarnConfiguration.ROUTER_CLIENTRM_INTERCEPTOR_CLASS_PIPELINE, + mockPassThroughInterceptorClass + "," + + ApplicationSubmissionContextInterceptor.class.getName() + "," + + MockClientRequestInterceptor.class.getName()); + + // Lower the max application context size + conf.set(YarnConfiguration.ROUTER_ASC_INTERCEPTOR_MAX_SIZE, "512B"); + + return conf; + } + + /** + * This test validates the correctness of SubmitApplication in case of empty + * request. + * @throws Exception error occur. + */ + @Test + public void testSubmitApplicationEmptyRequest() throws Exception { + + MockRouterClientRMService rmService = getRouterClientRMService(); + LambdaTestUtils.intercept(YarnException.class, + "Missing submitApplication request or applicationSubmissionContext information.", + () -> rmService.submitApplication(null)); + + ApplicationSubmissionContext context = ApplicationSubmissionContext.newInstance( + null, "", "", null, null, false, false, -1, null, null); + SubmitApplicationRequest request = SubmitApplicationRequest.newInstance(context); + LambdaTestUtils.intercept(YarnException.class, + "Missing submitApplication request or applicationSubmissionContext information.", + () -> rmService.submitApplication(null)); + } + + /** + * This test validates the correctness of SubmitApplication by setting up + * null, valid, and large ContainerLaunchContexts. + * @throws Exception error occur. + */ + @Test + public void testCLCExceedSize() throws Exception { + + ApplicationSubmissionContext context = ApplicationSubmissionContext.newInstance( + ApplicationId.newInstance(1, 1), "test", "default", + Priority.newInstance(0), null, false, true, 2, + Resource.newInstance(10, 2), "test"); + + LocalResource localResource = LocalResource.newInstance( + URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), + LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, + 1234567890L); + + Map localResources = new HashMap<>(); + localResources.put("rsrc", localResource); + + Map env = new HashMap<>(); + env.put("somevar", "someval"); + + List containerCmds = new ArrayList<>(); + containerCmds.add("somecmd"); + containerCmds.add("somearg"); + + Map serviceData = new HashMap<>(); + serviceData.put("someservice", ByteBuffer.wrap(new byte[] {0x1, 0x2, 0x3})); + ByteBuffer containerTokens = ByteBuffer.wrap(new byte[] {0x7, 0x8, 0x9, 0xa}); + + Map acls = new HashMap<>(); + acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); + acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, env, containerCmds, serviceData, containerTokens, acls); + ApplicationSubmissionContextPBImpl appSubmissionContextPB = + (ApplicationSubmissionContextPBImpl) context; + Configuration configuration = getConf(); + + // Null ApplicationSubmissionContext + RouterServerUtil.checkAppSubmissionContext(null, configuration); + + // Null ContainerLaunchContext + RouterServerUtil.checkAppSubmissionContext(appSubmissionContextPB, configuration); + + // Valid ContainerLaunchContext + context.setAMContainerSpec(clc); + RouterServerUtil.checkAppSubmissionContext(appSubmissionContextPB, configuration); + + // ContainerLaunchContext exceeds 1MB + for (int i = 0; i < 1000; i++) { + localResources.put("rsrc" + i, localResource); + } + ContainerLaunchContext clcExceedSize = ContainerLaunchContext.newInstance( + localResources, env, containerCmds, serviceData, containerTokens, acls); + context.setAMContainerSpec(clcExceedSize); + LambdaTestUtils.intercept(YarnException.class, + "The size of the ApplicationSubmissionContext of the application", + () -> RouterServerUtil.checkAppSubmissionContext(appSubmissionContextPB, configuration)); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md index f547e8d6b77..3f7acee2d97 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md @@ -91,8 +91,7 @@ of the desirable properties of balance, optimal cluster utilization and global i This part of the federation system is part of future work in [YARN-5597](https://issues.apache.org/jira/browse/YARN-5597). - -###Federation State-Store +### Federation State-Store The Federation State defines the additional state that needs to be maintained to loosely couple multiple individual sub-clusters into a single large federated cluster. This includes the following information: ####Sub-cluster Membership @@ -255,10 +254,30 @@ Optional: These are extra configurations that should appear in the **conf/yarn-site.xml** at each Router. -| Property | Example | Description | -|:---- |:---- |:---- | -|`yarn.router.bind-host` | `0.0.0.0` | Host IP to bind the router to. The actual address the server will bind to. If this optional address is set, the RPC and webapp servers will bind to this address and the port specified in yarn.router.*.address respectively. This is most useful for making Router listen to all interfaces by setting to 0.0.0.0. | -| `yarn.router.clientrm.interceptor-class.pipeline` | `org.apache.hadoop.yarn.server.router.clientrm.FederationClientInterceptor` | A comma-separated list of interceptor classes to be run at the router when interfacing with the client. The last step of this pipeline must be the Federation Client Interceptor. | +| Property | Example | Description | +|:--------------------------------------------------|:----------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `yarn.router.bind-host` | `0.0.0.0` | Host IP to bind the router to. The actual address the server will bind to. If this optional address is set, the RPC and webapp servers will bind to this address and the port specified in yarn.router.*.address respectively. This is most useful for making Router listen to all interfaces by setting to 0.0.0.0. | +| `yarn.router.clientrm.interceptor-class.pipeline` | `org.apache.hadoop.yarn.server.router.clientrm.FederationClientInterceptor` | A comma-separated list of interceptor classes to be run at the router when interfacing with the client. The last step of this pipeline must be the Federation Client Interceptor. | + +> Enable ApplicationSubmissionContextInterceptor + +- If the `FederationStateStore` is configured with `Zookpeer` storage, the app information will be stored in `Zookpeer`. If the size of the app information exceeds `1MB`, `Zookpeer` may fail. `ApplicationSubmissionContextInterceptor` will check the size of `ApplicationSubmissionContext`, if the size exceeds the limit(default 1MB), an exception will be thrown. + - The size of the ApplicationSubmissionContext of the application application_123456789_0001 is above the limit. Size = 1.02 MB. + +- The required configuration is as follows: + +``` + + yarn.router.clientrm.interceptor-class.pipeline + org.apache.hadoop.yarn.server.router.clientrm.PassThroughClientRequestInterceptor, + org.apache.hadoop.yarn.server.router.clientrm.ApplicationSubmissionContextInterceptor, + org.apache.hadoop.yarn.server.router.clientrm.FederationClientInterceptor + + + yarn.router.asc-interceptor-max-size + 1MB + +``` Optional: From 734f7abfb8b84a4c20dbae5073cf2d4fb60adc1c Mon Sep 17 00:00:00 2001 From: nao <56360298+nao-it@users.noreply.github.com> Date: Fri, 10 Mar 2023 18:27:22 +0300 Subject: [PATCH 19/97] HADOOP-18646. Upgrade Netty to 4.1.89.Final to fix CVE-2022-41881 (#5435) This fixes CVE-2022-41881. This also upgrades io.opencensus dependencies to 0.12.3 Contributed by Aleksandr Nikolaev --- LICENSE-binary | 8 ++------ hadoop-project/pom.xml | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 0fab0eea8ae..8a82432de06 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -289,12 +289,8 @@ io.netty:netty-resolver-dns-classes-macos:4.1.77.Final io.netty:netty-transport-native-epoll:4.1.77.Final io.netty:netty-transport-native-kqueue:4.1.77.Final io.netty:netty-resolver-dns-native-macos:4.1.77.Final -io.opencensus:opencensus-api:0.24.0 -io.opencensus:opencensus-contrib-grpc-metrics:0.24.0 -io.opentracing:opentracing-api:0.33.0 -io.opentracing:opentracing-noop:0.33.0 -io.opentracing:opentracing-util:0.33.0 -io.perfmark:perfmark-api:0.19.0 +io.opencensus:opencensus-api:0.12.3 +io.opencensus:opencensus-contrib-grpc-metrics:0.12.3 io.reactivex:rxjava:1.3.8 io.reactivex:rxjava-string:1.1.1 io.reactivex:rxnetty:0.4.20 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 88a7b1f119c..9eda301b63a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -140,7 +140,7 @@ 5.2.0 2.9.0 3.2.4 - 4.1.77.Final + 4.1.89.Final 1.1.8.2 1.7.1 From ca6f5afb6da7f1c40ba42061d34b60b54bb01712 Mon Sep 17 00:00:00 2001 From: Stephen O'Donnell Date: Sat, 11 Mar 2023 16:40:07 +0000 Subject: [PATCH 20/97] HDFS-16942. Send error to datanode if FBR is rejected due to bad lease (#5460) --- .../hadoop-client-api/pom.xml | 6 ++ .../hdfs/server/datanode/BPServiceActor.java | 4 ++ .../server/namenode/NameNodeRpcServer.java | 3 + .../InvalidBlockReportLeaseException.java | 41 +++++++++++ .../hdfs/server/protocol/package-info.java | 27 ++++++++ .../blockmanagement/TestBlockReportLease.java | 69 +++++++++++++++++++ .../server/datanode/TestBPOfferService.java | 7 +- 7 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index b4b81011eb5..52263b3cfc3 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -126,6 +126,12 @@ org/apache/hadoop/yarn/client/api/package-info.class + + org.apache.hadoop:* + + org/apache/hadoop/hdfs/server/protocol/package-info.class + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index e9f424604b4..b552fa277d0 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -63,6 +63,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; +import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports; @@ -791,6 +792,9 @@ class BPServiceActor implements Runnable { shouldServiceRun = false; return; } + if (InvalidBlockReportLeaseException.class.getName().equals(reClass)) { + fullBlockReportLeaseId = 0; + } LOG.warn("RemoteException in offerService", re); sleepAfterException(); } catch (IOException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index b19bfc13acf..c5e6d041859 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -172,6 +172,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; +import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException; import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; @@ -1651,6 +1652,8 @@ public class NameNodeRpcServer implements NamenodeProtocols { bm.processReport(nodeReg, reports[index].getStorage(), blocks, context)); } + } else { + throw new InvalidBlockReportLeaseException(context.getReportId(), context.getLeaseId()); } } catch (UnregisteredNodeException une) { LOG.warn("Datanode {} is attempting to report but not register yet.", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java new file mode 100644 index 00000000000..8428b805f74 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.protocol; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This exception is thrown when a datanode sends a full block report but it is + * rejected by the Namenode due to an invalid lease (expired or otherwise). + * + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class InvalidBlockReportLeaseException extends IOException { + /** for java.io.Serializable. */ + private static final long serialVersionUID = 1L; + + public InvalidBlockReportLeaseException(long blockReportID, long leaseID) { + super("Block report 0x" + Long.toHexString(blockReportID) + " was rejected as lease 0x" + + Long.toHexString(leaseID) + " is invalid"); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java new file mode 100644 index 00000000000..21743595548 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides classes for the namenode server protocol. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +package org.apache.hadoop.hdfs.server.protocol; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java index d1ae0b600fc..225f7fc96c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; +import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.RegisterCommand; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; @@ -41,12 +42,14 @@ import org.junit.Test; import java.util.ArrayList; import java.util.List; import java.util.Random; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.spy; @@ -137,6 +140,72 @@ public class TestBlockReportLease { } } + @Test + public void testExceptionThrownWhenFBRLeaseExpired() throws Exception { + HdfsConfiguration conf = new HdfsConfiguration(); + Random rand = new Random(); + + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(1).build()) { + cluster.waitActive(); + + FSNamesystem fsn = cluster.getNamesystem(); + BlockManager blockManager = fsn.getBlockManager(); + BlockManager spyBlockManager = spy(blockManager); + fsn.setBlockManagerForTesting(spyBlockManager); + String poolId = cluster.getNamesystem().getBlockPoolId(); + + NamenodeProtocols rpcServer = cluster.getNameNodeRpc(); + + // Test based on one DataNode report to Namenode + DataNode dn = cluster.getDataNodes().get(0); + DatanodeDescriptor datanodeDescriptor = spyBlockManager + .getDatanodeManager().getDatanode(dn.getDatanodeId()); + + DatanodeRegistration dnRegistration = dn.getDNRegistrationForBP(poolId); + StorageReport[] storages = dn.getFSDataset().getStorageReports(poolId); + + // Send heartbeat and request full block report lease + HeartbeatResponse hbResponse = rpcServer.sendHeartbeat( + dnRegistration, storages, 0, 0, 0, 0, 0, null, true, + SlowPeerReports.EMPTY_REPORT, SlowDiskReports.EMPTY_REPORT); + + // Remove full block report lease about dn + spyBlockManager.getBlockReportLeaseManager() + .removeLease(datanodeDescriptor); + + ExecutorService pool = Executors.newFixedThreadPool(1); + + // Trigger sendBlockReport + BlockReportContext brContext = new BlockReportContext(1, 0, + rand.nextLong(), hbResponse.getFullBlockReportLeaseId()); + Future sendBRfuturea = pool.submit(() -> { + // Build every storage with 100 blocks for sending report + DatanodeStorage[] datanodeStorages + = new DatanodeStorage[storages.length]; + for (int i = 0; i < storages.length; i++) { + datanodeStorages[i] = storages[i].getStorage(); + } + StorageBlockReport[] reports = createReports(datanodeStorages, 100); + + // Send blockReport + return rpcServer.blockReport(dnRegistration, poolId, reports, + brContext); + }); + + // Get result, it will not null if process successfully + ExecutionException exception = null; + try { + sendBRfuturea.get(); + } catch (ExecutionException e) { + exception = e; + } + assertNotNull(exception); + assertEquals(InvalidBlockReportLeaseException.class, + exception.getCause().getClass()); + } + } + @Test public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index d300eac4b69..9d4b0db0804 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import static org.apache.hadoop.test.MetricsAsserts.assertCounter; @@ -39,7 +40,6 @@ import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; -import java.net.ConnectException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collections; @@ -1187,8 +1187,9 @@ public class TestBPOfferService { // just reject and wait until DN request for a new leaseId if(leaseId == 1) { firstLeaseId = leaseId; - throw new ConnectException( - "network is not reachable for test. "); + InvalidBlockReportLeaseException e = + new InvalidBlockReportLeaseException(context.getReportId(), 1); + throw new RemoteException(e.getClass().getName(), e.getMessage()); } else { secondLeaseId = leaseId; return null; From 476340c6999df71480164b18abd809c5e710d87a Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Mon, 13 Mar 2023 05:38:04 +0100 Subject: [PATCH 21/97] HADOOP-18658. snakeyaml dependency: upgrade to v2.0 (#5467). Contributed by PJ Fanning. Signed-off-by: Ayush Saxena --- LICENSE-binary | 2 +- hadoop-project/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 8a82432de06..101c1f10daa 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -363,7 +363,7 @@ org.jetbrains.kotlin:kotlin-stdlib-common:1.4.10 org.lz4:lz4-java:1.7.1 org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.0.5 -org.yaml:snakeyaml:1.33 +org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 9eda301b63a..d8114afb58f 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -196,7 +196,7 @@ ${hadoop.version} 1.5.4 - 1.33 + 2.0 1.7.1 2.2.4 4.13.2 From 09469bf47dd1eb1d880f8119bf62c29cf70cdf58 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 13 Mar 2023 12:23:06 +0000 Subject: [PATCH 22/97] HADOOP-18661. Fix bin/hadoop usage script terminology. (#5473) Followup to HADOOP-13209: s/slaves/r/workers in the usage message you get when you type "bin/hadoop" Contributed by Steve Loughran --- hadoop-common-project/hadoop-common/src/main/bin/hadoop | 4 ++-- .../hadoop-common/src/main/bin/hadoop-daemons.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index abf3573986a..1218d22ecf5 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -26,9 +26,9 @@ MYNAME="${BASH_SOURCE-$0}" function hadoop_usage { hadoop_add_option "buildpaths" "attempt to add class files from build tree" - hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode" + hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in worker mode" hadoop_add_option "loglevel level" "set the log4j level for this command" - hadoop_add_option "hosts filename" "list of hosts to use in slave mode" + hadoop_add_option "hosts filename" "list of hosts to use in worker mode" hadoop_add_option "workers" "turn on worker mode" hadoop_add_subcommand "checknative" client "check native Hadoop and compression libraries availability" diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh index 55304916ad1..1d8096b4baa 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh @@ -16,7 +16,7 @@ # limitations under the License. -# Run a Hadoop command on all slave hosts. +# Run a Hadoop command on all worker hosts. function hadoop_usage { From aff840c59c81c1093f2d4bafeaedb0c4c8742eae Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 13 Mar 2023 05:30:12 -0700 Subject: [PATCH 23/97] HADOOP-18653. LogLevel servlet to determine log impl before using setLevel (#5456) The log level can only be set on Log4J log implementations; probes are used to downgrade to a warning when other logging back ends are used Contributed by Viraj Jasani --- .../java/org/apache/hadoop/log/LogLevel.java | 11 ++++++-- .../org/apache/hadoop/util/GenericsUtil.java | 28 +++++++++++++++++-- .../apache/hadoop/util/TestGenericsUtil.java | 2 +- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java index 6785e2f672a..32879597a9c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/LogLevel.java @@ -34,6 +34,8 @@ import javax.servlet.http.HttpServletResponse; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -44,6 +46,7 @@ import org.apache.hadoop.security.authentication.client.AuthenticatedURL; import org.apache.hadoop.security.authentication.client.KerberosAuthenticator; import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.GenericsUtil; import org.apache.hadoop.util.ServletUtil; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -338,14 +341,18 @@ public class LogLevel { out.println(MARKER + "Submitted Class Name: " + logName + "
"); - Logger log = Logger.getLogger(logName); + org.slf4j.Logger log = LoggerFactory.getLogger(logName); out.println(MARKER + "Log Class: " + log.getClass().getName() +"
"); if (level != null) { out.println(MARKER + "Submitted Level: " + level + "
"); } - process(log, level, out); + if (GenericsUtil.isLog4jLogger(logName)) { + process(Logger.getLogger(logName), level, out); + } else { + out.println("Sorry, setting log level is only supported for log4j loggers.
"); + } } out.println(FORMS); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java index 2d35b15bc59..2bf26da4d3b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericsUtil.java @@ -20,6 +20,7 @@ package org.apache.hadoop.util; import java.lang.reflect.Array; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -33,6 +34,14 @@ import org.slf4j.LoggerFactory; @InterfaceStability.Unstable public class GenericsUtil { + private static final String SLF4J_LOG4J_ADAPTER_CLASS = "org.slf4j.impl.Log4jLoggerAdapter"; + + /** + * Set to false only if log4j adapter class is not found in the classpath. Once set to false, + * the utility method should not bother re-loading class again. + */ + private static final AtomicBoolean IS_LOG4J_LOGGER = new AtomicBoolean(true); + /** * Returns the Class object (of type Class<T>) of the * argument of type T. @@ -87,12 +96,27 @@ public class GenericsUtil { if (clazz == null) { return false; } - Logger log = LoggerFactory.getLogger(clazz); + return isLog4jLogger(clazz.getName()); + } + + /** + * Determine whether the log of the given logger is of Log4J implementation. + * + * @param logger the logger name, usually class name as string. + * @return true if the logger uses Log4J implementation. + */ + public static boolean isLog4jLogger(String logger) { + if (logger == null || !IS_LOG4J_LOGGER.get()) { + return false; + } + Logger log = LoggerFactory.getLogger(logger); try { - Class log4jClass = Class.forName("org.slf4j.impl.Log4jLoggerAdapter"); + Class log4jClass = Class.forName(SLF4J_LOG4J_ADAPTER_CLASS); return log4jClass.isInstance(log); } catch (ClassNotFoundException e) { + IS_LOG4J_LOGGER.set(false); return false; } } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericsUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericsUtil.java index 85d649cc075..e47c3e57ba7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericsUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericsUtil.java @@ -140,7 +140,7 @@ public class TestGenericsUtil { @Test public void testIsLog4jLogger() throws Exception { - assertFalse("False if clazz is null", GenericsUtil.isLog4jLogger(null)); + assertFalse("False if clazz is null", GenericsUtil.isLog4jLogger((Class) null)); assertTrue("The implementation is Log4j", GenericsUtil.isLog4jLogger(TestGenericsUtil.class)); } From eee2ea075d10b197837ceca8cbb2260b48b29d10 Mon Sep 17 00:00:00 2001 From: Stephen O'Donnell Date: Wed, 15 Mar 2023 04:33:00 +0000 Subject: [PATCH 24/97] HDFS-16942. Addendum. Send error to datanode if FBR is rejected due to bad lease (#5478). Contributed by Stephen O'Donnell/ --- .../hadoop-client-api/pom.xml | 6 ----- .../hdfs/server/protocol/package-info.java | 27 ------------------- 2 files changed, 33 deletions(-) delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index 52263b3cfc3..b4b81011eb5 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -126,12 +126,6 @@ org/apache/hadoop/yarn/client/api/package-info.class - - org.apache.hadoop:* - - org/apache/hadoop/hdfs/server/protocol/package-info.class - - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java deleted file mode 100644 index 21743595548..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * This package provides classes for the namenode server protocol. - */ -@InterfaceAudience.Private -@InterfaceStability.Evolving -package org.apache.hadoop.hdfs.server.protocol; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; From 405bfa28002aafdc8afc31d4648d2388cff3c4c3 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 15 Mar 2023 09:45:37 -0700 Subject: [PATCH 25/97] HADOOP-18654. Remove unused custom appender TaskLogAppender (#5457) --- .../src/main/conf/log4j.properties | 8 - .../org/apache/hadoop/mapred/TaskLog.java | 11 -- .../apache/hadoop/mapred/TaskLogAppender.java | 153 ------------------ .../hadoop/mapred/TestTaskLogAppender.java | 74 --------- 4 files changed, 246 deletions(-) delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLogAppender.java delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestTaskLogAppender.java diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties index 54d5c729848..b4eec1fe2cc 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties @@ -75,14 +75,6 @@ log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n -# -# TaskLog Appender -# -log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender - -log4j.appender.TLA.layout=org.apache.log4j.PatternLayout -log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n - # # HDFS block state change log from block manager # diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java index f5e07e9128a..a0223dedd64 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java @@ -256,17 +256,6 @@ public class TaskLog { throws IOException { System.out.flush(); System.err.flush(); - Enumeration allLoggers = LogManager.getCurrentLoggers(); - while (allLoggers.hasMoreElements()) { - Logger l = allLoggers.nextElement(); - Enumeration allAppenders = l.getAllAppenders(); - while (allAppenders.hasMoreElements()) { - Appender a = allAppenders.nextElement(); - if (a instanceof TaskLogAppender) { - ((TaskLogAppender)a).flush(); - } - } - } if (currentTaskid != taskid) { currentTaskid = taskid; resetPrevLengths(logLocation); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLogAppender.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLogAppender.java deleted file mode 100644 index d10b764640d..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLogAppender.java +++ /dev/null @@ -1,153 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.mapred; - -import java.io.Flushable; -import java.util.LinkedList; -import java.util.Queue; - -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.log4j.FileAppender; -import org.apache.log4j.spi.LoggingEvent; - -/** - * A simple log4j-appender for the task child's - * map-reduce system logs. - * - */ -@InterfaceStability.Unstable -public class TaskLogAppender extends FileAppender implements Flushable { - private String taskId; //taskId should be managed as String rather than TaskID object - //so that log4j can configure it from the configuration(log4j.properties). - private Integer maxEvents; - private Queue tail = null; - private Boolean isCleanup; - - // System properties passed in from JVM runner - static final String ISCLEANUP_PROPERTY = "hadoop.tasklog.iscleanup"; - static final String LOGSIZE_PROPERTY = "hadoop.tasklog.totalLogFileSize"; - static final String TASKID_PROPERTY = "hadoop.tasklog.taskid"; - - @Override - public void activateOptions() { - synchronized (this) { - setOptionsFromSystemProperties(); - - if (maxEvents > 0) { - tail = new LinkedList(); - } - setFile(TaskLog.getTaskLogFile(TaskAttemptID.forName(taskId), - isCleanup, TaskLog.LogName.SYSLOG).toString()); - setAppend(true); - super.activateOptions(); - } - } - - /** - * The Task Runner passes in the options as system properties. Set - * the options if the setters haven't already been called. - */ - private synchronized void setOptionsFromSystemProperties() { - if (isCleanup == null) { - String propValue = System.getProperty(ISCLEANUP_PROPERTY, "false"); - isCleanup = Boolean.valueOf(propValue); - } - - if (taskId == null) { - taskId = System.getProperty(TASKID_PROPERTY); - } - - if (maxEvents == null) { - String propValue = System.getProperty(LOGSIZE_PROPERTY, "0"); - setTotalLogFileSize(Long.parseLong(propValue)); - } - } - - @Override - public void append(LoggingEvent event) { - synchronized (this) { - if (tail == null) { - super.append(event); - } else { - if (tail.size() >= maxEvents) { - tail.remove(); - } - tail.add(event); - } - } - } - - @Override - public void flush() { - if (qw != null) { - qw.flush(); - } - } - - @Override - public synchronized void close() { - if (tail != null) { - for(LoggingEvent event: tail) { - super.append(event); - } - } - super.close(); - } - - /** - * Getter/Setter methods for log4j. - */ - - public synchronized String getTaskId() { - return taskId; - } - - public synchronized void setTaskId(String taskId) { - this.taskId = taskId; - } - - private static final int EVENT_SIZE = 100; - - public synchronized long getTotalLogFileSize() { - return maxEvents * EVENT_SIZE; - } - - public synchronized void setTotalLogFileSize(long logSize) { - maxEvents = (int) logSize / EVENT_SIZE; - } - - /** - * Set whether the task is a cleanup attempt or not. - * - * @param isCleanup - * true if the task is cleanup attempt, false otherwise. - */ - public synchronized void setIsCleanup(boolean isCleanup) { - this.isCleanup = isCleanup; - } - - /** - * Get whether task is cleanup attempt or not. - * - * @return true if the task is cleanup attempt, false otherwise. - */ - public synchronized boolean getIsCleanup() { - return isCleanup; - } -} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestTaskLogAppender.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestTaskLogAppender.java deleted file mode 100644 index 52e9d7be356..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestTaskLogAppender.java +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.hadoop.mapred; - -import java.io.StringWriter; -import java.io.Writer; - -import org.apache.log4j.Category; -import org.apache.log4j.Layout; -import org.apache.log4j.Logger; -import org.apache.log4j.PatternLayout; -import org.apache.log4j.Priority; -import org.apache.log4j.spi.LoggingEvent; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -public class TestTaskLogAppender { -/** - * test TaskLogAppender - */ - @SuppressWarnings("deprecation") - @Test (timeout=5000) - public void testTaskLogAppender(){ - TaskLogAppender appender= new TaskLogAppender(); - - System.setProperty(TaskLogAppender.TASKID_PROPERTY,"attempt_01_02_m03_04_001"); - System.setProperty(TaskLogAppender.LOGSIZE_PROPERTY, "1003"); - appender.activateOptions(); - assertThat(appender.getTaskId()).isEqualTo("attempt_01_02_m03_04_001"); - assertThat(appender.getTotalLogFileSize()).isEqualTo(1000); - assertFalse(appender.getIsCleanup()); - - // test writer - Writer writer= new StringWriter(); - appender.setWriter(writer); - Layout layout = new PatternLayout("%-5p [%t]: %m%n"); - appender.setLayout(layout); - Category logger= Logger.getLogger(getClass().getName()); - LoggingEvent event = new LoggingEvent("fqnOfCategoryClass", logger, Priority.INFO, "message", new Throwable()); - appender.append(event); - appender.flush() ; - appender.close(); - assertTrue(writer.toString().length()>0); - - // test cleanup should not changed - appender= new TaskLogAppender(); - appender.setIsCleanup(true); - appender.activateOptions(); - assertTrue(appender.getIsCleanup()); - - - } - -} From cf4a678ce946cecd035677bce756794ef286d795 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 15 Mar 2023 09:46:17 -0700 Subject: [PATCH 26/97] HADOOP-18649. CLA and CRLA appenders to be replaced with RFA (#5448) --- .../hadoop/mapreduce/v2/util/MRApps.java | 5 +- .../src/main/resources/mapred-default.xml | 15 +- .../hadoop/yarn/ContainerLogAppender.java | 129 ------------------ .../yarn/ContainerRollingLogAppender.java | 76 ----------- .../hadoop/yarn/TestContainerLogAppender.java | 48 ------- .../main/resources/container-log4j.properties | 39 +++--- 6 files changed, 24 insertions(+), 288 deletions(-) delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerLogAppender.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLogAppender.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index a3ccfd72d8c..72dd48b09c2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -60,8 +60,6 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.yarn.ContainerLogAppender; -import org.apache.hadoop.yarn.ContainerRollingLogAppender; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.records.LocalResource; @@ -588,8 +586,7 @@ public class MRApps extends Apps { /** * Add the JVM system properties necessary to configure - * {@link ContainerLogAppender} or - * {@link ContainerRollingLogAppender}. + * {@link org.apache.log4j.RollingFileAppender}. * * @param task for map/reduce, or null for app master * @param vargs the argument list to append to diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 7e1b49c925f..7aadd521092 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -840,11 +840,8 @@ yarn.app.mapreduce.task.container.log.backups 0 Number of backup files for task logs when using - ContainerRollingLogAppender (CRLA). See - org.apache.log4j.RollingFileAppender.maxBackupIndex. By default, - ContainerLogAppender (CLA) is used, and container logs are not rolled. CRLA - is enabled for tasks when both mapreduce.task.userlog.limit.kb and - yarn.app.mapreduce.task.container.log.backups are greater than zero. + RollingFileAppender (RFA). See + org.apache.log4j.RollingFileAppender.maxBackupIndex.
@@ -852,12 +849,8 @@ yarn.app.mapreduce.am.container.log.backups 0 Number of backup files for the ApplicationMaster logs when using - ContainerRollingLogAppender (CRLA). See - org.apache.log4j.RollingFileAppender.maxBackupIndex. By default, - ContainerLogAppender (CLA) is used, and container logs are not rolled. CRLA - is enabled for the ApplicationMaster when both - yarn.app.mapreduce.am.container.log.limit.kb and - yarn.app.mapreduce.am.container.log.backups are greater than zero. + RollingFileAppender (RFA). See + org.apache.log4j.RollingFileAppender.maxBackupIndex. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerLogAppender.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerLogAppender.java deleted file mode 100644 index 09efe41e0c2..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerLogAppender.java +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn; - -import java.io.File; -import java.io.Flushable; -import java.util.ArrayDeque; -import java.util.Deque; - -import org.apache.hadoop.classification.InterfaceAudience.Public; -import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.log4j.FileAppender; -import org.apache.log4j.spi.LoggingEvent; - -/** - * A simple log4j-appender for container's logs. - */ -@Public -@Unstable -public class ContainerLogAppender extends FileAppender - implements Flushable { - - private String containerLogDir; - private String containerLogFile; - private int maxEvents; - private Deque eventBuffer; - private boolean closed = false; - - @Override - public synchronized void activateOptions() { - if (maxEvents > 0) { - this.eventBuffer = new ArrayDeque<>(); - } - setFile(new File(this.containerLogDir, containerLogFile).toString()); - setAppend(true); - super.activateOptions(); - } - - @Override - public synchronized void append(LoggingEvent event) { - if (closed) { - return; - } - if (eventBuffer != null) { - if (eventBuffer.size() == maxEvents) { - eventBuffer.removeFirst(); - } - eventBuffer.addLast(event); - } else { - super.append(event); - } - } - - @Override - public void flush() { - if (qw != null) { - qw.flush(); - } - } - - @Override - public synchronized void close() { - if (!closed) { - closed = true; - if (eventBuffer != null) { - for (LoggingEvent event : eventBuffer) { - super.append(event); - } - // let garbage collection do its work - eventBuffer = null; - } - super.close(); - } - } - - /** - * Getter/Setter methods for log4j. - * - * @return containerLogDir. - */ - - public String getContainerLogDir() { - return this.containerLogDir; - } - - public void setContainerLogDir(String containerLogDir) { - this.containerLogDir = containerLogDir; - } - - public String getContainerLogFile() { - return containerLogFile; - } - - public void setContainerLogFile(String containerLogFile) { - this.containerLogFile = containerLogFile; - } - - private static final long EVENT_SIZE = 100; - - public long getTotalLogFileSize() { - return maxEvents * EVENT_SIZE; - } - - /** - * Setter so that log4j can configure it from the - * configuration(log4j.properties). - * - * @param logSize log size. - */ - public void setTotalLogFileSize(long logSize) { - maxEvents = (int)(logSize / EVENT_SIZE); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java deleted file mode 100644 index f0e00fc1940..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn; - -import org.apache.hadoop.classification.InterfaceAudience.Public; -import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.log4j.RollingFileAppender; - -import java.io.File; -import java.io.Flushable; - -/** - * A simple log4j-appender for container's logs. - * - */ -@Public -@Unstable -public class ContainerRollingLogAppender extends RollingFileAppender - implements Flushable { - private String containerLogDir; - private String containerLogFile; - - @Override - public void activateOptions() { - synchronized (this) { - setFile(new File(this.containerLogDir, containerLogFile).toString()); - setAppend(true); - super.activateOptions(); - } - } - - @Override - public void flush() { - if (qw != null) { - qw.flush(); - } - } - - /** - * Getter/Setter methods for log4j. - * - * @return containerLogDir. - */ - - public String getContainerLogDir() { - return this.containerLogDir; - } - - public void setContainerLogDir(String containerLogDir) { - this.containerLogDir = containerLogDir; - } - - public String getContainerLogFile() { - return containerLogFile; - } - - public void setContainerLogFile(String containerLogFile) { - this.containerLogFile = containerLogFile; - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLogAppender.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLogAppender.java deleted file mode 100644 index 26acfd7bad8..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLogAppender.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn; - -import org.junit.jupiter.api.Test; - -import org.apache.log4j.Logger; -import org.apache.log4j.PatternLayout; - -public class TestContainerLogAppender { - - @Test - void testAppendInClose() throws Exception { - final ContainerLogAppender claAppender = new ContainerLogAppender(); - claAppender.setName("testCLA"); - claAppender.setLayout(new PatternLayout("%-5p [%t]: %m%n")); - claAppender.setContainerLogDir("target/testAppendInClose/logDir"); - claAppender.setContainerLogFile("syslog"); - claAppender.setTotalLogFileSize(1000); - claAppender.activateOptions(); - final Logger claLog = Logger.getLogger("testAppendInClose-catergory"); - claLog.setAdditivity(false); - claLog.addAppender(claAppender); - claLog.info(new Object() { - public String toString() { - claLog.info("message1"); - return "return message1"; - } - }); - claAppender.close(); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties index 678e3a74c89..c5371c6d9ef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties @@ -26,36 +26,35 @@ log4j.threshold=ALL # #Default values -yarn.app.container.log.dir=null -yarn.app.container.log.filesize=100 +yarn.app.container.log.filesize=100MB +yarn.app.container.log.backups=1 +yarn.app.mapreduce.shuffle.log.backups=1 -log4j.appender.CLA=org.apache.hadoop.yarn.ContainerLogAppender -log4j.appender.CLA.containerLogDir=${yarn.app.container.log.dir} -log4j.appender.CLA.containerLogFile=${hadoop.root.logfile} -log4j.appender.CLA.totalLogFileSize=${yarn.app.container.log.filesize} +log4j.appender.CLA=org.apache.log4j.RollingFileAppender +log4j.appender.CLA.File=${yarn.app.container.log.dir}/${hadoop.root.logfile} +log4j.appender.CLA.MaxFileSize=${yarn.app.container.log.filesize} +log4j.appender.CLA.MaxBackupIndex=${yarn.app.container.log.backups} log4j.appender.CLA.layout=org.apache.log4j.PatternLayout log4j.appender.CLA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c: %m%n -log4j.appender.CRLA=org.apache.hadoop.yarn.ContainerRollingLogAppender -log4j.appender.CRLA.containerLogDir=${yarn.app.container.log.dir} -log4j.appender.CRLA.containerLogFile=${hadoop.root.logfile} -log4j.appender.CRLA.maximumFileSize=${yarn.app.container.log.filesize} -log4j.appender.CRLA.maxBackupIndex=${yarn.app.container.log.backups} +log4j.appender.CRLA=org.apache.log4j.RollingFileAppender +log4j.appender.CRLA.File=${yarn.app.container.log.dir}/${hadoop.root.logfile} +log4j.appender.CRLA.MaxFileSize=${yarn.app.container.log.filesize} +log4j.appender.CRLA.MaxBackupIndex=${yarn.app.container.log.backups} log4j.appender.CRLA.layout=org.apache.log4j.PatternLayout log4j.appender.CRLA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c: %m%n -log4j.appender.shuffleCLA=org.apache.hadoop.yarn.ContainerLogAppender -log4j.appender.shuffleCLA.containerLogDir=${yarn.app.container.log.dir} -log4j.appender.shuffleCLA.containerLogFile=${yarn.app.mapreduce.shuffle.logfile} -log4j.appender.shuffleCLA.totalLogFileSize=${yarn.app.mapreduce.shuffle.log.filesize} +log4j.appender.shuffleCLA=org.apache.log4j.RollingFileAppender +log4j.appender.shuffleCLA.File=${yarn.app.container.log.dir}/${yarn.app.mapreduce.shuffle.logfile} +log4j.appender.shuffleCLA.MaxFileSize=${yarn.app.mapreduce.shuffle.log.filesize} +log4j.appender.shuffleCLA.MaxBackupIndex=${yarn.app.mapreduce.shuffle.log.backups} log4j.appender.shuffleCLA.layout=org.apache.log4j.PatternLayout log4j.appender.shuffleCLA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c: %m%n -log4j.appender.shuffleCRLA=org.apache.hadoop.yarn.ContainerRollingLogAppender -log4j.appender.shuffleCRLA.containerLogDir=${yarn.app.container.log.dir} -log4j.appender.shuffleCRLA.containerLogFile=${yarn.app.mapreduce.shuffle.logfile} -log4j.appender.shuffleCRLA.maximumFileSize=${yarn.app.mapreduce.shuffle.log.filesize} -log4j.appender.shuffleCRLA.maxBackupIndex=${yarn.app.mapreduce.shuffle.log.backups} +log4j.appender.shuffleCRLA=org.apache.log4j.RollingFileAppender +log4j.appender.shuffleCRLA.File=${yarn.app.container.log.dir}/${yarn.app.mapreduce.shuffle.logfile} +log4j.appender.shuffleCRLA.MaxFileSize=${yarn.app.mapreduce.shuffle.log.filesize} +log4j.appender.shuffleCRLA.MaxBackupIndex=${yarn.app.mapreduce.shuffle.log.backups} log4j.appender.shuffleCRLA.layout=org.apache.log4j.PatternLayout log4j.appender.shuffleCRLA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c: %m%n From 15935fa86501810978103cae14ea7f65d0bb0aa4 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 15 Mar 2023 09:59:55 -0700 Subject: [PATCH 27/97] HDFS-16947. RBF NamenodeHeartbeatService to report error for not being able to register namenode in state store (#5470) --- .../server/federation/router/NamenodeHeartbeatService.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java index 86e24a0b24a..9ffcea1a95c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java @@ -317,11 +317,8 @@ public class NamenodeHeartbeatService extends PeriodicService { if (!resolver.registerNamenode(report)) { LOG.warn("Cannot register namenode {}", report); } - } catch (IOException e) { - LOG.info("Cannot register namenode in the State Store"); - } catch (Exception ex) { - LOG.error("Unhandled exception updating NN registration for {}", - getNamenodeDesc(), ex); + } catch (Exception e) { + LOG.error("Cannot register namenode {} in the State Store", getNamenodeDesc(), e); } } From 7c42d0f7da1d154f0bfdc1ffd2070508a365babc Mon Sep 17 00:00:00 2001 From: Masatake Iwasaki Date: Thu, 16 Mar 2023 02:10:42 +0900 Subject: [PATCH 28/97] HADOOP-17746. Compatibility table in directory_markers.md doesn't render right. (#3116) Contributed by Masatake Iwasaki --- .../src/site/markdown/tools/hadoop-aws/directory_markers.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md index 41099fe6653..1d7f6b1223d 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md @@ -29,7 +29,6 @@ Changing the policy from the default value, `"delete"` _is not backwards compati Versions of Hadoop which are incompatible with other marker retention policies, as of August 2020. -------------------------------------------------------- | Branch | Compatible Since | Supported | |------------|------------------|---------------------| | Hadoop 2.x | n/a | WONTFIX | @@ -37,7 +36,7 @@ as of August 2020. | Hadoop 3.1 | check | Read-only | | Hadoop 3.2 | check | Read-only | | Hadoop 3.3 | 3.3.1 | Done | -------------------------------------------------------- + *WONTFIX* From 759ddebb132b5d7d83dacc2e09adfb9817bcad3b Mon Sep 17 00:00:00 2001 From: Pranav Saxena <108325433+saxenapranav@users.noreply.github.com> Date: Thu, 16 Mar 2023 01:33:22 +0530 Subject: [PATCH 29/97] HADOOP-18647. x-ms-client-request-id to identify the retry of an API. (#5437) The x-ms-client-request-id now includes a field to indicate a call is a retry of a previous operation Contributed by Pranav Saxena --- .../fs/azurebfs/utils/TracingContext.java | 43 +++++++++-- .../fs/azurebfs/TestTracingContext.java | 72 +++++++++++++++++++ .../utils/TracingHeaderValidator.java | 3 + 3 files changed, 114 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 9a2ccda36fb..57e65b30b46 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -63,6 +63,16 @@ public class TracingContext { //final concatenated ID list set into x-ms-client-request-id header private String header = EMPTY_STRING; + /** + * If {@link #primaryRequestId} is null, this field shall be set equal + * to the last part of the {@link #clientRequestId}'s UUID + * in {@link #constructHeader(AbfsHttpOperation, String)} only on the + * first API call for an operation. Subsequent retries for that operation + * will not change this field. In case {@link #primaryRequestId} is non-null, + * this field shall not be set. + */ + private String primaryRequestIdForRetry; + private static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); public static final int MAX_CLIENT_CORRELATION_ID_LENGTH = 72; public static final String CLIENT_CORRELATION_ID_PATTERN = "[a-zA-Z0-9-]*"; @@ -152,8 +162,8 @@ public class TracingContext { * X_MS_CLIENT_REQUEST_ID header of the http operation * @param httpOperation AbfsHttpOperation instance to set header into * connection - * @param previousFailure List of failures seen before this API trigger on - * same operation from AbfsClient. + * @param previousFailure Failure seen before this API trigger on same operation + * from AbfsClient. */ public void constructHeader(AbfsHttpOperation httpOperation, String previousFailure) { clientRequestId = UUID.randomUUID().toString(); @@ -161,8 +171,8 @@ public class TracingContext { case ALL_ID_FORMAT: // Optional IDs (e.g. streamId) may be empty header = clientCorrelationID + ":" + clientRequestId + ":" + fileSystemID + ":" - + primaryRequestId + ":" + streamID + ":" + opType + ":" - + retryCount; + + getPrimaryRequestIdForHeader(retryCount > 0) + ":" + streamID + + ":" + opType + ":" + retryCount; header = addFailureReasons(header, previousFailure); break; case TWO_ID_FORMAT: @@ -175,6 +185,31 @@ public class TracingContext { listener.callTracingHeaderValidator(header, format); } httpOperation.setRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID, header); + /* + * In case the primaryRequestId is an empty-string and if it is the first try to + * API call (previousFailure shall be null), maintain the last part of clientRequestId's + * UUID in primaryRequestIdForRetry. This field shall be used as primaryRequestId part + * of the x-ms-client-request-id header in case of retry of the same API-request. + */ + if (primaryRequestId.isEmpty() && previousFailure == null) { + String[] clientRequestIdParts = clientRequestId.split("-"); + primaryRequestIdForRetry = clientRequestIdParts[ + clientRequestIdParts.length - 1]; + } + } + + /** + * Provide value to be used as primaryRequestId part of x-ms-client-request-id header. + * @param isRetry define if it's for a retry case. + * @return {@link #primaryRequestIdForRetry}:If the {@link #primaryRequestId} + * is an empty-string, and it's a retry iteration. + * {@link #primaryRequestId} for other cases. + */ + private String getPrimaryRequestIdForHeader(final Boolean isRetry) { + if (!primaryRequestId.isEmpty() || !isRetry) { + return primaryRequestId; + } + return primaryRequestIdForRetry; } private String addFailureReasons(final String header, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java index b91a3e2208b..23e65ed2dd2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java @@ -31,12 +31,14 @@ import org.junit.Assume; import org.junit.AssumptionViolatedException; import org.junit.Ignore; import org.junit.Test; +import org.mockito.Mockito; import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.enums.Trilean; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -198,4 +200,74 @@ public class TestTracingContext extends AbstractAbfsIntegrationTest { fs.getAbfsStore().setNamespaceEnabled(Trilean.TRUE); fs.access(new Path("/"), FsAction.READ); } + + @Test + public void testRetryPrimaryRequestIdWhenInitiallySuppliedEmpty() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final String fileSystemId = fs.getFileSystemId(); + final String clientCorrelationId = fs.getClientCorrelationId(); + final TracingHeaderFormat tracingHeaderFormat = TracingHeaderFormat.ALL_ID_FORMAT; + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 0)); + AbfsHttpOperation abfsHttpOperation = Mockito.mock(AbfsHttpOperation.class); + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito.anyString(), Mockito.anyString()); + tracingContext.constructHeader(abfsHttpOperation, null); + String header = tracingContext.getHeader(); + String clientRequestIdUsed = header.split(":")[1]; + String[] clientRequestIdUsedParts = clientRequestIdUsed.split("-"); + String assertionPrimaryId = clientRequestIdUsedParts[clientRequestIdUsedParts.length - 1]; + + tracingContext.setRetryCount(1); + tracingContext.setListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 1)); + + tracingContext.constructHeader(abfsHttpOperation, "RT"); + header = tracingContext.getHeader(); + String primaryRequestId = header.split(":")[3]; + + Assertions.assertThat(primaryRequestId) + .describedAs("PrimaryRequestId in a retried request's " + + "tracingContext should be equal to last part of original " + + "request's clientRequestId UUID") + .isEqualTo(assertionPrimaryId); + } + + @Test + public void testRetryPrimaryRequestIdWhenInitiallySuppliedNonEmpty() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final String fileSystemId = fs.getFileSystemId(); + final String clientCorrelationId = fs.getClientCorrelationId(); + final TracingHeaderFormat tracingHeaderFormat = TracingHeaderFormat.ALL_ID_FORMAT; + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 0)); + tracingContext.setPrimaryRequestID(); + AbfsHttpOperation abfsHttpOperation = Mockito.mock(AbfsHttpOperation.class); + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito.anyString(), Mockito.anyString()); + tracingContext.constructHeader(abfsHttpOperation, null); + String header = tracingContext.getHeader(); + String assertionPrimaryId = header.split(":")[3]; + + tracingContext.setRetryCount(1); + tracingContext.setListener(new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.CREATE_FILESYSTEM, false, + 1)); + + tracingContext.constructHeader(abfsHttpOperation, "RT"); + header = tracingContext.getHeader(); + String primaryRequestId = header.split(":")[3]; + + Assertions.assertThat(primaryRequestId) + .describedAs("PrimaryRequestId in a retried request's tracingContext " + + "should be equal to PrimaryRequestId in the original request.") + .isEqualTo(assertionPrimaryId); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java index e195f1c381a..7569c80d67c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java @@ -130,6 +130,9 @@ public class TracingHeaderValidator implements Listener { } Assertions.assertThat(idList[5]).describedAs("Operation name incorrect") .isEqualTo(operation.toString()); + if (idList[6].contains("_")) { + idList[6] = idList[6].split("_")[0]; + } int retryCount = Integer.parseInt(idList[6]); Assertions.assertThat(retryCount) .describedAs("Retry was required due to issue on server side") From fa723ae83916af6e814923eca53a5fef046d33aa Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 18 Mar 2023 02:59:45 +0800 Subject: [PATCH 30/97] YARN-11445. [Federation] Add getClusterInfo, getClusterUserInfo REST APIs for Router. (#5472) --- .../webapp/dao/ClusterInfo.java | 10 +++ .../webapp/dao/ClusterUserInfo.java | 10 +++ .../yarn/server/router/RouterMetrics.java | 62 +++++++++++++ .../webapp/FederationInterceptorREST.java | 76 +++++++++++++++- .../webapp/dao/FederationClusterInfo.java | 50 +++++++++++ .../webapp/dao/FederationClusterUserInfo.java | 49 +++++++++++ .../yarn/server/router/TestRouterMetrics.java | 64 ++++++++++++++ .../MockDefaultRequestInterceptorREST.java | 16 +++- .../webapp/TestFederationInterceptorREST.java | 88 +++++++++++++++++++ .../TestableFederationInterceptorREST.java | 4 + 10 files changed, 426 insertions(+), 3 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterInfo.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java index a429215dd5d..4596f37f1e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java @@ -44,6 +44,8 @@ public class ClusterInfo { protected String hadoopVersionBuiltOn; protected String haZooKeeperConnectionState; + private String subClusterId; + public ClusterInfo() { } // JAXB needs this @@ -113,4 +115,12 @@ public class ClusterInfo { public String getHAZookeeperConnectionState() { return this.haZooKeeperConnectionState; } + + public String getSubClusterId() { + return subClusterId; + } + + public void setSubClusterId(String subClusterId) { + this.subClusterId = subClusterId; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterUserInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterUserInfo.java index 7a6bd40b942..a65fc845563 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterUserInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterUserInfo.java @@ -42,6 +42,8 @@ public class ClusterUserInfo { // User who has placed the request protected String requestedUser; + private String subClusterId; + public ClusterUserInfo() { } @@ -61,4 +63,12 @@ public class ClusterUserInfo { public String getRequestedUser() { return requestedUser; } + + public String getSubClusterId() { + return subClusterId; + } + + public void setSubClusterId(String subClusterId) { + this.subClusterId = subClusterId; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java index fdcd890ea6e..3338013ebac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java @@ -159,6 +159,10 @@ public final class RouterMetrics { private MutableGaugeInt numAddToClusterNodeLabelsFailedRetrieved; @Metric("# of removeFromClusterNodeLabels failed to be retrieved") private MutableGaugeInt numRemoveFromClusterNodeLabelsFailedRetrieved; + @Metric("# of getClusterInfo failed to be retrieved") + private MutableGaugeInt numGetClusterInfoFailedRetrieved; + @Metric("# of getClusterUserInfo failed to be retrieved") + private MutableGaugeInt numGetClusterUserInfoFailedRetrieved; // Aggregate metrics are shared, and don't have to be looked up per call @Metric("Total number of successful Submitted apps and latency(ms)") @@ -279,6 +283,10 @@ public final class RouterMetrics { private MutableRate totalSucceededAddToClusterNodeLabelsRetrieved; @Metric("Total number of successful Retrieved RemoveFromClusterNodeLabels and latency(ms)") private MutableRate totalSucceededRemoveFromClusterNodeLabelsRetrieved; + @Metric("Total number of successful Retrieved GetClusterInfoRetrieved and latency(ms)") + private MutableRate totalSucceededGetClusterInfoRetrieved; + @Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)") + private MutableRate totalSucceededGetClusterUserInfoRetrieved; /** * Provide quantile counters for all latencies. @@ -342,6 +350,8 @@ public final class RouterMetrics { private MutableQuantiles replaceLabelsOnNodeLatency; private MutableQuantiles addToClusterNodeLabelsLatency; private MutableQuantiles removeFromClusterNodeLabelsLatency; + private MutableQuantiles getClusterInfoLatency; + private MutableQuantiles getClusterUserInfoLatency; private static volatile RouterMetrics instance = null; private static MetricsRegistry registry; @@ -551,6 +561,12 @@ public final class RouterMetrics { removeFromClusterNodeLabelsLatency = registry.newQuantiles("removeFromClusterNodeLabelsLatency", "latency of remove cluster nodelabels timeouts", "ops", "latency", 10); + + getClusterInfoLatency = registry.newQuantiles("getClusterInfoLatency", + "latency of get cluster info timeouts", "ops", "latency", 10); + + getClusterUserInfoLatency = registry.newQuantiles("getClusterUserInfoLatency", + "latency of get cluster user info timeouts", "ops", "latency", 10); } public static RouterMetrics getMetrics() { @@ -847,6 +863,16 @@ public final class RouterMetrics { return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().numSamples(); } + @VisibleForTesting + public long getNumSucceededGetClusterInfoRetrieved() { + return totalSucceededGetClusterInfoRetrieved.lastStat().numSamples(); + } + + @VisibleForTesting + public long getNumSucceededGetClusterUserInfoRetrieved() { + return totalSucceededGetClusterUserInfoRetrieved.lastStat().numSamples(); + } + @VisibleForTesting public long getNumSucceededRefreshSuperUserGroupsConfigurationRetrieved() { return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().numSamples(); @@ -1137,6 +1163,16 @@ public final class RouterMetrics { return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().mean(); } + @VisibleForTesting + public double getLatencySucceededGetClusterInfoRetrieved() { + return totalSucceededGetClusterInfoRetrieved.lastStat().mean(); + } + + @VisibleForTesting + public double getLatencySucceededGetClusterUserInfoRetrieved() { + return totalSucceededGetClusterUserInfoRetrieved.lastStat().mean(); + } + @VisibleForTesting public double getLatencySucceededRefreshSuperUserGroupsConfigurationRetrieved() { return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().mean(); @@ -1382,6 +1418,14 @@ public final class RouterMetrics { return numRemoveFromClusterNodeLabelsFailedRetrieved.value(); } + public int getClusterInfoFailedRetrieved() { + return numGetClusterInfoFailedRetrieved.value(); + } + + public int getClusterUserInfoFailedRetrieved() { + return numGetClusterUserInfoFailedRetrieved.value(); + } + public int getDelegationTokenFailedRetrieved() { return numGetDelegationTokenFailedRetrieved.value(); } @@ -1685,6 +1729,16 @@ public final class RouterMetrics { removeFromClusterNodeLabelsLatency.add(duration); } + public void succeededGetClusterInfoRetrieved(long duration) { + totalSucceededGetClusterInfoRetrieved.add(duration); + getClusterInfoLatency.add(duration); + } + + public void succeededGetClusterUserInfoRetrieved(long duration) { + totalSucceededGetClusterUserInfoRetrieved.add(duration); + getClusterUserInfoLatency.add(duration); + } + public void succeededRefreshSuperUserGroupsConfRetrieved(long duration) { totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.add(duration); refreshSuperUserGroupsConfLatency.add(duration); @@ -1905,6 +1959,14 @@ public final class RouterMetrics { numRemoveFromClusterNodeLabelsFailedRetrieved.incr(); } + public void incrGetClusterInfoFailedRetrieved() { + numGetClusterInfoFailedRetrieved.incr(); + } + + public void incrGetClusterUserInfoFailedRetrieved() { + numGetClusterUserInfoFailedRetrieved.incr(); + } + public void incrGetDelegationTokenFailedRetrieved() { numGetDelegationTokenFailedRetrieved.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java index 94b4b1ca251..857e4c52c6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java @@ -129,6 +129,8 @@ import org.apache.hadoop.yarn.server.router.webapp.dao.FederationBulkActivitiesI import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.SubClusterResult; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo; +import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo; +import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; @@ -1137,14 +1139,84 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { return getClusterInfo(); } + /** + * This method retrieves the cluster information, and it is reachable by using + * {@link RMWSConsts#INFO}. + * + * In Federation mode, we will return a FederationClusterInfo object, + * which contains a set of ClusterInfo. + * + * @return the cluster information. + */ @Override public ClusterInfo getClusterInfo() { - throw new NotImplementedException("Code is not implemented"); + try { + long startTime = Time.now(); + Map subClustersActive = getActiveSubclusters(); + Class[] argsClasses = new Class[]{}; + Object[] args = new Object[]{}; + ClientMethod remoteMethod = new ClientMethod("getClusterInfo", argsClasses, args); + Map subClusterInfoMap = + invokeConcurrent(subClustersActive.values(), remoteMethod, ClusterInfo.class); + FederationClusterInfo federationClusterInfo = new FederationClusterInfo(); + subClusterInfoMap.forEach((subClusterInfo, clusterInfo) -> { + SubClusterId subClusterId = subClusterInfo.getSubClusterId(); + clusterInfo.setSubClusterId(subClusterId.getId()); + federationClusterInfo.getList().add(clusterInfo); + }); + long stopTime = Time.now(); + routerMetrics.succeededGetClusterInfoRetrieved(stopTime - startTime); + return federationClusterInfo; + } catch (NotFoundException e) { + routerMetrics.incrGetClusterInfoFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException("Get all active sub cluster(s) error.", e); + } catch (YarnException | IOException e) { + routerMetrics.incrGetClusterInfoFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException("getClusterInfo error.", e); + } + routerMetrics.incrGetClusterInfoFailedRetrieved(); + throw new RuntimeException("getClusterInfo error."); } + /** + * This method retrieves the cluster user information, and it is reachable by using + * {@link RMWSConsts#CLUSTER_USER_INFO}. + * + * In Federation mode, we will return a ClusterUserInfo object, + * which contains a set of ClusterUserInfo. + * + * @param hsr the servlet request + * @return the cluster user information + */ @Override public ClusterUserInfo getClusterUserInfo(HttpServletRequest hsr) { - throw new NotImplementedException("Code is not implemented"); + try { + long startTime = Time.now(); + Map subClustersActive = getActiveSubclusters(); + final HttpServletRequest hsrCopy = clone(hsr); + Class[] argsClasses = new Class[]{HttpServletRequest.class}; + Object[] args = new Object[]{hsrCopy}; + ClientMethod remoteMethod = new ClientMethod("getClusterUserInfo", argsClasses, args); + Map subClusterInfoMap = + invokeConcurrent(subClustersActive.values(), remoteMethod, ClusterUserInfo.class); + FederationClusterUserInfo federationClusterUserInfo = new FederationClusterUserInfo(); + subClusterInfoMap.forEach((subClusterInfo, clusterUserInfo) -> { + SubClusterId subClusterId = subClusterInfo.getSubClusterId(); + clusterUserInfo.setSubClusterId(subClusterId.getId()); + federationClusterUserInfo.getList().add(clusterUserInfo); + }); + long stopTime = Time.now(); + routerMetrics.succeededGetClusterUserInfoRetrieved(stopTime - startTime); + return federationClusterUserInfo; + } catch (NotFoundException e) { + routerMetrics.incrGetClusterUserInfoFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException("Get all active sub cluster(s) error.", e); + } catch (YarnException | IOException e) { + routerMetrics.incrGetClusterUserInfoFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException("getClusterUserInfo error.", e); + } + routerMetrics.incrGetClusterUserInfoFailedRetrieved(); + throw new RuntimeException("getClusterUserInfo error."); } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterInfo.java new file mode 100644 index 00000000000..9b8d7b5431a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterInfo.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.router.webapp.dao; + +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.ArrayList; +import java.util.List; + +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class FederationClusterInfo extends ClusterInfo { + + @XmlElement(name = "subCluster") + private List list = new ArrayList<>(); + + public FederationClusterInfo() { + } // JAXB needs this + + public FederationClusterInfo(ArrayList list) { + this.list = list; + } + + public List getList() { + return list; + } + + public void setList(List list) { + this.list = list; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java new file mode 100644 index 00000000000..b4a19b7919d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationClusterUserInfo.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.router.webapp.dao; + +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.ArrayList; +import java.util.List; + +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class FederationClusterUserInfo extends ClusterUserInfo { + @XmlElement(name = "subCluster") + private List list = new ArrayList<>(); + + public FederationClusterUserInfo() { + } // JAXB needs this + + public FederationClusterUserInfo(ArrayList list) { + this.list = list; + } + + public List getList() { + return list; + } + + public void setList(List list) { + this.list = list; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java index a3756174573..955948c91c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java @@ -568,6 +568,16 @@ public class TestRouterMetrics { LOG.info("Mocked: failed getBulkActivitie call"); metrics.incrGetBulkActivitiesFailedRetrieved(); } + + public void getClusterInfoFailed() { + LOG.info("Mocked: failed getClusterInfo call"); + metrics.incrGetClusterInfoFailedRetrieved(); + } + + public void getClusterUserInfoFailed() { + LOG.info("Mocked: failed getClusterUserInfo call"); + metrics.incrGetClusterUserInfoFailedRetrieved(); + } } // Records successes for all calls @@ -838,6 +848,16 @@ public class TestRouterMetrics { LOG.info("Mocked: successful AddToClusterNodeLabels call with duration {}", duration); metrics.succeededAddToClusterNodeLabelsRetrieved(duration); } + + public void getClusterInfoRetrieved(long duration) { + LOG.info("Mocked: successful GetClusterInfoRetrieved call with duration {}", duration); + metrics.succeededGetClusterInfoRetrieved(duration); + } + + public void getClusterUserInfoRetrieved(long duration) { + LOG.info("Mocked: successful GetClusterUserInfoRetrieved call with duration {}", duration); + metrics.succeededGetClusterUserInfoRetrieved(duration); + } } @Test @@ -1848,4 +1868,48 @@ public class TestRouterMetrics { Assert.assertEquals(225, metrics.getLatencySucceededAddToClusterNodeLabelsRetrieved(), ASSERT_DOUBLE_DELTA); } + + @Test + public void testGetClusterInfoRetrievedFailed() { + long totalBadBefore = metrics.getClusterInfoFailedRetrieved(); + badSubCluster.getClusterInfoFailed(); + Assert.assertEquals(totalBadBefore + 1, metrics.getClusterInfoFailedRetrieved()); + } + + @Test + public void testGetClusterInfoRetrieved() { + long totalGoodBefore = metrics.getNumSucceededGetClusterInfoRetrieved(); + goodSubCluster.getClusterInfoRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededGetClusterInfoRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededGetClusterInfoRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getClusterInfoRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededGetClusterInfoRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededGetClusterInfoRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testGetClusterUserInfoRetrievedFailed() { + long totalBadBefore = metrics.getClusterUserInfoFailedRetrieved(); + badSubCluster.getClusterUserInfoFailed(); + Assert.assertEquals(totalBadBefore + 1, metrics.getClusterUserInfoFailedRetrieved()); + } + + @Test + public void testGetClusterUserInfoRetrieved() { + long totalGoodBefore = metrics.getNumSucceededGetClusterUserInfoRetrieved(); + goodSubCluster.getClusterUserInfoRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededGetClusterUserInfoRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getClusterUserInfoRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededGetClusterUserInfoRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java index 653224a7d37..c34167f9219 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java @@ -111,7 +111,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppState; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ApplicationSubmissionContextInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NewApplication; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodesInfo; @@ -161,7 +163,6 @@ import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import static org.apache.hadoop.yarn.server.router.webapp.BaseRouterWebServicesTest.QUEUE_DEFAULT; import static org.apache.hadoop.yarn.server.router.webapp.BaseRouterWebServicesTest.QUEUE_DEFAULT_FULL; import static org.apache.hadoop.yarn.server.router.webapp.BaseRouterWebServicesTest.QUEUE_DEDICATED; @@ -1363,4 +1364,17 @@ public class MockDefaultRequestInterceptorREST } throw new YarnException("removeFromClusterNodeLabels Error"); } + + @Override + public ClusterInfo getClusterInfo() { + ClusterInfo clusterInfo = new ClusterInfo(mockRM); + return clusterInfo; + } + + @Override + public ClusterUserInfo getClusterUserInfo(HttpServletRequest hsr) { + String remoteUser = hsr.getRemoteUser(); + UserGroupInformation callerUGI = UserGroupInformation.createRemoteUser(remoteUser); + return new ClusterUserInfo(mockRM, callerUGI); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java index a2831657dc8..784fbd15ce1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java @@ -73,10 +73,13 @@ import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationHome import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreTestUtil; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppState; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ApplicationSubmissionContextInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterUserInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NewApplication; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; @@ -128,9 +131,12 @@ import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationBulkActivitiesInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo; +import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo; +import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo; import org.apache.hadoop.yarn.util.LRUCacheHashMap; import org.apache.hadoop.yarn.util.MonotonicClock; import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.junit.Assert; @@ -2127,4 +2133,86 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { LambdaTestUtils.intercept(YarnRuntimeException.class, "removeFromClusterNodeLabels Error", () -> interceptor.removeFromClusterNodeLabels(oldNodeLabels1, null)); } + + @Test + public void testGetClusterUserInfo() { + String requestUserName = "test-user"; + HttpServletRequest hsr = mock(HttpServletRequest.class); + when(hsr.getRemoteUser()).thenReturn(requestUserName); + ClusterUserInfo clusterUserInfo = interceptor.getClusterUserInfo(hsr); + + Assert.assertNotNull(clusterUserInfo); + Assert.assertTrue(clusterUserInfo instanceof FederationClusterUserInfo); + + FederationClusterUserInfo federationClusterUserInfo = + (FederationClusterUserInfo) clusterUserInfo; + + List fedClusterUserInfoList = federationClusterUserInfo.getList(); + Assert.assertNotNull(fedClusterUserInfoList); + Assert.assertEquals(4, fedClusterUserInfoList.size()); + + List subClusterIds = subClusters.stream().map( + subClusterId -> subClusterId.getId()).collect(Collectors.toList()); + MockRM mockRM = interceptor.getMockRM(); + + for (ClusterUserInfo fedClusterUserInfo : fedClusterUserInfoList) { + // Check subClusterId + String subClusterId = fedClusterUserInfo.getSubClusterId(); + Assert.assertNotNull(subClusterId); + Assert.assertTrue(subClusterIds.contains(subClusterId)); + + // Check requestedUser + String requestedUser = fedClusterUserInfo.getRequestedUser(); + Assert.assertNotNull(requestedUser); + Assert.assertEquals(requestUserName, requestedUser); + + // Check rmLoginUser + String rmLoginUser = fedClusterUserInfo.getRmLoginUser(); + Assert.assertNotNull(rmLoginUser); + Assert.assertEquals(mockRM.getRMLoginUser(), rmLoginUser); + } + } + + @Test + public void testGetClusterInfo() { + ClusterInfo clusterInfos = interceptor.getClusterInfo(); + Assert.assertNotNull(clusterInfos); + Assert.assertTrue(clusterInfos instanceof FederationClusterInfo); + + FederationClusterInfo federationClusterInfos = + (FederationClusterInfo) (clusterInfos); + + List fedClusterInfosList = federationClusterInfos.getList(); + Assert.assertNotNull(fedClusterInfosList); + Assert.assertEquals(4, fedClusterInfosList.size()); + + List subClusterIds = subClusters.stream().map( + subClusterId -> subClusterId.getId()).collect(Collectors.toList()); + + MockRM mockRM = interceptor.getMockRM(); + String yarnVersion = YarnVersionInfo.getVersion(); + + for (ClusterInfo clusterInfo : fedClusterInfosList) { + String subClusterId = clusterInfo.getSubClusterId(); + // Check subClusterId + Assert.assertTrue(subClusterIds.contains(subClusterId)); + + // Check state + String clusterState = mockRM.getServiceState().toString(); + Assert.assertEquals(clusterState, clusterInfo.getState()); + + // Check rmStateStoreName + String rmStateStoreName = + mockRM.getRMContext().getStateStore().getClass().getName(); + Assert.assertEquals(rmStateStoreName, clusterInfo.getRMStateStore()); + + // Check RM Version + Assert.assertEquals(yarnVersion, clusterInfo.getRMVersion()); + + // Check haZooKeeperConnectionState + String rmHAZookeeperConnectionState = mockRM.getRMContext().getHAZookeeperConnectionState(); + Assert.assertEquals(rmHAZookeeperConnectionState, + clusterInfo.getHAZookeeperConnectionState()); + } + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java index 31fd756b664..0e37b7c9749 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestableFederationInterceptorREST.java @@ -117,4 +117,8 @@ public class TestableFederationInterceptorREST } super.shutdown(); } + + public MockRM getMockRM() { + return mockRM; + } } \ No newline at end of file From b6a9d7b4429970e332e5e07f0cf3265ee5d0e909 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 17 Mar 2023 15:33:50 -0700 Subject: [PATCH 31/97] HADOOP-18631. (ADDENDUM) Use LogCapturer to match audit log pattern and remove hdfs async audit log configs (#5451) --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 37 ------ .../hdfs/server/namenode/FSNamesystem.java | 10 +- .../src/main/resources/hdfs-default.xml | 29 ----- .../hdfs/server/namenode/TestAuditLogs.java | 106 ++++++++---------- .../hadoop/hdfs/server/namenode/TestFsck.java | 67 ++++++----- 5 files changed, 89 insertions(+), 160 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index e5e21e4307a..3286ffb4f09 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -733,43 +733,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default"; public static final String DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY = "dfs.namenode.audit.log.token.tracking.id"; public static final boolean DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT = false; - /** - * Deprecated. Use log4j properties instead. - * Set system env variable HDFS_AUDIT_LOGGER, which in tern assigns the value to - * "hdfs.audit.logger" for log4j properties to determine log level and appender. - */ - @Deprecated - public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async"; - @Deprecated - public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT = false; - - /** - * Deprecated. Use log4j properties instead. - * Set value to Async appender "blocking" property as part of log4j properties configuration. - *

- * For example, - * log4j.appender.ASYNCAPPENDER=org.apache.log4j.AsyncAppender - * log4j.appender.ASYNCAPPENDER.blocking=false - */ - @Deprecated - public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY = - "dfs.namenode.audit.log.async.blocking"; - @Deprecated - public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT = true; - - /** - * Deprecated. Use log4j properties instead. - * Set value to Async appender "bufferSize" property as part of log4j properties configuration. - *

- * For example, - * log4j.appender.ASYNCAPPENDER=org.apache.log4j.AsyncAppender - * log4j.appender.ASYNCAPPENDER.bufferSize=128 - */ - @Deprecated - public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY = - "dfs.namenode.audit.log.async.buffer.size"; - @Deprecated - public static final int DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT = 128; public static final String DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST = "dfs.namenode.audit.log.debug.cmdlist"; public static final String DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY = "dfs.namenode.metrics.logger.period.seconds"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 0e46dca9dff..107439defee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -48,8 +48,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; @@ -1069,11 +1067,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } } - @SuppressWarnings("deprecation") private static void checkForAsyncLogEnabledByOldConfigs(Configuration conf) { - if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { - LOG.warn("Use log4j properties to enable async log for audit logs. {} is deprecated", - DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY); + // dfs.namenode.audit.log.async is no longer in use. Use log4j properties instead. + if (conf.getBoolean("dfs.namenode.audit.log.async", false)) { + LOG.warn("Use log4j properties to enable async log for audit logs. " + + "dfs.namenode.audit.log.async is no longer in use."); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 5643a9b5c5e..bdd048004d3 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -5099,35 +5099,6 @@ - - dfs.namenode.audit.log.async - false - - If true, enables asynchronous audit log. - - - - - dfs.namenode.audit.log.async.blocking - true - - Only used when enables asynchronous audit log. Sets whether audit log async - appender should wait if there is no space available in the event buffer or - immediately return. Default value is true. - - - - - dfs.namenode.audit.log.async.buffer.size - 128 - - Only used when enables asynchronous audit log. Sets the number of audit - logs allowed in the event buffer before the calling thread is blocked - (if dfs.namenode.audit.log.async.blocking is true) or until logs are - summarized and discarded. Default value is 128. - - - dfs.namenode.audit.log.token.tracking.id false diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java index 698178e4e96..0f736696751 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java @@ -20,12 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.*; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; import java.io.InputStream; -import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -46,12 +41,15 @@ import org.apache.hadoop.hdfs.web.WebHdfsTestUtil; import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; import org.apache.log4j.Appender; import org.apache.log4j.AsyncAppender; import org.apache.log4j.Logger; import org.junit.After; +import org.junit.AfterClass; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -66,11 +64,10 @@ public class TestAuditLogs { private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TestAuditLogs.class); - private static final File AUDIT_LOG_FILE = - new File(System.getProperty("hadoop.log.dir"), "hdfs-audit.log"); - final boolean useAsyncEdits; + private static LogCapturer auditLogCapture; + @Parameters public static Collection data() { Collection params = new ArrayList<>(); @@ -111,9 +108,6 @@ public class TestAuditLogs { @Before public void setupCluster() throws Exception { - try (PrintWriter writer = new PrintWriter(AUDIT_LOG_FILE)) { - writer.print(""); - } // must configure prior to instantiating the namesystem because it // will reconfigure the logger if async is enabled conf = new HdfsConfiguration(); @@ -132,21 +126,15 @@ public class TestAuditLogs { "org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit"); @SuppressWarnings("unchecked") List appenders = Collections.list(logger.getAllAppenders()); - assertEquals(1, appenders.size()); assertTrue(appenders.get(0) instanceof AsyncAppender); fnames = util.getFileNames(fileName); util.waitReplication(fs, fileName, (short)3); userGroupInfo = UserGroupInformation.createUserForTesting(username, groups); - LOG.info("Audit log file: {}, exists: {}, length: {}", AUDIT_LOG_FILE, AUDIT_LOG_FILE.exists(), - AUDIT_LOG_FILE.length()); } @After public void teardownCluster() throws Exception { - try (PrintWriter writer = new PrintWriter(AUDIT_LOG_FILE)) { - writer.print(""); - } util.cleanup(fs, "/srcdat"); if (fs != null) { fs.close(); @@ -158,6 +146,17 @@ public class TestAuditLogs { } } + @BeforeClass + public static void beforeClass() { + auditLogCapture = LogCapturer.captureLogs(FSNamesystem.AUDIT_LOG); + } + + @AfterClass + public static void afterClass() { + auditLogCapture.stopCapturing(); + } + + /** test that allowed operation puts proper entry in audit log */ @Test public void testAuditAllowed() throws Exception { @@ -273,54 +272,47 @@ public class TestAuditLogs { verifySuccessCommandsAuditLogs(1, "foo", "cmd=create"); } - private void verifySuccessCommandsAuditLogs(int leastExpected, String file, String cmd) - throws IOException { - - try (BufferedReader reader = new BufferedReader(new FileReader(AUDIT_LOG_FILE))) { - String line; - int success = 0; - while ((line = reader.readLine()) != null) { - assertNotNull(line); - LOG.info("Line: {}", line); - if (SUCCESS_PATTERN.matcher(line).matches() && line.contains(file) && line.contains( - cmd)) { - assertTrue("Expected audit event not found in audit log", - AUDIT_PATTERN.matcher(line).matches()); - LOG.info("Successful verification. Log line: {}", line); - success++; - } + private void verifySuccessCommandsAuditLogs(int leastExpected, String file, String cmd) { + String[] auditLogOutputLines = auditLogCapture.getOutput().split("\\n"); + int success = 0; + for (String auditLogLine : auditLogOutputLines) { + if (!auditLogLine.contains("allowed=")) { + continue; } - if (success < leastExpected) { - throw new AssertionError( - "Least expected: " + leastExpected + ". Actual success: " + success); + String line = "allowed=" + auditLogLine.split("allowed=")[1]; + LOG.info("Line: {}", line); + if (SUCCESS_PATTERN.matcher(line).matches() && line.contains(file) && line.contains(cmd)) { + assertTrue("Expected audit event not found in audit log", + AUDIT_PATTERN.matcher(line).matches()); + LOG.info("Successful verification. Log line: {}", line); + success++; } } + if (success < leastExpected) { + throw new AssertionError( + "Least expected: " + leastExpected + ". Actual success: " + success); + } } - private void verifyFailedCommandsAuditLogs(int leastExpected, String file, String cmd) - throws IOException { - - try (BufferedReader reader = new BufferedReader(new FileReader(AUDIT_LOG_FILE))) { - String line; - int success = 0; - while ((line = reader.readLine()) != null) { - assertNotNull(line); - LOG.info("Line: {}", line); - if (FAILURE_PATTERN.matcher(line).matches() && line.contains(file) && line.contains( - cmd)) { - assertTrue("Expected audit event not found in audit log", - AUDIT_PATTERN.matcher(line).matches()); - LOG.info("Failure verification. Log line: {}", line); - success++; - } + private void verifyFailedCommandsAuditLogs(int expected, String file, String cmd) { + String[] auditLogOutputLines = auditLogCapture.getOutput().split("\\n"); + int success = 0; + for (String auditLogLine : auditLogOutputLines) { + if (!auditLogLine.contains("allowed=")) { + continue; } - assertEquals("Expected: " + leastExpected + ". Actual failure: " + success, leastExpected, - success); - if (success < leastExpected) { - throw new AssertionError( - "Least expected: " + leastExpected + ". Actual success: " + success); + String line = "allowed=" + auditLogLine.split("allowed=")[1]; + LOG.info("Line: {}", line); + if (FAILURE_PATTERN.matcher(line).matches() && line.contains(file) && line.contains( + cmd)) { + assertTrue("Expected audit event not found in audit log", + AUDIT_PATTERN.matcher(line).matches()); + LOG.info("Failure verification. Log line: {}", line); + success++; } } + assertEquals("Expected: " + expected + ". Actual failure: " + success, expected, + success); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 8d8183e5ad1..a312b03168b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -30,12 +30,10 @@ import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; -import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; import java.io.PrintWriter; @@ -117,11 +115,13 @@ import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; import org.apache.hadoop.util.ToolRunner; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.LoggerFactory; @@ -132,10 +132,7 @@ public class TestFsck { private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TestFsck.class.getName()); - private static final File AUDIT_LOG_FILE = - new File(System.getProperty("hadoop.log.dir"), "hdfs-audit.log"); - - // Pattern for: + // Pattern for: // allowed=true ugi=name ip=/address cmd=FSCK src=/ dst=null perm=null static final Pattern FSCK_PATTERN = Pattern.compile( "allowed=.*?\\s" + @@ -159,6 +156,8 @@ public class TestFsck { private static final String LINE_SEPARATOR = System.getProperty("line.separator"); + private static LogCapturer auditLogCapture; + public static String runFsck(Configuration conf, int expectedErrCode, boolean checkErrorCode, String... path) throws Exception { @@ -179,6 +178,16 @@ public class TestFsck { private MiniDFSCluster cluster = null; private Configuration conf = null; + @BeforeClass + public static void beforeClass() { + auditLogCapture = LogCapturer.captureLogs(FSNamesystem.AUDIT_LOG); + } + + @AfterClass + public static void afterClass() { + auditLogCapture.stopCapturing(); + } + @Before public void setUp() throws Exception { conf = new Configuration(); @@ -191,11 +200,6 @@ public class TestFsck { shutdownCluster(); } - @AfterClass - public static void afterClass() throws Exception { - assertTrue(AUDIT_LOG_FILE.delete()); - } - private void shutdownCluster() throws Exception { if (cluster != null) { cluster.shutdown(); @@ -245,29 +249,30 @@ public class TestFsck { util.cleanup(fs, "/srcdat"); } - private void verifyAuditLogs() throws IOException { - try (BufferedReader reader = new BufferedReader(new FileReader(AUDIT_LOG_FILE))) { - // Audit log should contain one getfileinfo and one fsck - String line; - int getFileStatusSuccess = 0; - int fsckCount = 0; - while ((line = reader.readLine()) != null) { - LOG.info("Line: {}", line); - if (line.contains("cmd=getfileinfo") && GET_FILE_INFO_PATTERN.matcher(line).matches()) { - getFileStatusSuccess++; - } else if (FSCK_PATTERN.matcher(line).matches()) { - fsckCount++; - } + private void verifyAuditLogs() { + String[] auditLogOutputLines = auditLogCapture.getOutput().split("\\n"); + int fileStatusSuccess = 0; + int fsckCount = 0; + for (String auditLogLine : auditLogOutputLines) { + if (!auditLogLine.contains("allowed=")) { + continue; } - if (getFileStatusSuccess < 2) { - throw new AssertionError( - "getfileinfo cmd should occur at least 2 times. Actual count: " + getFileStatusSuccess); - } - if (fsckCount < 1) { - throw new AssertionError( - "fsck should be present at least once. Actual count: " + fsckCount); + String extractedAuditLog = "allowed=" + auditLogLine.split("allowed=")[1]; + LOG.info("Line: {}", extractedAuditLog); + if (extractedAuditLog.contains("cmd=getfileinfo") && GET_FILE_INFO_PATTERN.matcher( + extractedAuditLog).matches()) { + fileStatusSuccess++; + } else if (FSCK_PATTERN.matcher(extractedAuditLog).matches()) { + fsckCount++; } } + if (fileStatusSuccess < 2) { + throw new AssertionError( + "getfileinfo cmd should occur at least 2 times. Actual count: " + fileStatusSuccess); + } + if (fsckCount < 1) { + throw new AssertionError("fsck should be present at least once. Actual count: " + fsckCount); + } } @Test From f8d0949f7d60f952fe658dd295bf040fc7cc2b15 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sat, 18 Mar 2023 14:43:25 -0700 Subject: [PATCH 32/97] HDFS-16953. RBF: Mount table store APIs should update cache only if state store record is successfully updated (#5482) --- .../federation/store/impl/MountTableStoreImpl.java | 12 +++++++++--- .../server/federation/router/TestRouterAdminCLI.java | 12 +++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MountTableStoreImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MountTableStoreImpl.java index 680752b8efe..a9b0174e6a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MountTableStoreImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/impl/MountTableStoreImpl.java @@ -117,7 +117,9 @@ public class MountTableStoreImpl extends MountTableStore { AddMountTableEntryResponse response = AddMountTableEntryResponse.newInstance(); response.setStatus(status); - updateCacheAllRouters(); + if (status) { + updateCacheAllRouters(); + } return response; } else { AddMountTableEntryResponse response = @@ -139,7 +141,9 @@ public class MountTableStoreImpl extends MountTableStore { UpdateMountTableEntryResponse response = UpdateMountTableEntryResponse.newInstance(); response.setStatus(status); - updateCacheAllRouters(); + if (status) { + updateCacheAllRouters(); + } return response; } else { UpdateMountTableEntryResponse response = @@ -170,7 +174,9 @@ public class MountTableStoreImpl extends MountTableStore { RemoveMountTableEntryResponse response = RemoveMountTableEntryResponse.newInstance(); response.setStatus(status); - updateCacheAllRouters(); + if (status) { + updateCacheAllRouters(); + } return response; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java index 761fad2fb7a..75c79dd2c2e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java @@ -167,8 +167,9 @@ public class TestRouterAdminCLI { assertEquals(0, ToolRunner.run(admin, argv)); assertEquals(-1, ToolRunner.run(admin, argv)); - stateStore.loadCache(MountTableStoreImpl.class, true); + verifyMountTableContents(src, dest); + GetMountTableEntriesRequest getRequest = GetMountTableEntriesRequest .newInstance(src); GetMountTableEntriesResponse getResponse = client.getMountTableManager() @@ -207,6 +208,15 @@ public class TestRouterAdminCLI { assertTrue(mountTable.isFaultTolerant()); } + private void verifyMountTableContents(String src, String dest) throws Exception { + String[] argv = new String[] {"-ls", "/"}; + System.setOut(new PrintStream(out)); + assertEquals(0, ToolRunner.run(admin, argv)); + String response = out.toString(); + assertTrue("The response should have " + src + ": " + response, response.contains(src)); + assertTrue("The response should have " + dest + ": " + response, response.contains(dest)); + } + @Test public void testAddMountTableNotNormalized() throws Exception { String nsId = "ns0"; From 9a8287c36f2ecc41a8fe9f154df7784cf29ef75a Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 20 Mar 2023 19:07:06 -0700 Subject: [PATCH 33/97] HADOOP-18669. Remove Log4Json Layout (#5493) --- .../java/org/apache/hadoop/log/Log4Json.java | 263 ----------------- .../org/apache/hadoop/log/TestLog4Json.java | 264 ------------------ 2 files changed, 527 deletions(-) delete mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/Log4Json.java delete mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLog4Json.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/Log4Json.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/Log4Json.java deleted file mode 100644 index 68cf680a022..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/log/Log4Json.java +++ /dev/null @@ -1,263 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.hadoop.log; - -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.MappingJsonFactory; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import com.fasterxml.jackson.databind.node.ContainerNode; -import org.apache.log4j.Layout; -import org.apache.log4j.helpers.ISO8601DateFormat; -import org.apache.log4j.spi.LoggingEvent; -import org.apache.log4j.spi.ThrowableInformation; - -import java.io.IOException; -import java.io.StringWriter; -import java.io.Writer; -import java.text.DateFormat; -import java.util.Date; - -/** - * This offers a log layout for JSON, with some test entry points. It's purpose is - * to allow Log4J to generate events that are easy for other programs to parse, but which are somewhat - * human-readable. - * - * Some features. - * - *

    - *
  1. Every event is a standalone JSON clause
  2. - *
  3. Time is published as a time_t event since 1/1/1970 - * -this is the fastest to generate.
  4. - *
  5. An ISO date is generated, but this is cached and will only be accurate to within a second
  6. - *
  7. the stack trace is included as an array
  8. - *
- * - * A simple log event will resemble the following - *
- *     {"name":"test","time":1318429136789,"date":"2011-10-12 15:18:56,789","level":"INFO","thread":"main","message":"test message"}
- * 
- * - * An event with an error will contain data similar to that below (which has been reformatted to be multi-line). - * - *
- *     {
- *     "name":"testException",
- *     "time":1318429136789,
- *     "date":"2011-10-12 15:18:56,789",
- *     "level":"INFO",
- *     "thread":"quoted\"",
- *     "message":"new line\n and {}",
- *     "exceptionclass":"java.net.NoRouteToHostException",
- *     "stack":[
- *         "java.net.NoRouteToHostException: that box caught fire 3 years ago",
- *         "\tat org.apache.hadoop.log.TestLog4Json.testException(TestLog4Json.java:49)",
- *         "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)",
- *         "\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)",
- *         "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)",
- *         "\tat java.lang.reflect.Method.invoke(Method.java:597)",
- *         "\tat junit.framework.TestCase.runTest(TestCase.java:168)",
- *         "\tat junit.framework.TestCase.runBare(TestCase.java:134)",
- *         "\tat junit.framework.TestResult$1.protect(TestResult.java:110)",
- *         "\tat junit.framework.TestResult.runProtected(TestResult.java:128)",
- *         "\tat junit.framework.TestResult.run(TestResult.java:113)",
- *         "\tat junit.framework.TestCase.run(TestCase.java:124)",
- *         "\tat junit.framework.TestSuite.runTest(TestSuite.java:232)",
- *         "\tat junit.framework.TestSuite.run(TestSuite.java:227)",
- *         "\tat org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:83)",
- *         "\tat org.apache.maven.surefire.junit4.JUnit4TestSet.execute(JUnit4TestSet.java:59)",
- *         "\tat org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.executeTestSet(AbstractDirectoryTestSuite.java:120)",
- *         "\tat org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.execute(AbstractDirectoryTestSuite.java:145)",
- *         "\tat org.apache.maven.surefire.Surefire.run(Surefire.java:104)",
- *         "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)",
- *         "\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)",
- *         "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)",
- *         "\tat java.lang.reflect.Method.invoke(Method.java:597)",
- *         "\tat org.apache.maven.surefire.booter.SurefireBooter.runSuitesInProcess(SurefireBooter.java:290)",
- *         "\tat org.apache.maven.surefire.booter.SurefireBooter.main(SurefireBooter.java:1017)"
- *         ]
- *     }
- * 
- */ -public class Log4Json extends Layout { - - /** - * Jackson factories are thread safe when constructing parsers and generators. - * They are not thread safe in configure methods; if there is to be any - * configuration it must be done in a static initializer block. - */ - private static final JsonFactory factory = new MappingJsonFactory(); - private static final ObjectReader READER = new ObjectMapper(factory).reader(); - public static final String DATE = "date"; - public static final String EXCEPTION_CLASS = "exceptionclass"; - public static final String LEVEL = "level"; - public static final String MESSAGE = "message"; - public static final String NAME = "name"; - public static final String STACK = "stack"; - public static final String THREAD = "thread"; - public static final String TIME = "time"; - public static final String JSON_TYPE = "application/json"; - - private final DateFormat dateFormat; - - public Log4Json() { - dateFormat = new ISO8601DateFormat(); - } - - - /** - * @return the mime type of JSON - */ - @Override - public String getContentType() { - return JSON_TYPE; - } - - @Override - public String format(LoggingEvent event) { - try { - return toJson(event); - } catch (IOException e) { - //this really should not happen, and rather than throw an exception - //which may hide the real problem, the log class is printed - //in JSON format. The classname is used to ensure valid JSON is - //returned without playing escaping games - return "{ \"logfailure\":\"" + e.getClass().toString() + "\"}"; - } - } - - /** - * Convert an event to JSON - * - * @param event the event -must not be null - * @return a string value - * @throws IOException on problems generating the JSON - */ - public String toJson(LoggingEvent event) throws IOException { - StringWriter writer = new StringWriter(); - toJson(writer, event); - return writer.toString(); - } - - /** - * Convert an event to JSON - * - * @param writer the destination writer - * @param event the event -must not be null - * @return the writer - * @throws IOException on problems generating the JSON - */ - public Writer toJson(final Writer writer, final LoggingEvent event) - throws IOException { - ThrowableInformation ti = event.getThrowableInformation(); - toJson(writer, - event.getLoggerName(), - event.getTimeStamp(), - event.getLevel().toString(), - event.getThreadName(), - event.getRenderedMessage(), - ti); - return writer; - } - - /** - * Build a JSON entry from the parameters. This is public for testing. - * - * @param writer destination - * @param loggerName logger name - * @param timeStamp time_t value - * @param level level string - * @param threadName name of the thread - * @param message rendered message - * @param ti nullable thrown information - * @return the writer - * @throws IOException on any problem - */ - public Writer toJson(final Writer writer, - final String loggerName, - final long timeStamp, - final String level, - final String threadName, - final String message, - final ThrowableInformation ti) throws IOException { - JsonGenerator json = factory.createGenerator(writer); - json.writeStartObject(); - json.writeStringField(NAME, loggerName); - json.writeNumberField(TIME, timeStamp); - Date date = new Date(timeStamp); - json.writeStringField(DATE, dateFormat.format(date)); - json.writeStringField(LEVEL, level); - json.writeStringField(THREAD, threadName); - json.writeStringField(MESSAGE, message); - if (ti != null) { - //there is some throwable info, but if the log event has been sent over the wire, - //there may not be a throwable inside it, just a summary. - Throwable thrown = ti.getThrowable(); - String eclass = (thrown != null) ? - thrown.getClass().getName() - : ""; - json.writeStringField(EXCEPTION_CLASS, eclass); - String[] stackTrace = ti.getThrowableStrRep(); - json.writeArrayFieldStart(STACK); - for (String row : stackTrace) { - json.writeString(row); - } - json.writeEndArray(); - } - json.writeEndObject(); - json.flush(); - json.close(); - return writer; - } - - /** - * This appender does not ignore throwables - * - * @return false, always - */ - @Override - public boolean ignoresThrowable() { - return false; - } - - /** - * Do nothing - */ - @Override - public void activateOptions() { - } - - /** - * For use in tests - * - * @param json incoming JSON to parse - * @return a node tree - * @throws IOException on any parsing problems - */ - public static ContainerNode parse(String json) throws IOException { - JsonNode jsonNode = READER.readTree(json); - if (!(jsonNode instanceof ContainerNode)) { - throw new IOException("Wrong JSON data: " + json); - } - return (ContainerNode) jsonNode; - } -} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLog4Json.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLog4Json.java deleted file mode 100644 index 519f14b7fd8..00000000000 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/log/TestLog4Json.java +++ /dev/null @@ -1,264 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.log; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ContainerNode; -import org.junit.Test; -import static org.junit.Assert.*; -import org.apache.hadoop.util.Time; -import org.apache.log4j.Appender; -import org.apache.log4j.Category; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.WriterAppender; -import org.apache.log4j.spi.HierarchyEventListener; -import org.apache.log4j.spi.LoggerFactory; -import org.apache.log4j.spi.LoggerRepository; -import org.apache.log4j.spi.ThrowableInformation; - -import java.io.IOException; -import java.io.StringWriter; -import java.io.Writer; -import java.net.NoRouteToHostException; -import java.util.Enumeration; -import java.util.Vector; - -public class TestLog4Json { - - @Test - public void testConstruction() throws Throwable { - Log4Json l4j = new Log4Json(); - String outcome = l4j.toJson(new StringWriter(), - "name", 0, "DEBUG", "thread1", - "hello, world", null).toString(); - println("testConstruction", outcome); - } - - @Test - public void testException() throws Throwable { - Exception e = - new NoRouteToHostException("that box caught fire 3 years ago"); - ThrowableInformation ti = new ThrowableInformation(e); - Log4Json l4j = new Log4Json(); - long timeStamp = Time.now(); - String outcome = l4j.toJson(new StringWriter(), - "testException", - timeStamp, - "INFO", - "quoted\"", - "new line\n and {}", - ti) - .toString(); - println("testException", outcome); - } - - @Test - public void testNestedException() throws Throwable { - Exception e = - new NoRouteToHostException("that box caught fire 3 years ago"); - Exception ioe = new IOException("Datacenter problems", e); - ThrowableInformation ti = new ThrowableInformation(ioe); - Log4Json l4j = new Log4Json(); - long timeStamp = Time.now(); - String outcome = l4j.toJson(new StringWriter(), - "testNestedException", - timeStamp, - "INFO", - "quoted\"", - "new line\n and {}", - ti) - .toString(); - println("testNestedException", outcome); - ContainerNode rootNode = Log4Json.parse(outcome); - assertEntryEquals(rootNode, Log4Json.LEVEL, "INFO"); - assertEntryEquals(rootNode, Log4Json.NAME, "testNestedException"); - assertEntryEquals(rootNode, Log4Json.TIME, timeStamp); - assertEntryEquals(rootNode, Log4Json.EXCEPTION_CLASS, - ioe.getClass().getName()); - JsonNode node = assertNodeContains(rootNode, Log4Json.STACK); - assertTrue("Not an array: " + node, node.isArray()); - node = assertNodeContains(rootNode, Log4Json.DATE); - assertTrue("Not a string: " + node, node.isTextual()); - //rather than try and make assertions about the format of the text - //message equalling another ISO date, this test asserts that the hypen - //and colon characters are in the string. - String dateText = node.textValue(); - assertTrue("No '-' in " + dateText, dateText.contains("-")); - assertTrue("No '-' in " + dateText, dateText.contains(":")); - - } - - - /** - * Create a log instance and and log to it - * @throws Throwable if it all goes wrong - */ - @Test - public void testLog() throws Throwable { - String message = "test message"; - Throwable throwable = null; - String json = logOut(message, throwable); - println("testLog", json); - } - - /** - * Create a log instance and and log to it - * @throws Throwable if it all goes wrong - */ - @Test - public void testLogExceptions() throws Throwable { - String message = "test message"; - Throwable inner = new IOException("Directory / not found"); - Throwable throwable = new IOException("startup failure", inner); - String json = logOut(message, throwable); - println("testLogExceptions", json); - } - - - void assertEntryEquals(ContainerNode rootNode, String key, String value) { - JsonNode node = assertNodeContains(rootNode, key); - assertEquals(value, node.textValue()); - } - - private JsonNode assertNodeContains(ContainerNode rootNode, String key) { - JsonNode node = rootNode.get(key); - if (node == null) { - fail("No entry of name \"" + key + "\" found in " + rootNode.toString()); - } - return node; - } - - void assertEntryEquals(ContainerNode rootNode, String key, long value) { - JsonNode node = assertNodeContains(rootNode, key); - assertEquals(value, node.numberValue()); - } - - /** - * Print out what's going on. The logging APIs aren't used and the text - * delimited for more details - * - * @param name name of operation - * @param text text to print - */ - private void println(String name, String text) { - System.out.println(name + ": #" + text + "#"); - } - - private String logOut(String message, Throwable throwable) { - StringWriter writer = new StringWriter(); - Logger logger = createLogger(writer); - logger.info(message, throwable); - //remove and close the appender - logger.removeAllAppenders(); - return writer.toString(); - } - - public Logger createLogger(Writer writer) { - TestLoggerRepository repo = new TestLoggerRepository(); - Logger logger = repo.getLogger("test"); - Log4Json layout = new Log4Json(); - WriterAppender appender = new WriterAppender(layout, writer); - logger.addAppender(appender); - return logger; - } - - /** - * This test logger avoids integrating with the main runtimes Logger hierarchy - * in ways the reader does not want to know. - */ - private static class TestLogger extends Logger { - private TestLogger(String name, LoggerRepository repo) { - super(name); - repository = repo; - setLevel(Level.INFO); - } - - } - - public static class TestLoggerRepository implements LoggerRepository { - @Override - public void addHierarchyEventListener(HierarchyEventListener listener) { - } - - @Override - public boolean isDisabled(int level) { - return false; - } - - @Override - public void setThreshold(Level level) { - } - - @Override - public void setThreshold(String val) { - } - - @Override - public void emitNoAppenderWarning(Category cat) { - } - - @Override - public Level getThreshold() { - return Level.ALL; - } - - @Override - public Logger getLogger(String name) { - return new TestLogger(name, this); - } - - @Override - public Logger getLogger(String name, LoggerFactory factory) { - return new TestLogger(name, this); - } - - @Override - public Logger getRootLogger() { - return new TestLogger("root", this); - } - - @Override - public Logger exists(String name) { - return null; - } - - @Override - public void shutdown() { - } - - @Override - public Enumeration getCurrentLoggers() { - return new Vector().elements(); - } - - @Override - public Enumeration getCurrentCategories() { - return new Vector().elements(); - } - - @Override - public void fireAddAppenderEvent(Category logger, Appender appender) { - } - - @Override - public void resetConfiguration() { - } - } -} From 0dbe1d3284498b06fc53af9c093682f58d579427 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 20 Mar 2023 19:23:16 -0700 Subject: [PATCH 34/97] HADOOP-18668. Path capability probe for truncate is only honored by RawLocalFileSystem (#5492) --- .../hdfs/client/DfsPathCapabilities.java | 1 + .../fs/http/client/HttpFSFileSystem.java | 1 + .../fs/http/client/BaseTestHttpFSWith.java | 9 +++++++++ .../viewfs/TestViewFileSystemWithTruncate.java | 3 +++ .../hdfs/TestViewDistributedFileSystem.java | 18 ++++++++++++++++++ .../hdfs/server/namenode/TestFileTruncate.java | 7 +++++++ ...estWebHdfsWithRestCsrfPreventionFilter.java | 3 +++ 7 files changed, 42 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java index 30e7e00653b..612a9776303 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/DfsPathCapabilities.java @@ -53,6 +53,7 @@ public final class DfsPathCapabilities { case CommonPathCapabilities.FS_SNAPSHOTS: case CommonPathCapabilities.FS_STORAGEPOLICY: case CommonPathCapabilities.FS_XATTRS: + case CommonPathCapabilities.FS_TRUNCATE: return Optional.of(true); case CommonPathCapabilities.FS_SYMLINKS: return Optional.of(FileSystem.areSymlinksEnabled()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java index f34a27e0277..8e3e530dd06 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java @@ -1646,6 +1646,7 @@ public class HttpFSFileSystem extends FileSystem case CommonPathCapabilities.FS_SNAPSHOTS: case CommonPathCapabilities.FS_STORAGEPOLICY: case CommonPathCapabilities.FS_XATTRS: + case CommonPathCapabilities.FS_TRUNCATE: return true; case CommonPathCapabilities.FS_SYMLINKS: return false; diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java index 41dc03d59e2..b3a5d54ed52 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java @@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStoragePolicySpi; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileChecksum; @@ -302,9 +303,17 @@ public abstract class BaseTestHttpFSWith extends HFSTestCase { AppendTestUtil.checkFullFile(fs, file, newLength, data, file.toString()); fs.close(); + assertPathCapabilityForTruncate(file); } } + private void assertPathCapabilityForTruncate(Path file) throws Exception { + FileSystem fs = this.getHttpFSFileSystem(); + assertTrue("HttpFS/WebHdfs/SWebHdfs support truncate", + fs.hasPathCapability(file, CommonPathCapabilities.FS_TRUNCATE)); + fs.close(); + } + private void testConcat() throws Exception { Configuration config = getProxiedFSConf(); config.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithTruncate.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithTruncate.java index 2f44b46aa30..45138b78f73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithTruncate.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithTruncate.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.function.Supplier; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; @@ -103,6 +104,8 @@ public class TestViewFileSystemWithTruncate { out.writeBytes("drtatedasfdasfgdfas"); out.close(); int newLength = 10; + assertTrue("ViewFS supports truncate", + fsView.hasPathCapability(filePath, CommonPathCapabilities.FS_TRUNCATE)); boolean isReady = fsView.truncate(filePath, newLength); if (!isReady) { GenericTestUtils.waitFor(new Supplier() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestViewDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestViewDistributedFileSystem.java index d7cc241394a..b53b124c738 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestViewDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestViewDistributedFileSystem.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; @@ -191,4 +192,21 @@ public class TestViewDistributedFileSystem extends TestDistributedFileSystem{ } } } + + @Test + public void testPathCapabilities() throws IOException { + Configuration conf = getTestConfiguration(); + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build()) { + URI defaultUri = URI.create(conf.get(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY)); + conf.set("fs.viewfs.mounttable." + defaultUri.getHost() + ".linkFallback", + defaultUri.toString()); + try (ViewDistributedFileSystem fileSystem = (ViewDistributedFileSystem) FileSystem.get( + conf)) { + final Path testFile = new Path("/test"); + assertTrue("ViewDfs supports truncate", + fileSystem.hasPathCapability(testFile, CommonPathCapabilities.FS_TRUNCATE)); + } + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java index 13bc00f0944..7da222c4c02 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java @@ -33,6 +33,7 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.util.concurrent.ThreadLocalRandom; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; import org.apache.hadoop.ipc.RemoteException; @@ -143,6 +144,8 @@ public class TestFileTruncate { writeContents(contents, fileLength, p); int newLength = fileLength - toTruncate; + assertTrue("DFS supports truncate", + fs.hasPathCapability(p, CommonPathCapabilities.FS_TRUNCATE)); boolean isReady = fs.truncate(p, newLength); LOG.info("fileLength=" + fileLength + ", newLength=" + newLength + ", toTruncate=" + toTruncate + ", isReady=" + isReady); @@ -176,6 +179,8 @@ public class TestFileTruncate { for(int n = data.length; n > 0; ) { final int newLength = ThreadLocalRandom.current().nextInt(n); + assertTrue("DFS supports truncate", + fs.hasPathCapability(p, CommonPathCapabilities.FS_TRUNCATE)); final boolean isReady = fs.truncate(p, newLength); LOG.info("newLength=" + newLength + ", isReady=" + isReady); assertEquals("File must be closed for truncating at the block boundary", @@ -209,6 +214,8 @@ public class TestFileTruncate { final int newLength = data.length - 1; assert newLength % BLOCK_SIZE != 0 : " newLength must not be multiple of BLOCK_SIZE"; + assertTrue("DFS supports truncate", + fs.hasPathCapability(p, CommonPathCapabilities.FS_TRUNCATE)); final boolean isReady = fs.truncate(p, newLength); LOG.info("newLength=" + newLength + ", isReady=" + isReady); assertEquals("File must be closed for truncating at the block boundary", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithRestCsrfPreventionFilter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithRestCsrfPreventionFilter.java index a1c27f52dc9..5388008fff4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithRestCsrfPreventionFilter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithRestCsrfPreventionFilter.java @@ -29,6 +29,7 @@ import java.net.URI; import java.util.Arrays; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSTestUtil; @@ -160,6 +161,8 @@ public class TestWebHdfsWithRestCsrfPreventionFilter { if (nnRestCsrf && !clientRestCsrf) { expectException(); } + assertTrue("WebHdfs supports truncate", + webhdfs.hasPathCapability(FILE, CommonPathCapabilities.FS_TRUNCATE)); assertTrue(webhdfs.truncate(FILE, 0L)); } From 67e02a92e0b9c4da3dcdd01f231a98f243f12d06 Mon Sep 17 00:00:00 2001 From: Yubi Lee Date: Wed, 22 Mar 2023 10:54:41 +0900 Subject: [PATCH 35/97] HADOOP-18666. A whitelist of endpoints to skip Kerberos authentication doesn't work for ResourceManager and Job History Server (#5480) --- .../org/apache/hadoop/http/HttpServer2.java | 13 +++- .../hadoop/http/TestHttpServerWithSpnego.java | 60 ++++++++++++++++--- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java index 178f761191b..515148e9298 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java @@ -497,7 +497,12 @@ public final class HttpServer2 implements FilterContainer { prefix -> this.conf.get(prefix + "type") .equals(PseudoAuthenticationHandler.TYPE)) ) { - server.initSpnego(conf, hostName, usernameConfKey, keytabConfKey); + server.initSpnego( + conf, + hostName, + getFilterProperties(conf, authFilterConfigurationPrefixes), + usernameConfKey, + keytabConfKey); } for (URI ep : endpoints) { @@ -1340,8 +1345,12 @@ public final class HttpServer2 implements FilterContainer { } private void initSpnego(Configuration conf, String hostName, - String usernameConfKey, String keytabConfKey) throws IOException { + Properties authFilterConfigurationPrefixes, String usernameConfKey, String keytabConfKey) + throws IOException { Map params = new HashMap<>(); + for (Map.Entry entry : authFilterConfigurationPrefixes.entrySet()) { + params.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue())); + } String principalInConf = conf.get(usernameConfKey); if (principalInConf != null && !principalInConf.isEmpty()) { params.put("kerberos.principal", SecurityUtil.getServerPrincipal( diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerWithSpnego.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerWithSpnego.java index dfcd98801de..cddbc2a1959 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerWithSpnego.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServerWithSpnego.java @@ -19,8 +19,10 @@ package org.apache.hadoop.http; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.minikdc.MiniKdc; import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.AuthenticationFilterInitializer; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.KerberosTestUtils; import org.apache.hadoop.security.authentication.client.AuthenticatedURL; @@ -104,7 +106,9 @@ public class TestHttpServerWithSpnego { */ @Test public void testAuthenticationWithProxyUser() throws Exception { - Configuration spengoConf = getSpengoConf(new Configuration()); + Configuration spnegoConf = getSpnegoConf(new Configuration()); + spnegoConf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY, + ProxyUserAuthenticationFilterInitializer.class.getName()); //setup logs dir System.setProperty("hadoop.log.dir", testRootDir.getAbsolutePath()); @@ -118,15 +122,15 @@ public class TestHttpServerWithSpnego { new String[]{"groupC"}); // Make userA impersonate users in groupB - spengoConf.set("hadoop.proxyuser.userA.hosts", "*"); - spengoConf.set("hadoop.proxyuser.userA.groups", "groupB"); - ProxyUsers.refreshSuperUserGroupsConfiguration(spengoConf); + spnegoConf.set("hadoop.proxyuser.userA.hosts", "*"); + spnegoConf.set("hadoop.proxyuser.userA.groups", "groupB"); + ProxyUsers.refreshSuperUserGroupsConfiguration(spnegoConf); HttpServer2 httpServer = null; try { // Create http server to test. httpServer = getCommonBuilder() - .setConf(spengoConf) + .setConf(spnegoConf) .setACL(new AccessControlList("userA groupA")) .build(); httpServer.start(); @@ -191,6 +195,48 @@ public class TestHttpServerWithSpnego { } } + @Test + public void testAuthenticationToAllowList() throws Exception { + Configuration spnegoConf = getSpnegoConf(new Configuration()); + String[] allowList = new String[] {"/jmx", "/prom"}; + String[] denyList = new String[] {"/conf", "/stacks", "/logLevel"}; + spnegoConf.set(PREFIX + "kerberos.endpoint.whitelist", String.join(",", allowList)); + spnegoConf.set(CommonConfigurationKeysPublic.HADOOP_PROMETHEUS_ENABLED, "true"); + spnegoConf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY, + AuthenticationFilterInitializer.class.getName()); + + //setup logs dir + System.setProperty("hadoop.log.dir", testRootDir.getAbsolutePath()); + + HttpServer2 httpServer = null; + try { + // Create http server to test. + httpServer = getCommonBuilder().setConf(spnegoConf).setSecurityEnabled(true) + .setUsernameConfKey(PREFIX + "kerberos.principal") + .setKeytabConfKey(PREFIX + "kerberos.keytab").build(); + httpServer.start(); + + String serverURL = "http://" + NetUtils.getHostPortString(httpServer.getConnectorAddress(0)); + + // endpoints in whitelist should not require Kerberos authentication + for (String endpoint : allowList) { + HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + endpoint).openConnection(); + Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode()); + } + + // endpoints not in whitelist should require Kerberos authentication + for (String endpoint : denyList) { + HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + endpoint).openConnection(); + Assert.assertEquals(HttpURLConnection.HTTP_UNAUTHORIZED, conn.getResponseCode()); + } + + } finally { + if (httpServer != null) { + httpServer.stop(); + } + } + } + private AuthenticatedURL.Token getEncryptedAuthToken(Signer signer, String user) throws Exception { AuthenticationToken token = @@ -209,10 +255,8 @@ public class TestHttpServerWithSpnego { return new Signer(secretProvider); } - private Configuration getSpengoConf(Configuration conf) { + private Configuration getSpnegoConf(Configuration conf) { conf = new Configuration(); - conf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY, - ProxyUserAuthenticationFilterInitializer.class.getName()); conf.set(PREFIX + "type", "kerberos"); conf.setBoolean(PREFIX + "simple.anonymous.allowed", false); conf.set(PREFIX + "signature.secret.file", From e3cb9573e1114adcfa8471d94b282783297d9e8c Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 23 Mar 2023 08:30:08 +0530 Subject: [PATCH 36/97] HADOOP-18662. ListFiles with recursive fails with FNF. (#5477). Contributed by Ayush Saxena. Reviewed-by: Steve Loughran newDirItor = listLocatedStatus(stat.getPath()); + itors.push(curItor); + curItor = newDirItor; + } catch (FileNotFoundException ignored) { + LOGGER.debug("Directory {} deleted while attempting for recursive listing", + stat.getPath()); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index 32b0992610f..2773214f45d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_CLIENT_TOPOLOGY_RE import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_KEY; import static org.apache.hadoop.hdfs.client.HdfsAdmin.TRASH_PERMISSION; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_CONTEXT; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -31,6 +32,7 @@ import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import java.io.File; import java.io.FileNotFoundException; @@ -123,9 +125,11 @@ import org.apache.hadoop.test.Whitebox; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.concurrent.HadoopExecutors; +import org.apache.hadoop.util.functional.RemoteIterators; import org.junit.Assert; import org.junit.Test; import org.mockito.InOrder; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; @@ -1557,6 +1561,56 @@ public class TestDistributedFileSystem { } } + @Test + public void testListFilesRecursive() throws IOException { + Configuration conf = getTestConfiguration(); + + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();) { + DistributedFileSystem fs = cluster.getFileSystem(); + + // Create some directories and files. + Path dir = new Path("/dir"); + Path subDir1 = fs.makeQualified(new Path(dir, "subDir1")); + Path subDir2 = fs.makeQualified(new Path(dir, "subDir2")); + + fs.create(new Path(dir, "foo1")).close(); + fs.create(new Path(dir, "foo2")).close(); + fs.create(new Path(subDir1, "foo3")).close(); + fs.create(new Path(subDir2, "foo4")).close(); + + // Mock the filesystem, and throw FNF when listing is triggered for the subdirectory. + FileSystem mockFs = spy(fs); + Mockito.doThrow(new FileNotFoundException("")).when(mockFs).listLocatedStatus(eq(subDir1)); + List str = RemoteIterators.toList(mockFs.listFiles(dir, true)); + assertThat(str).hasSize(3); + + // Mock the filesystem to depict a scenario where the directory got deleted and a file + // got created with the same name. + Mockito.doReturn(getMockedIterator(subDir1)).when(mockFs).listLocatedStatus(eq(subDir1)); + + str = RemoteIterators.toList(mockFs.listFiles(dir, true)); + assertThat(str).hasSize(4); + } + } + + private static RemoteIterator getMockedIterator(Path subDir1) { + return new RemoteIterator() { + private int remainingEntries = 1; + + @Override + public boolean hasNext() throws IOException { + return remainingEntries > 0; + } + + @Override + public LocatedFileStatus next() throws IOException { + remainingEntries--; + return new LocatedFileStatus(0, false, 1, 1024, 0L, 0, null, null, null, null, subDir1, + false, false, false, null); + } + }; + } + @Test public void testListStatusOfSnapshotDirs() throws IOException { MiniDFSCluster cluster = new MiniDFSCluster.Builder(getTestConfiguration()) From 028cde0006a433c926f973bfb8f3fff8ac8c63cc Mon Sep 17 00:00:00 2001 From: "zhaixiaojuan@loongson.cn" <67671683+zhaixiaojuan@users.noreply.github.com> Date: Thu, 23 Mar 2023 11:08:59 +0800 Subject: [PATCH 37/97] HADOOP-18644. Add bswap support for LoongArch64. (#5453). Contributed by zhaixiaojuan. Reviewed-by: He Xiaoqiao --- .../src/main/native/src/lib/primitives.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/primitives.h b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/primitives.h index 000c4b91fd9..6d454e1dfc5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/primitives.h +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/lib/primitives.h @@ -99,7 +99,7 @@ inline void simple_memcpy(void * dest, const void * src, size_t len) { inline uint32_t bswap(uint32_t val) { #ifdef __aarch64__ __asm__("rev %w[dst], %w[src]" : [dst]"=r"(val) : [src]"r"(val)); -#elif defined(__ppc64__)||(__PPC64__)||(__powerpc64__) +#elif defined(__ppc64__)||(__PPC64__)||(__powerpc64__)||(__loongarch64) return __builtin_bswap32(val); #else __asm__("bswap %0" : "=r" (val) : "0" (val)); @@ -110,7 +110,7 @@ inline uint32_t bswap(uint32_t val) { inline uint64_t bswap64(uint64_t val) { #ifdef __aarch64__ __asm__("rev %[dst], %[src]" : [dst]"=r"(val) : [src]"r"(val)); -#elif defined(__ppc64__)||(__PPC64__)||(__powerpc64__) +#elif defined(__ppc64__)||(__PPC64__)||(__powerpc64__)||(__loongarch64) return __builtin_bswap64(val); #else #ifdef __X64 From 5cf62d149878c595c6e2edea7d9079f05aa9882a Mon Sep 17 00:00:00 2001 From: Kidd5368 <57645247+Kidd53685368@users.noreply.github.com> Date: Thu, 23 Mar 2023 17:00:23 +0800 Subject: [PATCH 38/97] HDFS-16948. Update log of BlockManager#chooseExcessRedundancyStriped when EC internal block is moved by balancer. (#5474). Contributed by Kidd53685368. Reviewed-by: zhangshuyan Signed-off-by: He Xiaoqiao --- .../hadoop/hdfs/server/blockmanagement/BlockManager.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 4e5e1234716..e5a6cf73b69 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -4128,6 +4128,7 @@ public class BlockManager implements BlockStatsMXBean { BitSet found = new BitSet(groupSize); //indices found BitSet duplicated = new BitSet(groupSize); //indices found more than once HashMap storage2index = new HashMap<>(); + boolean logEmptyExcessType = true; for (DatanodeStorageInfo storage : nonExcess) { int index = sblk.getStorageBlockIndex(storage); assert index >= 0; @@ -4145,6 +4146,7 @@ public class BlockManager implements BlockStatsMXBean { Integer index = storage2index.get(delStorageHint); if (index != null && duplicated.get(index)) { processChosenExcessRedundancy(nonExcess, delStorageHint, storedBlock); + logEmptyExcessType = false; } } @@ -4155,8 +4157,10 @@ public class BlockManager implements BlockStatsMXBean { final List excessTypes = storagePolicy.chooseExcess( (short) numOfTarget, DatanodeStorageInfo.toStorageTypes(nonExcess)); if (excessTypes.isEmpty()) { - LOG.warn("excess types chosen for block {} among storages {} is empty", - storedBlock, nonExcess); + if(logEmptyExcessType) { + LOG.warn("excess types chosen for block {} among storages {} is empty", + storedBlock, nonExcess); + } return; } From 69748aae321c7b7fad3724367df8c6022c4353df Mon Sep 17 00:00:00 2001 From: Tamas Domok Date: Fri, 24 Mar 2023 09:38:53 +0100 Subject: [PATCH 39/97] YARN-11461. fix NPE in determineMissingParents (auto queue creation / CS). (#5506) Change-Id: Iaaaf43a545588eaff8a0a20f6f3c27258a45f390 --- .../capacity/CapacitySchedulerQueueManager.java | 5 +++++ .../TestCapacitySchedulerNewQueueAutoCreation.java | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java index d8108c0f007..d29d80e07e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java @@ -576,6 +576,11 @@ public class CapacitySchedulerQueueManager implements SchedulerQueueManager< firstExistingStaticParent = getQueue(parentCandidate.toString()); } + if (firstExistingParent == null || firstExistingStaticParent == null) { + throw new SchedulerDynamicEditException("Could not auto-create queue " + + queue + " parent queue does not exist."); + } + int maximumDepthOfStaticParent = csContext.getConfiguration().getMaximumAutoCreatedQueueDepth( firstExistingStaticParent.getQueuePath()); if (firstStaticParentDistance > maximumDepthOfStaticParent) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNewQueueAutoCreation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNewQueueAutoCreation.java index 037312b716a..47db9565af1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNewQueueAutoCreation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNewQueueAutoCreation.java @@ -1251,6 +1251,17 @@ public class TestCapacitySchedulerNewQueueAutoCreation Assert.assertNull("root.e.e1-auto should have been removed", eAuto); } + @Test() + public void testAutoCreateInvalidParent() throws Exception { + startScheduler(); + Assert.assertThrows(SchedulerDynamicEditException.class, + () -> createQueue("invalid.queue")); + Assert.assertThrows(SchedulerDynamicEditException.class, + () -> createQueue("invalid.queue.longer")); + Assert.assertThrows(SchedulerDynamicEditException.class, + () -> createQueue("invalidQueue")); + } + protected AbstractLeafQueue createQueue(String queuePath) throws YarnException, IOException { return autoQueueHandler.createQueue(new QueuePath(queuePath)); From 72b01227062fcd5214c3561c4b81842ac1802bdd Mon Sep 17 00:00:00 2001 From: Andras Katona <41361962+akatona84@users.noreply.github.com> Date: Fri, 24 Mar 2023 16:31:45 +0100 Subject: [PATCH 40/97] HADOOP-18676. Fixing jettison vulnerability of hadoop-common lib (#5507) * HADOOP-18587. Fixing jettison vulnerability of hadoop-common lib * no need for excluding, let it come Change-Id: Ia6e4ad351158dd4b0510dec34bbde531a60e7654 --- hadoop-common-project/hadoop-common/pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 41efc183c3e..5fb267cfd0d 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -175,6 +175,14 @@ + + + org.codehaus.jettison + jettison + com.sun.jersey jersey-server From b82bcbd8ad52b7a1fd37a205f0e8f09b4c6f2fed Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Sat, 25 Mar 2023 12:04:28 +0530 Subject: [PATCH 41/97] Revert "HADOOP-18676. Fixing jettison vulnerability of hadoop-common lib (#5507)" This reverts commit 72b01227062fcd5214c3561c4b81842ac1802bdd. --- hadoop-common-project/hadoop-common/pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 5fb267cfd0d..41efc183c3e 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -175,14 +175,6 @@ - - - org.codehaus.jettison - jettison - com.sun.jersey jersey-server From ee01c64c6c86d5f811de563387aa0443125428ae Mon Sep 17 00:00:00 2001 From: Andras Katona <41361962+akatona84@users.noreply.github.com> Date: Mon, 27 Mar 2023 09:59:02 +0200 Subject: [PATCH 42/97] HADOOP-18676. jettison dependency override in hadoop-common lib (#5513) --- hadoop-client-modules/hadoop-client/pom.xml | 16 ++++++++++++++++ hadoop-common-project/hadoop-common/pom.xml | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/hadoop-client-modules/hadoop-client/pom.xml b/hadoop-client-modules/hadoop-client/pom.xml index 5299c9e8713..9170bf4b549 100644 --- a/hadoop-client-modules/hadoop-client/pom.xml +++ b/hadoop-client-modules/hadoop-client/pom.xml @@ -69,6 +69,10 @@ com.github.pjfanning jersey-json + + org.codehaus.jettison + jettison + com.sun.jersey jersey-server @@ -182,6 +186,10 @@ com.github.pjfanning jersey-json + + org.codehaus.jettison + jettison + io.netty netty @@ -233,6 +241,10 @@ com.github.pjfanning jersey-json + + org.codehaus.jettison + jettison + com.sun.jersey jersey-servlet @@ -290,6 +302,10 @@ com.github.pjfanning jersey-json + + org.codehaus.jettison + jettison + io.netty netty diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 41efc183c3e..5fb267cfd0d 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -175,6 +175,14 @@ + + + org.codehaus.jettison + jettison + com.sun.jersey jersey-server From 762d3ddb433d1a1bfe4224aaa7559daf2ed9ad48 Mon Sep 17 00:00:00 2001 From: Anmol Asrani Date: Mon, 27 Mar 2023 17:13:34 +0530 Subject: [PATCH 43/97] HADOOP-18146: ABFS: Added changes for expect hundred continue header (#4039) This change lets the client react pre-emptively to server load without getting to 503 and the exponential backoff which follows. This stops performance suffering so much as capacity limits are approached for an account. Contributed by Anmol Asranii --- .../src/config/checkstyle-suppressions.xml | 4 + .../hadoop/fs/azurebfs/AbfsConfiguration.java | 9 + .../fs/azurebfs/AzureBlobFileSystemStore.java | 1 + .../azurebfs/constants/AbfsHttpConstants.java | 8 + .../azurebfs/constants/ConfigurationKeys.java | 5 + .../constants/FileSystemConfigurations.java | 2 +- .../constants/HttpHeaderConfigurations.java | 1 + .../InvalidAbfsRestOperationException.java | 21 +- .../services/AppendRequestParameters.java | 13 +- .../fs/azurebfs/services/AbfsClient.java | 105 +++-- .../AbfsClientThrottlingIntercept.java | 29 +- .../azurebfs/services/AbfsHttpOperation.java | 105 ++++- .../azurebfs/services/AbfsOutputStream.java | 6 +- .../services/AbfsOutputStreamContext.java | 11 + .../azurebfs/services/AbfsRestOperation.java | 37 +- .../services/ExponentialRetryPolicy.java | 8 +- .../fs/azurebfs/utils/TracingContext.java | 4 + .../hadoop-azure/src/site/markdown/abfs.md | 11 + .../azurebfs/AbstractAbfsIntegrationTest.java | 4 +- .../ITestAzureBlobFileSystemCreate.java | 4 +- .../ITestAzureBlobFileSystemDelete.java | 14 +- .../fs/azurebfs/ITestCustomerProvidedKey.java | 4 +- ...stAbfsClient.java => ITestAbfsClient.java} | 215 ++++++++++- .../services/ITestAbfsRestOperation.java | 358 ++++++++++++++++++ .../services/TestAbfsOutputStream.java | 37 +- .../services/TestAbfsRenameRetryRecovery.java | 2 +- .../services/TestExponentialRetryPolicy.java | 4 +- 27 files changed, 930 insertions(+), 92 deletions(-) rename hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/{TestAbfsClient.java => ITestAbfsClient.java} (64%) create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml index fd2a7c210e7..2065746b766 100644 --- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml +++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml @@ -48,7 +48,11 @@ files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]utils[\\/]Base64.java"/> + + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 80f803d80da..124c4d9de72 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -117,6 +117,11 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_OPTIMIZE_FOOTER_READ) private boolean optimizeFooterRead; + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED, + DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED) + private boolean isExpectHeaderEnabled; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED, DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED) private boolean accountThrottlingEnabled; @@ -706,6 +711,10 @@ public class AbfsConfiguration{ return this.azureAppendBlobDirs; } + public boolean isExpectHeaderEnabled() { + return this.isExpectHeaderEnabled; + } + public boolean accountThrottlingEnabled() { return accountThrottlingEnabled; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index e5e70561265..3cee9b4f90c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -693,6 +693,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { } return new AbfsOutputStreamContext(abfsConfiguration.getSasTokenRenewPeriodForStreamsInSeconds()) .withWriteBufferSize(bufferSize) + .enableExpectHeader(abfsConfiguration.isExpectHeaderEnabled()) .enableFlush(abfsConfiguration.isFlushEnabled()) .enableSmallWriteOptimization(abfsConfiguration.isSmallWriteOptimizationEnabled()) .disableOutputStreamFlush(abfsConfiguration.isOutputStreamFlushDisabled()) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index e1b791f6ef2..7e4ddfa675a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -64,6 +64,11 @@ public final class AbfsHttpConstants { public static final String HTTP_METHOD_PATCH = "PATCH"; public static final String HTTP_METHOD_POST = "POST"; public static final String HTTP_METHOD_PUT = "PUT"; + /** + * All status codes less than http 100 signify error + * and should qualify for retry. + */ + public static final int HTTP_CONTINUE = 100; // Abfs generic constants public static final String SINGLE_WHITE_SPACE = " "; @@ -103,6 +108,9 @@ public final class AbfsHttpConstants { public static final String DEFAULT_SCOPE = "default:"; public static final String PERMISSION_FORMAT = "%04d"; public static final String SUPER_USER = "$superuser"; + // The HTTP 100 Continue informational status response code indicates that everything so far + // is OK and that the client should continue with the request or ignore it if it is already finished. + public static final String HUNDRED_CONTINUE = "100-continue"; public static final char CHAR_FORWARD_SLASH = '/'; public static final char CHAR_EXCLAMATION_POINT = '!'; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index a59f76b6d0f..e3052cd7bbc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -35,6 +35,11 @@ public final class ConfigurationKeys { * path to determine HNS status. */ public static final String FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "fs.azure.account.hns.enabled"; + /** + * Enable or disable expect hundred continue header. + * Value: {@value}. + */ + public static final String FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = "fs.azure.account.expect.header.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 9994d9f5207..68b492a5791 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -32,7 +32,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_ST public final class FileSystemConfigurations { public static final String DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED = ""; - + public static final boolean DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = true; public static final String USER_HOME_DIRECTORY_PREFIX = "/user"; private static final int SIXTY_SECONDS = 60 * 1000; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java index d4065ac2836..b123e90170e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java @@ -70,6 +70,7 @@ public final class HttpHeaderConfigurations { public static final String X_MS_LEASE_ID = "x-ms-lease-id"; public static final String X_MS_PROPOSED_LEASE_ID = "x-ms-proposed-lease-id"; public static final String X_MS_LEASE_BREAK_PERIOD = "x-ms-lease-break-period"; + public static final String EXPECT = "Expect"; private HttpHeaderConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java index 19620212134..285297024c7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java @@ -30,6 +30,9 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; @InterfaceAudience.Public @InterfaceStability.Evolving public class InvalidAbfsRestOperationException extends AbfsRestOperationException { + + private static final String ERROR_MESSAGE = "InvalidAbfsRestOperationException"; + public InvalidAbfsRestOperationException( final Exception innerException) { super( @@ -37,7 +40,23 @@ public class InvalidAbfsRestOperationException extends AbfsRestOperationExceptio AzureServiceErrorCode.UNKNOWN.getErrorCode(), innerException != null ? innerException.toString() - : "InvalidAbfsRestOperationException", + : ERROR_MESSAGE, innerException); } + + /** + * Adds the retry count along with the exception. + * @param innerException The inner exception which is originally caught. + * @param retryCount The retry count when the exception was thrown. + */ + public InvalidAbfsRestOperationException( + final Exception innerException, int retryCount) { + super( + AzureServiceErrorCode.UNKNOWN.getStatusCode(), + AzureServiceErrorCode.UNKNOWN.getErrorCode(), + innerException != null + ? innerException.toString() + : ERROR_MESSAGE + " RetryCount: " + retryCount, + innerException); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java index 7369bfaf564..57e559a60ec 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java @@ -34,19 +34,22 @@ public class AppendRequestParameters { private final Mode mode; private final boolean isAppendBlob; private final String leaseId; + private boolean isExpectHeaderEnabled; public AppendRequestParameters(final long position, final int offset, final int length, final Mode mode, final boolean isAppendBlob, - final String leaseId) { + final String leaseId, + final boolean isExpectHeaderEnabled) { this.position = position; this.offset = offset; this.length = length; this.mode = mode; this.isAppendBlob = isAppendBlob; this.leaseId = leaseId; + this.isExpectHeaderEnabled = isExpectHeaderEnabled; } public long getPosition() { @@ -72,4 +75,12 @@ public class AppendRequestParameters { public String getLeaseId() { return this.leaseId; } + + public boolean isExpectHeaderEnabled() { + return isExpectHeaderEnabled; + } + + public void setExpectHeaderEnabled(boolean expectHeaderEnabled) { + isExpectHeaderEnabled = expectHeaderEnabled; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 25562660ae2..2c367333300 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -77,6 +77,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.S import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; /** @@ -656,6 +657,9 @@ public class AbfsClient implements Closeable { throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); addCustomerProvidedKeyHeaders(requestHeaders); + if (reqParams.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } // JDK7 does not support PATCH, so to workaround the issue we will use // PUT and specify the real method in the X-Http-Method-Override header. requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, @@ -681,29 +685,7 @@ public class AbfsClient implements Closeable { abfsUriQueryBuilder, cachedSasToken); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = new AbfsRestOperation( - AbfsRestOperationType.Append, - this, - HTTP_METHOD_PUT, - url, - requestHeaders, - buffer, - reqParams.getoffset(), - reqParams.getLength(), - sasTokenForReuse); - try { - op.execute(tracingContext); - } catch (AzureBlobFileSystemException e) { - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw e; - } - if (reqParams.isAppendBlob() - && appendSuccessCheckOp(op, path, - (reqParams.getPosition() + reqParams.getLength()), tracingContext)) { - final AbfsRestOperation successOp = new AbfsRestOperation( - AbfsRestOperationType.Append, - this, + final AbfsRestOperation op = getAbfsRestOperationForAppend(AbfsRestOperationType.Append, HTTP_METHOD_PUT, url, requestHeaders, @@ -711,6 +693,41 @@ public class AbfsClient implements Closeable { reqParams.getoffset(), reqParams.getLength(), sasTokenForReuse); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException e) { + /* + If the http response code indicates a user error we retry + the same append request with expect header being disabled. + When "100-continue" header is enabled but a non Http 100 response comes, + the response message might not get set correctly by the server. + So, this handling is to avoid breaking of backward compatibility + if someone has taken dependency on the exception message, + which is created using the error string present in the response header. + */ + int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode(); + if (checkUserError(responseStatusCode) && reqParams.isExpectHeaderEnabled()) { + LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path); + reqParams.setExpectHeaderEnabled(false); + return this.append(path, buffer, reqParams, cachedSasToken, + tracingContext); + } + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + if (reqParams.isAppendBlob() + && appendSuccessCheckOp(op, path, + (reqParams.getPosition() + reqParams.getLength()), tracingContext)) { + final AbfsRestOperation successOp = getAbfsRestOperationForAppend( + AbfsRestOperationType.Append, + HTTP_METHOD_PUT, + url, + requestHeaders, + buffer, + reqParams.getoffset(), + reqParams.getLength(), + sasTokenForReuse); successOp.hardSetResult(HttpURLConnection.HTTP_OK); return successOp; } @@ -720,6 +737,48 @@ public class AbfsClient implements Closeable { return op; } + /** + * Returns the rest operation for append. + * @param operationType The AbfsRestOperationType. + * @param httpMethod specifies the httpMethod. + * @param url specifies the url. + * @param requestHeaders This includes the list of request headers. + * @param buffer The buffer to write into. + * @param bufferOffset The buffer offset. + * @param bufferLength The buffer Length. + * @param sasTokenForReuse The sasToken. + * @return AbfsRestOperation op. + */ + @VisibleForTesting + AbfsRestOperation getAbfsRestOperationForAppend(final AbfsRestOperationType operationType, + final String httpMethod, + final URL url, + final List requestHeaders, + final byte[] buffer, + final int bufferOffset, + final int bufferLength, + final String sasTokenForReuse) { + return new AbfsRestOperation( + operationType, + this, + httpMethod, + url, + requestHeaders, + buffer, + bufferOffset, + bufferLength, sasTokenForReuse); + } + + /** + * Returns true if the status code lies in the range of user error. + * @param responseStatusCode http response status code. + * @return True or False. + */ + private boolean checkUserError(int responseStatusCode) { + return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST + && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR); + } + // For AppendBlob its possible that the append succeeded in the backend but the request failed. // However a retry would fail with an InvalidQueryParameterValue // (as the current offset would be unacceptable). diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java index 52a46bc7469..3bb225d4be8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java @@ -28,6 +28,8 @@ import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; + /** * Throttles Azure Blob File System read and write operations to achieve maximum * throughput by minimizing errors. The errors occur when the account ingress @@ -60,7 +62,7 @@ public final class AbfsClientThrottlingIntercept implements AbfsThrottlingInterc // Hide default constructor private AbfsClientThrottlingIntercept(AbfsConfiguration abfsConfiguration) { - //Account name is kept as empty as same instance is shared across all accounts + // Account name is kept as empty as same instance is shared across all accounts. this.accountName = ""; this.readThrottler = setAnalyzer("read", abfsConfiguration); this.writeThrottler = setAnalyzer("write", abfsConfiguration); @@ -114,6 +116,18 @@ public final class AbfsClientThrottlingIntercept implements AbfsThrottlingInterc return singleton; } + /** + * Updates the metrics for the case when response code signifies throttling + * but there are some expected bytes to be sent. + * @param isThrottledOperation returns true if status code is HTTP_UNAVAILABLE + * @param abfsHttpOperation Used for status code and data transferred. + * @return true if the operation is throttled and has some bytes to transfer. + */ + private boolean updateBytesTransferred(boolean isThrottledOperation, + AbfsHttpOperation abfsHttpOperation) { + return isThrottledOperation && abfsHttpOperation.getExpectedBytesToBeSent() > 0; + } + /** * Updates the metrics for successful and failed read and write operations. * @param operationType Only applicable for read and write operations. @@ -134,9 +148,22 @@ public final class AbfsClientThrottlingIntercept implements AbfsThrottlingInterc boolean isFailedOperation = (status < HttpURLConnection.HTTP_OK || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); + // If status code is 503, it is considered as a throttled operation. + boolean isThrottledOperation = (status == HTTP_UNAVAILABLE); + switch (operationType) { case Append: contentLength = abfsHttpOperation.getBytesSent(); + if (contentLength == 0) { + /* + Signifies the case where we could not update the bytesSent due to + throttling but there were some expectedBytesToBeSent. + */ + if (updateBytesTransferred(isThrottledOperation, abfsHttpOperation)) { + LOG.debug("Updating metrics due to throttling for path {}", abfsHttpOperation.getConnUrl().getPath()); + contentLength = abfsHttpOperation.getExpectedBytesToBeSent(); + } + } if (contentLength > 0) { writeThrottler.addBytesTransferred(contentLength, isFailedOperation); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 413bf368689..67ac0c31665 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -43,6 +43,9 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.services.AbfsPerfLoggable; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; + /** * Represents an HTTP operation. */ @@ -73,6 +76,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { // metrics private int bytesSent; + private int expectedBytesToBeSent; private long bytesReceived; // optional trace enabled metrics @@ -155,6 +159,10 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { return bytesSent; } + public int getExpectedBytesToBeSent() { + return expectedBytesToBeSent; + } + public long getBytesReceived() { return bytesReceived; } @@ -282,7 +290,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { this.connection.setRequestMethod(method); for (AbfsHttpHeader header : requestHeaders) { - this.connection.setRequestProperty(header.getName(), header.getValue()); + setRequestProperty(header.getName(), header.getValue()); } } @@ -314,13 +322,44 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { if (this.isTraceEnabled) { startTime = System.nanoTime(); } - try (OutputStream outputStream = this.connection.getOutputStream()) { - // update bytes sent before they are sent so we may observe - // attempted sends as well as successful sends via the - // accompanying statusCode + OutputStream outputStream = null; + // Updates the expected bytes to be sent based on length. + this.expectedBytesToBeSent = length; + try { + try { + /* Without expect header enabled, if getOutputStream() throws + an exception, it gets caught by the restOperation. But with + expect header enabled we return back without throwing an exception + for the correct response code processing. + */ + outputStream = getConnOutputStream(); + } catch (IOException e) { + /* If getOutputStream fails with an exception and expect header + is enabled, we return back without throwing an exception to + the caller. The caller is responsible for setting the correct status code. + If expect header is not enabled, we throw back the exception. + */ + String expectHeader = getConnProperty(EXPECT); + if (expectHeader != null && expectHeader.equals(HUNDRED_CONTINUE)) { + LOG.debug("Getting output stream failed with expect header enabled, returning back ", e); + return; + } else { + LOG.debug("Getting output stream failed without expect header enabled, throwing exception ", e); + throw e; + } + } + // update bytes sent for successful as well as failed attempts via the + // accompanying statusCode. this.bytesSent = length; + + // If this fails with or without expect header enabled, + // it throws an IOException. outputStream.write(buffer, offset, length); } finally { + // Closing the opened output stream + if (outputStream != null) { + outputStream.close(); + } if (this.isTraceEnabled) { this.sendRequestTimeMs = elapsedTimeMs(startTime); } @@ -344,13 +383,13 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { startTime = System.nanoTime(); } - this.statusCode = this.connection.getResponseCode(); + this.statusCode = getConnResponseCode(); if (this.isTraceEnabled) { this.recvResponseTimeMs = elapsedTimeMs(startTime); } - this.statusDescription = this.connection.getResponseMessage(); + this.statusDescription = getConnResponseMessage(); this.requestId = this.connection.getHeaderField(HttpHeaderConfigurations.X_MS_REQUEST_ID); if (this.requestId == null) { @@ -543,6 +582,58 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { return stream == null ? true : false; } + /** + * Gets the connection request property for a key. + * @param key The request property key. + * @return request peoperty value. + */ + String getConnProperty(String key) { + return connection.getRequestProperty(key); + } + + /** + * Gets the connection url. + * @return url. + */ + URL getConnUrl() { + return connection.getURL(); + } + + /** + * Gets the connection request method. + * @return request method. + */ + String getConnRequestMethod() { + return connection.getRequestMethod(); + } + + /** + * Gets the connection response code. + * @return response code. + * @throws IOException + */ + Integer getConnResponseCode() throws IOException { + return connection.getResponseCode(); + } + + /** + * Gets the connection output stream. + * @return output stream. + * @throws IOException + */ + OutputStream getConnOutputStream() throws IOException { + return connection.getOutputStream(); + } + + /** + * Gets the connection response message. + * @return response message. + * @throws IOException + */ + String getConnResponseMessage() throws IOException { + return connection.getResponseMessage(); + } + public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation { /** * Creates an instance to represent fixed results. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java index 620616b993f..82e20ce5b76 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java @@ -80,6 +80,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, private boolean disableOutputStreamFlush; private boolean enableSmallWriteOptimization; private boolean isAppendBlob; + private boolean isExpectHeaderEnabled; private volatile IOException lastError; private long lastFlushOffset; @@ -133,6 +134,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, this.position = abfsOutputStreamContext.getPosition(); this.closed = false; this.supportFlush = abfsOutputStreamContext.isEnableFlush(); + this.isExpectHeaderEnabled = abfsOutputStreamContext.isExpectHeaderEnabled(); this.disableOutputStreamFlush = abfsOutputStreamContext .isDisableOutputStreamFlush(); this.enableSmallWriteOptimization @@ -327,7 +329,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, * leaseId - The AbfsLeaseId for this request. */ AppendRequestParameters reqParams = new AppendRequestParameters( - offset, 0, bytesLength, mode, false, leaseId); + offset, 0, bytesLength, mode, false, leaseId, isExpectHeaderEnabled); AbfsRestOperation op = client.append(path, blockUploadData.toByteArray(), reqParams, cachedSasToken.get(), new TracingContext(tracingContext)); @@ -573,7 +575,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, "writeCurrentBufferToService", "append")) { AppendRequestParameters reqParams = new AppendRequestParameters(offset, 0, - bytesLength, APPEND_MODE, true, leaseId); + bytesLength, APPEND_MODE, true, leaseId, isExpectHeaderEnabled); AbfsRestOperation op = client.append(path, uploadData.toByteArray(), reqParams, cachedSasToken.get(), new TracingContext(tracingContext)); cachedSasToken.update(op.getSasToken()); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java index ad303823e0c..ed897330367 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java @@ -33,6 +33,8 @@ public class AbfsOutputStreamContext extends AbfsStreamContext { private boolean enableFlush; + private boolean enableExpectHeader; + private boolean enableSmallWriteOptimization; private boolean disableOutputStreamFlush; @@ -78,6 +80,11 @@ public class AbfsOutputStreamContext extends AbfsStreamContext { return this; } + public AbfsOutputStreamContext enableExpectHeader(final boolean enableExpectHeader) { + this.enableExpectHeader = enableExpectHeader; + return this; + } + public AbfsOutputStreamContext enableSmallWriteOptimization(final boolean enableSmallWriteOptimization) { this.enableSmallWriteOptimization = enableSmallWriteOptimization; return this; @@ -184,6 +191,10 @@ public class AbfsOutputStreamContext extends AbfsStreamContext { return enableFlush; } + public boolean isExpectHeaderEnabled() { + return enableExpectHeader; + } + public boolean isDisableOutputStreamFlush() { return disableOutputStreamFlush; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index ad99020390a..a9a72635422 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -38,6 +38,8 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; + /** * The AbfsRestOperation for Rest AbfsClient. */ @@ -236,11 +238,21 @@ public class AbfsRestOperation { } } - if (result.getStatusCode() >= HttpURLConnection.HTTP_BAD_REQUEST) { + int status = result.getStatusCode(); + /* + If even after exhausting all retries, the http status code has an + invalid value it qualifies for InvalidAbfsRestOperationException. + All http status code less than 1xx range are considered as invalid + status codes. + */ + if (status < HTTP_CONTINUE) { + throw new InvalidAbfsRestOperationException(null, retryCount); + } + + if (status >= HttpURLConnection.HTTP_BAD_REQUEST) { throw new AbfsRestOperationException(result.getStatusCode(), result.getStorageErrorCode(), result.getStorageErrorMessage(), null, result); } - LOG.trace("{} REST operation complete", operationType); } @@ -268,7 +280,7 @@ public class AbfsRestOperation { case Custom: case OAuth: LOG.debug("Authenticating request with OAuth2 access token"); - httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + httpOperation.setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, client.getAccessToken()); break; case SAS: @@ -319,7 +331,7 @@ public class AbfsRestOperation { LOG.warn("Unknown host name: {}. Retrying to resolve the host name...", hostname); if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { - throw new InvalidAbfsRestOperationException(ex); + throw new InvalidAbfsRestOperationException(ex, retryCount); } return false; } catch (IOException ex) { @@ -330,12 +342,25 @@ public class AbfsRestOperation { failureReason = RetryReason.getAbbreviation(ex, -1, ""); if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { - throw new InvalidAbfsRestOperationException(ex); + throw new InvalidAbfsRestOperationException(ex, retryCount); } return false; } finally { - intercept.updateMetrics(operationType, httpOperation); + int status = httpOperation.getStatusCode(); + /* + A status less than 300 (2xx range) or greater than or equal + to 500 (5xx range) should contribute to throttling metrics being updated. + Less than 200 or greater than or equal to 500 show failed operations. 2xx + range contributes to successful operations. 3xx range is for redirects + and 4xx range is for user errors. These should not be a part of + throttling backoff computation. + */ + boolean updateMetricsResponseCode = (status < HttpURLConnection.HTTP_MULT_CHOICE + || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); + if (updateMetricsResponseCode) { + intercept.updateMetrics(operationType, httpOperation); + } } LOG.debug("HttpRequest: {}: {}", operationType, httpOperation); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java index bcf94651aca..227bdc5fc1c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java @@ -24,6 +24,8 @@ import java.net.HttpURLConnection; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.classification.VisibleForTesting; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; + /** * Retry policy used by AbfsClient. * */ @@ -118,7 +120,9 @@ public class ExponentialRetryPolicy { /** * Returns if a request should be retried based on the retry count, current response, - * and the current strategy. + * and the current strategy. The valid http status code lies in the range of 1xx-5xx. + * But an invalid status code might be set due to network or timeout kind of issues. + * Such invalid status code also qualify for retry. * * @param retryCount The current retry attempt count. * @param statusCode The status code of the response, or -1 for socket error. @@ -126,7 +130,7 @@ public class ExponentialRetryPolicy { */ public boolean shouldRetry(final int retryCount, final int statusCode) { return retryCount < this.retryCount - && (statusCode == -1 + && (statusCode < HTTP_CONTINUE || statusCode == HttpURLConnection.HTTP_CLIENT_TIMEOUT || (statusCode >= HttpURLConnection.HTTP_INTERNAL_ERROR && statusCode != HttpURLConnection.HTTP_NOT_IMPLEMENTED diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 57e65b30b46..97864e61e0b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -149,6 +149,10 @@ public class TracingContext { this.opType = operation; } + public int getRetryCount() { + return retryCount; + } + public void setRetryCount(int retryCount) { this.retryCount = retryCount; } diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 31498df1790..aff1e32b83f 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -767,6 +767,17 @@ Hflush() being the only documented API that can provide persistent data transfer, Flush() also attempting to persist buffered data will lead to performance issues. +### Hundred Continue Options + +`fs.azure.account.expect.header.enabled`: This configuration parameter is used +to specify whether you wish to send a expect 100 continue header with each +append request or not. It is configured to true by default. This flag configures +the client to check with the Azure store before uploading a block of data from +an output stream. This allows the client to throttle back gracefully -before +actually attempting to upload the block. In experiments this provides +significant throughput improvements under heavy load. For more information : +- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect + ### Account level throttling Options diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index c5bf85a4f81..74655fd5736 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -42,7 +42,7 @@ import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore; import org.apache.hadoop.fs.azure.NativeAzureFileSystem; import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; @@ -254,7 +254,7 @@ public abstract class AbstractAbfsIntegrationTest extends } public AccessTokenProvider getAccessTokenProvider(final AzureBlobFileSystem fs) { - return TestAbfsClient.getAccessTokenProvider(fs.getAbfsStore().getClient()); + return ITestAbfsClient.getAccessTokenProvider(fs.getAbfsStore().getClient()); } public void loadConfiguredFileSystem() throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 2f23ac5c5c7..d9a3cea089f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -43,7 +43,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConcurrentWriteOperati import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; @@ -362,7 +362,7 @@ public class ITestAzureBlobFileSystemCreate extends // Get mock AbfsClient with current config AbfsClient mockClient - = TestAbfsClient.getMockAbfsClient( + = ITestAbfsClient.getMockAbfsClient( fs.getAbfsStore().getClient(), fs.getAbfsStore().getAbfsConfiguration()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index db181fb5dd6..1f0ff667522 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -35,7 +35,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationExcep import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.services.TestAbfsPerfTracker; import org.apache.hadoop.fs.azurebfs.utils.TestMockHelpers; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -176,7 +176,7 @@ public class ITestAzureBlobFileSystemDelete extends final AzureBlobFileSystem fs = getFileSystem(); AbfsClient abfsClient = fs.getAbfsStore().getClient(); - AbfsClient testClient = TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfig); @@ -223,7 +223,7 @@ public class ITestAzureBlobFileSystemDelete extends public void testDeleteIdempotencyTriggerHttp404() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - AbfsClient client = TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient client = ITestAbfsClient.createTestClientFromCurrentContext( fs.getAbfsStore().getClient(), this.getConfiguration()); @@ -242,7 +242,7 @@ public class ITestAzureBlobFileSystemDelete extends getTestTracingContext(fs, true))); // mock idempotency check to mimic retried case - AbfsClient mockClient = TestAbfsClient.getMockAbfsClient( + AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient( fs.getAbfsStore().getClient(), this.getConfiguration()); AzureBlobFileSystemStore mockStore = mock(AzureBlobFileSystemStore.class); @@ -257,10 +257,10 @@ public class ITestAzureBlobFileSystemDelete extends // Case 2: Mimic retried case // Idempotency check on Delete always returns success - AbfsRestOperation idempotencyRetOp = TestAbfsClient.getRestOp( + AbfsRestOperation idempotencyRetOp = ITestAbfsClient.getRestOp( DeletePath, mockClient, HTTP_METHOD_DELETE, - TestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"), - TestAbfsClient.getTestRequestHeaders(mockClient)); + ITestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"), + ITestAbfsClient.getTestRequestHeaders(mockClient)); idempotencyRetOp.hardSetResult(HTTP_OK); doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java index 58c3be66883..bd8dbdf871b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java @@ -203,7 +203,7 @@ public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest { // Trying to append with correct CPK headers AppendRequestParameters appendRequestParameters = new AppendRequestParameters( - 0, 0, 5, Mode.APPEND_MODE, false, null); + 0, 0, 5, Mode.APPEND_MODE, false, null, true); byte[] buffer = getRandomBytesArray(5); AbfsClient abfsClient = fs.getAbfsClient(); AbfsRestOperation abfsRestOperation = abfsClient @@ -248,7 +248,7 @@ public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest { // Trying to append without CPK headers AppendRequestParameters appendRequestParameters = new AppendRequestParameters( - 0, 0, 5, Mode.APPEND_MODE, false, null); + 0, 0, 5, Mode.APPEND_MODE, false, null, true); byte[] buffer = getRandomBytesArray(5); AbfsClient abfsClient = fs.getAbfsClient(); AbfsRestOperation abfsRestOperation = abfsClient diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java similarity index 64% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 08eb3adc926..c031e5daa6c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -20,20 +20,43 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; import java.lang.reflect.Field; +import java.net.HttpURLConnection; +import java.net.ProtocolException; import java.net.URL; import java.util.List; +import java.util.Random; import java.util.regex.Pattern; +import org.assertj.core.api.Assertions; import org.junit.Test; +import org.mockito.Mockito; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import static org.assertj.core.api.Assertions.assertThat; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -59,14 +82,19 @@ import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST * Test useragent of abfs client. * */ -public final class TestAbfsClient { +public final class ITestAbfsClient extends AbstractAbfsIntegrationTest { private static final String ACCOUNT_NAME = "bogusAccountName.dfs.core.windows.net"; private static final String FS_AZURE_USER_AGENT_PREFIX = "Partner Service"; + private static final String TEST_PATH = "/testfile"; + public static final int REDUCED_RETRY_COUNT = 2; + public static final int REDUCED_BACKOFF_INTERVAL = 100; + public static final int BUFFER_LENGTH = 5; + public static final int BUFFER_OFFSET = 0; private final Pattern userAgentStringPattern; - public TestAbfsClient(){ + public ITestAbfsClient() throws Exception { StringBuilder regEx = new StringBuilder(); regEx.append("^"); regEx.append(APN_VERSION); @@ -124,7 +152,7 @@ public final class TestAbfsClient { } private void verifybBasicInfo(String userAgentStr) { - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string [" + userAgentStr + "] should be of the pattern: " + this.userAgentStringPattern.pattern()) .matches(this.userAgentStringPattern) @@ -153,7 +181,7 @@ public final class TestAbfsClient { String userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain " + FS_AZURE_USER_AGENT_PREFIX) .contains(FS_AZURE_USER_AGENT_PREFIX); @@ -163,7 +191,7 @@ public final class TestAbfsClient { userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain " + FS_AZURE_USER_AGENT_PREFIX) .doesNotContain(FS_AZURE_USER_AGENT_PREFIX); } @@ -179,14 +207,14 @@ public final class TestAbfsClient { String userAgentStr = getUserAgentString(abfsConfiguration, true); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain sslProvider") .contains(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName()); userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain sslProvider") .doesNotContain(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName()); } @@ -202,7 +230,7 @@ public final class TestAbfsClient { String userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain cluster name") .contains(clusterName); @@ -212,7 +240,7 @@ public final class TestAbfsClient { userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain cluster name") .doesNotContain(clusterName) .describedAs("User-Agent string should contain UNKNOWN as cluster name config is absent") @@ -230,7 +258,7 @@ public final class TestAbfsClient { String userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain cluster type") .contains(clusterType); @@ -240,7 +268,7 @@ public final class TestAbfsClient { userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain cluster type") .doesNotContain(clusterType) .describedAs("User-Agent string should contain UNKNOWN as cluster type config is absent") @@ -311,24 +339,23 @@ public final class TestAbfsClient { AbfsThrottlingInterceptFactory.getInstance( abfsConfig.getAccountName().substring(0, abfsConfig.getAccountName().indexOf(DOT)), abfsConfig)); - // override baseurl - client = TestAbfsClient.setAbfsClientField(client, "abfsConfiguration", + client = ITestAbfsClient.setAbfsClientField(client, "abfsConfiguration", abfsConfig); // override baseurl - client = TestAbfsClient.setAbfsClientField(client, "baseUrl", + client = ITestAbfsClient.setAbfsClientField(client, "baseUrl", baseAbfsClientInstance.getBaseUrl()); // override auth provider if (currentAuthType == AuthType.SharedKey) { - client = TestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials", + client = ITestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials", new SharedKeyCredentials( abfsConfig.getAccountName().substring(0, abfsConfig.getAccountName().indexOf(DOT)), abfsConfig.getStorageAccountKey())); } else { - client = TestAbfsClient.setAbfsClientField(client, "tokenProvider", + client = ITestAbfsClient.setAbfsClientField(client, "tokenProvider", abfsConfig.getTokenProvider()); } @@ -336,7 +363,7 @@ public final class TestAbfsClient { String userAgent = "APN/1.0 Azure Blob FS/3.4.0-SNAPSHOT (PrivateBuild " + "JavaJRE 1.8.0_252; Linux 5.3.0-59-generic/amd64; openssl-1.0; " + "UNKNOWN/UNKNOWN) MSFT"; - client = TestAbfsClient.setAbfsClientField(client, "userAgent", userAgent); + client = ITestAbfsClient.setAbfsClientField(client, "userAgent", userAgent); return client; } @@ -404,4 +431,156 @@ public final class TestAbfsClient { public static AccessTokenProvider getAccessTokenProvider(AbfsClient client) { return client.getTokenProvider(); } + + /** + * Test helper method to get random bytes array. + * @param length The length of byte buffer. + * @return byte buffer. + */ + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + /** + * Test to verify that client retries append request without + * expect header enabled if append with expect header enabled fails + * with 4xx kind of error. + * @throws Exception + */ + @Test + public void testExpectHundredContinue() throws Exception { + // Get the filesystem. + final AzureBlobFileSystem fs = getFileSystem(); + + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsClient abfsClient = fs.getAbfsStore().getClient(); + + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); + + // Update the configuration with reduced retry count and reduced backoff interval. + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + abfsConfiguration, + REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL); + + // Gets the client. + AbfsClient testClient = Mockito.spy( + ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig)); + + // Create the append request params with expect header enabled initially. + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters( + BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH, + AppendRequestParameters.Mode.APPEND_MODE, false, null, true); + + byte[] buffer = getRandomBytesArray(BUFFER_LENGTH); + + // Create a test container to upload the data. + Path testPath = path(TEST_PATH); + fs.create(testPath); + String finalTestPath = testPath.toString() + .substring(testPath.toString().lastIndexOf("/")); + + // Creates a list of request headers. + final List requestHeaders + = ITestAbfsClient.getTestRequestHeaders(testClient); + requestHeaders.add( + new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (appendRequestParameters.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + + // Updates the query parameters. + final AbfsUriQueryBuilder abfsUriQueryBuilder + = testClient.createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, + Long.toString(appendRequestParameters.getPosition())); + + // Creates the url for the specified path. + URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); + + // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.Append, + testClient, + HTTP_METHOD_PUT, + url, + requestHeaders, buffer, + appendRequestParameters.getoffset(), + appendRequestParameters.getLength(), null)); + + AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, + HTTP_METHOD_PUT, requestHeaders)); + + // Sets the expect request property if expect header is enabled. + if (appendRequestParameters.isExpectHeaderEnabled()) { + Mockito.doReturn(HUNDRED_CONTINUE).when(abfsHttpOperation) + .getConnProperty(EXPECT); + } + + HttpURLConnection urlConnection = mock(HttpURLConnection.class); + Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); + Mockito.doReturn(url).when(urlConnection).getURL(); + Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); + + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); + + // Give user error code 404 when processResponse is called. + Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); + Mockito.doReturn(HTTP_NOT_FOUND).when(abfsHttpOperation).getConnResponseCode(); + Mockito.doReturn("Resource Not Found") + .when(abfsHttpOperation) + .getConnResponseMessage(); + + // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly. + Mockito.doThrow(new ProtocolException("Server rejected Operation")) + .when(abfsHttpOperation) + .getConnOutputStream(); + + // Sets the httpOperation for the rest operation. + Mockito.doReturn(abfsHttpOperation) + .when(op) + .createHttpOperation(); + + // Mock the restOperation for the client. + Mockito.doReturn(op) + .when(testClient) + .getAbfsRestOperationForAppend(Mockito.any(), + Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.nullable(int.class), Mockito.nullable(int.class), + Mockito.any()); + + TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", + "abcde", FSOperationType.APPEND, + TracingHeaderFormat.ALL_ID_FORMAT, null)); + + // Check that expect header is enabled before the append call. + Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled()) + .describedAs("The expect header is not true before the append call") + .isTrue(); + + intercept(AzureBlobFileSystemException.class, + () -> testClient.append(finalTestPath, buffer, appendRequestParameters, null, tracingContext)); + + // Verify that the request was not exponentially retried because of user error. + Assertions.assertThat(tracingContext.getRetryCount()) + .describedAs("The retry count is incorrect") + .isEqualTo(0); + + // Verify that the same request was retried with expect header disabled. + Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled()) + .describedAs("The expect header is not false") + .isFalse(); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java new file mode 100644 index 00000000000..6ffe2e2773b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java @@ -0,0 +1,358 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.ProtocolException; +import java.net.URL; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; + +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; + +@RunWith(Parameterized.class) +public class ITestAbfsRestOperation extends AbstractAbfsIntegrationTest { + + // Specifies whether getOutputStream() or write() throws IOException. + public enum ErrorType {OUTPUTSTREAM, WRITE}; + + private static final int HTTP_EXPECTATION_FAILED = 417; + private static final int HTTP_ERROR = 0; + private static final int ZERO = 0; + private static final int REDUCED_RETRY_COUNT = 2; + private static final int REDUCED_BACKOFF_INTERVAL = 100; + private static final int BUFFER_LENGTH = 5; + private static final int BUFFER_OFFSET = 0; + private static final String TEST_PATH = "/testfile"; + + // Specifies whether the expect header is enabled or not. + @Parameterized.Parameter + public boolean expectHeaderEnabled; + + // Gives the http response code. + @Parameterized.Parameter(1) + public int responseCode; + + // Gives the http response message. + @Parameterized.Parameter(2) + public String responseMessage; + + // Gives the errorType based on the enum. + @Parameterized.Parameter(3) + public ErrorType errorType; + + // The intercept. + private AbfsThrottlingIntercept intercept; + + /* + HTTP_OK = 200, + HTTP_UNAVAILABLE = 503, + HTTP_NOT_FOUND = 404, + HTTP_EXPECTATION_FAILED = 417, + HTTP_ERROR = 0. + */ + @Parameterized.Parameters(name = "expect={0}-code={1}-ErrorType={3}") + public static Iterable params() { + return Arrays.asList(new Object[][]{ + {true, HTTP_OK, "OK", ErrorType.WRITE}, + {false, HTTP_OK, "OK", ErrorType.WRITE}, + {true, HTTP_UNAVAILABLE, "ServerBusy", ErrorType.OUTPUTSTREAM}, + {true, HTTP_NOT_FOUND, "Resource Not Found", ErrorType.OUTPUTSTREAM}, + {true, HTTP_EXPECTATION_FAILED, "Expectation Failed", ErrorType.OUTPUTSTREAM}, + {true, HTTP_ERROR, "Error", ErrorType.OUTPUTSTREAM} + }); + } + + public ITestAbfsRestOperation() throws Exception { + super(); + } + + /** + * Test helper method to get random bytes array. + * @param length The length of byte buffer + * @return byte buffer + */ + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + /** + * Gives the AbfsRestOperation. + * @return abfsRestOperation. + */ + private AbfsRestOperation getRestOperation() throws Exception { + // Get the filesystem. + final AzureBlobFileSystem fs = getFileSystem(); + + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsClient abfsClient = fs.getAbfsStore().getClient(); + + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); + + // Update the configuration with reduced retry count and reduced backoff interval. + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + abfsConfiguration, + REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL); + + intercept = Mockito.mock(AbfsThrottlingIntercept.class); + Mockito.doNothing().when(intercept).updateMetrics(Mockito.any(), Mockito.any()); + + // Gets the client. + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig)); + + Mockito.doReturn(intercept).when(testClient).getIntercept(); + + // Expect header is enabled or not based on the parameter. + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters( + BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH, + AppendRequestParameters.Mode.APPEND_MODE, false, null, + expectHeaderEnabled); + + byte[] buffer = getRandomBytesArray(5); + + // Create a test container to upload the data. + Path testPath = path(TEST_PATH); + fs.create(testPath); + String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); + + // Creates a list of request headers. + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (appendRequestParameters.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + + // Updates the query parameters. + final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(appendRequestParameters.getPosition())); + + // Creates the url for the specified path. + URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); + + // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.Append, + testClient, + HTTP_METHOD_PUT, + url, + requestHeaders, buffer, + appendRequestParameters.getoffset(), + appendRequestParameters.getLength(), null)); + + AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, HTTP_METHOD_PUT, requestHeaders)); + + // Sets the expect request property if expect header is enabled. + if (expectHeaderEnabled) { + Mockito.doReturn(HUNDRED_CONTINUE) + .when(abfsHttpOperation) + .getConnProperty(EXPECT); + } + + HttpURLConnection urlConnection = mock(HttpURLConnection.class); + Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); + Mockito.doReturn(url).when(urlConnection).getURL(); + Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); + + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); + Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); + + switch (errorType) { + case OUTPUTSTREAM: + // If the getOutputStream() throws IOException and Expect Header is + // enabled, it returns back to processResponse and hence we have + // mocked the response code and the response message to check different + // behaviour based on response code. + Mockito.doReturn(responseCode).when(abfsHttpOperation).getConnResponseCode(); + Mockito.doReturn(responseMessage) + .when(abfsHttpOperation) + .getConnResponseMessage(); + Mockito.doThrow(new ProtocolException("Server rejected Operation")) + .when(abfsHttpOperation) + .getConnOutputStream(); + break; + case WRITE: + // If write() throws IOException and Expect Header is + // enabled or not, it should throw back the exception. + OutputStream outputStream = Mockito.spy(new OutputStream() { + @Override + public void write(final int i) throws IOException { + } + }); + Mockito.doReturn(outputStream).when(abfsHttpOperation).getConnOutputStream(); + Mockito.doThrow(new IOException()) + .when(outputStream) + .write(buffer, appendRequestParameters.getoffset(), + appendRequestParameters.getLength()); + break; + default: + break; + } + + // Sets the httpOperation for the rest operation. + Mockito.doReturn(abfsHttpOperation) + .when(op) + .createHttpOperation(); + return op; + } + + void assertTraceContextState(int retryCount, int assertRetryCount, int bytesSent, int assertBytesSent, + int expectedBytesSent, int assertExpectedBytesSent) { + // Assert that the request is retried or not. + Assertions.assertThat(retryCount) + .describedAs("The retry count is incorrect") + .isEqualTo(assertRetryCount); + + // Assert that metrics will be updated correctly. + Assertions.assertThat(bytesSent) + .describedAs("The bytes sent is incorrect") + .isEqualTo(assertBytesSent); + Assertions.assertThat(expectedBytesSent) + .describedAs("The expected bytes sent is incorrect") + .isEqualTo(assertExpectedBytesSent); + } + + /** + * Test the functionalities based on whether getOutputStream() or write() + * throws exception and what is the corresponding response code. + */ + @Test + public void testExpectHundredContinue() throws Exception { + // Gets the AbfsRestOperation. + AbfsRestOperation op = getRestOperation(); + AbfsHttpOperation httpOperation = op.createHttpOperation(); + + TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", + "abcde", FSOperationType.APPEND, + TracingHeaderFormat.ALL_ID_FORMAT, null)); + + switch (errorType) { + case WRITE: + // If write() throws IOException and Expect Header is + // enabled or not, it should throw back the exception + // which is caught and exponential retry logic comes into place. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), BUFFER_LENGTH, + 0, 0); + break; + case OUTPUTSTREAM: + switch (responseCode) { + case HTTP_UNAVAILABLE: + // In the case of 503 i.e. throttled case, we should retry. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), ZERO, + httpOperation.getExpectedBytesToBeSent(), BUFFER_LENGTH); + + // Verifies that update Metrics call is made for throttle case and for the first without retry + + // for the retried cases as well. + Mockito.verify(intercept, times(REDUCED_RETRY_COUNT + 1)) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + case HTTP_ERROR: + // In the case of http status code 0 i.e. ErrorType case, we should retry. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), + ZERO, 0, 0); + + // Verifies that update Metrics call is made for ErrorType case and for the first without retry + + // for the retried cases as well. + Mockito.verify(intercept, times(REDUCED_RETRY_COUNT + 1)) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + case HTTP_NOT_FOUND: + case HTTP_EXPECTATION_FAILED: + // In the case of 4xx ErrorType. i.e. user ErrorType, retry should not happen. + intercept(AzureBlobFileSystemException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), ZERO, 0, + 0, 0, 0); + + // Verifies that update Metrics call is not made for user ErrorType case. + Mockito.verify(intercept, never()) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + default: + break; + } + break; + default: + break; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java index 0673e387bfb..e26ba938cf5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java @@ -72,6 +72,7 @@ public final class TestAbfsOutputStream { boolean isFlushEnabled, boolean disableOutputStreamFlush, boolean isAppendBlob, + boolean isExpectHeaderEnabled, AbfsClient client, String path, TracingContext tracingContext, @@ -89,6 +90,7 @@ public final class TestAbfsOutputStream { return new AbfsOutputStreamContext(2) .withWriteBufferSize(writeBufferSize) + .enableExpectHeader(isExpectHeaderEnabled) .enableFlush(isFlushEnabled) .disableOutputStreamFlush(disableOutputStreamFlush) .withStreamStatistics(new AbfsOutputStreamStatisticsImpl()) @@ -129,6 +131,7 @@ public final class TestAbfsOutputStream { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -149,9 +152,9 @@ public final class TestAbfsOutputStream { out.hsync(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, WRITE_SIZE, APPEND_MODE, false, null); + 0, 0, WRITE_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null); + WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), @@ -190,6 +193,7 @@ public final class TestAbfsOutputStream { true, false, false, + true, client, PATH, tracingContext, @@ -203,9 +207,9 @@ public final class TestAbfsOutputStream { out.close(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, 5*WRITE_SIZE-BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, 5*WRITE_SIZE-BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), @@ -264,6 +268,7 @@ public final class TestAbfsOutputStream { true, false, false, + true, client, PATH, tracingContext, @@ -277,9 +282,9 @@ public final class TestAbfsOutputStream { out.close(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -335,6 +340,7 @@ public final class TestAbfsOutputStream { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -350,9 +356,9 @@ public final class TestAbfsOutputStream { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -390,6 +396,7 @@ public final class TestAbfsOutputStream { true, false, true, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -405,9 +412,9 @@ public final class TestAbfsOutputStream { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, true, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, true, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, true, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, true, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -449,6 +456,7 @@ public final class TestAbfsOutputStream { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -464,9 +472,9 @@ public final class TestAbfsOutputStream { out.hflush(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -518,6 +526,7 @@ public final class TestAbfsOutputStream { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -535,9 +544,9 @@ public final class TestAbfsOutputStream { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 65ea79b36bd..f5cbceaddd8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -58,7 +58,7 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { String destNoParentPath = "/NoParent/Dest"; AzureBlobFileSystem fs = getFileSystem(); - AbfsClient mockClient = TestAbfsClient.getMockAbfsClient( + AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient( fs.getAbfsStore().getClient(), fs.getAbfsStore().getAbfsConfiguration()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index a1fc4e138d6..12ab4e9ead6 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -103,7 +103,7 @@ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, "dummy.dfs.core.windows.net"); AbfsThrottlingIntercept intercept; - AbfsClient abfsClient = TestAbfsClient.createTestClientFromCurrentContext(fs.getAbfsStore().getClient(), abfsConfiguration); + AbfsClient abfsClient = ITestAbfsClient.createTestClientFromCurrentContext(fs.getAbfsStore().getClient(), abfsConfiguration); intercept = abfsClient.getIntercept(); Assertions.assertThat(intercept) .describedAs("AbfsNoOpThrottlingIntercept instance expected") @@ -114,7 +114,7 @@ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { // On disabling throttling AbfsClientThrottlingIntercept object is returned AbfsConfiguration abfsConfiguration1 = new AbfsConfiguration(configuration, "dummy1.dfs.core.windows.net"); - AbfsClient abfsClient1 = TestAbfsClient.createTestClientFromCurrentContext(fs.getAbfsStore().getClient(), abfsConfiguration1); + AbfsClient abfsClient1 = ITestAbfsClient.createTestClientFromCurrentContext(fs.getAbfsStore().getClient(), abfsConfiguration1); intercept = abfsClient1.getIntercept(); Assertions.assertThat(intercept) .describedAs("AbfsClientThrottlingIntercept instance expected") From 926993cb73f957eb191c0a830c6b5560585f95d8 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 28 Mar 2023 00:27:21 +0800 Subject: [PATCH 44/97] =?UTF-8?q?YARN-11376.=20[Federation]=20Support=20up?= =?UTF-8?q?dateNodeResource=E3=80=81refreshNodesResources=20API's=20for=20?= =?UTF-8?q?Federation.=20(#5496)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../RefreshNodesResourcesRequest.java | 27 ++++++++ .../UpdateNodeResourceRequest.java | 30 +++++++- ...erver_resourcemanager_service_protos.proto | 2 + .../RefreshNodesResourcesRequestPBImpl.java | 24 +++++++ .../pb/UpdateNodeResourceRequestPBImpl.java | 16 +++++ .../yarn/server/router/RouterMetrics.java | 62 +++++++++++++++++ .../rmadmin/FederationRMAdminInterceptor.java | 69 ++++++++++++++++++- .../yarn/server/router/TestRouterMetrics.java | 64 +++++++++++++++++ .../TestFederationRMAdminInterceptor.java | 68 ++++++++++++++++++ .../TestableFederationRMAdminInterceptor.java | 3 +- 10 files changed, 360 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshNodesResourcesRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshNodesResourcesRequest.java index f8c91f6437e..bcffeb74ed8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshNodesResourcesRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RefreshNodesResourcesRequest.java @@ -36,4 +36,31 @@ public abstract class RefreshNodesResourcesRequest { Records.newRecord(RefreshNodesResourcesRequest.class); return request; } + + @Public + @Evolving + public static RefreshNodesResourcesRequest newInstance(String subClusterId) { + RefreshNodesResourcesRequest request = + Records.newRecord(RefreshNodesResourcesRequest.class); + request.setSubClusterId(subClusterId); + return request; + } + + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + @Public + @Evolving + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + @Public + @Evolving + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java index d540ccebb46..cfe93135498 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java @@ -51,7 +51,18 @@ public abstract class UpdateNodeResourceRequest { request.setNodeResourceMap(nodeResourceMap); return request; } - + + @Public + @Evolving + public static UpdateNodeResourceRequest newInstance( + Map nodeResourceMap, String subClusterId) { + UpdateNodeResourceRequest request = + Records.newRecord(UpdateNodeResourceRequest.class); + request.setNodeResourceMap(nodeResourceMap); + request.setSubClusterId(subClusterId); + return request; + } + /** * Get the map from NodeId to ResourceOption. * @return the map of {@code } @@ -68,4 +79,21 @@ public abstract class UpdateNodeResourceRequest { @Evolving public abstract void setNodeResourceMap(Map nodeResourceMap); + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + @Public + @Evolving + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + @Public + @Evolving + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto index 4050a5b356f..132f937e150 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto @@ -79,12 +79,14 @@ message GetGroupsForUserResponseProto { message UpdateNodeResourceRequestProto { repeated NodeResourceMapProto node_resource_map = 1; + optional string sub_cluster_id = 2; } message UpdateNodeResourceResponseProto { } message RefreshNodesResourcesRequestProto { + optional string sub_cluster_id = 1; } message RefreshNodesResourcesResponseProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshNodesResourcesRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshNodesResourcesRequestPBImpl.java index 203fca19ee0..1e866e608d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshNodesResourcesRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RefreshNodesResourcesRequestPBImpl.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshNodesResourcesRequestProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshNodesResourcesRequestProtoOrBuilder; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest; import org.apache.hadoop.thirdparty.protobuf.TextFormat; @@ -69,4 +70,27 @@ public class RefreshNodesResourcesRequestPBImpl extends RefreshNodesResourcesReq public String toString() { return TextFormat.shortDebugString(getProto()); } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = RefreshNodesResourcesRequestProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public String getSubClusterId() { + RefreshNodesResourcesRequestProtoOrBuilder p = viaProto ? proto : builder; + return (p.hasSubClusterId()) ? p.getSubClusterId() : null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java index 0e05e731ad2..512462cd195 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java @@ -66,6 +66,22 @@ public class UpdateNodeResourceRequestPBImpl extends UpdateNodeResourceRequest { this.nodeResourceMap.putAll(nodeResourceMap); } + @Override + public String getSubClusterId() { + UpdateNodeResourceRequestProtoOrBuilder p = viaProto ? proto : builder; + return (p.hasSubClusterId()) ? p.getSubClusterId() : null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } + public UpdateNodeResourceRequestProto getProto() { mergeLocalToProto(); proto = viaProto ? proto : builder.build(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java index 3338013ebac..3a581dfbd1f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java @@ -163,6 +163,10 @@ public final class RouterMetrics { private MutableGaugeInt numGetClusterInfoFailedRetrieved; @Metric("# of getClusterUserInfo failed to be retrieved") private MutableGaugeInt numGetClusterUserInfoFailedRetrieved; + @Metric("# of updateNodeResource failed to be retrieved") + private MutableGaugeInt numUpdateNodeResourceFailedRetrieved; + @Metric("# of refreshNodesResources failed to be retrieved") + private MutableGaugeInt numRefreshNodesResourcesFailedRetrieved; // Aggregate metrics are shared, and don't have to be looked up per call @Metric("Total number of successful Submitted apps and latency(ms)") @@ -287,6 +291,10 @@ public final class RouterMetrics { private MutableRate totalSucceededGetClusterInfoRetrieved; @Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)") private MutableRate totalSucceededGetClusterUserInfoRetrieved; + @Metric("Total number of successful Retrieved UpdateNodeResource and latency(ms)") + private MutableRate totalSucceededUpdateNodeResourceRetrieved; + @Metric("Total number of successful Retrieved RefreshNodesResources and latency(ms)") + private MutableRate totalSucceededRefreshNodesResourcesRetrieved; /** * Provide quantile counters for all latencies. @@ -352,6 +360,8 @@ public final class RouterMetrics { private MutableQuantiles removeFromClusterNodeLabelsLatency; private MutableQuantiles getClusterInfoLatency; private MutableQuantiles getClusterUserInfoLatency; + private MutableQuantiles updateNodeResourceLatency; + private MutableQuantiles refreshNodesResourcesLatency; private static volatile RouterMetrics instance = null; private static MetricsRegistry registry; @@ -567,6 +577,12 @@ public final class RouterMetrics { getClusterUserInfoLatency = registry.newQuantiles("getClusterUserInfoLatency", "latency of get cluster user info timeouts", "ops", "latency", 10); + + updateNodeResourceLatency = registry.newQuantiles("updateNodeResourceLatency", + "latency of update node resource timeouts", "ops", "latency", 10); + + refreshNodesResourcesLatency = registry.newQuantiles("refreshNodesResourcesLatency", + "latency of refresh nodes resources timeouts", "ops", "latency", 10); } public static RouterMetrics getMetrics() { @@ -873,6 +889,16 @@ public final class RouterMetrics { return totalSucceededGetClusterUserInfoRetrieved.lastStat().numSamples(); } + @VisibleForTesting + public long getNumSucceededUpdateNodeResourceRetrieved() { + return totalSucceededUpdateNodeResourceRetrieved.lastStat().numSamples(); + } + + @VisibleForTesting + public long getNumSucceededRefreshNodesResourcesRetrieved() { + return totalSucceededRefreshNodesResourcesRetrieved.lastStat().numSamples(); + } + @VisibleForTesting public long getNumSucceededRefreshSuperUserGroupsConfigurationRetrieved() { return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().numSamples(); @@ -1173,6 +1199,16 @@ public final class RouterMetrics { return totalSucceededGetClusterUserInfoRetrieved.lastStat().mean(); } + @VisibleForTesting + public double getLatencySucceededUpdateNodeResourceRetrieved() { + return totalSucceededUpdateNodeResourceRetrieved.lastStat().mean(); + } + + @VisibleForTesting + public double getLatencySucceededRefreshNodesResourcesRetrieved() { + return totalSucceededRefreshNodesResourcesRetrieved.lastStat().mean(); + } + @VisibleForTesting public double getLatencySucceededRefreshSuperUserGroupsConfigurationRetrieved() { return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().mean(); @@ -1426,6 +1462,14 @@ public final class RouterMetrics { return numGetClusterUserInfoFailedRetrieved.value(); } + public int getUpdateNodeResourceFailedRetrieved() { + return numUpdateNodeResourceFailedRetrieved.value(); + } + + public int getRefreshNodesResourcesFailedRetrieved() { + return numRefreshNodesResourcesFailedRetrieved.value(); + } + public int getDelegationTokenFailedRetrieved() { return numGetDelegationTokenFailedRetrieved.value(); } @@ -1739,6 +1783,16 @@ public final class RouterMetrics { getClusterUserInfoLatency.add(duration); } + public void succeededUpdateNodeResourceRetrieved(long duration) { + totalSucceededUpdateNodeResourceRetrieved.add(duration); + updateNodeResourceLatency.add(duration); + } + + public void succeededRefreshNodesResourcesRetrieved(long duration) { + totalSucceededRefreshNodesResourcesRetrieved.add(duration); + refreshNodesResourcesLatency.add(duration); + } + public void succeededRefreshSuperUserGroupsConfRetrieved(long duration) { totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.add(duration); refreshSuperUserGroupsConfLatency.add(duration); @@ -1967,6 +2021,14 @@ public final class RouterMetrics { numGetClusterUserInfoFailedRetrieved.incr(); } + public void incrUpdateNodeResourceFailedRetrieved() { + numUpdateNodeResourceFailedRetrieved.incr(); + } + + public void incrRefreshNodesResourcesFailedRetrieved() { + numRefreshNodesResourcesFailedRetrieved.incr(); + } + public void incrGetDelegationTokenFailedRetrieved() { numGetDelegationTokenFailedRetrieved.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java index 93e864bb980..c930459559f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.router.rmadmin; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -112,7 +113,7 @@ public class FederationRMAdminInterceptor extends AbstractRMAdminRequestIntercep @VisibleForTesting protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster( - SubClusterId subClusterId) throws YarnException { + SubClusterId subClusterId) throws Exception { if (adminRMProxies.containsKey(subClusterId)) { return adminRMProxies.get(subClusterId); @@ -438,13 +439,75 @@ public class FederationRMAdminInterceptor extends AbstractRMAdminRequestIntercep @Override public UpdateNodeResourceResponse updateNodeResource(UpdateNodeResourceRequest request) throws YarnException, IOException { - throw new NotImplementedException(); + + // parameter verification. + if (request == null) { + routerMetrics.incrUpdateNodeResourceFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing UpdateNodeResource request.", null); + } + + String subClusterId = request.getSubClusterId(); + if (StringUtils.isBlank(subClusterId)) { + routerMetrics.incrUpdateNodeResourceFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing UpdateNodeResource SubClusterId.", null); + } + + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[]{UpdateNodeResourceRequest.class}, new Object[]{request}); + Collection updateNodeResourceResps = + remoteMethod.invokeConcurrent(this, UpdateNodeResourceResponse.class, subClusterId); + if (CollectionUtils.isNotEmpty(updateNodeResourceResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededUpdateNodeResourceRetrieved(stopTime - startTime); + return UpdateNodeResourceResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrUpdateNodeResourceFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to updateNodeResource due to exception. " + e.getMessage()); + } + + routerMetrics.incrUpdateNodeResourceFailedRetrieved(); + throw new YarnException("Unable to updateNodeResource."); } @Override public RefreshNodesResourcesResponse refreshNodesResources(RefreshNodesResourcesRequest request) throws YarnException, IOException { - throw new NotImplementedException(); + + // parameter verification. + if (request == null) { + routerMetrics.incrRefreshNodesResourcesFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing RefreshNodesResources request.", null); + } + + String subClusterId = request.getSubClusterId(); + if (StringUtils.isBlank(subClusterId)) { + routerMetrics.incrRefreshNodesResourcesFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing RefreshNodesResources SubClusterId.", null); + } + + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[]{RefreshNodesResourcesRequest.class}, new Object[]{request}); + Collection refreshNodesResourcesResps = + remoteMethod.invokeConcurrent(this, RefreshNodesResourcesResponse.class, subClusterId); + if (CollectionUtils.isNotEmpty(refreshNodesResourcesResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededRefreshNodesResourcesRetrieved(stopTime - startTime); + return RefreshNodesResourcesResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrRefreshNodesResourcesFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to refreshNodesResources due to exception. " + e.getMessage()); + } + + routerMetrics.incrRefreshNodesResourcesFailedRetrieved(); + throw new YarnException("Unable to refreshNodesResources."); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java index 955948c91c8..4af7e8c7f5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java @@ -578,6 +578,16 @@ public class TestRouterMetrics { LOG.info("Mocked: failed getClusterUserInfo call"); metrics.incrGetClusterUserInfoFailedRetrieved(); } + + public void getUpdateNodeResourceFailed() { + LOG.info("Mocked: failed getClusterUserInfo call"); + metrics.incrUpdateNodeResourceFailedRetrieved(); + } + + public void getRefreshNodesResourcesFailed() { + LOG.info("Mocked: failed refreshNodesResources call"); + metrics.incrRefreshNodesResourcesFailedRetrieved(); + } } // Records successes for all calls @@ -858,6 +868,16 @@ public class TestRouterMetrics { LOG.info("Mocked: successful GetClusterUserInfoRetrieved call with duration {}", duration); metrics.succeededGetClusterUserInfoRetrieved(duration); } + + public void getUpdateNodeResourceRetrieved(long duration) { + LOG.info("Mocked: successful UpdateNodeResourceRetrieved call with duration {}", duration); + metrics.succeededUpdateNodeResourceRetrieved(duration); + } + + public void getRefreshNodesResourcesRetrieved(long duration) { + LOG.info("Mocked: successful RefreshNodesResourcesRetrieved call with duration {}", duration); + metrics.succeededRefreshNodesResourcesRetrieved(duration); + } } @Test @@ -1912,4 +1932,48 @@ public class TestRouterMetrics { Assert.assertEquals(225, metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA); } + + @Test + public void testUpdateNodeResourceRetrievedFailed() { + long totalBadBefore = metrics.getUpdateNodeResourceFailedRetrieved(); + badSubCluster.getUpdateNodeResourceFailed(); + Assert.assertEquals(totalBadBefore + 1, metrics.getUpdateNodeResourceFailedRetrieved()); + } + + @Test + public void testUpdateNodeResourceRetrieved() { + long totalGoodBefore = metrics.getNumSucceededGetClusterUserInfoRetrieved(); + goodSubCluster.getUpdateNodeResourceRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededUpdateNodeResourceRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededUpdateNodeResourceRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getUpdateNodeResourceRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededUpdateNodeResourceRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededUpdateNodeResourceRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testRefreshNodesResourcesRetrievedFailed() { + long totalBadBefore = metrics.getRefreshNodesResourcesFailedRetrieved(); + badSubCluster.getRefreshNodesResourcesFailed(); + Assert.assertEquals(totalBadBefore + 1, metrics.getRefreshNodesResourcesFailedRetrieved()); + } + + @Test + public void testRefreshNodesResourcesRetrieved() { + long totalGoodBefore = metrics.getNumSucceededRefreshNodesResourcesRetrieved(); + goodSubCluster.getRefreshNodesResourcesRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededRefreshNodesResourcesRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededRefreshNodesResourcesRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getRefreshNodesResourcesRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededRefreshNodesResourcesRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededRefreshNodesResourcesRetrieved(), ASSERT_DOUBLE_DELTA); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java index 60a782bd8a9..7449c8474d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java @@ -22,6 +22,9 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.yarn.api.records.DecommissionType; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest; @@ -35,6 +38,10 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse; import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade; @@ -45,7 +52,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.junit.Assert.assertNotNull; @@ -63,6 +72,7 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest { //////////////////////////////// private final static String USER_NAME = "test-user"; private final static int NUM_SUBCLUSTER = 4; + private final static int GB = 1024; private TestableFederationRMAdminInterceptor interceptor; private FederationStateStoreFacade facade; @@ -320,4 +330,62 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest { LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", () -> interceptor.refreshServiceAcls(request1)); } + + @Test + public void testUpdateNodeResourceEmptyRequest() throws Exception { + // null request1. + LambdaTestUtils.intercept(YarnException.class, "Missing UpdateNodeResource request.", + () -> interceptor.updateNodeResource(null)); + + // null request2. + Map nodeResourceMap = new HashMap<>(); + UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap); + LambdaTestUtils.intercept(YarnException.class, "Missing UpdateNodeResource SubClusterId.", + () -> interceptor.updateNodeResource(request)); + } + + @Test + public void testUpdateNodeResourceNormalRequest() throws Exception { + // case 1, test the existing subCluster (SC-1). + Map nodeResourceMap = new HashMap<>(); + NodeId nodeId = NodeId.newInstance("127.0.0.1", 1); + ResourceOption resourceOption = + ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1); + nodeResourceMap.put(nodeId, resourceOption); + UpdateNodeResourceRequest request = + UpdateNodeResourceRequest.newInstance(nodeResourceMap, "SC-1"); + UpdateNodeResourceResponse response = interceptor.updateNodeResource(request); + assertNotNull(response); + + // case 2, test the non-exist subCluster. + UpdateNodeResourceRequest request1 = + UpdateNodeResourceRequest.newInstance(nodeResourceMap, "SC-NON"); + LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.updateNodeResource(request1)); + } + + @Test + public void testRefreshNodesResourcesEmptyRequest() throws Exception { + // null request1. + LambdaTestUtils.intercept(YarnException.class, "Missing RefreshNodesResources request.", + () -> interceptor.refreshNodesResources(null)); + + // null request2. + RefreshNodesResourcesRequest request = RefreshNodesResourcesRequest.newInstance(); + LambdaTestUtils.intercept(YarnException.class, "Missing RefreshNodesResources SubClusterId.", + () -> interceptor.refreshNodesResources(request)); + } + + @Test + public void testRefreshNodesResourcesNormalRequest() throws Exception { + // case 1, test the existing subCluster (SC-1). + RefreshNodesResourcesRequest request = RefreshNodesResourcesRequest.newInstance("SC-1"); + RefreshNodesResourcesResponse response = interceptor.refreshNodesResources(request); + assertNotNull(response); + + // case 2, test the non-exist subCluster. + RefreshNodesResourcesRequest request1 = RefreshNodesResourcesRequest.newInstance("SC-NON"); + LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.refreshNodesResources(request1)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java index b95bcd4a62b..29d06385e4e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestableFederationRMAdminInterceptor.java @@ -52,7 +52,7 @@ public class TestableFederationRMAdminInterceptor extends FederationRMAdminInter @Override protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster( - SubClusterId subClusterId) throws YarnException { + SubClusterId subClusterId) throws Exception { MockRM mockRM; synchronized (this) { if (mockRMs.containsKey(subClusterId)) { @@ -66,6 +66,7 @@ public class TestableFederationRMAdminInterceptor extends FederationRMAdminInter } mockRM.init(config); mockRM.start(); + mockRM.registerNode("127.0.0.1:1", 102400, 100); mockRMs.put(subClusterId, mockRM); } return mockRM.getAdminService(); From b5e8269d9b4f57a8afc730a25326be68dd2129db Mon Sep 17 00:00:00 2001 From: Jinhu Wu Date: Tue, 28 Mar 2023 14:27:01 +0800 Subject: [PATCH 45/97] HADOOP-18458: AliyunOSSBlockOutputStream to support heap/off-heap buffer before uploading data to OSS (#4912) --- .../oss/AliyunOSSBlockOutputStream.java | 219 ++-- .../fs/aliyun/oss/AliyunOSSFileSystem.java | 33 +- .../aliyun/oss/AliyunOSSFileSystemStore.java | 63 +- .../hadoop/fs/aliyun/oss/Constants.java | 53 + .../hadoop/fs/aliyun/oss/OSSDataBlocks.java | 1109 +++++++++++++++++ .../BlockOutputStreamStatistics.java | 72 ++ .../impl/OutputStreamStatistics.java | 98 ++ .../oss/statistics/impl/package-info.java | 29 + .../aliyun/oss/statistics/package-info.java | 27 + .../markdown/tools/hadoop-aliyun/index.md | 50 +- .../oss/TestAliyunOSSBlockOutputStream.java | 193 ++- 11 files changed, 1830 insertions(+), 116 deletions(-) create mode 100644 hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/OSSDataBlocks.java create mode 100644 hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/BlockOutputStreamStatistics.java create mode 100644 hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/OutputStreamStatistics.java create mode 100644 hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/package-info.java create mode 100644 hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/package-info.java diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSBlockOutputStream.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSBlockOutputStream.java index d9a3f5830da..132d38526bf 100644 --- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSBlockOutputStream.java +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSBlockOutputStream.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.aliyun.oss; import com.aliyun.oss.model.PartETag; +import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; @@ -27,17 +28,15 @@ import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; /** * Asynchronous multi-part based uploading mechanism to support huge file @@ -49,71 +48,103 @@ public class AliyunOSSBlockOutputStream extends OutputStream { LoggerFactory.getLogger(AliyunOSSBlockOutputStream.class); private AliyunOSSFileSystemStore store; private Configuration conf; - private boolean closed; + private final AtomicBoolean closed = new AtomicBoolean(false); private String key; - private File blockFile; - private Map blockFiles = new HashMap<>(); - private long blockSize; + private int blockSize; private int blockId = 0; private long blockWritten = 0L; private String uploadId = null; private final List> partETagsFutures; + private final OSSDataBlocks.BlockFactory blockFactory; + private final BlockOutputStreamStatistics statistics; + private OSSDataBlocks.DataBlock activeBlock; private final ListeningExecutorService executorService; - private OutputStream blockStream; private final byte[] singleByte = new byte[1]; public AliyunOSSBlockOutputStream(Configuration conf, AliyunOSSFileSystemStore store, String key, - Long blockSize, + int blockSize, + OSSDataBlocks.BlockFactory blockFactory, + BlockOutputStreamStatistics statistics, ExecutorService executorService) throws IOException { this.store = store; this.conf = conf; this.key = key; this.blockSize = blockSize; - this.blockFile = newBlockFile(); - this.blockStream = - new BufferedOutputStream(new FileOutputStream(blockFile)); + this.blockFactory = blockFactory; + this.statistics = statistics; this.partETagsFutures = new ArrayList<>(2); this.executorService = MoreExecutors.listeningDecorator(executorService); } - private File newBlockFile() throws IOException { - return AliyunOSSUtils.createTmpFileForWrite( - String.format("oss-block-%04d-", blockId), blockSize, conf); + /** + * Demand create a destination block. + * @return the active block; null if there isn't one. + * @throws IOException on any failure to create + */ + private synchronized OSSDataBlocks.DataBlock createBlockIfNeeded() + throws IOException { + if (activeBlock == null) { + blockId++; + activeBlock = blockFactory.create(blockId, blockSize, statistics); + } + return activeBlock; } + /** + * Check for the filesystem being open. + * @throws IOException if the filesystem is closed. + */ + void checkOpen() throws IOException { + if (closed.get()) { + throw new IOException("Stream closed."); + } + } + + /** + * The flush operation does not trigger an upload; that awaits + * the next block being full. What it does do is call {@code flush() } + * on the current block, leaving it to choose how to react. + * @throws IOException Any IO problem. + */ @Override public synchronized void flush() throws IOException { - blockStream.flush(); + checkOpen(); + + OSSDataBlocks.DataBlock dataBlock = getActiveBlock(); + if (dataBlock != null) { + dataBlock.flush(); + } } @Override public synchronized void close() throws IOException { - if (closed) { + if (closed.get()) { + // already closed + LOG.debug("Ignoring close() as stream is already closed"); return; } - blockStream.flush(); - blockStream.close(); - if (!blockFiles.values().contains(blockFile)) { - blockId++; - blockFiles.put(blockId, blockFile); - } - try { - if (blockFiles.size() == 1) { + if (uploadId == null) { // just upload it directly - store.uploadObject(key, blockFile); + OSSDataBlocks.DataBlock dataBlock = getActiveBlock(); + if (dataBlock == null) { + // zero size file + store.storeEmptyFile(key); + } else { + OSSDataBlocks.BlockUploadData uploadData = dataBlock.startUpload(); + if (uploadData.hasFile()) { + store.uploadObject(key, uploadData.getFile()); + } else { + store.uploadObject(key, + uploadData.getUploadStream(), dataBlock.dataSize()); + } + } } else { if (blockWritten > 0) { - ListenableFuture partETagFuture = - executorService.submit(() -> { - PartETag partETag = store.uploadPart(blockFile, key, uploadId, - blockId); - return partETag; - }); - partETagsFutures.add(partETagFuture); + uploadCurrentBlock(); } // wait for the partial uploads to finish final List partETags = waitForAllPartUploads(); @@ -124,8 +155,8 @@ public class AliyunOSSBlockOutputStream extends OutputStream { new ArrayList<>(partETags)); } } finally { - removeTemporaryFiles(); - closed = true; + cleanupWithLogger(LOG, getActiveBlock(), blockFactory); + closed.set(true); } } @@ -138,64 +169,82 @@ public class AliyunOSSBlockOutputStream extends OutputStream { @Override public synchronized void write(byte[] b, int off, int len) throws IOException { - if (closed) { - throw new IOException("Stream closed."); + int totalWritten = 0; + while (totalWritten < len) { + int written = internalWrite(b, off + totalWritten, len - totalWritten); + totalWritten += written; + LOG.debug("Buffer len {}, written {}, total written {}", + len, written, totalWritten); } - blockStream.write(b, off, len); - blockWritten += len; - if (blockWritten >= blockSize) { - uploadCurrentPart(); - blockWritten = 0L; + } + private synchronized int internalWrite(byte[] b, int off, int len) + throws IOException { + OSSDataBlocks.validateWriteArgs(b, off, len); + checkOpen(); + if (len == 0) { + return 0; + } + OSSDataBlocks.DataBlock block = createBlockIfNeeded(); + int written = block.write(b, off, len); + blockWritten += written; + int remainingCapacity = block.remainingCapacity(); + if (written < len) { + // not everything was written — the block has run out + // of capacity + // Trigger an upload then process the remainder. + LOG.debug("writing more data than block has capacity -triggering upload"); + uploadCurrentBlock(); + } else { + if (remainingCapacity == 0) { + // the whole buffer is done, trigger an upload + uploadCurrentBlock(); + } + } + return written; + } + + /** + * Clear the active block. + */ + private void clearActiveBlock() { + if (activeBlock != null) { + LOG.debug("Clearing active block"); + } + synchronized (this) { + activeBlock = null; } } - private void removeTemporaryFiles() { - for (File file : blockFiles.values()) { - if (file != null && file.exists() && !file.delete()) { - LOG.warn("Failed to delete temporary file {}", file); - } - } + private synchronized OSSDataBlocks.DataBlock getActiveBlock() { + return activeBlock; } - private void removePartFiles() throws IOException { - for (ListenableFuture partETagFuture : partETagsFutures) { - if (!partETagFuture.isDone()) { - continue; - } - - try { - File blockFile = blockFiles.get(partETagFuture.get().getPartNumber()); - if (blockFile != null && blockFile.exists() && !blockFile.delete()) { - LOG.warn("Failed to delete temporary file {}", blockFile); - } - } catch (InterruptedException | ExecutionException e) { - throw new IOException(e); - } - } - } - - private void uploadCurrentPart() throws IOException { - blockStream.flush(); - blockStream.close(); - if (blockId == 0) { + private void uploadCurrentBlock() + throws IOException { + if (uploadId == null) { uploadId = store.getUploadId(key); } - blockId++; - blockFiles.put(blockId, blockFile); - - File currentFile = blockFile; int currentBlockId = blockId; - ListenableFuture partETagFuture = - executorService.submit(() -> { - PartETag partETag = store.uploadPart(currentFile, key, uploadId, - currentBlockId); - return partETag; - }); - partETagsFutures.add(partETagFuture); - removePartFiles(); - blockFile = newBlockFile(); - blockStream = new BufferedOutputStream(new FileOutputStream(blockFile)); + OSSDataBlocks.DataBlock dataBlock = getActiveBlock(); + long size = dataBlock.dataSize(); + OSSDataBlocks.BlockUploadData uploadData = dataBlock.startUpload(); + try { + ListenableFuture partETagFuture = + executorService.submit(() -> { + try { + PartETag partETag = store.uploadPart(uploadData, size, key, + uploadId, currentBlockId); + return partETag; + } finally { + cleanupWithLogger(LOG, uploadData, dataBlock); + } + }); + partETagsFutures.add(partETagFuture); + } finally { + blockWritten = 0; + clearActiveBlock(); + } } /** diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java index 5f40488bfd6..c41940fde9d 100644 --- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java @@ -27,6 +27,9 @@ import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics; +import org.apache.hadoop.fs.aliyun.oss.statistics.impl.OutputStreamStatistics; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -71,6 +74,9 @@ public class AliyunOSSFileSystem extends FileSystem { private String bucket; private String username; private Path workingDir; + private OSSDataBlocks.BlockFactory blockFactory; + private BlockOutputStreamStatistics blockOutputStreamStatistics; + private int uploadPartSize; private int blockOutputActiveBlocks; private AliyunOSSFileSystemStore store; private int maxKeys; @@ -128,13 +134,13 @@ public class AliyunOSSFileSystem extends FileSystem { // this means the file is not found } - long uploadPartSize = AliyunOSSUtils.getMultipartSizeProperty(getConf(), - MULTIPART_UPLOAD_PART_SIZE_KEY, MULTIPART_UPLOAD_PART_SIZE_DEFAULT); return new FSDataOutputStream( new AliyunOSSBlockOutputStream(getConf(), store, key, uploadPartSize, + blockFactory, + blockOutputStreamStatistics, new SemaphoredDelegatingExecutor(boundedThreadPool, blockOutputActiveBlocks, true)), statistics); } @@ -334,6 +340,7 @@ public class AliyunOSSFileSystem extends FileSystem { */ public void initialize(URI name, Configuration conf) throws IOException { super.initialize(name, conf); + setConf(conf); bucket = name.getHost(); uri = java.net.URI.create(name.getScheme() + "://" + name.getAuthority()); @@ -345,6 +352,16 @@ public class AliyunOSSFileSystem extends FileSystem { blockOutputActiveBlocks = intOption(conf, UPLOAD_ACTIVE_BLOCKS_KEY, UPLOAD_ACTIVE_BLOCKS_DEFAULT, 1); + uploadPartSize = (int)AliyunOSSUtils.getMultipartSizeProperty(conf, + MULTIPART_UPLOAD_PART_SIZE_KEY, MULTIPART_UPLOAD_PART_SIZE_DEFAULT); + String uploadBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, + DEFAULT_FAST_UPLOAD_BUFFER); + + blockOutputStreamStatistics = new OutputStreamStatistics(); + blockFactory = OSSDataBlocks.createFactory(this, uploadBuffer); + LOG.debug("Using OSSBlockOutputStream with buffer = {}; block={};" + + " queue limit={}", + uploadBuffer, uploadPartSize, blockOutputActiveBlocks); store = new AliyunOSSFileSystemStore(); store.initialize(name, conf, username, statistics); maxKeys = conf.getInt(MAX_PAGING_KEYS_KEY, MAX_PAGING_KEYS_DEFAULT); @@ -379,8 +396,6 @@ public class AliyunOSSFileSystem extends FileSystem { this.boundedCopyThreadPool = BlockingThreadPoolExecutorService.newInstance( maxCopyThreads, maxCopyTasks, 60L, TimeUnit.SECONDS, "oss-copy-unbounded"); - - setConf(conf); } /** @@ -757,4 +772,14 @@ public class AliyunOSSFileSystem extends FileSystem { public AliyunOSSFileSystemStore getStore() { return store; } + + @VisibleForTesting + OSSDataBlocks.BlockFactory getBlockFactory() { + return blockFactory; + } + + @VisibleForTesting + BlockOutputStreamStatistics getBlockOutputStreamStatistics() { + return blockOutputStreamStatistics; + } } diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java index 156af04babf..6e0c7dc7e4b 100644 --- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java @@ -422,6 +422,27 @@ public class AliyunOSSFileSystemStore { } } + /** + * Upload an input stream as an OSS object, using single upload. + * @param key object key. + * @param in input stream to upload. + * @param size size of the input stream. + * @throws IOException if failed to upload object. + */ + public void uploadObject(String key, InputStream in, long size) + throws IOException { + ObjectMetadata meta = new ObjectMetadata(); + meta.setContentLength(size); + + if (StringUtils.isNotEmpty(serverSideEncryptionAlgorithm)) { + meta.setServerSideEncryption(serverSideEncryptionAlgorithm); + } + + PutObjectResult result = ossClient.putObject(bucketName, key, in, meta); + LOG.debug(result.getETag()); + statistics.incrementWriteOps(1); + } + /** * list objects. * @@ -652,44 +673,58 @@ public class AliyunOSSFileSystemStore { }; } + public PartETag uploadPart(OSSDataBlocks.BlockUploadData partData, + long size, String key, String uploadId, int idx) throws IOException { + if (partData.hasFile()) { + return uploadPart(partData.getFile(), key, uploadId, idx); + } else { + return uploadPart(partData.getUploadStream(), size, key, uploadId, idx); + } + } + public PartETag uploadPart(File file, String key, String uploadId, int idx) throws IOException { - InputStream instream = null; + InputStream in = new FileInputStream(file); + try { + return uploadPart(in, file.length(), key, uploadId, idx); + } finally { + in.close(); + } + } + + public PartETag uploadPart(InputStream in, long size, String key, + String uploadId, int idx) throws IOException { Exception caught = null; int tries = 3; while (tries > 0) { try { - instream = new FileInputStream(file); UploadPartRequest uploadRequest = new UploadPartRequest(); uploadRequest.setBucketName(bucketName); uploadRequest.setKey(key); uploadRequest.setUploadId(uploadId); - uploadRequest.setInputStream(instream); - uploadRequest.setPartSize(file.length()); + uploadRequest.setInputStream(in); + uploadRequest.setPartSize(size); uploadRequest.setPartNumber(idx); UploadPartResult uploadResult = ossClient.uploadPart(uploadRequest); statistics.incrementWriteOps(1); return uploadResult.getPartETag(); } catch (Exception e) { - LOG.debug("Failed to upload "+ file.getPath() +", " + + LOG.debug("Failed to upload " + key + ", part " + idx + "try again.", e); caught = e; - } finally { - if (instream != null) { - instream.close(); - instream = null; - } } tries--; } assert (caught != null); - throw new IOException("Failed to upload " + file.getPath() + + throw new IOException("Failed to upload " + key + ", part " + idx + " for 3 times.", caught); } /** * Initiate multipart upload. + * @param key object key. + * @return upload id. */ public String getUploadId(String key) { InitiateMultipartUploadRequest initiateMultipartUploadRequest = @@ -701,6 +736,10 @@ public class AliyunOSSFileSystemStore { /** * Complete the specific multipart upload. + * @param key object key. + * @param uploadId upload id of this multipart upload. + * @param partETags part etags need to be completed. + * @return CompleteMultipartUploadResult. */ public CompleteMultipartUploadResult completeMultipartUpload(String key, String uploadId, List partETags) { @@ -713,6 +752,8 @@ public class AliyunOSSFileSystemStore { /** * Abort the specific multipart upload. + * @param key object key. + * @param uploadId upload id of this multipart upload. */ public void abortMultipartUpload(String key, String uploadId) { AbortMultipartUploadRequest request = new AbortMultipartUploadRequest( diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java index 3421b421813..baeb9199377 100644 --- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java @@ -134,6 +134,59 @@ public final class Constants { // Comma separated list of directories public static final String BUFFER_DIR_KEY = "fs.oss.buffer.dir"; + /** + * What buffer to use. + * Default is {@link #FAST_UPLOAD_BUFFER_DISK} + * Value: {@value} + */ + public static final String FAST_UPLOAD_BUFFER = + "fs.oss.fast.upload.buffer"; + + /** + * Buffer blocks to disk: {@value}. + * Capacity is limited to available disk space. + */ + public static final String FAST_UPLOAD_BUFFER_DISK = "disk"; + + /** + * Use an in-memory array. Fast but will run of heap rapidly: {@value}. + */ + public static final String FAST_UPLOAD_BUFFER_ARRAY = "array"; + + /** + * Use a byte buffer. May be more memory efficient than the + * {@link #FAST_UPLOAD_BUFFER_ARRAY}: {@value}. + */ + public static final String FAST_UPLOAD_BYTEBUFFER = "bytebuffer"; + + /** + * Use an in-memory array and fallback to disk if + * used memory exceed the quota. + */ + public static final String FAST_UPLOAD_BUFFER_ARRAY_DISK = "array_disk"; + + /** + * Use a byte buffer and fallback to disk if + * used memory exceed the quota. + */ + public static final String FAST_UPLOAD_BYTEBUFFER_DISK = "bytebuffer_disk"; + + /** + * Memory limit of {@link #FAST_UPLOAD_BUFFER_ARRAY_DISK} or + * {@link #FAST_UPLOAD_BYTEBUFFER_DISK}. + */ + public static final String FAST_UPLOAD_BUFFER_MEMORY_LIMIT = + "fs.oss.fast.upload.memory.limit"; + + public static final long FAST_UPLOAD_BUFFER_MEMORY_LIMIT_DEFAULT = + 1024 * 1024 * 1024; // 1GB + + /** + * Default buffer option: {@value}. + */ + public static final String DEFAULT_FAST_UPLOAD_BUFFER = + FAST_UPLOAD_BUFFER_DISK; + // private | public-read | public-read-write public static final String CANNED_ACL_KEY = "fs.oss.acl.default"; public static final String CANNED_ACL_DEFAULT = ""; diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/OSSDataBlocks.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/OSSDataBlocks.java new file mode 100644 index 00000000000..048f8b7ec3a --- /dev/null +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/OSSDataBlocks.java @@ -0,0 +1,1109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.aliyun.oss; + +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.EOFException; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics; +import org.apache.hadoop.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.util.DirectBufferPool; + +import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; + +/** + * Set of classes to support output streaming into blocks which are then + * uploaded as to OSS as a single PUT, or as part of a multipart request. + */ +final class OSSDataBlocks { + private static final Logger LOG = + LoggerFactory.getLogger(OSSDataBlocks.class); + + private OSSDataBlocks() { + } + + /** + * Validate args to a write command. These are the same validation checks + * expected for any implementation of {@code OutputStream.write()}. + * @param b byte array containing data + * @param off offset in array where to start + * @param len number of bytes to be written + * @throws NullPointerException for a null buffer + * @throws IndexOutOfBoundsException if indices are out of range + */ + static void validateWriteArgs(byte[] b, int off, int len) + throws IOException { + Preconditions.checkNotNull(b); + if ((off < 0) || (off > b.length) || (len < 0) || + ((off + len) > b.length) || ((off + len) < 0)) { + throw new IndexOutOfBoundsException( + "write (b[" + b.length + "], " + off + ", " + len + ')'); + } + } + + /** + * Create a factory. + * @param owner factory owner + * @param name factory name -the option from {@link Constants}. + * @return the factory, ready to be initialized. + * @throws IllegalArgumentException if the name is unknown. + */ + static BlockFactory createFactory(AliyunOSSFileSystem owner, + String name) { + switch (name) { + case Constants.FAST_UPLOAD_BUFFER_ARRAY: + return new ArrayBlockFactory(owner); + case Constants.FAST_UPLOAD_BUFFER_DISK: + return new DiskBlockFactory(owner); + case Constants.FAST_UPLOAD_BYTEBUFFER: + return new ByteBufferBlockFactory(owner); + case Constants.FAST_UPLOAD_BUFFER_ARRAY_DISK: + return new MemoryAndDiskBlockFactory( + owner, new ArrayBlockFactory(owner)); + case Constants.FAST_UPLOAD_BYTEBUFFER_DISK: + return new MemoryAndDiskBlockFactory( + owner, new ByteBufferBlockFactory(owner)); + default: + throw new IllegalArgumentException("Unsupported block buffer" + + " \"" + name + '"'); + } + } + + /** + * The output information for an upload. + * It can be one of a file or an input stream. + * When closed, any stream is closed. Any source file is untouched. + */ + static final class BlockUploadData implements Closeable { + private final File file; + private final InputStream uploadStream; + + /** + * File constructor; input stream will be null. + * @param file file to upload + */ + BlockUploadData(File file) { + Preconditions.checkArgument(file.exists(), "No file: " + file); + this.file = file; + this.uploadStream = null; + } + + /** + * Stream constructor, file field will be null. + * @param uploadStream stream to upload + */ + BlockUploadData(InputStream uploadStream) { + Preconditions.checkNotNull(uploadStream, "rawUploadStream"); + this.uploadStream = uploadStream; + this.file = null; + } + + /** + * Predicate: does this instance contain a file reference. + * @return true if there is a file. + */ + boolean hasFile() { + return file != null; + } + + /** + * Get the file, if there is one. + * @return the file for uploading, or null. + */ + File getFile() { + return file; + } + + /** + * Get the raw upload stream, if the object was + * created with one. + * @return the upload stream or null. + */ + InputStream getUploadStream() { + return uploadStream; + } + + /** + * Close: closes any upload stream provided in the constructor. + * @throws IOException inherited exception + */ + @Override + public void close() throws IOException { + cleanupWithLogger(LOG, uploadStream); + } + } + + /** + * Base class for block factories. + */ + static abstract class BlockFactory implements Closeable { + private final AliyunOSSFileSystem owner; + + protected BlockFactory(AliyunOSSFileSystem owner) { + this.owner = owner; + } + + /** + * Create a block. + * + * @param index index of block + * @param limit limit of the block + * @param statistics stats to work with + * @return a new block. + */ + abstract DataBlock create(long index, int limit, + BlockOutputStreamStatistics statistics) throws IOException; + + /** + * Implement any close/cleanup operation. + * Base class is a no-op + * @throws IOException Inherited exception; implementations should + * avoid raising it. + */ + @Override + public void close() throws IOException { + } + + /** + * Owner. + */ + protected AliyunOSSFileSystem getOwner() { + return owner; + } + } + + /** + * This represents a block being uploaded. + */ + static abstract class DataBlock implements Closeable { + + enum DestState {Writing, Upload, Closed} + + private volatile DestState state = DestState.Writing; + private final long index; + private final BlockOutputStreamStatistics statistics; + + protected DataBlock(long index, + BlockOutputStreamStatistics statistics) { + this.index = index; + this.statistics = statistics; + } + + /** + * Atomically enter a state, verifying current state. + * @param current current state. null means "no check" + * @param next next state + * @throws IllegalStateException if the current state is not as expected + */ + protected synchronized final void enterState(DestState current, + DestState next) + throws IllegalStateException { + verifyState(current); + LOG.debug("{}: entering state {}", this, next); + state = next; + } + + /** + * Verify that the block is in the declared state. + * @param expected expected state. + * @throws IllegalStateException if the DataBlock is in the wrong state + */ + protected final void verifyState(DestState expected) + throws IllegalStateException { + if (expected != null && state != expected) { + throw new IllegalStateException("Expected stream state " + expected + + " -but actual state is " + state + " in " + this); + } + } + + /** + * Current state. + * @return the current state. + */ + final DestState getState() { + return state; + } + + /** + * Get index, used by subclasses. + */ + final long getIndex() { + return index; + } + + /** + * Return the current data size. + * @return the size of the data + */ + abstract int dataSize(); + + /** + * Predicate to verify that the block has the capacity to write + * the given set of bytes. + * @param bytes number of bytes desired to be written. + * @return true if there is enough space. + */ + abstract boolean hasCapacity(long bytes); + + /** + * Predicate to check if there is data in the block. + * @return true if there is + */ + boolean hasData() { + return dataSize() > 0; + } + + /** + * The remaining capacity in the block before it is full. + * @return the number of bytes remaining. + */ + abstract int remainingCapacity(); + + /** + * Write a series of bytes from the buffer, from the offset. + * Returns the number of bytes written. + * Only valid in the state {@code Writing}. + * Base class verifies the state but does no writing. + * @param buffer buffer + * @param offset offset + * @param length length of write + * @return number of bytes written + * @throws IOException trouble + */ + int write(byte[] buffer, int offset, int length) throws IOException { + verifyState(DestState.Writing); + Preconditions.checkArgument(buffer != null, "Null buffer"); + Preconditions.checkArgument(length >= 0, "length is negative"); + Preconditions.checkArgument(offset >= 0, "offset is negative"); + Preconditions.checkArgument( + !(buffer.length - offset < length), + "buffer shorter than amount of data to write"); + return 0; + } + + /** + * Flush the output. + * Only valid in the state {@code Writing}. + * In the base class, this is a no-op + * @throws IOException any IO problem. + */ + void flush() throws IOException { + verifyState(DestState.Writing); + } + + /** + * Switch to the upload state and return a stream for uploading. + * Base class calls {@link #enterState(DestState, DestState)} to + * manage the state machine. + * @return the stream + * @throws IOException trouble + */ + BlockUploadData startUpload() throws IOException { + LOG.debug("Start datablock[{}] upload", index); + enterState(DestState.Writing, DestState.Upload); + return null; + } + + /** + * Enter the closed state. + * @return true if the class was in any other state, implying that + * the subclass should do its close operations + */ + protected synchronized boolean enterClosedState() { + if (!state.equals(DestState.Closed)) { + enterState(null, DestState.Closed); + return true; + } else { + return false; + } + } + + @Override + public void close() throws IOException { + if (enterClosedState()) { + LOG.debug("Closed {}", this); + innerClose(); + } + } + + /** + * Inner close logic for subclasses to implement. + */ + protected void innerClose() throws IOException { + } + + /** + * A block has been allocated. + */ + protected void blockAllocated() { + if (statistics != null) { + statistics.blockAllocated(); + } + } + + /** + * A block has been released. + */ + protected void blockReleased() { + if (statistics != null) { + statistics.blockReleased(); + } + } + + /** + * A disk block has been allocated. + */ + protected void diskBlockAllocated() { + if (statistics != null) { + statistics.diskBlockAllocated(); + } + } + + /** + * A disk block has been released. + */ + protected void diskBlockReleased() { + if (statistics != null) { + statistics.diskBlockReleased(); + } + } + + /** + * Memory bytes has been allocated. + */ + protected void bytesAllocated(long size) { + if (statistics != null) { + statistics.bytesAllocated(size); + } + } + + /** + * Memory bytes has been released. + */ + protected void bytesReleased(long size) { + if (statistics != null) { + statistics.bytesReleased(size); + } + } + + protected BlockOutputStreamStatistics getStatistics() { + return statistics; + } + } + + // ==================================================================== + + static class MemoryLimitException extends IOException { + MemoryLimitException(String msg) { + super(msg); + } + } + + static abstract class MemoryBlockFactory extends BlockFactory { + private final AtomicLong memoryUsed = new AtomicLong(0); + private long memoryLimit = 0; + private boolean checkMemory = false; + + MemoryBlockFactory(AliyunOSSFileSystem owner) { + super(owner); + } + + void setMemoryLimit(long memoryLimit) { + this.memoryLimit = memoryLimit; + if (memoryLimit > 0) { + checkMemory = true; + } + } + + void allocateMemory(long size) throws MemoryLimitException { + if (!checkMemory) { + return; + } + long next = memoryUsed.addAndGet(size); + if (next > memoryLimit) { + memoryUsed.getAndAdd(-size); + String msg = "Can not allocate memory" + + ", memory used " + memoryUsed + + ", allocate size " + size + + ", memory limit " + memoryLimit; + throw new MemoryLimitException(msg); + } + } + + void releaseMemory(long size) { + if (!checkMemory) { + return; + } + memoryUsed.getAndAdd(-size); + } + + long getMemoryUsed() { + return memoryUsed.get(); + } + } + + /** + * Use byte arrays on the heap for storage. + */ + static class ArrayBlockFactory extends MemoryBlockFactory { + + ArrayBlockFactory(AliyunOSSFileSystem owner) { + super(owner); + } + + @Override + DataBlock create(long index, int limit, + BlockOutputStreamStatistics statistics) + throws IOException { + try { + return new ByteArrayBlock(index, limit, statistics); + } catch (MemoryLimitException e) { + LOG.debug(e.getMessage() + ", index " + index); + return null; + } + } + + static class OSSByteArrayOutputStream extends ByteArrayOutputStream { + + OSSByteArrayOutputStream(int size) { + super(size); + } + + /** + * InputStream backed by the internal byte array. + * + * @return + */ + ByteArrayInputStream getInputStream() { + ByteArrayInputStream bin = new ByteArrayInputStream(this.buf, 0, count); + this.reset(); + this.buf = null; + return bin; + } + } + + /** + * Stream to memory via a {@code ByteArrayOutputStream}. + *

+ * This has the problem: it can consume a lot of heap space + * proportional to the mismatch between writes to the stream and + * the JVM-wide upload bandwidth to the OSS endpoint. + * The memory consumption can be limited by tuning the filesystem settings + * to restrict the number of queued/active uploads. + */ + + class ByteArrayBlock extends DataBlock { + private OSSByteArrayOutputStream buffer; + private final int limit; + // cache data size so that it is consistent after the buffer is reset. + private Integer dataSize; + + ByteArrayBlock(long index, + int limit, + BlockOutputStreamStatistics statistics) throws MemoryLimitException { + super(index, statistics); + this.limit = limit; + allocateMemory(limit); + buffer = new OSSByteArrayOutputStream(limit); + blockAllocated(); + bytesAllocated(limit); + } + + /** + * Get the amount of data; if there is no buffer then the size is 0. + * + * @return the amount of data available to upload. + */ + @Override + int dataSize() { + return dataSize != null ? dataSize : buffer.size(); + } + + @Override + BlockUploadData startUpload() throws IOException { + super.startUpload(); + dataSize = buffer.size(); + ByteArrayInputStream bufferData = buffer.getInputStream(); + buffer = null; + return new BlockUploadData(bufferData); + } + + @Override + boolean hasCapacity(long bytes) { + return dataSize() + bytes <= limit; + } + + @Override + int remainingCapacity() { + return limit - dataSize(); + } + + @Override + int write(byte[] b, int offset, int len) throws IOException { + super.write(b, offset, len); + int written = Math.min(remainingCapacity(), len); + buffer.write(b, offset, written); + return written; + } + + @Override + protected void innerClose() { + buffer = null; + releaseMemory(limit); + blockReleased(); + bytesReleased(limit); + } + + @Override + public String toString() { + return "ByteArrayBlock{" + + "index=" + getIndex() + + ", state=" + getState() + + ", limit=" + limit + + ", dataSize=" + dataSize + + '}'; + } + } + } + + // ==================================================================== + + /** + * Stream via Direct ByteBuffers; these are allocated off heap + * via {@link DirectBufferPool}. + */ + static class ByteBufferBlockFactory extends MemoryBlockFactory { + private final DirectBufferPool bufferPool = new DirectBufferPool(); + private final AtomicInteger buffersOutstanding = new AtomicInteger(0); + + ByteBufferBlockFactory(AliyunOSSFileSystem owner) { + super(owner); + } + + @Override + ByteBufferBlock create(long index, int limit, + BlockOutputStreamStatistics statistics) + throws IOException { + try { + return new ByteBufferBlock(index, limit, statistics); + } catch (MemoryLimitException e) { + LOG.debug(e.getMessage() + ", index " + index); + return null; + } + } + + private ByteBuffer requestBuffer(int limit) { + LOG.debug("Requesting buffer of size {}", limit); + buffersOutstanding.incrementAndGet(); + return bufferPool.getBuffer(limit); + } + + private void releaseBuffer(ByteBuffer buffer) { + LOG.debug("Releasing buffer"); + bufferPool.returnBuffer(buffer); + buffersOutstanding.decrementAndGet(); + } + + /** + * Get count of outstanding buffers. + * @return the current buffer count + */ + public int getOutstandingBufferCount() { + return buffersOutstanding.get(); + } + + @Override + public String toString() { + return "ByteBufferBlockFactory{" + + "buffersOutstanding=" + buffersOutstanding + + '}'; + } + + /** + * A DataBlock which requests a buffer from pool on creation; returns + * it when it is closed. + */ + class ByteBufferBlock extends DataBlock { + private ByteBuffer blockBuffer; + private final int bufferSize; + // cache data size so that it is consistent after the buffer is reset. + private Integer dataSize; + + /** + * Instantiate. This will request a ByteBuffer of the desired size. + * @param index block index + * @param bufferSize buffer size + */ + ByteBufferBlock(long index, int bufferSize, + BlockOutputStreamStatistics statistics) throws MemoryLimitException { + super(index, statistics); + this.bufferSize = bufferSize; + allocateMemory(bufferSize); + blockBuffer = requestBuffer(bufferSize); + blockAllocated(); + bytesAllocated(bufferSize); + } + + /** + * Get the amount of data; if there is no buffer then the size is 0. + * @return the amount of data available to upload. + */ + @Override + int dataSize() { + return dataSize != null ? dataSize : bufferCapacityUsed(); + } + + @Override + BlockUploadData startUpload() throws IOException { + super.startUpload(); + dataSize = bufferCapacityUsed(); + // set the buffer up from reading from the beginning + blockBuffer.limit(blockBuffer.position()); + blockBuffer.position(0); + return new BlockUploadData( + new ByteBufferInputStream(dataSize, blockBuffer)); + } + + @Override + public boolean hasCapacity(long bytes) { + return bytes <= remainingCapacity(); + } + + @Override + public int remainingCapacity() { + return blockBuffer != null ? blockBuffer.remaining() : 0; + } + + private int bufferCapacityUsed() { + return blockBuffer.capacity() - blockBuffer.remaining(); + } + + @Override + int write(byte[] b, int offset, int len) throws IOException { + super.write(b, offset, len); + int written = Math.min(remainingCapacity(), len); + blockBuffer.put(b, offset, written); + return written; + } + + /** + * Closing the block will release the buffer. + */ + @Override + protected void innerClose() { + if (blockBuffer != null) { + releaseMemory(bufferSize); + blockReleased(); + bytesReleased(bufferSize); + releaseBuffer(blockBuffer); + blockBuffer = null; + } + } + + @Override + public String toString() { + return "ByteBufferBlock{" + + "index=" + getIndex() + + ", state=" + getState() + + ", dataSize=" + dataSize() + + ", limit=" + bufferSize + + ", remainingCapacity=" + remainingCapacity() + + '}'; + } + + /** + * Provide an input stream from a byte buffer; supporting + * {@link #mark(int)}, which is required to enable replay of failed + * PUT attempts. + */ + class ByteBufferInputStream extends InputStream { + + private final int size; + private ByteBuffer byteBuffer; + + ByteBufferInputStream(int size, + ByteBuffer byteBuffer) { + LOG.debug("Creating ByteBufferInputStream of size {}", size); + this.size = size; + this.byteBuffer = byteBuffer; + } + + /** + * After the stream is closed, set the local reference to the byte + * buffer to null; this guarantees that future attempts to use + * stream methods will fail. + */ + @Override + public synchronized void close() { + LOG.debug("ByteBufferInputStream.close() for {}", + ByteBufferBlock.super.toString()); + byteBuffer = null; + } + + /** + * Verify that the stream is open. + * @throws IOException if the stream is closed + */ + private void verifyOpen() throws IOException { + if (byteBuffer == null) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + } + + public synchronized int read() throws IOException { + if (available() > 0) { + return byteBuffer.get() & 0xFF; + } else { + return -1; + } + } + + @Override + public synchronized long skip(long offset) throws IOException { + verifyOpen(); + long newPos = position() + offset; + if (newPos < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + if (newPos > size) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + byteBuffer.position((int) newPos); + return newPos; + } + + @Override + public synchronized int available() { + Preconditions.checkState(byteBuffer != null, + FSExceptionMessages.STREAM_IS_CLOSED); + return byteBuffer.remaining(); + } + + /** + * Get the current buffer position. + * @return the buffer position + */ + public synchronized int position() { + return byteBuffer.position(); + } + + /** + * Check if there is data left. + * @return true if there is data remaining in the buffer. + */ + public synchronized boolean hasRemaining() { + return byteBuffer.hasRemaining(); + } + + @Override + public synchronized void mark(int readlimit) { + LOG.debug("mark at {}", position()); + byteBuffer.mark(); + } + + @Override + public synchronized void reset() throws IOException { + LOG.debug("reset"); + byteBuffer.reset(); + } + + @Override + public boolean markSupported() { + return true; + } + + /** + * Read in data. + * @param b destination buffer + * @param offset offset within the buffer + * @param length length of bytes to read + * @throws EOFException if the position is negative + * @throws IndexOutOfBoundsException if there isn't space for the + * amount of data requested. + * @throws IllegalArgumentException other arguments are invalid. + */ + @SuppressWarnings("NullableProblems") + public synchronized int read(byte[] b, int offset, int length) + throws IOException { + Preconditions.checkArgument(length >= 0, "length is negative"); + Preconditions.checkArgument(b != null, "Null buffer"); + if (b.length - offset < length) { + throw new IndexOutOfBoundsException( + FSExceptionMessages.TOO_MANY_BYTES_FOR_DEST_BUFFER + + ": request length =" + length + + ", with offset =" + offset + + "; buffer capacity =" + (b.length - offset)); + } + verifyOpen(); + if (!hasRemaining()) { + return -1; + } + + int toRead = Math.min(length, available()); + byteBuffer.get(b, offset, toRead); + return toRead; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "ByteBufferInputStream{"); + sb.append("size=").append(size); + ByteBuffer buf = this.byteBuffer; + if (buf != null) { + sb.append(", available=").append(buf.remaining()); + } + sb.append(", ").append(ByteBufferBlock.super.toString()); + sb.append('}'); + return sb.toString(); + } + } + } + } + + // ==================================================================== + + /** + * Buffer blocks to disk. + */ + static class DiskBlockFactory extends BlockFactory { + + DiskBlockFactory(AliyunOSSFileSystem owner) { + super(owner); + } + + /** + * Create a temp file and a {@link DiskBlock} instance to manage it. + * + * @param index block index + * @param limit limit of the block. + * @return the new block + * @throws IOException IO problems + */ + @Override + DataBlock create(long index, int limit, + BlockOutputStreamStatistics statistics) + throws IOException { + File destFile = AliyunOSSUtils.createTmpFileForWrite( + String.format("oss-block-%04d-", index), limit, getOwner().getConf()); + return new DiskBlock(destFile, limit, index, statistics); + } + } + + /** + * Stream to a file. + * This will stop at the limit; the caller is expected to create a new block. + */ + static class DiskBlock extends DataBlock { + + private int bytesWritten = 0; + private final File bufferFile; + private final int limit; + private BufferedOutputStream out; + private final AtomicBoolean closed = new AtomicBoolean(false); + + DiskBlock(File bufferFile, + int limit, + long index, + BlockOutputStreamStatistics statistics) + throws FileNotFoundException { + super(index, statistics); + this.limit = limit; + this.bufferFile = bufferFile; + blockAllocated(); + diskBlockAllocated(); + out = new BufferedOutputStream(new FileOutputStream(bufferFile)); + } + + @Override + int dataSize() { + return bytesWritten; + } + + @Override + boolean hasCapacity(long bytes) { + return dataSize() + bytes <= limit; + } + + @Override + int remainingCapacity() { + return limit - bytesWritten; + } + + @Override + int write(byte[] b, int offset, int len) throws IOException { + super.write(b, offset, len); + int written = Math.min(remainingCapacity(), len); + out.write(b, offset, written); + bytesWritten += written; + return written; + } + + @Override + BlockUploadData startUpload() throws IOException { + super.startUpload(); + try { + out.flush(); + } finally { + out.close(); + out = null; + } + return new BlockUploadData(bufferFile); + } + + /** + * The close operation will delete the destination file if it still + * exists. + * @throws IOException IO problems + */ + @SuppressWarnings("UnnecessaryDefault") + @Override + protected void innerClose() throws IOException { + final DestState state = getState(); + LOG.debug("Closing {}", this); + switch (state) { + case Writing: + if (bufferFile.exists()) { + // file was not uploaded + LOG.debug("Block[{}]: Deleting buffer file as upload did not start", + getIndex()); + closeBlock(); + } + break; + + case Upload: + LOG.debug("Block[{}]: Buffer file {} exists —close upload stream", + getIndex(), bufferFile); + break; + + case Closed: + closeBlock(); + break; + + default: + // this state can never be reached, but checkstyle complains, so + // it is here. + } + } + + /** + * Flush operation will flush to disk. + * @throws IOException IOE raised on FileOutputStream + */ + @Override + void flush() throws IOException { + super.flush(); + out.flush(); + } + + @Override + public String toString() { + String sb = "FileBlock{" + + "index=" + getIndex() + + ", destFile=" + bufferFile + + ", state=" + getState() + + ", dataSize=" + dataSize() + + ", limit=" + limit + + '}'; + return sb; + } + + /** + * Close the block. + * This will delete the block's buffer file if the block has + * not previously been closed. + */ + void closeBlock() { + LOG.debug("block[{}]: closeBlock()", getIndex()); + if (!closed.getAndSet(true)) { + blockReleased(); + diskBlockReleased(); + if (!bufferFile.delete() && bufferFile.exists()) { + LOG.warn("delete({}) returned false", + bufferFile.getAbsoluteFile()); + } + } else { + LOG.debug("block[{}]: skipping re-entrant closeBlock()", getIndex()); + } + } + } + + /** + * Buffer blocks to memory and fallback to disk if + * used memory exceed the quota. + */ + static class MemoryAndDiskBlockFactory extends BlockFactory { + private BlockFactory memoryFactory; + private BlockFactory diskFactory; + + MemoryAndDiskBlockFactory(AliyunOSSFileSystem owner, + BlockFactory memoryFactory) { + super(owner); + this.memoryFactory = memoryFactory; + diskFactory = new DiskBlockFactory(owner); + + long memoryLimit = owner.getConf().getLong( + Constants.FAST_UPLOAD_BUFFER_MEMORY_LIMIT, + Constants.FAST_UPLOAD_BUFFER_MEMORY_LIMIT_DEFAULT); + ((MemoryBlockFactory)this.memoryFactory).setMemoryLimit(memoryLimit); + } + + /** + * Create a temp file and a {@link DataBlock} instance to manage it. + * + * @param index block index + * @param limit limit of the block. + * @return the new block + * @throws IOException IO problems + */ + @Override + DataBlock create(long index, int limit, + BlockOutputStreamStatistics statistics) + throws IOException { + DataBlock block = memoryFactory.create(index, limit, statistics); + if (block != null) { + return block; + } else { + return diskFactory.create(index, limit, statistics); + } + } + + @VisibleForTesting + MemoryBlockFactory getMemoryFactory() { + return (MemoryBlockFactory)memoryFactory; + } + } +} diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/BlockOutputStreamStatistics.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/BlockOutputStreamStatistics.java new file mode 100644 index 00000000000..51940b71831 --- /dev/null +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/BlockOutputStreamStatistics.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.aliyun.oss.statistics; + +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Block output stream statistics. + */ +@InterfaceStability.Unstable +public interface BlockOutputStreamStatistics { + + /** + * A block has been allocated. + */ + void blockAllocated(); + + /** + * A block has been released. + */ + void blockReleased(); + + /** + * A disk block has been allocated. + */ + void diskBlockAllocated(); + + /** + * A disk block has been released. + */ + void diskBlockReleased(); + + /** + * Memory bytes has been allocated. + * @param size allocated size. + */ + void bytesAllocated(long size); + + /** + * Memory bytes has been released. + * @param size released size. + */ + void bytesReleased(long size); + + int getBlocksAllocated(); + + int getBlocksReleased(); + + int getDiskBlocksAllocated(); + + int getDiskBlocksReleased(); + + long getBytesAllocated(); + + long getBytesReleased(); +} diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/OutputStreamStatistics.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/OutputStreamStatistics.java new file mode 100644 index 00000000000..011a2eecd08 --- /dev/null +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/OutputStreamStatistics.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.aliyun.oss.statistics.impl; + +import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Implementation of {@link BlockOutputStreamStatistics}. + */ +public class OutputStreamStatistics implements BlockOutputStreamStatistics { + private final AtomicInteger blocksAllocated = new AtomicInteger(0); + private final AtomicInteger blocksReleased = new AtomicInteger(0); + + private final AtomicInteger diskBlocksAllocated = new AtomicInteger(0); + private final AtomicInteger diskBlocksReleased = new AtomicInteger(0); + + private final AtomicLong bytesAllocated = new AtomicLong(0); + private final AtomicLong bytesReleased = new AtomicLong(0); + + @Override + public void blockAllocated() { + blocksAllocated.incrementAndGet(); + } + + @Override + public void blockReleased() { + blocksReleased.incrementAndGet(); + } + + @Override + public void diskBlockAllocated() { + diskBlocksAllocated.incrementAndGet(); + } + + @Override + public void diskBlockReleased() { + diskBlocksReleased.incrementAndGet(); + } + + @Override + public int getBlocksAllocated() { + return blocksAllocated.get(); + } + + @Override + public int getBlocksReleased() { + return blocksReleased.get(); + } + + @Override + public int getDiskBlocksAllocated() { + return diskBlocksAllocated.get(); + } + + @Override + public int getDiskBlocksReleased() { + return diskBlocksReleased.get(); + } + + @Override + public void bytesAllocated(long size) { + bytesAllocated.getAndAdd(size); + } + + @Override + public void bytesReleased(long size) { + bytesReleased.getAndAdd(size); + } + + @Override + public long getBytesAllocated() { + return bytesAllocated.get(); + } + + @Override + public long getBytesReleased() { + return bytesReleased.get(); + } +} diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/package-info.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/package-info.java new file mode 100644 index 00000000000..2f044173bc3 --- /dev/null +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/impl/package-info.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Statistics collection for the OSS connector: implementation. + * Not for use by anything outside the hadoop-aliyun source tree. + */ + +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.aliyun.oss.statistics.impl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/package-info.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/package-info.java new file mode 100644 index 00000000000..49abca73d5d --- /dev/null +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/statistics/package-info.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Statistics collection for the OSS connector: interfaces. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.aliyun.oss.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-aliyun/src/site/markdown/tools/hadoop-aliyun/index.md b/hadoop-tools/hadoop-aliyun/src/site/markdown/tools/hadoop-aliyun/index.md index d48bf1c6b02..578b9595d1d 100644 --- a/hadoop-tools/hadoop-aliyun/src/site/markdown/tools/hadoop-aliyun/index.md +++ b/hadoop-tools/hadoop-aliyun/src/site/markdown/tools/hadoop-aliyun/index.md @@ -164,7 +164,7 @@ please raise your issues with them. fs.oss.attempts.maximum - 20 + 10 How many times we should retry commands on transient errors. @@ -239,7 +239,7 @@ please raise your issues with them. fs.oss.multipart.download.size - 102400/value> + 524288/value> Size in bytes in each request from ALiyun OSS. @@ -251,9 +251,53 @@ please raise your issues with them. + + fs.oss.fast.upload.buffer + disk + + The buffering mechanism to use. + Values: disk, array, bytebuffer, array_disk, bytebuffer_disk. + + "disk" will use the directories listed in fs.oss.buffer.dir as + the location(s) to save data prior to being uploaded. + + "array" uses arrays in the JVM heap + + "bytebuffer" uses off-heap memory within the JVM. + + Both "array" and "bytebuffer" will consume memory in a single stream up to the number + of blocks set by: + + fs.oss.multipart.upload.size * fs.oss.upload.active.blocks. + + If using either of these mechanisms, keep this value low + + The total number of threads performing work across all threads is set by + fs.oss.multipart.download.threads(Currently fast upload shares the same thread tool with download. + The thread pool size is specified in "fs.oss.multipart.download.threads"), + with fs.oss.max.total.tasks values setting the number of queued work items. + + "array_disk" and "bytebuffer_disk" support fallback to disk. + + + + + fs.oss.fast.upload.memory.limit + 1073741824 + + Memory limit of "array_disk" and "bytebuffer_disk" upload buffers. + Will fallback to disk buffers if used memory reaches the limit. + + + fs.oss.buffer.dir - Comma separated list of directories to buffer OSS data before uploading to Aliyun OSS + ${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/oss + Comma separated list of directories to buffer + OSS data before uploading to Aliyun OSS. + Yarn container path will be used as default value on yarn applications, + otherwise fall back to hadoop.tmp.dir + diff --git a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java index 69aa0a5a795..891890dfc40 100644 --- a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java +++ b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSBlockOutputStream.java @@ -22,6 +22,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.aliyun.oss.OSSDataBlocks.ByteBufferBlockFactory; +import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; @@ -37,12 +39,19 @@ import java.util.ArrayList; import java.util.LinkedHashSet; import static org.apache.hadoop.fs.aliyun.oss.Constants.BUFFER_DIR_KEY; +import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER; +import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER_ARRAY_DISK; +import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER_DISK; +import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BUFFER_MEMORY_LIMIT; +import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BYTEBUFFER; +import static org.apache.hadoop.fs.aliyun.oss.Constants.FAST_UPLOAD_BYTEBUFFER_DISK; import static org.apache.hadoop.fs.aliyun.oss.Constants.MULTIPART_UPLOAD_PART_SIZE_DEFAULT; import static org.apache.hadoop.fs.aliyun.oss.Constants.MULTIPART_UPLOAD_PART_SIZE_KEY; import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** @@ -54,6 +63,7 @@ public class TestAliyunOSSBlockOutputStream { private static final int PART_SIZE = 1024 * 1024; private static String testRootPath = AliyunOSSTestUtils.generateUniqueTestPath(); + private static final long MEMORY_LIMIT = 10 * 1024 * 1024; @Rule public Timeout testTimeout = new Timeout(30 * 60 * 1000); @@ -65,6 +75,7 @@ public class TestAliyunOSSBlockOutputStream { conf.setInt(IO_CHUNK_BUFFER_SIZE, conf.getInt(MULTIPART_UPLOAD_PART_SIZE_KEY, 0)); conf.setInt(Constants.UPLOAD_ACTIVE_BLOCKS_KEY, 20); + conf.setLong(FAST_UPLOAD_BUFFER_MEMORY_LIMIT, MEMORY_LIMIT); fs = AliyunOSSTestUtils.createTestFileSystem(conf); } @@ -82,7 +93,7 @@ public class TestAliyunOSSBlockOutputStream { @Test public void testZeroByteUpload() throws IOException { ContractTestUtils.createAndVerifyFile(fs, getTestPath(), 0); - bufferDirShouldEmpty(); + bufferShouldReleased(true); } @Test @@ -106,20 +117,21 @@ public class TestAliyunOSSBlockOutputStream { assertEquals(size - 1, statistics.getBytesRead()); assertEquals(3, statistics.getWriteOps()); assertEquals(size - 1, statistics.getBytesWritten()); + bufferShouldReleased(); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size); assertEquals(14, statistics.getReadOps()); assertEquals(2 * size - 1, statistics.getBytesRead()); assertEquals(6, statistics.getWriteOps()); assertEquals(2 * size - 1, statistics.getBytesWritten()); + bufferShouldReleased(); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size + 1); - assertEquals(22, statistics.getReadOps()); assertEquals(3 * size, statistics.getBytesRead()); assertEquals(10, statistics.getWriteOps()); assertEquals(3 * size, statistics.getBytesWritten()); - bufferDirShouldEmpty(); + bufferShouldReleased(); } @Test @@ -133,19 +145,21 @@ public class TestAliyunOSSBlockOutputStream { assertEquals(size - 1, statistics.getBytesRead()); assertEquals(8, statistics.getWriteOps()); assertEquals(size - 1, statistics.getBytesWritten()); + bufferShouldReleased(); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size); assertEquals(34, statistics.getReadOps()); assertEquals(2 * size - 1, statistics.getBytesRead()); assertEquals(16, statistics.getWriteOps()); assertEquals(2 * size - 1, statistics.getBytesWritten()); + bufferShouldReleased(); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size + 1); assertEquals(52, statistics.getReadOps()); assertEquals(3 * size, statistics.getBytesRead()); assertEquals(25, statistics.getWriteOps()); assertEquals(3 * size, statistics.getBytesWritten()); - bufferDirShouldEmpty(); + bufferShouldReleased(); } @Test @@ -159,16 +173,18 @@ public class TestAliyunOSSBlockOutputStream { assertEquals(size, statistics.getBytesRead()); assertEquals(52, statistics.getWriteOps()); assertEquals(size, statistics.getBytesWritten()); - bufferDirShouldEmpty(); + bufferShouldReleased(); } @Test public void testHugeUpload() throws IOException { ContractTestUtils.createAndVerifyFile(fs, getTestPath(), PART_SIZE - 1); + bufferShouldReleased(); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), PART_SIZE); + bufferShouldReleased(); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), MULTIPART_UPLOAD_PART_SIZE_DEFAULT + 1); - bufferDirShouldEmpty(); + bufferShouldReleased(); } @Test @@ -199,15 +215,43 @@ public class TestAliyunOSSBlockOutputStream { public void testSmallUpload() throws IOException { long size = fs.getConf().getInt(MULTIPART_UPLOAD_PART_SIZE_KEY, 1024); ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size - 1); - bufferDirShouldEmpty(); + bufferShouldReleased(); } - private void bufferDirShouldEmpty() throws IOException { - Path bufferPath = new Path(fs.getConf().get(BUFFER_DIR_KEY)); - FileStatus[] files = bufferPath.getFileSystem( - fs.getConf()).listStatus(bufferPath); - // Temporary file should be deleted - assertEquals(0, files.length); + private void bufferShouldReleased() throws IOException { + bufferShouldReleased(false); + } + + private void bufferShouldReleased(boolean zeroSizeFile) throws IOException { + String bufferDir = fs.getConf().get(BUFFER_DIR_KEY); + String bufferType = fs.getConf().get(FAST_UPLOAD_BUFFER); + if (bufferType.equals(FAST_UPLOAD_BUFFER_DISK)) { + assertNotNull(bufferDir); + Path bufferPath = new Path(fs.getConf().get(BUFFER_DIR_KEY)); + FileStatus[] files = bufferPath.getFileSystem( + fs.getConf()).listStatus(bufferPath); + // Temporary file should be deleted + assertEquals(0, files.length); + } else { + if (bufferType.equals(FAST_UPLOAD_BYTEBUFFER)) { + OSSDataBlocks.ByteBufferBlockFactory + blockFactory = (OSSDataBlocks.ByteBufferBlockFactory) + ((AliyunOSSFileSystem)fs).getBlockFactory(); + assertEquals("outstanding buffers in " + blockFactory, + 0, blockFactory.getOutstandingBufferCount()); + } + } + BlockOutputStreamStatistics statistics = + ((AliyunOSSFileSystem)fs).getBlockOutputStreamStatistics(); + assertEquals(statistics.getBlocksAllocated(), + statistics.getBlocksReleased()); + if (zeroSizeFile) { + assertEquals(statistics.getBlocksAllocated(), 0); + } else { + assertTrue(statistics.getBlocksAllocated() >= 1); + } + assertEquals(statistics.getBytesReleased(), + statistics.getBytesAllocated()); } @Test @@ -249,4 +293,127 @@ public class TestAliyunOSSBlockOutputStream { assertNotEquals("round robin not working", tmp1.getParent(), tmp2.getParent()); } + + @Test + public void testByteBufferIO() throws IOException { + try (OSSDataBlocks.ByteBufferBlockFactory factory = + new OSSDataBlocks.ByteBufferBlockFactory((AliyunOSSFileSystem)fs)) { + int limit = 128; + OSSDataBlocks.ByteBufferBlockFactory.ByteBufferBlock block + = factory.create(1, limit, null); + assertEquals("outstanding buffers in " + factory, + 1, factory.getOutstandingBufferCount()); + + byte[] buffer = ContractTestUtils.toAsciiByteArray("test data"); + int bufferLen = buffer.length; + block.write(buffer, 0, bufferLen); + assertEquals(bufferLen, block.dataSize()); + assertEquals("capacity in " + block, + limit - bufferLen, block.remainingCapacity()); + assertTrue("hasCapacity(64) in " + block, block.hasCapacity(64)); + assertTrue("No capacity in " + block, + block.hasCapacity(limit - bufferLen)); + + // now start the write + OSSDataBlocks.BlockUploadData blockUploadData = block.startUpload(); + ByteBufferBlockFactory.ByteBufferBlock.ByteBufferInputStream + stream = + (ByteBufferBlockFactory.ByteBufferBlock.ByteBufferInputStream) + blockUploadData.getUploadStream(); + assertTrue("Mark not supported in " + stream, stream.markSupported()); + assertTrue("!hasRemaining() in " + stream, stream.hasRemaining()); + + int expected = bufferLen; + assertEquals("wrong available() in " + stream, + expected, stream.available()); + + assertEquals('t', stream.read()); + stream.mark(limit); + expected--; + assertEquals("wrong available() in " + stream, + expected, stream.available()); + + // read into a byte array with an offset + int offset = 5; + byte[] in = new byte[limit]; + assertEquals(2, stream.read(in, offset, 2)); + assertEquals('e', in[offset]); + assertEquals('s', in[offset + 1]); + expected -= 2; + assertEquals("wrong available() in " + stream, + expected, stream.available()); + + // read to end + byte[] remainder = new byte[limit]; + int c; + int index = 0; + while ((c = stream.read()) >= 0) { + remainder[index++] = (byte) c; + } + assertEquals(expected, index); + assertEquals('a', remainder[--index]); + + assertEquals("wrong available() in " + stream, + 0, stream.available()); + assertTrue("hasRemaining() in " + stream, !stream.hasRemaining()); + + // go the mark point + stream.reset(); + assertEquals('e', stream.read()); + + // when the stream is closed, the data should be returned + stream.close(); + assertEquals("outstanding buffers in " + factory, + 1, factory.getOutstandingBufferCount()); + block.close(); + assertEquals("outstanding buffers in " + factory, + 0, factory.getOutstandingBufferCount()); + stream.close(); + assertEquals("outstanding buffers in " + factory, + 0, factory.getOutstandingBufferCount()); + } + } + + @Test + public void testFastUploadArrayDisk() throws IOException { + testFastUploadFallback(FAST_UPLOAD_BUFFER_ARRAY_DISK); + } + + @Test + public void testFastUploadByteBufferDisk() throws IOException { + testFastUploadFallback(FAST_UPLOAD_BYTEBUFFER_DISK); + } + + private void testFastUploadFallback(String name) throws IOException { + Configuration conf = fs.getConf(); + fs.close(); + + conf.set(FAST_UPLOAD_BUFFER, name); + + fs = AliyunOSSTestUtils.createTestFileSystem(conf); + long size = 5 * MEMORY_LIMIT; + ContractTestUtils.createAndVerifyFile(fs, getTestPath(), size); + OSSDataBlocks.MemoryBlockFactory + blockFactory = ((OSSDataBlocks.MemoryAndDiskBlockFactory) + ((AliyunOSSFileSystem)fs).getBlockFactory()).getMemoryFactory(); + assertEquals(blockFactory.getMemoryUsed(), 0); + + Path bufferPath = new Path(fs.getConf().get(BUFFER_DIR_KEY)); + FileStatus[] files = bufferPath.getFileSystem( + fs.getConf()).listStatus(bufferPath); + // Temporary file should be deleted + assertEquals(0, files.length); + + BlockOutputStreamStatistics statistics = + ((AliyunOSSFileSystem)fs).getBlockOutputStreamStatistics(); + assertEquals(statistics.getBlocksAllocated(), + statistics.getBlocksReleased()); + assertTrue(statistics.getBlocksAllocated() > 1); + assertEquals(statistics.getBytesReleased(), + statistics.getBytesAllocated()); + assertTrue(statistics.getBytesAllocated() >= MEMORY_LIMIT); + assertTrue(statistics.getDiskBlocksAllocated() > 0); + assertEquals(statistics.getDiskBlocksAllocated(), + statistics.getDiskBlocksReleased()); + } } From 700147b4ac18ceca5137e9d7fc8f53a5619768d4 Mon Sep 17 00:00:00 2001 From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com> Date: Tue, 28 Mar 2023 16:14:59 +0800 Subject: [PATCH 46/97] HDFS-16964. Improve processing of excess redundancy after failover. (#5510). Contributed by Shuyan Zhang. Signed-off-by: He Xiaoqiao --- .../server/blockmanagement/BlockManager.java | 59 ++++++++++++------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index e5a6cf73b69..ec8bbf82498 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -3987,17 +3987,11 @@ public class BlockManager implements BlockStatsMXBean { } if (shouldProcessExtraRedundancy(num, expectedRedundancy)) { - if (num.replicasOnStaleNodes() > 0) { - // If any of the replicas of this block are on nodes that are - // considered "stale", then these replicas may in fact have - // already been deleted. So, we cannot safely act on the - // over-replication until a later point in time, when - // the "stale" nodes have block reported. + // extra redundancy block + if (!processExtraRedundancyBlockWithoutPostpone(block, expectedRedundancy, + null, null)) { return MisReplicationResult.POSTPONE; } - - // extra redundancy block - processExtraRedundancyBlock(block, expectedRedundancy, null, null); return MisReplicationResult.OVER_REPLICATED; } @@ -4020,12 +4014,26 @@ public class BlockManager implements BlockStatsMXBean { } } + /** + * Process blocks with redundant replicas. If there are replicas in + * stale storages, mark them in the postponedMisreplicatedBlocks. + */ + private void processExtraRedundancyBlock(final BlockInfo block, + final short replication, final DatanodeDescriptor addedNode, + DatanodeDescriptor delNodeHint) { + if (!processExtraRedundancyBlockWithoutPostpone(block, replication, + addedNode, delNodeHint)) { + postponeBlock(block); + } + } + /** * Find how many of the containing nodes are "extra", if any. * If there are any extras, call chooseExcessRedundancies() to * mark them in the excessRedundancyMap. + * @return true if all redundancy replicas are removed. */ - private void processExtraRedundancyBlock(final BlockInfo block, + private boolean processExtraRedundancyBlockWithoutPostpone(final BlockInfo block, final short replication, final DatanodeDescriptor addedNode, DatanodeDescriptor delNodeHint) { assert namesystem.hasWriteLock(); @@ -4035,17 +4043,17 @@ public class BlockManager implements BlockStatsMXBean { Collection nonExcess = new ArrayList<>(); Collection corruptNodes = corruptReplicas .getNodes(block); + boolean hasStaleStorage = false; + Set staleStorages = new HashSet<>(); for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) { if (storage.getState() != State.NORMAL) { continue; } final DatanodeDescriptor cur = storage.getDatanodeDescriptor(); if (storage.areBlockContentsStale()) { - LOG.trace("BLOCK* processExtraRedundancyBlock: Postponing {}" - + " since storage {} does not yet have up-to-date information.", - block, storage); - postponeBlock(block); - return; + hasStaleStorage = true; + staleStorages.add(storage); + continue; } if (!isExcess(cur, block)) { if (cur.isInService()) { @@ -4058,6 +4066,13 @@ public class BlockManager implements BlockStatsMXBean { } chooseExcessRedundancies(nonExcess, block, replication, addedNode, delNodeHint); + if (hasStaleStorage) { + LOG.trace("BLOCK* processExtraRedundancyBlockWithoutPostpone: Postponing {}" + + " since storages {} does not yet have up-to-date information.", + block, staleStorages); + return false; + } + return true; } private void chooseExcessRedundancies( @@ -4071,12 +4086,14 @@ public class BlockManager implements BlockStatsMXBean { if (storedBlock.isStriped()) { chooseExcessRedundancyStriped(bc, nonExcess, storedBlock, delNodeHint); } else { - final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy( - bc.getStoragePolicyID()); - final List excessTypes = storagePolicy.chooseExcess( - replication, DatanodeStorageInfo.toStorageTypes(nonExcess)); - chooseExcessRedundancyContiguous(nonExcess, storedBlock, replication, - addedNode, delNodeHint, excessTypes); + if (nonExcess.size() > replication) { + final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy( + bc.getStoragePolicyID()); + final List excessTypes = storagePolicy.chooseExcess( + replication, DatanodeStorageInfo.toStorageTypes(nonExcess)); + chooseExcessRedundancyContiguous(nonExcess, storedBlock, replication, + addedNode, delNodeHint, excessTypes); + } } } From aa602381c595db4b958709c03874ad54597ba197 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 28 Mar 2023 22:48:46 +0800 Subject: [PATCH 47/97] YARN-11426. Improve YARN NodeLabel Memory Display. (#5335) YARN-11426. Improve YARN NodeLabel Memory Display. Co-authored-by: slfan1989 Reviewed-by: Inigo Goiri Reviewed-by: Chris Nauroth Signed-off-by: Shilun Fan --- .../hadoop-yarn/hadoop-yarn-api/pom.xml | 5 ++++ .../hadoop/yarn/api/records/Resource.java | 11 ++++++++ .../hadoop/yarn/api/records/TestResource.java | 25 +++++++++++++++++++ .../webapp/NodeLabelsPage.java | 2 +- 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml index 61747c2cd80..e4b8ee28227 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml @@ -115,6 +115,11 @@ com.fasterxml.jackson.core jackson-annotations + + org.mockito + mockito-core + test + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java index 0c10e017685..80e569d5a9e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java @@ -29,6 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; @@ -543,4 +544,14 @@ public abstract class Resource implements Comparable { ri.setMaximumAllocation(Long.MAX_VALUE); return ri; } + + @VisibleForTesting + protected void setResources(ResourceInformation[] resources) { + this.resources = resources; + } + + public String getFormattedString(long memory) { + return getFormattedString( + StringUtils.byteDesc(memory * 1024 * 1024)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java index 638ecf9d322..060ca39c697 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/records/TestResource.java @@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.api.records; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; /** * The class to test {@link Resource}. @@ -42,4 +44,27 @@ class TestResource { "Cast to Integer.MAX_VALUE if the long is greater than " + "Integer.MAX_VALUE"); } + + @Test + public void testResourceFormatted() { + Resource resource = spy(Resource.class); + resource.setResources(new ResourceInformation[0]); + when(resource.getVirtualCores()).thenReturn(1); + + // We set 10MB + String expectedResult1 = ""; + assertEquals(expectedResult1, resource.getFormattedString(10)); + + // We set 1024 MB = 1GB + String expectedResult2 = ""; + assertEquals(expectedResult2, resource.getFormattedString(1024)); + + // We set 1024 * 1024 MB = 1024 GB = 1TB + String expectedResult3 = ""; + assertEquals(expectedResult3, resource.getFormattedString(1024 * 1024)); + + // We set 1024 * 1024 * 1024 MB = 1024 * 1024 GB = 1 * 1024 TB = 1 PB + String expectedResult4 = ""; + assertEquals(expectedResult4, resource.getFormattedString(1024 * 1024 * 1024)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java index 6ff76281007..c4df6aa0e27 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodeLabelsPage.java @@ -75,7 +75,7 @@ public class NodeLabelsPage extends RmView { } else { row = row.td(String.valueOf(nActiveNMs)); } - row.td(info.getResource().toString()).__(); + row.td(info.getResource().toFormattedString()).__(); } tbody.__().__(); } From 5bc8f2532746453cb0b57a4c3552b02fae984ffd Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 29 Mar 2023 00:33:19 +0800 Subject: [PATCH 48/97] YARN-11446. [Federation] Add updateSchedulerConfiguration, getSchedulerConfiguration REST APIs for Router. (#5476) --- .../hadoop/yarn/webapp/dao/ConfInfo.java | 11 ++ .../yarn/webapp/dao/SchedConfUpdateInfo.java | 11 ++ .../yarn/server/router/RouterMetrics.java | 62 ++++++++ .../webapp/FederationInterceptorREST.java | 134 +++++++++++++++++- .../router/webapp/dao/FederationConfInfo.java | 55 +++++++ .../yarn/server/router/TestRouterMetrics.java | 66 +++++++++ .../MockDefaultRequestInterceptorREST.java | 28 +++- .../webapp/TestFederationInterceptorREST.java | 69 +++++++++ 8 files changed, 430 insertions(+), 6 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java index 1971efa5684..7ca396f49d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/ConfInfo.java @@ -33,6 +33,8 @@ public class ConfInfo { private ArrayList property = new ArrayList<>(); + private String subClusterId; + public ConfInfo() { } // JAXB needs this @@ -74,5 +76,14 @@ public class ConfInfo { public String getValue() { return value; } + + } + + public String getSubClusterId() { + return subClusterId; + } + + public void setSubClusterId(String subClusterId) { + this.subClusterId = subClusterId; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java index 45462919ed1..8f3ad5d66e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/dao/SchedConfUpdateInfo.java @@ -44,6 +44,9 @@ public class SchedConfUpdateInfo { @XmlElement(name = "update-queue") private ArrayList updateQueueInfo = new ArrayList<>(); + @XmlElement(name = "subClusterId") + private String subClusterId = ""; + private HashMap global = new HashMap<>(); public SchedConfUpdateInfo() { @@ -82,4 +85,12 @@ public class SchedConfUpdateInfo { public void setGlobalParams(HashMap globalInfo) { this.global = globalInfo; } + + public String getSubClusterId() { + return subClusterId; + } + + public void setSubClusterId(String subClusterId) { + this.subClusterId = subClusterId; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java index 3a581dfbd1f..a84a315b93c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java @@ -159,6 +159,10 @@ public final class RouterMetrics { private MutableGaugeInt numAddToClusterNodeLabelsFailedRetrieved; @Metric("# of removeFromClusterNodeLabels failed to be retrieved") private MutableGaugeInt numRemoveFromClusterNodeLabelsFailedRetrieved; + @Metric("# of numUpdateSchedulerConfiguration failed to be retrieved") + private MutableGaugeInt numUpdateSchedulerConfigurationFailedRetrieved; + @Metric("# of numGetSchedulerConfiguration failed to be retrieved") + private MutableGaugeInt numGetSchedulerConfigurationFailedRetrieved; @Metric("# of getClusterInfo failed to be retrieved") private MutableGaugeInt numGetClusterInfoFailedRetrieved; @Metric("# of getClusterUserInfo failed to be retrieved") @@ -287,6 +291,10 @@ public final class RouterMetrics { private MutableRate totalSucceededAddToClusterNodeLabelsRetrieved; @Metric("Total number of successful Retrieved RemoveFromClusterNodeLabels and latency(ms)") private MutableRate totalSucceededRemoveFromClusterNodeLabelsRetrieved; + @Metric("Total number of successful Retrieved updateSchedulerConfiguration and latency(ms)") + private MutableRate totalSucceededUpdateSchedulerConfigurationRetrieved; + @Metric("Total number of successful Retrieved getSchedulerConfiguration and latency(ms)") + private MutableRate totalSucceededGetSchedulerConfigurationRetrieved; @Metric("Total number of successful Retrieved GetClusterInfoRetrieved and latency(ms)") private MutableRate totalSucceededGetClusterInfoRetrieved; @Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)") @@ -358,6 +366,8 @@ public final class RouterMetrics { private MutableQuantiles replaceLabelsOnNodeLatency; private MutableQuantiles addToClusterNodeLabelsLatency; private MutableQuantiles removeFromClusterNodeLabelsLatency; + private MutableQuantiles updateSchedulerConfigLatency; + private MutableQuantiles getSchedulerConfigurationLatency; private MutableQuantiles getClusterInfoLatency; private MutableQuantiles getClusterUserInfoLatency; private MutableQuantiles updateNodeResourceLatency; @@ -572,6 +582,12 @@ public final class RouterMetrics { removeFromClusterNodeLabelsLatency = registry.newQuantiles("removeFromClusterNodeLabelsLatency", "latency of remove cluster nodelabels timeouts", "ops", "latency", 10); + updateSchedulerConfigLatency = registry.newQuantiles("updateSchedulerConfigurationLatency", + "latency of update scheduler configuration timeouts", "ops", "latency", 10); + + getSchedulerConfigurationLatency = registry.newQuantiles("getSchedulerConfigurationLatency", + "latency of get scheduler configuration timeouts", "ops", "latency", 10); + getClusterInfoLatency = registry.newQuantiles("getClusterInfoLatency", "latency of get cluster info timeouts", "ops", "latency", 10); @@ -879,6 +895,16 @@ public final class RouterMetrics { return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().numSamples(); } + @VisibleForTesting + public long getNumSucceededUpdateSchedulerConfigurationRetrieved() { + return totalSucceededUpdateSchedulerConfigurationRetrieved.lastStat().numSamples(); + } + + @VisibleForTesting + public long getNumSucceededGetSchedulerConfigurationRetrieved() { + return totalSucceededGetSchedulerConfigurationRetrieved.lastStat().numSamples(); + } + @VisibleForTesting public long getNumSucceededGetClusterInfoRetrieved() { return totalSucceededGetClusterInfoRetrieved.lastStat().numSamples(); @@ -1189,6 +1215,16 @@ public final class RouterMetrics { return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().mean(); } + @VisibleForTesting + public double getLatencySucceededUpdateSchedulerConfigurationRetrieved() { + return totalSucceededUpdateSchedulerConfigurationRetrieved.lastStat().mean(); + } + + @VisibleForTesting + public double getLatencySucceededGetSchedulerConfigurationRetrieved() { + return totalSucceededGetSchedulerConfigurationRetrieved.lastStat().mean(); + } + @VisibleForTesting public double getLatencySucceededGetClusterInfoRetrieved() { return totalSucceededGetClusterInfoRetrieved.lastStat().mean(); @@ -1454,6 +1490,14 @@ public final class RouterMetrics { return numRemoveFromClusterNodeLabelsFailedRetrieved.value(); } + public int getUpdateSchedulerConfigurationFailedRetrieved() { + return numUpdateSchedulerConfigurationFailedRetrieved.value(); + } + + public int getSchedulerConfigurationFailedRetrieved() { + return numGetSchedulerConfigurationFailedRetrieved.value(); + } + public int getClusterInfoFailedRetrieved() { return numGetClusterInfoFailedRetrieved.value(); } @@ -1773,6 +1817,16 @@ public final class RouterMetrics { removeFromClusterNodeLabelsLatency.add(duration); } + public void succeededUpdateSchedulerConfigurationRetrieved(long duration) { + totalSucceededUpdateSchedulerConfigurationRetrieved.add(duration); + updateSchedulerConfigLatency.add(duration); + } + + public void succeededGetSchedulerConfigurationRetrieved(long duration) { + totalSucceededGetSchedulerConfigurationRetrieved.add(duration); + getSchedulerConfigurationLatency.add(duration); + } + public void succeededGetClusterInfoRetrieved(long duration) { totalSucceededGetClusterInfoRetrieved.add(duration); getClusterInfoLatency.add(duration); @@ -2013,6 +2067,14 @@ public final class RouterMetrics { numRemoveFromClusterNodeLabelsFailedRetrieved.incr(); } + public void incrUpdateSchedulerConfigurationFailedRetrieved() { + numUpdateSchedulerConfigurationFailedRetrieved.incr(); + } + + public void incrGetSchedulerConfigurationFailedRetrieved() { + numGetSchedulerConfigurationFailedRetrieved.incr(); + } + public void incrGetClusterInfoFailedRetrieved() { numGetClusterInfoFailedRetrieved.incr(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java index 857e4c52c6f..9975823ec2b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java @@ -44,7 +44,6 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.Status; import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.impl.prefetch.Validate; @@ -129,6 +128,7 @@ import org.apache.hadoop.yarn.server.router.webapp.dao.FederationBulkActivitiesI import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.SubClusterResult; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo; +import org.apache.hadoop.yarn.server.router.webapp.dao.FederationConfInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -136,6 +136,7 @@ import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo; import org.apache.hadoop.yarn.util.LRUCacheHashMap; +import org.apache.hadoop.yarn.webapp.dao.ConfInfo; import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.MonotonicClock; @@ -848,6 +849,29 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { } } + /** + * Get the active subcluster in the federation. + * + * @param subClusterId subClusterId. + * @return subClusterInfo. + * @throws NotFoundException If the subclusters cannot be found. + */ + private SubClusterInfo getActiveSubCluster(String subClusterId) + throws NotFoundException { + try { + SubClusterId pSubClusterId = SubClusterId.newInstance(subClusterId); + Map subClusterInfoMap = + federationFacade.getSubClusters(true); + SubClusterInfo subClusterInfo = subClusterInfoMap.get(pSubClusterId); + if (subClusterInfo == null) { + throw new NotFoundException(subClusterId + " not found."); + } + return subClusterInfo; + } catch (YarnException e) { + throw new NotFoundException(e.getMessage()); + } + } + /** * The YARN Router will forward to the request to all the SubClusters to find * where the node is running. @@ -2906,17 +2930,117 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { throw new RuntimeException("getContainer Failed."); } + /** + * This method updates the Scheduler configuration, and it is reachable by + * using {@link RMWSConsts#SCHEDULER_CONF}. + * + * @param mutationInfo th information for making scheduler configuration + * changes (supports adding, removing, or updating a queue, as well + * as global scheduler conf changes) + * @param hsr the servlet request + * @return Response containing the status code + * @throws AuthorizationException if the user is not authorized to invoke this + * method + * @throws InterruptedException if interrupted + */ @Override public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo, - HttpServletRequest hsr) - throws AuthorizationException, InterruptedException { - throw new NotImplementedException("Code is not implemented"); + HttpServletRequest hsr) throws AuthorizationException, InterruptedException { + + // Make Sure mutationInfo is not null. + if (mutationInfo == null) { + routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved(); + throw new IllegalArgumentException( + "Parameter error, the schedConfUpdateInfo is empty or null."); + } + + // In federated mode, we may have a mix of multiple schedulers. + // In order to ensure accurate update scheduler configuration, + // we need users to explicitly set subClusterId. + String pSubClusterId = mutationInfo.getSubClusterId(); + if (StringUtils.isBlank(pSubClusterId)) { + routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved(); + throw new IllegalArgumentException("Parameter error, " + + "the subClusterId is empty or null."); + } + + // Get the subClusterInfo , then update the scheduler configuration. + try { + long startTime = clock.getTime(); + SubClusterInfo subClusterInfo = getActiveSubCluster(pSubClusterId); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( + subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + Response response = interceptor.updateSchedulerConfiguration(mutationInfo, hsr); + if (response != null) { + long endTime = clock.getTime(); + routerMetrics.succeededUpdateSchedulerConfigurationRetrieved(endTime - startTime); + return Response.status(response.getStatus()).entity(response.getEntity()).build(); + } + } catch (NotFoundException e) { + routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException(e, + "Get subCluster error. subClusterId = %s", pSubClusterId); + } catch (Exception e) { + routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException(e, + "UpdateSchedulerConfiguration error. subClusterId = %s", pSubClusterId); + } + + routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved(); + throw new RuntimeException("UpdateSchedulerConfiguration error. subClusterId = " + + pSubClusterId); } + /** + * This method retrieves all the Scheduler configuration, and it is reachable + * by using {@link RMWSConsts#SCHEDULER_CONF}. + * + * @param hsr the servlet request + * @return Response containing the status code + * @throws AuthorizationException if the user is not authorized to invoke this + * method. + */ @Override public Response getSchedulerConfiguration(HttpServletRequest hsr) throws AuthorizationException { - throw new NotImplementedException("Code is not implemented"); + try { + long startTime = clock.getTime(); + FederationConfInfo federationConfInfo = new FederationConfInfo(); + Map subClustersActive = getActiveSubclusters(); + final HttpServletRequest hsrCopy = clone(hsr); + Class[] argsClasses = new Class[]{HttpServletRequest.class}; + Object[] args = new Object[]{hsrCopy}; + ClientMethod remoteMethod = new ClientMethod("getSchedulerConfiguration", argsClasses, args); + Map responseMap = + invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class); + responseMap.forEach((subClusterInfo, response) -> { + SubClusterId subClusterId = subClusterInfo.getSubClusterId(); + if (response == null) { + String errorMsg = subClusterId + " Can't getSchedulerConfiguration."; + federationConfInfo.getErrorMsgs().add(errorMsg); + } else if (response.getStatus() == Status.BAD_REQUEST.getStatusCode()) { + String errorMsg = String.valueOf(response.getEntity()); + federationConfInfo.getErrorMsgs().add(errorMsg); + } else if (response.getStatus() == Status.OK.getStatusCode()) { + ConfInfo fedConfInfo = ConfInfo.class.cast(response.getEntity()); + fedConfInfo.setSubClusterId(subClusterId.getId()); + federationConfInfo.getList().add(fedConfInfo); + } + }); + long endTime = clock.getTime(); + routerMetrics.succeededGetSchedulerConfigurationRetrieved(endTime - startTime); + return Response.status(Status.OK).entity(federationConfInfo).build(); + } catch (NotFoundException e) { + RouterServerUtil.logAndThrowRunTimeException("get all active sub cluster(s) error.", e); + routerMetrics.incrGetSchedulerConfigurationFailedRetrieved(); + } catch (Exception e) { + routerMetrics.incrGetSchedulerConfigurationFailedRetrieved(); + RouterServerUtil.logAndThrowRunTimeException("getSchedulerConfiguration error.", e); + return Response.status(Status.BAD_REQUEST).entity("getSchedulerConfiguration error.").build(); + } + + routerMetrics.incrGetSchedulerConfigurationFailedRetrieved(); + throw new RuntimeException("getSchedulerConfiguration error."); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java new file mode 100644 index 00000000000..6a5e611a4f8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/dao/FederationConfInfo.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.router.webapp.dao; + +import org.apache.hadoop.yarn.webapp.dao.ConfInfo; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.ArrayList; +import java.util.List; + +@XmlRootElement +@XmlAccessorType(XmlAccessType.FIELD) +public class FederationConfInfo extends ConfInfo { + @XmlElement(name = "subCluster") + private List list = new ArrayList<>(); + + @XmlElement(name = "errorMsgs") + private List errorMsgs = new ArrayList<>(); + public FederationConfInfo() { + } // JAXB needs this + + public List getList() { + return list; + } + + public void setList(List list) { + this.list = list; + } + + public List getErrorMsgs() { + return errorMsgs; + } + + public void setErrorMsgs(List errorMsgs) { + this.errorMsgs = errorMsgs; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java index 4af7e8c7f5a..f8dc03a04c6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java @@ -569,6 +569,16 @@ public class TestRouterMetrics { metrics.incrGetBulkActivitiesFailedRetrieved(); } + public void getSchedulerConfigurationFailed() { + LOG.info("Mocked: failed getSchedulerConfiguration call"); + metrics.incrGetSchedulerConfigurationFailedRetrieved(); + } + + public void updateSchedulerConfigurationFailedRetrieved() { + LOG.info("Mocked: failed updateSchedulerConfiguration call"); + metrics.incrUpdateSchedulerConfigurationFailedRetrieved(); + } + public void getClusterInfoFailed() { LOG.info("Mocked: failed getClusterInfo call"); metrics.incrGetClusterInfoFailedRetrieved(); @@ -859,6 +869,16 @@ public class TestRouterMetrics { metrics.succeededAddToClusterNodeLabelsRetrieved(duration); } + public void getSchedulerConfigurationRetrieved(long duration) { + LOG.info("Mocked: successful GetSchedulerConfiguration call with duration {}", duration); + metrics.succeededGetSchedulerConfigurationRetrieved(duration); + } + + public void getUpdateSchedulerConfigurationRetrieved(long duration) { + LOG.info("Mocked: successful UpdateSchedulerConfiguration call with duration {}", duration); + metrics.succeededUpdateSchedulerConfigurationRetrieved(duration); + } + public void getClusterInfoRetrieved(long duration) { LOG.info("Mocked: successful GetClusterInfoRetrieved call with duration {}", duration); metrics.succeededGetClusterInfoRetrieved(duration); @@ -1889,6 +1909,52 @@ public class TestRouterMetrics { metrics.getLatencySucceededAddToClusterNodeLabelsRetrieved(), ASSERT_DOUBLE_DELTA); } + @Test + public void testGetSchedulerConfigurationRetrievedFailed() { + long totalBadBefore = metrics.getSchedulerConfigurationFailedRetrieved(); + badSubCluster.getSchedulerConfigurationFailed(); + Assert.assertEquals(totalBadBefore + 1, + metrics.getSchedulerConfigurationFailedRetrieved()); + } + + @Test + public void testGetSchedulerConfigurationRetrieved() { + long totalGoodBefore = metrics.getNumSucceededGetSchedulerConfigurationRetrieved(); + goodSubCluster.getSchedulerConfigurationRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededGetSchedulerConfigurationRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededGetSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getSchedulerConfigurationRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededGetSchedulerConfigurationRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededGetSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA); + } + + @Test + public void testUpdateSchedulerConfigurationRetrievedFailed() { + long totalBadBefore = metrics.getUpdateSchedulerConfigurationFailedRetrieved(); + badSubCluster.updateSchedulerConfigurationFailedRetrieved(); + Assert.assertEquals(totalBadBefore + 1, + metrics.getUpdateSchedulerConfigurationFailedRetrieved()); + } + + @Test + public void testUpdateSchedulerConfigurationRetrieved() { + long totalGoodBefore = metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved(); + goodSubCluster.getUpdateSchedulerConfigurationRetrieved(150); + Assert.assertEquals(totalGoodBefore + 1, + metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved()); + Assert.assertEquals(150, + metrics.getLatencySucceededUpdateSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA); + goodSubCluster.getUpdateSchedulerConfigurationRetrieved(300); + Assert.assertEquals(totalGoodBefore + 2, + metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved()); + Assert.assertEquals(225, + metrics.getLatencySucceededUpdateSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA); + } + @Test public void testGetClusterInfoRetrievedFailed() { long totalBadBefore = metrics.getClusterInfoFailedRetrieved(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java index c34167f9219..d4e1b5145cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java @@ -102,6 +102,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueu import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerTestUtilities; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf.MutableCSConfigurationProvider; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo; @@ -159,6 +160,8 @@ import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.ForbiddenException; import org.apache.hadoop.yarn.webapp.NotFoundException; +import org.apache.hadoop.yarn.webapp.dao.ConfInfo; +import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo; import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1007,7 +1010,7 @@ public class MockDefaultRequestInterceptorREST } if (resContext.getReservationId() == null) { - throw new BadRequestException("Update operations must specify an existing ReservaitonId"); + throw new BadRequestException("Update operations must specify an existing ReservationId"); } ReservationRequestInterpreter[] values = ReservationRequestInterpreter.values(); @@ -1366,6 +1369,29 @@ public class MockDefaultRequestInterceptorREST } @Override + public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo, + HttpServletRequest req) throws AuthorizationException, InterruptedException { + RMContext rmContext = mockRM.getRMContext(); + MutableCSConfigurationProvider provider = new MutableCSConfigurationProvider(rmContext); + try { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.SCHEDULER_CONFIGURATION_STORE_CLASS, + YarnConfiguration.MEMORY_CONFIGURATION_STORE); + provider.init(conf); + provider.logAndApplyMutation(UserGroupInformation.getCurrentUser(), mutationInfo); + } catch (Exception e) { + throw new RuntimeException(e); + } + return Response.status(Status.OK). + entity("Configuration change successfully applied.").build(); + } + + @Override + public Response getSchedulerConfiguration(HttpServletRequest req) throws AuthorizationException { + return Response.status(Status.OK).entity(new ConfInfo(mockRM.getConfig())) + .build(); + } + public ClusterInfo getClusterInfo() { ClusterInfo clusterInfo = new ClusterInfo(mockRM); return clusterInfo; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java index 784fbd15ce1..19bba51e270 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java @@ -126,6 +126,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationDelet import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeAllocationInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesInfo; +import org.apache.hadoop.yarn.server.router.webapp.dao.FederationConfInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo; @@ -138,6 +139,9 @@ import org.apache.hadoop.yarn.util.MonotonicClock; import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.apache.hadoop.yarn.webapp.BadRequestException; +import org.apache.hadoop.yarn.webapp.dao.ConfInfo; +import org.apache.hadoop.yarn.webapp.dao.QueueConfigInfo; +import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.junit.Assert; import org.junit.Test; @@ -171,6 +175,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { private final static int NUM_SUBCLUSTER = 4; private static final int BAD_REQUEST = 400; private static final int ACCEPTED = 202; + private static final int OK = 200; private static String user = "test-user"; private TestableFederationInterceptorREST interceptor; private MemoryFederationStateStore stateStore; @@ -2134,6 +2139,35 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { () -> interceptor.removeFromClusterNodeLabels(oldNodeLabels1, null)); } + @Test + public void testGetSchedulerConfiguration() throws Exception { + Response response = interceptor.getSchedulerConfiguration(null); + Assert.assertNotNull(response); + Assert.assertEquals(OK, response.getStatus()); + + Object entity = response.getEntity(); + Assert.assertNotNull(entity); + Assert.assertTrue(entity instanceof FederationConfInfo); + + FederationConfInfo federationConfInfo = FederationConfInfo.class.cast(entity); + List confInfos = federationConfInfo.getList(); + Assert.assertNotNull(confInfos); + Assert.assertEquals(4, confInfos.size()); + + List errors = federationConfInfo.getErrorMsgs(); + Assert.assertEquals(0, errors.size()); + + Set subClusterSet = subClusters.stream() + .map(subClusterId -> subClusterId.getId()).collect(Collectors.toSet()); + + for (ConfInfo confInfo : confInfos) { + List confItems = confInfo.getItems(); + Assert.assertNotNull(confItems); + Assert.assertTrue(confItems.size() > 0); + Assert.assertTrue(subClusterSet.contains(confInfo.getSubClusterId())); + } + } + @Test public void testGetClusterUserInfo() { String requestUserName = "test-user"; @@ -2173,6 +2207,41 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } } + @Test + public void testUpdateSchedulerConfigurationErrorMsg() throws Exception { + SchedConfUpdateInfo mutationInfo = new SchedConfUpdateInfo(); + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, the subClusterId is empty or null.", + () -> interceptor.updateSchedulerConfiguration(mutationInfo, null)); + + LambdaTestUtils.intercept(IllegalArgumentException.class, + "Parameter error, the schedConfUpdateInfo is empty or null.", + () -> interceptor.updateSchedulerConfiguration(null, null)); + } + + @Test + public void testUpdateSchedulerConfiguration() + throws AuthorizationException, InterruptedException { + SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo(); + updateInfo.setSubClusterId("1"); + Map goodUpdateMap = new HashMap<>(); + goodUpdateMap.put("goodKey", "goodVal"); + QueueConfigInfo goodUpdateInfo = new + QueueConfigInfo("root.default", goodUpdateMap); + updateInfo.getUpdateQueueInfo().add(goodUpdateInfo); + Response response = interceptor.updateSchedulerConfiguration(updateInfo, null); + + Assert.assertNotNull(response); + Assert.assertEquals(OK, response.getStatus()); + + String expectMsg = "Configuration change successfully applied."; + Object entity = response.getEntity(); + Assert.assertNotNull(entity); + + String entityMsg = String.valueOf(entity); + Assert.assertEquals(expectMsg, entityMsg); + } + @Test public void testGetClusterInfo() { ClusterInfo clusterInfos = interceptor.getClusterInfo(); From b4bcbb9515b5b264156b379034b8e9c923bcb25d Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 29 Mar 2023 10:43:13 -0700 Subject: [PATCH 49/97] HDFS-16959. RBF: State store cache loading metrics (#5497) --- .../src/site/markdown/Metrics.md | 24 +++++++------ .../federation/metrics/StateStoreMetrics.java | 28 +++++++++++++++ .../federation/store/CachedRecordStore.java | 2 ++ .../driver/TestStateStoreDriverBase.java | 36 +++++++++++++++++++ .../store/driver/TestStateStoreFile.java | 12 +++++++ .../driver/TestStateStoreFileSystem.java | 12 +++++++ .../store/driver/TestStateStoreZK.java | 14 ++++++++ 7 files changed, 117 insertions(+), 11 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index a551e3ae15f..0777fc42abe 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -592,17 +592,19 @@ StateStoreMetrics ----------------- StateStoreMetrics shows the statistics of the State Store component in Router-based federation. -| Name | Description | -|:---- |:---- | -| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric | -| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds | -| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric | -| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds | -| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric | -| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds | -| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric | -| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds | -| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store | +| Name | Description | +|:------------------------------------------|:-----------------------------------------------------------------------------------| +| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric | +| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds | +| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric | +| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds | +| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric | +| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds | +| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric | +| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds | +| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store | +| `Cache`*BaseRecord*`LoadNumOps` | Number of times store records are loaded in the State Store Cache from State Store | +| `Cache`*BaseRecord*`LoadAvgTime` | Average time of loading State Store Cache from State Store in milliseconds | yarn context ============ diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java index 371b33e05e2..b5c4047acd1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.federation.metrics; import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName; import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId; +import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -54,6 +55,7 @@ public class StateStoreMetrics implements StateStoreMBean { private MutableRate failures; private Map cacheSizes; + private final Map cacheLoadMetrics = new HashMap<>(); protected StateStoreMetrics() {} @@ -150,6 +152,32 @@ public class StateStoreMetrics implements StateStoreMBean { counter.set(count); } + /** + * Set the cache loading metrics for the state store interface. + * + * @param name Name of the record of the cache. + * @param value The time duration interval as the cache value. + */ + public void setCacheLoading(String name, long value) { + String cacheLoad = "Cache" + name + "Load"; + MutableRate cacheLoadMetric = cacheLoadMetrics.get(cacheLoad); + if (cacheLoadMetric == null) { + cacheLoadMetric = registry.newRate(cacheLoad, name, false); + cacheLoadMetrics.put(cacheLoad, cacheLoadMetric); + } + cacheLoadMetrics.get(cacheLoad).add(value); + } + + /** + * Retrieve unmodifiable map of cache loading metrics. + * + * @return unmodifiable map of cache loading metrics. + */ + @VisibleForTesting + public Map getCacheLoadMetrics() { + return Collections.unmodifiableMap(cacheLoadMetrics); + } + @VisibleForTesting public void reset() { reads.resetMinMax(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java index 6fea9b9946d..08dcc1c6e46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java @@ -113,6 +113,7 @@ public abstract class CachedRecordStore if (force || isUpdateTime()) { List newRecords = null; long t = -1; + long startTime = Time.monotonicNow(); try { QueryResult result = getDriver().get(getRecordClass()); newRecords = result.getRecords(); @@ -143,6 +144,7 @@ public abstract class CachedRecordStore StateStoreMetrics metrics = getDriver().getMetrics(); if (metrics != null) { String recordName = getRecordClass().getSimpleName(); + metrics.setCacheLoading(recordName, Time.monotonicNow() - startTime); metrics.setCacheSize(recordName, this.records.size()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java index 4eb38b06b12..48d84f9326b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java @@ -48,6 +48,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.Query; import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult; import org.apache.hadoop.hdfs.server.federation.store.records.RouterState; import org.apache.hadoop.hdfs.server.federation.store.records.StateStoreVersion; +import org.apache.hadoop.metrics2.lib.MutableRate; + import org.junit.After; import org.junit.AfterClass; import org.slf4j.Logger; @@ -76,6 +78,10 @@ public class TestStateStoreDriverBase { return stateStore.getDriver(); } + protected StateStoreService getStateStoreService() { + return stateStore; + } + @After public void cleanMetrics() { if (stateStore != null) { @@ -574,6 +580,36 @@ public class TestStateStoreDriverBase { return getters; } + public long getMountTableCacheLoadSamples(StateStoreDriver driver) throws IOException { + final MutableRate mountTableCache = getMountTableCache(driver); + return mountTableCache.lastStat().numSamples(); + } + + private static MutableRate getMountTableCache(StateStoreDriver driver) throws IOException { + StateStoreMetrics metrics = stateStore.getMetrics(); + final Query query = new Query<>(MountTable.newInstance()); + driver.getMultiple(MountTable.class, query); + final Map cacheLoadMetrics = metrics.getCacheLoadMetrics(); + final MutableRate mountTableCache = cacheLoadMetrics.get("CacheMountTableLoad"); + assertNotNull("CacheMountTableLoad should be present in the state store metrics", + mountTableCache); + return mountTableCache; + } + + public void testCacheLoadMetrics(StateStoreDriver driver, long numRefresh, + double expectedHigherThan) throws IOException, IllegalArgumentException { + final MutableRate mountTableCache = getMountTableCache(driver); + // CacheMountTableLoadNumOps + final long mountTableCacheLoadNumOps = getMountTableCacheLoadSamples(driver); + assertEquals("Num of samples collected should match", numRefresh, mountTableCacheLoadNumOps); + // CacheMountTableLoadAvgTime ms + final double mountTableCacheLoadAvgTimeMs = mountTableCache.lastStat().mean(); + assertTrue( + "Mean time duration for cache load is expected to be higher than " + expectedHigherThan + + " ms." + " Actual value: " + mountTableCacheLoadAvgTimeMs, + mountTableCacheLoadAvgTimeMs > expectedHigherThan); + } + /** * Get the type of field. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java index a8a9020744c..b01500b2ea1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java @@ -73,4 +73,16 @@ public class TestStateStoreFile extends TestStateStoreDriverBase { throws IllegalArgumentException, IllegalAccessException, IOException { testMetrics(getStateStoreDriver()); } + + @Test + public void testCacheLoadMetrics() throws IOException { + // inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime + // value as -1 ms, that would mean no other sample with value >= 0 would have been received and + // hence this would be failure to assert that mount table avg load time is higher than -1 + getStateStoreService().getMetrics().setCacheLoading("MountTable", -1); + long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver()); + getStateStoreService().refreshCaches(true); + testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 1, -1); + } + } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java index dbd4b9bdae2..8c06e6b8ed1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java @@ -115,4 +115,16 @@ public class TestStateStoreFileSystem extends TestStateStoreDriverBase { testInsertWithErrorDuringWrite(driver, MembershipState.class); } + + @Test + public void testCacheLoadMetrics() throws IOException { + // inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime + // value as -1 ms, that would mean no other sample with value >= 0 would have been received and + // hence this would be failure to assert that mount table avg load time is higher than -1 + getStateStoreService().getMetrics().setCacheLoading("MountTable", -1); + long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver()); + getStateStoreService().refreshCaches(true); + getStateStoreService().refreshCaches(true); + testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 2, -1); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java index 3ad106697ac..f94e415b4d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java @@ -206,4 +206,18 @@ public class TestStateStoreZK extends TestStateStoreDriverBase { stateStoreDriver.setEnableConcurrent(true); testFetchErrors(stateStoreDriver); } + + @Test + public void testCacheLoadMetrics() throws IOException { + // inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime + // value as -1 ms, that would mean no other sample with value >= 0 would have been received and + // hence this would be failure to assert that mount table avg load time is higher than -1 + getStateStoreService().getMetrics().setCacheLoading("MountTable", -1); + long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver()); + getStateStoreService().refreshCaches(true); + getStateStoreService().refreshCaches(true); + getStateStoreService().refreshCaches(true); + testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 3, -1); + } + } \ No newline at end of file From 016362a28b516630c748809261a40ac63fdbc4f4 Mon Sep 17 00:00:00 2001 From: Galsza <109229906+Galsza@users.noreply.github.com> Date: Thu, 30 Mar 2023 01:12:02 +0200 Subject: [PATCH 50/97] HADOOP-18548. Hadoop Archive tool (HAR) should acquire delegation tokens from source and destination file systems (#5355) Signed-off-by: Chris Nauroth --- .../org/apache/hadoop/tools/HadoopArchives.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java index 471f3549449..6d082380ffa 100644 --- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java @@ -37,6 +37,8 @@ import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.Parser; +import org.apache.hadoop.mapreduce.security.TokenCache; +import org.apache.hadoop.security.Credentials; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -487,6 +489,11 @@ public class HadoopArchives implements Tool { + " should be a directory but is a file"); } conf.set(DST_DIR_LABEL, outputPath.toString()); + Credentials credentials = conf.getCredentials(); + Path[] allPaths = new Path[] {parentPath, dest}; + TokenCache.obtainTokensForNamenodes(credentials, allPaths, conf); + conf.setCredentials(credentials); + Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(new Cluster(conf), @@ -498,11 +505,11 @@ public class HadoopArchives implements Tool { NAME+"_"+Integer.toString(new Random().nextInt(Integer.MAX_VALUE), 36)); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); - FileSystem.mkdirs(jobDirectory.getFileSystem(conf), jobDirectory, - mapredSysPerms); + FileSystem jobfs = jobDirectory.getFileSystem(conf); + FileSystem.mkdirs(jobfs, jobDirectory, + mapredSysPerms); conf.set(JOB_DIR_LABEL, jobDirectory.toString()); //get a tmp directory for input splits - FileSystem jobfs = jobDirectory.getFileSystem(conf); Path srcFiles = new Path(jobDirectory, "_har_src_files"); conf.set(SRC_LIST_LABEL, srcFiles.toString()); SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf, From 389b3ea6e3acd9cb3e3ab22b2e021499692a68f0 Mon Sep 17 00:00:00 2001 From: sreeb-msft <111426823+sreeb-msft@users.noreply.github.com> Date: Fri, 31 Mar 2023 23:45:15 +0530 Subject: [PATCH 51/97] HADOOP-18012. ABFS: Enable config controlled ETag check for Rename idempotency (#5488) To support recovery of network failures during rename, the abfs client fetches the etag of the source file, and when recovering from a failure, uses this tag to determine whether the rename succeeded before the failure happened. * This works for files, but not directories * It adds the overhead of a HEAD request before each rename. * The option can be disabled by setting "fs.azure.enable.rename.resilience" to false Contributed by Sree Bhattacharyya --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 11 + .../fs/azurebfs/AzureBlobFileSystem.java | 6 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 4 +- .../azurebfs/constants/ConfigurationKeys.java | 3 + .../constants/FileSystemConfigurations.java | 1 + .../fs/azurebfs/services/AbfsClient.java | 199 ++++++--- .../services/AbfsClientRenameResult.java | 12 + .../azurebfs/services/AbfsRestOperation.java | 53 ++- ...ITestAzureBlobFileSystemDelegationSAS.java | 8 +- .../fs/azurebfs/ITestCustomerProvidedKey.java | 6 +- .../services/TestAbfsRenameRetryRecovery.java | 414 +++++++++++++++++- 11 files changed, 621 insertions(+), 96 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 124c4d9de72..0bcb97a8496 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -333,6 +333,10 @@ public class AbfsConfiguration{ FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR) private boolean enableAbfsListIterator; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE) + private boolean renameResilience; + public AbfsConfiguration(final Configuration rawConfig, String accountName) throws IllegalAccessException, InvalidConfigurationValueException, IOException { this.rawConfig = ProviderUtils.excludeIncompatibleCredentialProviders( @@ -1139,4 +1143,11 @@ public class AbfsConfiguration{ this.enableAbfsListIterator = enableAbfsListIterator; } + public boolean getRenameResilience() { + return renameResilience; + } + + void setRenameResilience(boolean actualResilience) { + renameResilience = actualResilience; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 5534b5fb44a..9c9d6f561d7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -201,9 +201,9 @@ public class AzureBlobFileSystem extends FileSystem tracingHeaderFormat = abfsConfiguration.getTracingHeaderFormat(); this.setWorkingDirectory(this.getHomeDirectory()); + TracingContext tracingContext = new TracingContext(clientCorrelationId, + fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, listener); if (abfsConfiguration.getCreateRemoteFileSystemDuringInitialization()) { - TracingContext tracingContext = new TracingContext(clientCorrelationId, - fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, listener); if (this.tryGetFileStatus(new Path(AbfsHttpConstants.ROOT_PATH), tracingContext) == null) { try { this.createFileSystem(tracingContext); @@ -442,7 +442,7 @@ public class AzureBlobFileSystem extends FileSystem } // Non-HNS account need to check dst status on driver side. - if (!abfsStore.getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) { + if (!getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) { dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 3cee9b4f90c..79ffc796c3a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -923,9 +923,11 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { do { try (AbfsPerfInfo perfInfo = startTracking("rename", "renamePath")) { + boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); final AbfsClientRenameResult abfsClientRenameResult = client.renamePath(sourceRelativePath, destinationRelativePath, - continuation, tracingContext, sourceEtag, false); + continuation, tracingContext, sourceEtag, false, + isNamespaceEnabled); AbfsRestOperation op = abfsClientRenameResult.getOp(); perfInfo.registerResult(op.getResult()); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index e3052cd7bbc..872364a8e61 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -238,6 +238,9 @@ public final class ConfigurationKeys { /** Key for rate limit capacity, as used by IO operations which try to throttle themselves. */ public static final String FS_AZURE_ABFS_IO_RATE_LIMIT = "fs.azure.io.rate.limit"; + /** Add extra resilience to rename failures, at the expense of performance. */ + public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience"; + public static String accountProperty(String property, String account) { return property + "." + account; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 68b492a5791..32f9966e30a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -118,6 +118,7 @@ public final class FileSystemConfigurations { public static final int STREAM_ID_LEN = 12; public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true; + public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true; /** * Limit of queued block upload operations before writes diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 2c367333300..77b8dcb2b98 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -55,6 +55,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; @@ -68,6 +69,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.apache.hadoop.util.concurrent.HadoopExecutors; +import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; @@ -77,8 +79,8 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.S import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; /** * AbfsClient. @@ -106,9 +108,12 @@ public class AbfsClient implements Closeable { private final ListeningScheduledExecutorService executorService; - /** logging the rename failure if metadata is in an incomplete state. */ - private static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE = - new LogExactlyOnce(LOG); + private boolean renameResilience; + + /** + * logging the rename failure if metadata is in an incomplete state. + */ + private static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE = new LogExactlyOnce(LOG); private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, final AbfsConfiguration abfsConfiguration, @@ -123,6 +128,7 @@ public class AbfsClient implements Closeable { this.accountName = abfsConfiguration.getAccountName().substring(0, abfsConfiguration.getAccountName().indexOf(AbfsHttpConstants.DOT)); this.authType = abfsConfiguration.getAuthType(accountName); this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration); + this.renameResilience = abfsConfiguration.getRenameResilience(); String encryptionKey = this.abfsConfiguration .getClientProvidedEncryptionKey(); @@ -504,27 +510,55 @@ public class AbfsClient implements Closeable { * took place. * As rename recovery is only attempted if the source etag is non-empty, * in normal rename operations rename recovery will never happen. - * @param source path to source file - * @param destination destination of rename. - * @param continuation continuation. - * @param tracingContext trace context - * @param sourceEtag etag of source file. may be null or empty + * + * @param source path to source file + * @param destination destination of rename. + * @param continuation continuation. + * @param tracingContext trace context + * @param sourceEtag etag of source file. may be null or empty * @param isMetadataIncompleteState was there a rename failure due to * incomplete metadata state? + * @param isNamespaceEnabled whether namespace enabled account or not * @return AbfsClientRenameResult result of rename operation indicating the * AbfsRest operation, rename recovery and incomplete metadata state failure. * @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures. */ public AbfsClientRenameResult renamePath( - final String source, - final String destination, - final String continuation, - final TracingContext tracingContext, - final String sourceEtag, - boolean isMetadataIncompleteState) + final String source, + final String destination, + final String continuation, + final TracingContext tracingContext, + String sourceEtag, + boolean isMetadataIncompleteState, + boolean isNamespaceEnabled) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + final boolean hasEtag = !isEmpty(sourceEtag); + + boolean shouldAttemptRecovery = renameResilience && isNamespaceEnabled; + if (!hasEtag && shouldAttemptRecovery) { + // in case eTag is already not supplied to the API + // and rename resilience is expected and it is an HNS enabled account + // fetch the source etag to be used later in recovery + try { + final AbfsRestOperation srcStatusOp = getPathStatus(source, + false, tracingContext); + if (srcStatusOp.hasResult()) { + final AbfsHttpOperation result = srcStatusOp.getResult(); + sourceEtag = extractEtagHeader(result); + // and update the directory status. + boolean isDir = checkIsDir(result); + shouldAttemptRecovery = !isDir; + LOG.debug("Retrieved etag of source for rename recovery: {}; isDir={}", sourceEtag, isDir); + } + } catch (AbfsRestOperationException e) { + throw new AbfsRestOperationException(e.getStatusCode(), SOURCE_PATH_NOT_FOUND.getErrorCode(), + e.getMessage(), e); + } + + } + String encodedRenameSource = urlEncode(FORWARD_SLASH + this.getFileSystem() + source); if (authType == AuthType.SAS) { final AbfsUriQueryBuilder srcQueryBuilder = new AbfsUriQueryBuilder(); @@ -541,12 +575,7 @@ public class AbfsClient implements Closeable { appendSASTokenToQuery(destination, SASTokenProvider.RENAME_DESTINATION_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(destination, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = new AbfsRestOperation( - AbfsRestOperationType.RenamePath, - this, - HTTP_METHOD_PUT, - url, - requestHeaders); + final AbfsRestOperation op = createRenameRestOperation(url, requestHeaders); try { incrementAbfsRenamePath(); op.execute(tracingContext); @@ -557,48 +586,74 @@ public class AbfsClient implements Closeable { // isMetadataIncompleteState is used for renameRecovery(as the 2nd param). return new AbfsClientRenameResult(op, isMetadataIncompleteState, isMetadataIncompleteState); } catch (AzureBlobFileSystemException e) { - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw e; - } - - // ref: HADOOP-18242. Rename failure occurring due to a rare case of - // tracking metadata being in incomplete state. - if (op.getResult().getStorageErrorCode() - .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) - && !isMetadataIncompleteState) { - //Logging - ABFS_METADATA_INCOMPLETE_RENAME_FAILURE - .info("Rename Failure attempting to resolve tracking metadata state and retrying."); + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + // ref: HADOOP-18242. Rename failure occurring due to a rare case of + // tracking metadata being in incomplete state. + if (op.getResult().getStorageErrorCode() + .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) + && !isMetadataIncompleteState) { + //Logging + ABFS_METADATA_INCOMPLETE_RENAME_FAILURE + .info("Rename Failure attempting to resolve tracking metadata state and retrying."); + // rename recovery should be attempted in this case also + shouldAttemptRecovery = true; + isMetadataIncompleteState = true; + String sourceEtagAfterFailure = sourceEtag; + if (isEmpty(sourceEtagAfterFailure)) { // Doing a HEAD call resolves the incomplete metadata state and // then we can retry the rename operation. AbfsRestOperation sourceStatusOp = getPathStatus(source, false, tracingContext); - isMetadataIncompleteState = true; // Extract the sourceEtag, using the status Op, and set it // for future rename recovery. AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); - String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); - renamePath(source, destination, continuation, tracingContext, - sourceEtagAfterFailure, isMetadataIncompleteState); + sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); } - // if we get out of the condition without a successful rename, then - // it isn't metadata incomplete state issue. - isMetadataIncompleteState = false; + renamePath(source, destination, continuation, tracingContext, + sourceEtagAfterFailure, isMetadataIncompleteState, isNamespaceEnabled); + } + // if we get out of the condition without a successful rename, then + // it isn't metadata incomplete state issue. + isMetadataIncompleteState = false; - boolean etagCheckSucceeded = renameIdempotencyCheckOp( - source, - sourceEtag, op, destination, tracingContext); - if (!etagCheckSucceeded) { - // idempotency did not return different result - // throw back the exception - throw e; - } + // setting default rename recovery success to false + boolean etagCheckSucceeded = false; + if (shouldAttemptRecovery) { + etagCheckSucceeded = renameIdempotencyCheckOp( + source, + sourceEtag, op, destination, tracingContext); + } + if (!etagCheckSucceeded) { + // idempotency did not return different result + // throw back the exception + throw e; + } return new AbfsClientRenameResult(op, true, isMetadataIncompleteState); } } + private boolean checkIsDir(AbfsHttpOperation result) { + String resourceType = result.getResponseHeader( + HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + return resourceType != null + && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); + } + + @VisibleForTesting + AbfsRestOperation createRenameRestOperation(URL url, List requestHeaders) { + AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.RenamePath, + this, + HTTP_METHOD_PUT, + url, + requestHeaders); + return op; + } + private void incrementAbfsRenamePath() { abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1); } @@ -628,28 +683,44 @@ public class AbfsClient implements Closeable { TracingContext tracingContext) { Preconditions.checkArgument(op.hasResult(), "Operations has null HTTP response"); - if ((op.isARetriedRequest()) - && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) - && isNotEmpty(sourceEtag)) { - - // Server has returned HTTP 404, which means rename source no longer - // exists. Check on destination status and if its etag matches - // that of the source, consider it to be a success. - LOG.debug("rename {} to {} failed, checking etag of destination", - source, destination); + // removing isDir from debug logs as it can be misleading + LOG.debug("rename({}, {}) failure {}; retry={} etag {}", + source, destination, op.getResult().getStatusCode(), op.isARetriedRequest(), sourceEtag); + if (!(op.isARetriedRequest() + && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND))) { + // only attempt recovery if the failure was a 404 on a retried rename request. + return false; + } + if (isNotEmpty(sourceEtag)) { + // Server has returned HTTP 404, we have an etag, so see + // if the rename has actually taken place, + LOG.info("rename {} to {} failed, checking etag of destination", + source, destination); try { - final AbfsRestOperation destStatusOp = getPathStatus(destination, - false, tracingContext); + final AbfsRestOperation destStatusOp = getPathStatus(destination, false, tracingContext); final AbfsHttpOperation result = destStatusOp.getResult(); - return result.getStatusCode() == HttpURLConnection.HTTP_OK - && sourceEtag.equals(extractEtagHeader(result)); - } catch (AzureBlobFileSystemException ignored) { + final boolean recovered = result.getStatusCode() == HttpURLConnection.HTTP_OK + && sourceEtag.equals(extractEtagHeader(result)); + LOG.info("File rename has taken place: recovery {}", + recovered ? "succeeded" : "failed"); + return recovered; + + } catch (AzureBlobFileSystemException ex) { // GetFileStatus on the destination failed, the rename did not take place + // or some other failure. log and swallow. + LOG.debug("Failed to get status of path {}", destination, ex); } + } else { + LOG.debug("No source etag; unable to probe for the operation's success"); } - return false; + return false; + } + + @VisibleForTesting + boolean isSourceDestEtagEqual(String sourceEtag, AbfsHttpOperation result) { + return sourceEtag.equals(extractEtagHeader(result)); } public AbfsRestOperation append(final String path, final byte[] buffer, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java index 86e3473a9fe..76648cfc44b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientRenameResult.java @@ -58,4 +58,16 @@ public class AbfsClientRenameResult { public boolean isIncompleteMetadataState() { return isIncompleteMetadataState; } + + @Override + public String toString() { + return "AbfsClientRenameResult{" + + "op=" + + op + + ", renameRecovered=" + + renameRecovered + + ", isIncompleteMetadataState=" + + isIncompleteMetadataState + + '}'; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index a9a72635422..6402be72ddc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -276,26 +276,8 @@ public class AbfsRestOperation { incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1); tracingContext.constructHeader(httpOperation, failureReason); - switch(client.getAuthType()) { - case Custom: - case OAuth: - LOG.debug("Authenticating request with OAuth2 access token"); - httpOperation.setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, - client.getAccessToken()); - break; - case SAS: - // do nothing; the SAS token should already be appended to the query string - httpOperation.setMaskForSAS(); //mask sig/oid from url for logs - break; - case SharedKey: - // sign the HTTP request - LOG.debug("Signing request with shared key"); - // sign the HTTP request - client.getSharedKeyCredentials().signRequest( - httpOperation.getConnection(), - hasRequestBody ? bufferLength : 0); - break; - } + signRequest(httpOperation, hasRequestBody ? bufferLength : 0); + } catch (IOException e) { LOG.debug("Auth failure: {}, {}", method, url); throw new AbfsRestOperationException(-1, null, @@ -376,6 +358,37 @@ public class AbfsRestOperation { return true; } + /** + * Sign an operation. + * @param httpOperation operation to sign + * @param bytesToSign how many bytes to sign for shared key auth. + * @throws IOException failure + */ + @VisibleForTesting + public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign) throws IOException { + switch(client.getAuthType()) { + case Custom: + case OAuth: + LOG.debug("Authenticating request with OAuth2 access token"); + httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + break; + case SAS: + // do nothing; the SAS token should already be appended to the query string + httpOperation.setMaskForSAS(); //mask sig/oid from url for logs + break; + case SharedKey: + default: + // sign the HTTP request + LOG.debug("Signing request with shared key"); + // sign the HTTP request + client.getSharedKeyCredentials().signRequest( + httpOperation.getConnection(), + bytesToSign); + break; + } + } + /** * Creates new object of {@link AbfsHttpOperation} with the url, method, and * requestHeaders fields of the AbfsRestOperation object. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java index b164689ef80..5735423aaf9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java @@ -70,6 +70,8 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati private static final Logger LOG = LoggerFactory.getLogger(ITestAzureBlobFileSystemDelegationSAS.class); + private boolean isHNSEnabled; + public ITestAzureBlobFileSystemDelegationSAS() throws Exception { // These tests rely on specific settings in azure-auth-keys.xml: String sasProvider = getRawConfiguration().get(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE); @@ -85,7 +87,7 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati @Override public void setup() throws Exception { - boolean isHNSEnabled = this.getConfiguration().getBoolean( + isHNSEnabled = this.getConfiguration().getBoolean( TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); Assume.assumeTrue(isHNSEnabled); createFilesystemForSASTests(); @@ -401,7 +403,7 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati fs.create(new Path(src)).close(); AbfsRestOperation abfsHttpRestOperation = fs.getAbfsClient() .renamePath(src, "/testABC" + "/abc.txt", null, - getTestTracingContext(fs, false), null, false) + getTestTracingContext(fs, false), null, false, isHNSEnabled) .getOp(); AbfsHttpOperation result = abfsHttpRestOperation.getResult(); String url = result.getMaskedUrl(); @@ -419,7 +421,7 @@ public class ITestAzureBlobFileSystemDelegationSAS extends AbstractAbfsIntegrati intercept(IOException.class, "sig=XXXX", () -> getFileSystem().getAbfsClient() .renamePath("testABC/test.xt", "testABC/abc.txt", null, - getTestTracingContext(getFileSystem(), false), null, false)); + getTestTracingContext(getFileSystem(), false), null, false, isHNSEnabled)); } @Test diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java index bd8dbdf871b..76b8a77fffc 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java @@ -99,10 +99,14 @@ public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest { private static final int FILE_SIZE = 10 * ONE_MB; private static final int FILE_SIZE_FOR_COPY_BETWEEN_ACCOUNTS = 24 * ONE_MB; + private boolean isNamespaceEnabled; + public ITestCustomerProvidedKey() throws Exception { boolean isCPKTestsEnabled = getConfiguration() .getBoolean(FS_AZURE_TEST_CPK_ENABLED, false); Assume.assumeTrue(isCPKTestsEnabled); + isNamespaceEnabled = getConfiguration() + .getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); } @Test @@ -526,7 +530,7 @@ public class ITestCustomerProvidedKey extends AbstractAbfsIntegrationTest { AbfsClient abfsClient = fs.getAbfsClient(); AbfsRestOperation abfsRestOperation = abfsClient .renamePath(testFileName, newName, null, - getTestTracingContext(fs, false), null, false) + getTestTracingContext(fs, false), null, false, isNamespaceEnabled) .getOp(); assertCPKHeaders(abfsRestOperation, false); assertNoCPKResponseHeadersPresent(abfsRestOperation); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index f5cbceaddd8..cef1c9ae5a1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -18,19 +18,44 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.SocketException; +import java.net.URL; +import java.time.Duration; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; +import org.apache.hadoop.fs.statistics.IOStatistics; import org.assertj.core.api.Assertions; +import org.junit.Assume; import org.junit.Test; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.EtagSource; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.commit.ResilientCommitByRename; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.PATH_ALREADY_EXISTS; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic; import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -45,7 +70,11 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { private static final Logger LOG = LoggerFactory.getLogger(TestAbfsRenameRetryRecovery.class); + private boolean isNamespaceEnabled; + public TestAbfsRenameRetryRecovery() throws Exception { + isNamespaceEnabled = getConfiguration() + .getBoolean(TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); } /** @@ -90,7 +119,7 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { // We need to throw an exception once a rename is triggered with // destination having no parent, but after a retry it needs to succeed. when(mockClient.renamePath(sourcePath, destNoParentPath, null, null, - null, false)) + null, false, isNamespaceEnabled)) .thenThrow(destParentNotFound) .thenReturn(recoveredMetaDataIncompleteResult); @@ -98,12 +127,12 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { intercept(AzureBlobFileSystemException.class, () -> mockClient.renamePath(sourcePath, destNoParentPath, null, null, - null, false)); + null, false, isNamespaceEnabled)); AbfsClientRenameResult resultOfSecondRenameCall = mockClient.renamePath(sourcePath, destNoParentPath, null, null, - null, false); + null, false, isNamespaceEnabled); // the second rename call should be the recoveredResult due to // metaDataIncomplete @@ -119,10 +148,387 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { // Verify renamePath occurred two times implying a retry was attempted. verify(mockClient, times(2)) - .renamePath(sourcePath, destNoParentPath, null, null, null, false); + .renamePath(sourcePath, destNoParentPath, null, null, null, false, + isNamespaceEnabled); } + AbfsClient getMockAbfsClient() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + + // adding mock objects to current AbfsClient + AbfsClient spyClient = Mockito.spy(fs.getAbfsStore().getClient()); + + Mockito.doAnswer(answer -> { + AbfsRestOperation op = new AbfsRestOperation(AbfsRestOperationType.RenamePath, + spyClient, HTTP_METHOD_PUT, answer.getArgument(0), answer.getArgument(1)); + AbfsRestOperation spiedOp = Mockito.spy(op); + addSpyBehavior(spiedOp, op, spyClient); + return spiedOp; + }).when(spyClient).createRenameRestOperation(Mockito.any(URL.class), anyList()); + + return spyClient; + + } + + /** + * Spies on a rest operation to inject transient failure. + * the first createHttpOperation() invocation will return an abfs rest operation + * which will fail. + * @param spiedRestOp spied operation whose createHttpOperation() will fail first time + * @param normalRestOp normal operation the good operation + * @param client client. + * @throws IOException failure + */ + private void addSpyBehavior(final AbfsRestOperation spiedRestOp, + final AbfsRestOperation normalRestOp, + final AbfsClient client) + throws IOException { + AbfsHttpOperation failingOperation = Mockito.spy(normalRestOp.createHttpOperation()); + AbfsHttpOperation normalOp1 = normalRestOp.createHttpOperation(); + executeThenFail(client, normalRestOp, failingOperation, normalOp1); + AbfsHttpOperation normalOp2 = normalRestOp.createHttpOperation(); + normalOp2.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + + when(spiedRestOp.createHttpOperation()) + .thenReturn(failingOperation) + .thenReturn(normalOp2); + } + + /** + * Mock an idempotency failure by executing the normal operation, then + * raising an IOE. + * @param normalRestOp the rest operation used to sign the requests. + * @param failingOperation failing operation + * @param normalOp good operation + * @throws IOException failure + */ + private void executeThenFail(final AbfsClient client, + final AbfsRestOperation normalRestOp, + final AbfsHttpOperation failingOperation, + final AbfsHttpOperation normalOp) + throws IOException { + + Mockito.doAnswer(answer -> { + LOG.info("Executing first attempt with post-operation fault injection"); + final byte[] buffer = answer.getArgument(0); + final int offset = answer.getArgument(1); + final int length = answer.getArgument(2); + normalRestOp.signRequest(normalOp, length); + normalOp.sendRequest(buffer, offset, length); + normalOp.processResponse(buffer, offset, length); + LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure", + normalOp.getStatusCode(), + normalOp.getStorageErrorCode(), + normalOp.getStorageErrorMessage()); + throw new SocketException("connection-reset"); + }).when(failingOperation).sendRequest(Mockito.nullable(byte[].class), + Mockito.nullable(int.class), Mockito.nullable(int.class)); + + } + + /** + * This is the good outcome: resilient rename. + */ + @Test + public void testRenameRecoveryEtagMatchFsLevel() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + touch(new Path(path1)); + + setAbfsClient(abfsStore, mockClient); + + // checking correct count in AbfsCounters + AbfsCounters counter = mockClient.getAbfsCounters(); + IOStatistics ioStats = counter.getIOStatistics(); + + Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName()); + Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); + + // 404 and retry, send sourceEtag as null + // source eTag matches -> rename should pass even when execute throws exception + fs.rename(new Path(path1), new Path(path2)); + + // validating stat counters after rename + // 4 calls should have happened in total for rename + // 1 -> original rename rest call, 2 -> first retry, + // +2 for getPathStatus calls + assertThatStatisticCounter(ioStats, + CONNECTIONS_MADE.getStatName()) + .isEqualTo(4 + connMadeBeforeRename); + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 + // retries happen internally within AbfsRestOperation execute() + // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called + assertThatStatisticCounter(ioStats, + RENAME_PATH_ATTEMPTS.getStatName()) + .isEqualTo(1 + renamePathAttemptsBeforeRename); + + } + + /** + * execute a failing rename but have the file at the far end not match. + * This is done by explicitly passing in a made up etag for the source + * etag and creating a file at the far end. + * The first rename will actually fail with a path exists exception, + * but as that is swallowed, it's not a problem. + */ + @Test + public void testRenameRecoveryEtagMismatchFsLevel() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + fs.create(new Path(path2)); + + setAbfsClient(abfsStore, mockClient); + + // source eTag does not match -> rename should be a failure + assertEquals(false, fs.rename(new Path(path1), new Path(path2))); + + } + + @Test + public void testRenameRecoveryFailsForDirFsLevel() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String dir1 = "/dummyDir1"; + String dir2 = "/dummyDir2"; + + Path path1 = new Path(dir1); + Path path2 = new Path(dir2); + + fs.mkdirs(path1); + + setAbfsClient(abfsStore, mockClient); + + // checking correct count in AbfsCounters + AbfsCounters counter = mockClient.getAbfsCounters(); + IOStatistics ioStats = counter.getIOStatistics(); + + Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName()); + Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); + + // source eTag does not match -> rename should be a failure + boolean renameResult = fs.rename(path1, path2); + assertEquals(false, renameResult); + + // validating stat counters after rename + // 3 calls should have happened in total for rename + // 1 -> original rename rest call, 2 -> first retry, + // +1 for getPathStatus calls + // last getPathStatus call should be skipped + assertThatStatisticCounter(ioStats, + CONNECTIONS_MADE.getStatName()) + .isEqualTo(3 + connMadeBeforeRename); + + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 + // retries happen internally within AbfsRestOperation execute() + // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called + assertThatStatisticCounter(ioStats, + RENAME_PATH_ATTEMPTS.getStatName()) + .isEqualTo(1 + renamePathAttemptsBeforeRename); + } + + /** + * Assert that an exception failed with a specific error code. + * @param code code + * @param e exception + * @throws AbfsRestOperationException if there is a mismatch + */ + private static void expectErrorCode(final AzureServiceErrorCode code, + final AbfsRestOperationException e) throws AbfsRestOperationException { + if (e.getErrorCode() != code) { + throw e; + } + } + + /** + * Directory rename failure is unrecoverable. + */ + @Test + public void testDirRenameRecoveryUnsupported() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient spyClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + fs.mkdirs(new Path(path1)); + + // source eTag does not match -> throw exception + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, testTracingContext, null, false, + isNamespaceEnabled))); + } + + /** + * Even with failures, having + */ + @Test + public void testExistingPathCorrectlyRejected() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient spyClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + + touch(new Path(path1)); + touch(new Path(path2)); + + // source eTag does not match -> throw exception + expectErrorCode(PATH_ALREADY_EXISTS, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, testTracingContext, null, false, + isNamespaceEnabled))); + } + + /** + * Test that rename recovery remains unsupported for + * FNS configurations. + */ + @Test + public void testRenameRecoveryUnsupportedForFlatNamespace() throws Exception { + Assume.assumeTrue(!isNamespaceEnabled); + AzureBlobFileSystem fs = getFileSystem(); + AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + touch(new Path(path1)); + + setAbfsClient(abfsStore, mockClient); + + // checking correct count in AbfsCounters + AbfsCounters counter = mockClient.getAbfsCounters(); + IOStatistics ioStats = counter.getIOStatistics(); + + Long connMadeBeforeRename = lookupCounterStatistic(ioStats, CONNECTIONS_MADE.getStatName()); + Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); + + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + mockClient.renamePath(path1, path2, null, testTracingContext, null, false, + isNamespaceEnabled))); + + // validating stat counters after rename + + // only 2 calls should have happened in total for rename + // 1 -> original rename rest call, 2 -> first retry, + // no getPathStatus calls + // last getPathStatus call should be skipped + assertThatStatisticCounter(ioStats, + CONNECTIONS_MADE.getStatName()) + .isEqualTo(2 + connMadeBeforeRename); + + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 + // retries happen internally within AbfsRestOperation execute() + // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called + assertThatStatisticCounter(ioStats, + RENAME_PATH_ATTEMPTS.getStatName()) + .isEqualTo(1 + renamePathAttemptsBeforeRename); + } + + /** + * Test the resilient commit code works through fault injection, including + * reporting recovery. + */ + @Test + public void testResilientCommitOperation() throws Throwable { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + final AzureBlobFileSystemStore store = fs.getAbfsStore(); + Assume.assumeTrue(store.getIsNamespaceEnabled(testTracingContext)); + + // patch in the mock abfs client to the filesystem, for the resilient + // commit API to pick up. + setAbfsClient(store, getMockAbfsClient()); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + + final Path source = new Path(path1); + touch(source); + final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag(); + + final ResilientCommitByRename commit = fs.createResilientCommitSupport(source); + final Pair outcome = + commit.commitSingleFileByRename(source, new Path(path2), sourceTag); + Assertions.assertThat(outcome.getKey()) + .describedAs("recovery flag") + .isTrue(); + } + /** + * Test the resilient commit code works through fault injection, including + * reporting recovery. + */ + @Test + public void testResilientCommitOperationTagMismatch() throws Throwable { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + final AzureBlobFileSystemStore store = fs.getAbfsStore(); + Assume.assumeTrue(store.getIsNamespaceEnabled(testTracingContext)); + + // patch in the mock abfs client to the filesystem, for the resilient + // commit API to pick up. + setAbfsClient(store, getMockAbfsClient()); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + + final Path source = new Path(path1); + touch(source); + final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag(); + + final ResilientCommitByRename commit = fs.createResilientCommitSupport(source); + intercept(FileNotFoundException.class, () -> + commit.commitSingleFileByRename(source, new Path(path2), "not the right tag")); + } + /** * Method to create an AbfsRestOperationException. * @param statusCode status code to be used. From eb1d3ebe2fb12eb36f507b2fceaea724c0f863d9 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 1 Apr 2023 06:29:18 +0800 Subject: [PATCH 52/97] YARN-11442. Refactor FederationInterceptorREST Code. (#5420) --- .../utils/FederationStateStoreFacade.java | 21 + .../webapp/FederationInterceptorREST.java | 679 ++++++------------ .../router/webapp/RouterWebServiceUtil.java | 7 +- .../webapp/TestFederationInterceptorREST.java | 99 +-- 4 files changed, 301 insertions(+), 505 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java index ebad527b6d4..e7cfb2e3112 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.Random; +import java.util.Collection; import javax.cache.Cache; import javax.cache.CacheManager; @@ -93,6 +94,7 @@ import org.apache.hadoop.yarn.server.federation.store.records.SubClusterState; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterDeregisterRequest; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterDeregisterResponse; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; +import org.apache.hadoop.yarn.webapp.NotFoundException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1211,4 +1213,23 @@ public final class FederationStateStoreFacade { } return false; } + + /** + * Get active subclusters. + * + * @return We will return a list of active subclusters as a Collection. + */ + public Collection getActiveSubClusters() + throws NotFoundException { + try { + Map subClusterMap = getSubClusters(true); + if (MapUtils.isEmpty(subClusterMap)) { + throw new NotFoundException("Not Found SubClusters."); + } + return subClusterMap.values(); + } catch (Exception e) { + LOG.error("getActiveSubClusters failed.", e); + return null; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java index 9975823ec2b..5d73ef20e59 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java @@ -29,12 +29,13 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequestWrapper; @@ -45,6 +46,7 @@ import javax.ws.rs.core.Response.Status; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.io.Text; @@ -148,6 +150,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; +import static org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade.getRandomActiveSubCluster; import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.extractToken; import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.getKerberosUserGroupInformation; @@ -159,8 +162,7 @@ import static org.apache.hadoop.yarn.server.router.webapp.RouterWebServiceUtil.g */ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { - private static final Logger LOG = - LoggerFactory.getLogger(FederationInterceptorREST.class); + private static final Logger LOG = LoggerFactory.getLogger(FederationInterceptorREST.class); private int numSubmitRetries; private FederationStateStoreFacade federationFacade; @@ -205,10 +207,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { interceptors = new HashMap<>(); routerMetrics = RouterMetrics.getMetrics(); - threadpool = HadoopExecutors.newCachedThreadPool( - new ThreadFactoryBuilder() - .setNameFormat("FederationInterceptorREST #%d") - .build()); + threadpool = HadoopExecutors.newCachedThreadPool(new ThreadFactoryBuilder() + .setNameFormat("FederationInterceptorREST #%d") + .build()); returnPartialReport = conf.getBoolean( YarnConfiguration.ROUTER_WEBAPP_PARTIAL_RESULTS_ENABLED, @@ -235,13 +236,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { } @VisibleForTesting - protected DefaultRequestInterceptorREST getInterceptorForSubCluster( - SubClusterId subClusterId) { + protected DefaultRequestInterceptorREST getInterceptorForSubCluster(SubClusterId subClusterId) { if (interceptors.containsKey(subClusterId)) { return interceptors.get(subClusterId); } else { - LOG.error( - "The interceptor for SubCluster {} does not exist in the cache.", + LOG.error("The interceptor for SubCluster {} does not exist in the cache.", subClusterId); return null; } @@ -255,44 +254,63 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { String interceptorClassName = conf.get( YarnConfiguration.ROUTER_WEBAPP_DEFAULT_INTERCEPTOR_CLASS, YarnConfiguration.DEFAULT_ROUTER_WEBAPP_DEFAULT_INTERCEPTOR_CLASS); - DefaultRequestInterceptorREST interceptorInstance = null; + + DefaultRequestInterceptorREST interceptorInstance; try { Class interceptorClass = conf.getClassByName(interceptorClassName); - if (DefaultRequestInterceptorREST.class - .isAssignableFrom(interceptorClass)) { - interceptorInstance = (DefaultRequestInterceptorREST) ReflectionUtils - .newInstance(interceptorClass, conf); + if (DefaultRequestInterceptorREST.class.isAssignableFrom(interceptorClass)) { + interceptorInstance = + (DefaultRequestInterceptorREST) ReflectionUtils.newInstance(interceptorClass, conf); String userName = getUser().getUserName(); interceptorInstance.init(userName); } else { - throw new YarnRuntimeException( - "Class: " + interceptorClassName + " not instance of " - + DefaultRequestInterceptorREST.class.getCanonicalName()); + throw new YarnRuntimeException("Class: " + interceptorClassName + " not instance of " + + DefaultRequestInterceptorREST.class.getCanonicalName()); } } catch (ClassNotFoundException e) { - throw new YarnRuntimeException( - "Could not instantiate ApplicationMasterRequestInterceptor: " - + interceptorClassName, - e); + throw new YarnRuntimeException("Could not instantiate ApplicationMasterRequestInterceptor: " + + interceptorClassName, e); } - String webAppAddresswithScheme = - WebAppUtils.getHttpSchemePrefix(this.getConf()) + webAppAddress; - interceptorInstance.setWebAppAddress(webAppAddresswithScheme); + String webAppAddressWithScheme = WebAppUtils.getHttpSchemePrefix(conf) + webAppAddress; + interceptorInstance.setWebAppAddress(webAppAddressWithScheme); interceptorInstance.setSubClusterId(subClusterId); interceptors.put(subClusterId, interceptorInstance); return interceptorInstance; } + protected DefaultRequestInterceptorREST getOrCreateInterceptorForSubCluster( + SubClusterInfo subClusterInfo) { + if (subClusterInfo != null) { + final SubClusterId subClusterId = subClusterInfo.getSubClusterId(); + final String webServiceAddress = subClusterInfo.getRMWebServiceAddress(); + return getOrCreateInterceptorForSubCluster(subClusterId, webServiceAddress); + } + return null; + } + + protected DefaultRequestInterceptorREST getOrCreateInterceptorByAppId(String appId) + throws YarnException { + // We first check the applicationId + RouterServerUtil.validateApplicationId(appId); + + // Get homeSubCluster By appId + SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); + return getOrCreateInterceptorForSubCluster(subClusterInfo); + } + + protected DefaultRequestInterceptorREST getOrCreateInterceptorByNodeId(String nodeId) { + SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId); + return getOrCreateInterceptorForSubCluster(subClusterInfo); + } + @VisibleForTesting protected DefaultRequestInterceptorREST getOrCreateInterceptorForSubCluster( SubClusterId subClusterId, String webAppAddress) { - DefaultRequestInterceptorREST interceptor = - getInterceptorForSubCluster(subClusterId); - String webAppAddresswithScheme = WebAppUtils.getHttpSchemePrefix( - this.getConf()) + webAppAddress; - if (interceptor == null || !webAppAddresswithScheme.equals(interceptor. - getWebAppAddress())){ + DefaultRequestInterceptorREST interceptor = getInterceptorForSubCluster(subClusterId); + String webAppAddressWithScheme = + WebAppUtils.getHttpSchemePrefix(this.getConf()) + webAppAddress; + if (interceptor == null || !webAppAddressWithScheme.equals(interceptor.getWebAppAddress())) { interceptor = createInterceptorForSubCluster(subClusterId, webAppAddress); } return interceptor; @@ -372,8 +390,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { List blackList, HttpServletRequest hsr, int retryCount) throws YarnException, IOException, InterruptedException { - SubClusterId subClusterId = - federationFacade.getRandomActiveSubCluster(subClustersActive, blackList); + SubClusterId subClusterId = getRandomActiveSubCluster(subClustersActive, blackList); LOG.info("getNewApplication try #{} on SubCluster {}.", retryCount, subClusterId); @@ -462,8 +479,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { * Router submits the request to the selected SubCluster (e.g. SC2). */ @Override - public Response submitApplication(ApplicationSubmissionContextInfo newApp, - HttpServletRequest hsr) + public Response submitApplication(ApplicationSubmissionContextInfo newApp, HttpServletRequest hsr) throws AuthorizationException, IOException, InterruptedException { long startTime = clock.getTime(); @@ -548,6 +564,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Step3. We get subClusterInfo based on subClusterId. SubClusterInfo subClusterInfo = federationFacade.getSubCluster(subClusterId); + if (subClusterInfo == null) { + throw new YarnException("Can't Find SubClusterId = " + subClusterId); + } // Step4. Submit the request, if the response is HttpServletResponse.SC_ACCEPTED, // We return the response, otherwise we throw an exception. @@ -587,43 +606,29 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { * operation. */ @Override - public AppInfo getApp(HttpServletRequest hsr, String appId, - Set unselectedFields) { + public AppInfo getApp(HttpServletRequest hsr, String appId, Set unselectedFields) { - long startTime = clock.getTime(); - - ApplicationId applicationId = null; try { - applicationId = ApplicationId.fromString(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrAppsFailedRetrieved(); - return null; - } + long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = null; - SubClusterId subClusterId = null; - try { - subClusterId = - federationFacade.getApplicationHomeSubCluster(applicationId); - if (subClusterId == null) { + // Get SubClusterInfo according to applicationId + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); + if (interceptor == null) { routerMetrics.incrAppsFailedRetrieved(); return null; } - subClusterInfo = federationFacade.getSubCluster(subClusterId); + AppInfo response = interceptor.getApp(hsr, appId, unselectedFields); + long stopTime = clock.getTime(); + routerMetrics.succeededAppsRetrieved(stopTime - startTime); + return response; } catch (YarnException e) { routerMetrics.incrAppsFailedRetrieved(); + LOG.error("getApp Error, applicationId = {}.", appId, e); return null; + } catch (IllegalArgumentException e) { + routerMetrics.incrAppsFailedRetrieved(); + throw e; } - - DefaultRequestInterceptorREST interceptor = - getOrCreateInterceptorForSubCluster( - subClusterId, subClusterInfo.getRMWebServiceAddress()); - AppInfo response = interceptor.getApp(hsr, appId, unselectedFields); - - long stopTime = clock.getTime(); - routerMetrics.succeededAppsRetrieved(stopTime - startTime); - - return response; } /** @@ -643,13 +648,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { * operation. */ @Override - public Response updateAppState(AppState targetState, HttpServletRequest hsr, - String appId) throws AuthorizationException, YarnException, - InterruptedException, IOException { + public Response updateAppState(AppState targetState, HttpServletRequest hsr, String appId) + throws AuthorizationException, YarnException, InterruptedException, IOException { long startTime = clock.getTime(); - ApplicationId applicationId = null; + ApplicationId applicationId; try { applicationId = ApplicationId.fromString(appId); } catch (IllegalArgumentException e) { @@ -660,8 +664,8 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { .build(); } - SubClusterInfo subClusterInfo = null; - SubClusterId subClusterId = null; + SubClusterInfo subClusterInfo; + SubClusterId subClusterId; try { subClusterId = federationFacade.getApplicationHomeSubCluster(applicationId); @@ -724,60 +728,35 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { AppsInfo apps = new AppsInfo(); long startTime = clock.getTime(); - Map subClustersActive = null; - try { - subClustersActive = federationFacade.getSubClusters(true); - } catch (YarnException e) { - routerMetrics.incrMultipleAppsFailedRetrieved(); - return null; - } - - // Send the requests in parallel - CompletionService compSvc = - new ExecutorCompletionService<>(this.threadpool); - // HttpServletRequest does not work with ExecutorCompletionService. // Create a duplicate hsr. final HttpServletRequest hsrCopy = clone(hsr); - for (final SubClusterInfo info : subClustersActive.values()) { - compSvc.submit(new Callable() { - @Override - public AppsInfo call() { - DefaultRequestInterceptorREST interceptor = - getOrCreateInterceptorForSubCluster( - info.getSubClusterId(), info.getRMWebServiceAddress()); - AppsInfo rmApps = interceptor.getApps(hsrCopy, stateQuery, - statesQuery, finalStatusQuery, userQuery, queueQuery, count, - startedBegin, startedEnd, finishBegin, finishEnd, - applicationTypes, applicationTags, name, unselectedFields); + Collection subClusterInfos = federationFacade.getActiveSubClusters(); - if (rmApps == null) { - routerMetrics.incrMultipleAppsFailedRetrieved(); - LOG.error("Subcluster {} failed to return appReport.", info.getSubClusterId()); - return null; - } + List appsInfos = subClusterInfos.parallelStream().map(subCluster -> { + try { + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(subCluster); + AppsInfo rmApps = interceptor.getApps(hsrCopy, stateQuery, statesQuery, finalStatusQuery, + userQuery, queueQuery, count, startedBegin, startedEnd, finishBegin, finishEnd, + applicationTypes, applicationTags, name, unselectedFields); + if (rmApps != null) { return rmApps; } - }); - } - - // Collect all the responses in parallel - for (int i = 0; i < subClustersActive.size(); i++) { - try { - Future future = compSvc.take(); - AppsInfo appsResponse = future.get(); + } catch (Exception e) { + LOG.warn("Failed to get application report.", e); + } + routerMetrics.incrMultipleAppsFailedRetrieved(); + LOG.error("Subcluster {} failed to return appReport.", subCluster.getSubClusterId()); + return null; + }).collect(Collectors.toList()); + appsInfos.forEach(appsInfo -> { + if (appsInfo != null) { + apps.addAll(appsInfo.getApps()); long stopTime = clock.getTime(); routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime); - - if (appsResponse != null) { - apps.addAll(appsResponse.getApps()); - } - } catch (Throwable e) { - routerMetrics.incrMultipleAppsFailedRetrieved(); - LOG.warn("Failed to get application report", e); } - } + }); if (apps.getApps().isEmpty()) { return null; @@ -803,15 +782,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { if (hsr == null) { return null; } - @SuppressWarnings("unchecked") - final Map parameterMap = - (Map) hsr.getParameterMap(); + + final Map parameterMap = hsr.getParameterMap(); final String pathInfo = hsr.getPathInfo(); final String user = hsr.getRemoteUser(); final Principal principal = hsr.getUserPrincipal(); - final String mediaType = - RouterWebServiceUtil.getMediaTypeFromHttpServletRequest( - hsr, AppsInfo.class); + final String mediaType = RouterWebServiceUtil.getMediaTypeFromHttpServletRequest( + hsr, AppsInfo.class); return new HttpServletRequestWrapper(hsr) { public Map getParameterMap() { return parameterMap; @@ -835,20 +812,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { }; } - /** - * Get the active subclusters in the federation. - * @return Map from subcluster id to its info. - * @throws NotFoundException If the subclusters cannot be found. - */ - private Map getActiveSubclusters() - throws NotFoundException { - try { - return federationFacade.getSubClusters(true); - } catch (YarnException e) { - throw new NotFoundException(e.getMessage()); - } - } - /** * Get the active subcluster in the federation. * @@ -860,13 +823,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { throws NotFoundException { try { SubClusterId pSubClusterId = SubClusterId.newInstance(subClusterId); - Map subClusterInfoMap = - federationFacade.getSubClusters(true); - SubClusterInfo subClusterInfo = subClusterInfoMap.get(pSubClusterId); - if (subClusterInfo == null) { - throw new NotFoundException(subClusterId + " not found."); - } - return subClusterInfo; + return federationFacade.getSubCluster(pSubClusterId); } catch (YarnException e) { throw new NotFoundException(e.getMessage()); } @@ -890,14 +847,14 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { */ @Override public NodeInfo getNode(String nodeId) { - final Map subClustersActive = - getActiveSubclusters(); + + final Collection subClustersActive = federationFacade.getActiveSubClusters(); + if (subClustersActive.isEmpty()) { - throw new NotFoundException( - FederationPolicyUtils.NO_ACTIVE_SUBCLUSTER_AVAILABLE); + throw new NotFoundException(FederationPolicyUtils.NO_ACTIVE_SUBCLUSTER_AVAILABLE); } - final Map results = - getNode(subClustersActive.values(), nodeId); + + final Map results = getNode(subClustersActive, nodeId); // Collect the responses NodeInfo nodeInfo = null; @@ -922,65 +879,53 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { /** * Get a node and the subcluster where it is. + * * @param subClusters Subclusters where to search. - * @param nodeId Identifier of the node we are looking for. + * @param nodeId Identifier of the node we are looking for. * @return Map between subcluster and node. */ - private Map getNode( - Collection subClusters, String nodeId) { + private Map getNode(Collection subClusters, + String nodeId) { - // Send the requests in parallel - CompletionService compSvc = - new ExecutorCompletionService(this.threadpool); - final Map> futures = new HashMap<>(); - for (final SubClusterInfo subcluster : subClusters) { - final SubClusterId subclusterId = subcluster.getSubClusterId(); - Future result = compSvc.submit(() -> { - try { - DefaultRequestInterceptorREST interceptor = - getOrCreateInterceptorForSubCluster( - subclusterId, subcluster.getRMWebServiceAddress()); - return interceptor.getNode(nodeId); - } catch (Exception e) { - LOG.error("Subcluster {} failed to return nodeInfo.", subclusterId, e); - return null; - } - }); - futures.put(subcluster, result); - } + // Parallel traversal of subClusters + Stream> pairStream = subClusters.parallelStream().map( + subClusterInfo -> { + final SubClusterId subClusterId = subClusterInfo.getSubClusterId(); + try { + DefaultRequestInterceptorREST interceptor = + getOrCreateInterceptorForSubCluster(subClusterInfo); + return Pair.of(subClusterInfo, interceptor.getNode(nodeId)); + } catch (Exception e) { + LOG.error("Subcluster {} failed to return nodeInfo.", subClusterId, e); + return null; + } + }); // Collect the results final Map results = new HashMap<>(); - for (Entry> entry : futures.entrySet()) { - try { - final Future future = entry.getValue(); - final NodeInfo nodeInfo = future.get(); - // Check if the node was found in this SubCluster - if (nodeInfo != null) { - SubClusterInfo subcluster = entry.getKey(); - results.put(subcluster, nodeInfo); - } - } catch (Throwable e) { - LOG.warn("Failed to get node report ", e); + pairStream.forEach(pair -> { + if (pair != null) { + SubClusterInfo subCluster = pair.getKey(); + NodeInfo nodeInfo = pair.getValue(); + results.put(subCluster, nodeInfo); } - } + }); return results; } /** * Get the subcluster a node belongs to. + * * @param nodeId Identifier of the node we are looking for. * @return The subcluster containing the node. * @throws NotFoundException If the node cannot be found. */ - private SubClusterInfo getNodeSubcluster(String nodeId) - throws NotFoundException { + private SubClusterInfo getNodeSubcluster(String nodeId) throws NotFoundException { + + final Collection subClusters = federationFacade.getActiveSubClusters(); + final Map results = getNode(subClusters, nodeId); - final Collection subClusters = - getActiveSubclusters().values(); - final Map results = - getNode(subClusters, nodeId); SubClusterInfo subcluster = null; NodeInfo nodeInfo = null; for (Entry entry : results.entrySet()) { @@ -992,8 +937,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { } } if (subcluster == null) { - throw new NotFoundException( - "Cannot find " + nodeId + " in any subcluster"); + throw new NotFoundException("Cannot find " + nodeId + " in any subcluster"); } return subcluster; } @@ -1022,15 +966,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { NodesInfo nodes = new NodesInfo(); try { - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); Class[] argsClasses = new Class[]{String.class}; Object[] args = new Object[]{states}; ClientMethod remoteMethod = new ClientMethod("getNodes", argsClasses, args); Map nodesMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, NodesInfo.class); - nodesMap.values().stream().forEach(nodesInfo -> { - nodes.addAll(nodesInfo.getNodes()); - }); + invokeConcurrent(subClustersActive, remoteMethod, NodesInfo.class); + nodesMap.values().forEach(nodesInfo -> nodes.addAll(nodesInfo.getNodes())); } catch (NotFoundException e) { LOG.error("get all active sub cluster(s) error.", e); throw e; @@ -1049,14 +991,20 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { return RouterWebServiceUtil.deleteDuplicateNodesInfo(nodes.getNodes()); } + /** + * This method changes the resources of a specific node, and it is reachable + * by using {@link RMWSConsts#NODE_RESOURCE}. + * + * @param hsr The servlet request. + * @param nodeId The node we want to retrieve the information for. + * It is a PathParam. + * @param resourceOption The resource change. + * @return the resources of a specific node. + */ @Override public ResourceInfo updateNodeResource(HttpServletRequest hsr, String nodeId, ResourceOptionInfo resourceOption) { - SubClusterInfo subcluster = getNodeSubcluster(nodeId); - DefaultRequestInterceptorREST interceptor = - getOrCreateInterceptorForSubCluster( - subcluster.getSubClusterId(), - subcluster.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByNodeId(nodeId); return interceptor.updateNodeResource(hsr, nodeId, resourceOption); } @@ -1064,50 +1012,30 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public ClusterMetricsInfo getClusterMetricsInfo() { ClusterMetricsInfo metrics = new ClusterMetricsInfo(); - final Map subClustersActive; - try { - subClustersActive = getActiveSubclusters(); - } catch (Exception e) { - LOG.error(e.getLocalizedMessage()); - return metrics; - } + Collection subClusterInfos = federationFacade.getActiveSubClusters(); - // Send the requests in parallel - CompletionService compSvc = - new ExecutorCompletionService(this.threadpool); - - for (final SubClusterInfo info : subClustersActive.values()) { - compSvc.submit(new Callable() { - @Override - public ClusterMetricsInfo call() { + Stream clusterMetricsInfoStream = subClusterInfos.parallelStream() + .map(subClusterInfo -> { DefaultRequestInterceptorREST interceptor = - getOrCreateInterceptorForSubCluster( - info.getSubClusterId(), info.getRMWebServiceAddress()); + getOrCreateInterceptorForSubCluster(subClusterInfo); try { - ClusterMetricsInfo metrics = interceptor.getClusterMetricsInfo(); - return metrics; + return interceptor.getClusterMetricsInfo(); } catch (Exception e) { LOG.error("Subcluster {} failed to return Cluster Metrics.", - info.getSubClusterId()); + subClusterInfo.getSubClusterId()); return null; } - } - }); - } + }); - // Collect all the responses in parallel - for (int i = 0; i < subClustersActive.size(); i++) { + clusterMetricsInfoStream.forEach(clusterMetricsInfo -> { try { - Future future = compSvc.take(); - ClusterMetricsInfo metricsResponse = future.get(); - - if (metricsResponse != null) { - RouterWebServiceUtil.mergeMetrics(metrics, metricsResponse); + if (clusterMetricsInfo != null) { + RouterWebServiceUtil.mergeMetrics(metrics, clusterMetricsInfo); } } catch (Throwable e) { - LOG.warn("Failed to get nodes report ", e); + LOG.warn("Failed to get nodes report.", e); } - } + }); return metrics; } @@ -1131,31 +1059,15 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { @Override public AppState getAppState(HttpServletRequest hsr, String appId) throws AuthorizationException { - - ApplicationId applicationId = null; try { - applicationId = ApplicationId.fromString(appId); - } catch (IllegalArgumentException e) { - return null; - } - - SubClusterInfo subClusterInfo = null; - SubClusterId subClusterId = null; - try { - subClusterId = - federationFacade.getApplicationHomeSubCluster(applicationId); - if (subClusterId == null) { - return null; + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); + if (interceptor != null) { + return interceptor.getAppState(hsr, appId); } - subClusterInfo = federationFacade.getSubCluster(subClusterId); - } catch (YarnException e) { - return null; + } catch (YarnException | IllegalArgumentException e) { + LOG.error("getHomeSubClusterInfoByAppId error, applicationId = {}.", appId, e); } - - DefaultRequestInterceptorREST interceptor = - getOrCreateInterceptorForSubCluster(subClusterId, - subClusterInfo.getRMWebServiceAddress()); - return interceptor.getAppState(hsr, appId); + return null; } @Override @@ -1176,12 +1088,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public ClusterInfo getClusterInfo() { try { long startTime = Time.now(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); Class[] argsClasses = new Class[]{}; Object[] args = new Object[]{}; ClientMethod remoteMethod = new ClientMethod("getClusterInfo", argsClasses, args); Map subClusterInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, ClusterInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, ClusterInfo.class); FederationClusterInfo federationClusterInfo = new FederationClusterInfo(); subClusterInfoMap.forEach((subClusterInfo, clusterInfo) -> { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); @@ -1216,13 +1128,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public ClusterUserInfo getClusterUserInfo(HttpServletRequest hsr) { try { long startTime = Time.now(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class}; Object[] args = new Object[]{hsrCopy}; ClientMethod remoteMethod = new ClientMethod("getClusterUserInfo", argsClasses, args); Map subClusterInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, ClusterUserInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, ClusterUserInfo.class); FederationClusterUserInfo federationClusterUserInfo = new FederationClusterUserInfo(); subClusterInfoMap.forEach((subClusterInfo, clusterUserInfo) -> { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); @@ -1246,7 +1158,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { /** * This method retrieves the current scheduler status, and it is reachable by * using {@link RMWSConsts#SCHEDULER}. - * * For the federation mode, the SchedulerType information of the cluster * cannot be integrated and displayed, and the specific cluster information needs to be marked. * @@ -1256,12 +1167,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public SchedulerTypeInfo getSchedulerInfo() { try { long startTime = Time.now(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); Class[] argsClasses = new Class[]{}; Object[] args = new Object[]{}; ClientMethod remoteMethod = new ClientMethod("getSchedulerInfo", argsClasses, args); Map subClusterInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, SchedulerTypeInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, SchedulerTypeInfo.class); FederationSchedulerTypeInfo federationSchedulerTypeInfo = new FederationSchedulerTypeInfo(); subClusterInfoMap.forEach((subClusterInfo, schedulerTypeInfo) -> { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); @@ -1319,17 +1230,18 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Step2. Call dumpSchedulerLogs of each subcluster. try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{String.class, HttpServletRequest.class}; Object[] args = new Object[]{time, hsrCopy}; ClientMethod remoteMethod = new ClientMethod("dumpSchedulerLogs", argsClasses, args); Map dumpSchedulerLogsMap = invokeConcurrent( - subClustersActive.values(), remoteMethod, String.class); + subClustersActive, remoteMethod, String.class); StringBuilder stringBuilder = new StringBuilder(); dumpSchedulerLogsMap.forEach((subClusterInfo, msg) -> { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); - stringBuilder.append("subClusterId" + subClusterId + " : " + msg + "; "); + stringBuilder.append("subClusterId") + .append(subClusterId).append(" : ").append(msg).append("; "); }); long stopTime = clock.getTime(); routerMetrics.succeededDumpSchedulerLogsRetrieved(stopTime - startTime); @@ -1369,12 +1281,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Query SubClusterInfo according to id, // if the nodeId cannot get SubClusterInfo, an exception will be thrown directly. - SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId); - // Call the corresponding subCluster to get ActivitiesInfo. long startTime = clock.getTime(); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByNodeId(nodeId); final HttpServletRequest hsrCopy = clone(hsr); ActivitiesInfo activitiesInfo = interceptor.getActivities(hsrCopy, nodeId, groupBy); if (activitiesInfo != null) { @@ -1382,10 +1291,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { routerMetrics.succeededGetActivitiesLatencyRetrieved(stopTime - startTime); return activitiesInfo; } - } catch (IllegalArgumentException e) { - routerMetrics.incrGetActivitiesFailedRetrieved(); - throw e; - } catch (NotFoundException e) { + } catch (IllegalArgumentException | NotFoundException e) { routerMetrics.incrGetActivitiesFailedRetrieved(); throw e; } @@ -1413,13 +1319,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { Validate.checkNotNegative(activitiesCount, "activitiesCount"); // Step2. Call the interface of subCluster concurrently and get the returned result. - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class, int.class}; Object[] args = new Object[]{hsrCopy, groupBy, activitiesCount}; ClientMethod remoteMethod = new ClientMethod("getBulkActivities", argsClasses, args); Map appStatisticsMap = invokeConcurrent( - subClustersActive.values(), remoteMethod, BulkActivitiesInfo.class); + subClustersActive, remoteMethod, BulkActivitiesInfo.class); // Step3. Generate Federation objects and set subCluster information. long startTime = clock.getTime(); @@ -1460,22 +1366,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { Set allocationRequestIds, String groupBy, String limit, Set actions, boolean summarize) { - // Only verify the app_id, - // because the specific subCluster needs to be found according to the app_id, - // and other verifications are directly handed over to the corresponding subCluster RM - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrGetAppActivitiesFailedRetrieved(); - throw e; - } - try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); final HttpServletRequest hsrCopy = clone(hsr); AppActivitiesInfo appActivitiesInfo = interceptor.getAppActivities(hsrCopy, appId, time, requestPriorities, allocationRequestIds, groupBy, limit, actions, summarize); @@ -1502,13 +1395,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { Set stateQueries, Set typeQueries) { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class, Set.class, Set.class}; Object[] args = new Object[]{hsrCopy, stateQueries, typeQueries}; ClientMethod remoteMethod = new ClientMethod("getAppStatistics", argsClasses, args); Map appStatisticsMap = invokeConcurrent( - subClustersActive.values(), remoteMethod, ApplicationStatisticsInfo.class); + subClustersActive, remoteMethod, ApplicationStatisticsInfo.class); ApplicationStatisticsInfo applicationStatisticsInfo = RouterWebServiceUtil.mergeApplicationStatisticsInfo(appStatisticsMap.values()); if (applicationStatisticsInfo != null) { @@ -1541,13 +1434,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { throws IOException { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class}; Object[] args = new Object[]{hsrCopy}; ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", argsClasses, args); Map nodeToLabelsInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, NodeToLabelsInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, NodeToLabelsInfo.class); NodeToLabelsInfo nodeToLabelsInfo = RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap); if (nodeToLabelsInfo != null) { @@ -1570,13 +1463,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public NodeLabelsInfo getRMNodeLabels(HttpServletRequest hsr) throws IOException { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class}; Object[] args = new Object[]{hsrCopy}; ClientMethod remoteMethod = new ClientMethod("getRMNodeLabels", argsClasses, args); Map nodeToLabelsInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, NodeLabelsInfo.class); NodeLabelsInfo nodeToLabelsInfo = RouterWebServiceUtil.mergeNodeLabelsInfo(nodeToLabelsInfoMap); if (nodeToLabelsInfo != null) { @@ -1600,12 +1493,12 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { throws IOException { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); Class[] argsClasses = new Class[]{Set.class}; Object[] args = new Object[]{labels}; ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", argsClasses, args); Map labelsToNodesInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, LabelsToNodesInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, LabelsToNodesInfo.class); Map labelToNodesMap = new HashMap<>(); labelsToNodesInfoMap.values().forEach(labelsToNode -> { Map values = labelsToNode.getLabelsToNodes(); @@ -1666,7 +1559,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Step2. We map the NodeId and NodeToLabelsEntry in the request. Map nodeIdToLabels = new HashMap<>(); - newNodeToLabels.getNodeToLabels().stream().forEach(nodeIdToLabel -> { + newNodeToLabels.getNodeToLabels().forEach(nodeIdToLabel -> { String nodeId = nodeIdToLabel.getNodeId(); nodeIdToLabels.put(nodeId, nodeIdToLabel); }); @@ -1686,11 +1579,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { long startTime = clock.getTime(); final HttpServletRequest hsrCopy = clone(hsr); StringBuilder builder = new StringBuilder(); - subClusterToNodeToLabelsEntryList.forEach((subCluster, nodeToLabelsEntryList) -> { - SubClusterId subClusterId = subCluster.getSubClusterId(); + subClusterToNodeToLabelsEntryList.forEach((subClusterInfo, nodeToLabelsEntryList) -> { + SubClusterId subClusterId = subClusterInfo.getSubClusterId(); try { DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subCluster.getSubClusterId(), subCluster.getRMWebServiceAddress()); + subClusterInfo); interceptor.replaceLabelsOnNodes(nodeToLabelsEntryList, hsrCopy); builder.append("subCluster-").append(subClusterId.getId()).append(":Success,"); } catch (Exception e) { @@ -1703,9 +1596,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Step5. return call result. return Response.status(Status.OK).entity(builder.toString()).build(); - } catch (NotFoundException e) { - routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved(); - throw e; } catch (Exception e) { routerMetrics.incrReplaceLabelsOnNodesFailedRetrieved(); throw e; @@ -1743,8 +1633,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // and then call the replaceLabelsOnNode of the subCluster. long startTime = clock.getTime(); SubClusterInfo subClusterInfo = getNodeSubcluster(nodeId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByNodeId(nodeId); final HttpServletRequest hsrCopy = clone(hsr); interceptor.replaceLabelsOnNode(newNodeLabelsName, hsrCopy, nodeId); @@ -1753,10 +1642,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { routerMetrics.succeededReplaceLabelsOnNodeRetrieved(stopTime - startTime); String msg = "subCluster#" + subClusterInfo.getSubClusterId().getId() + ":Success;"; return Response.status(Status.OK).entity(msg).build(); - } catch (NotFoundException e) { - routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); - throw e; - } catch (Exception e){ + } catch (Exception e) { routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); throw e; } @@ -1767,13 +1653,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { throws IOException { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class}; Object[] args = new Object[]{hsrCopy}; ClientMethod remoteMethod = new ClientMethod("getClusterNodeLabels", argsClasses, args); Map nodeToLabelsInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, NodeLabelsInfo.class); Set hashSets = Sets.newHashSet(); nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels())); NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets); @@ -1820,18 +1706,17 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActives = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{NodeLabelsInfo.class, HttpServletRequest.class}; Object[] args = new Object[]{newNodeLabels, hsrCopy}; ClientMethod remoteMethod = new ClientMethod("addToClusterNodeLabels", argsClasses, args); Map responseInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class); + invokeConcurrent(subClustersActives, remoteMethod, Response.class); StringBuffer buffer = new StringBuffer(); // SubCluster-0:SUCCESS,SubCluster-1:SUCCESS - responseInfoMap.forEach((subClusterInfo, response) -> { - buildAppendMsg(subClusterInfo, buffer, response); - }); + responseInfoMap.forEach((subClusterInfo, response) -> + buildAppendMsg(subClusterInfo, buffer, response)); long stopTime = clock.getTime(); routerMetrics.succeededAddToClusterNodeLabelsRetrieved((stopTime - startTime)); return Response.status(Status.OK).entity(buffer.toString()).build(); @@ -1868,19 +1753,18 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActives = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{Set.class, HttpServletRequest.class}; Object[] args = new Object[]{oldNodeLabels, hsrCopy}; ClientMethod remoteMethod = new ClientMethod("removeFromClusterNodeLabels", argsClasses, args); Map responseInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class); + invokeConcurrent(subClustersActives, remoteMethod, Response.class); StringBuffer buffer = new StringBuffer(); // SubCluster-0:SUCCESS,SubCluster-1:SUCCESS - responseInfoMap.forEach((subClusterInfo, response) -> { - buildAppendMsg(subClusterInfo, buffer, response); - }); + responseInfoMap.forEach((subClusterInfo, response) -> + buildAppendMsg(subClusterInfo, buffer, response)); long stopTime = clock.getTime(); routerMetrics.succeededRemoveFromClusterNodeLabelsRetrieved(stopTime - startTime); return Response.status(Status.OK).entity(buffer.toString()).build(); @@ -1897,7 +1781,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { } /** - * Bbulid Append information. + * Build Append information. * * @param subClusterInfo subCluster information. * @param buffer StringBuffer. @@ -1920,13 +1804,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { throws IOException { try { long startTime = clock.getTime(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class}; Object[] args = new Object[]{hsrCopy, nodeId}; ClientMethod remoteMethod = new ClientMethod("getLabelsOnNode", argsClasses, args); Map nodeToLabelsInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, NodeLabelsInfo.class); Set hashSets = Sets.newHashSet(); nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels())); NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets); @@ -1952,19 +1836,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public AppPriority getAppPriority(HttpServletRequest hsr, String appId) throws AuthorizationException { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrGetAppPriorityFailedRetrieved(); - throw e; - } - try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); AppPriority appPriority = interceptor.getAppPriority(hsr, appId); if (appPriority != null) { long stopTime = clock.getTime(); @@ -1988,14 +1862,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { HttpServletRequest hsr, String appId) throws AuthorizationException, YarnException, InterruptedException, IOException { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrUpdateAppPriorityFailedRetrieved(); - throw e; - } - if (targetPriority == null) { routerMetrics.incrUpdateAppPriorityFailedRetrieved(); throw new IllegalArgumentException("Parameter error, the targetPriority is empty or null."); @@ -2003,9 +1869,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); Response response = interceptor.updateApplicationPriority(targetPriority, hsr, appId); if (response != null) { long stopTime = clock.getTime(); @@ -2028,19 +1892,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public AppQueue getAppQueue(HttpServletRequest hsr, String appId) throws AuthorizationException { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrGetAppQueueFailedRetrieved(); - throw e; - } - try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); AppQueue queue = interceptor.getAppQueue(hsr, appId); if (queue != null) { long stopTime = clock.getTime(); @@ -2063,14 +1917,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { String appId) throws AuthorizationException, YarnException, InterruptedException, IOException { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrUpdateAppQueueFailedRetrieved(); - throw e; - } - if (targetQueue == null) { routerMetrics.incrUpdateAppQueueFailedRetrieved(); throw new IllegalArgumentException("Parameter error, the targetQueue is null."); @@ -2078,9 +1924,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); Response response = interceptor.updateAppQueue(targetQueue, hsr, appId); if (response != null) { long stopTime = clock.getTime(); @@ -2197,8 +2041,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { byte[] password = token.getPassword().array(); Text kind = new Text(token.getKind()); Text service = new Text(token.getService()); - Token tk = new Token<>(identifier, password, kind, service); - return tk; + return new Token<>(identifier, password, kind, service); } /** @@ -2342,9 +2185,8 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { private Response invokeCreateNewReservation(Map subClustersActive, List blackList, HttpServletRequest hsr, int retryCount) - throws YarnException, IOException, InterruptedException { - SubClusterId subClusterId = - federationFacade.getRandomActiveSubCluster(subClustersActive, blackList); + throws YarnException { + SubClusterId subClusterId = getRandomActiveSubCluster(subClustersActive, blackList); LOG.info("createNewReservation try #{} on SubCluster {}.", retryCount, subClusterId); SubClusterInfo subClusterInfo = subClustersActive.get(subClusterId); DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( @@ -2591,19 +2433,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public AppTimeoutInfo getAppTimeout(HttpServletRequest hsr, String appId, String type) throws AuthorizationException { - if (appId == null || appId.isEmpty()) { - routerMetrics.incrGetAppTimeoutFailedRetrieved(); - throw new IllegalArgumentException("Parameter error, the appId is empty or null."); - } - - // Check that the appId format is accurate - try { - ApplicationId.fromString(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrGetAppTimeoutFailedRetrieved(); - throw e; - } - if (type == null || type.isEmpty()) { routerMetrics.incrGetAppTimeoutFailedRetrieved(); throw new IllegalArgumentException("Parameter error, the type is empty or null."); @@ -2611,9 +2440,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); AppTimeoutInfo appTimeoutInfo = interceptor.getAppTimeout(hsr, appId, type); if (appTimeoutInfo != null) { long stopTime = clock.getTime(); @@ -2636,19 +2463,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { public AppTimeoutsInfo getAppTimeouts(HttpServletRequest hsr, String appId) throws AuthorizationException { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrGetAppTimeoutsFailedRetrieved(); - throw e; - } - try { long startTime = clock.getTime(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); AppTimeoutsInfo appTimeoutsInfo = interceptor.getAppTimeouts(hsr, appId); if (appTimeoutsInfo != null) { long stopTime = clock.getTime(); @@ -2673,14 +2490,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { HttpServletRequest hsr, String appId) throws AuthorizationException, YarnException, InterruptedException, IOException { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrUpdateApplicationTimeoutsRetrieved(); - throw e; - } - if (appTimeout == null) { routerMetrics.incrUpdateApplicationTimeoutsRetrieved(); throw new IllegalArgumentException("Parameter error, the appTimeout is null."); @@ -2688,9 +2497,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = Time.now(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); Response response = interceptor.updateApplicationTimeout(appTimeout, hsr, appId); if (response != null) { long stopTime = clock.getTime(); @@ -2713,19 +2520,9 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { @Override public AppAttemptsInfo getAppAttempts(HttpServletRequest hsr, String appId) { - // Check that the appId format is accurate - try { - RouterServerUtil.validateApplicationId(appId); - } catch (IllegalArgumentException e) { - routerMetrics.incrAppAttemptsFailedRetrieved(); - throw e; - } - try { long startTime = Time.now(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); AppAttemptsInfo appAttemptsInfo = interceptor.getAppAttempts(hsr, appId); if (appAttemptsInfo != null) { long stopTime = Time.now(); @@ -2768,14 +2565,14 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Traverse SubCluster and call checkUserAccessToQueue Api try { long startTime = Time.now(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{String.class, String.class, String.class, HttpServletRequest.class}; Object[] args = new Object[]{queue, username, queueAclType, hsrCopy}; ClientMethod remoteMethod = new ClientMethod("checkUserAccessToQueue", argsClasses, args); Map rmQueueAclInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, RMQueueAclInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, RMQueueAclInfo.class); FederationRMQueueAclInfo aclInfo = new FederationRMQueueAclInfo(); rmQueueAclInfoMap.forEach((subClusterInfo, rMQueueAclInfo) -> { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); @@ -2803,7 +2600,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Check that the appId/appAttemptId format is accurate try { - RouterServerUtil.validateApplicationId(appId); RouterServerUtil.validateApplicationAttemptId(appAttemptId); } catch (IllegalArgumentException e) { routerMetrics.incrAppAttemptReportFailedRetrieved(); @@ -2813,9 +2609,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Call the getAppAttempt method try { long startTime = Time.now(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); AppAttemptInfo appAttemptInfo = interceptor.getAppAttempt(req, res, appId, appAttemptId); if (appAttemptInfo != null) { long stopTime = Time.now(); @@ -2853,13 +2647,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); ContainersInfo containersInfo = new ContainersInfo(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); Class[] argsClasses = new Class[]{ HttpServletRequest.class, HttpServletResponse.class, String.class, String.class}; Object[] args = new Object[]{req, res, appId, appAttemptId}; ClientMethod remoteMethod = new ClientMethod("getContainers", argsClasses, args); Map containersInfoMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, ContainersInfo.class); + invokeConcurrent(subClustersActive, remoteMethod, ContainersInfo.class); if (containersInfoMap != null && !containersInfoMap.isEmpty()) { containersInfoMap.values().forEach(containers -> containersInfo.addAll(containers.getContainers())); @@ -2895,7 +2689,6 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { // Check that the appId/appAttemptId/containerId format is accurate try { - RouterServerUtil.validateApplicationId(appId); RouterServerUtil.validateApplicationAttemptId(appAttemptId); RouterServerUtil.validateContainerId(containerId); } catch (IllegalArgumentException e) { @@ -2905,9 +2698,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = Time.now(); - SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId); - DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster( - subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress()); + DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorByAppId(appId); ContainerInfo containerInfo = interceptor.getContainer(req, res, appId, appAttemptId, containerId); if (containerInfo != null) { @@ -3006,13 +2797,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { try { long startTime = clock.getTime(); FederationConfInfo federationConfInfo = new FederationConfInfo(); - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); final HttpServletRequest hsrCopy = clone(hsr); Class[] argsClasses = new Class[]{HttpServletRequest.class}; Object[] args = new Object[]{hsrCopy}; ClientMethod remoteMethod = new ClientMethod("getSchedulerConfiguration", argsClasses, args); Map responseMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class); + invokeConcurrent(subClustersActive, remoteMethod, Response.class); responseMap.forEach((subClusterInfo, response) -> { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); if (response == null) { @@ -3022,7 +2813,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { String errorMsg = String.valueOf(response.getEntity()); federationConfInfo.getErrorMsgs().add(errorMsg); } else if (response.getStatus() == Status.OK.getStatusCode()) { - ConfInfo fedConfInfo = ConfInfo.class.cast(response.getEntity()); + ConfInfo fedConfInfo = (ConfInfo) response.getEntity(); fedConfInfo.setSubClusterId(subClusterId.getId()); federationConfInfo.getList().add(fedConfInfo); } @@ -3175,7 +2966,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { */ private SubClusterInfo getHomeSubClusterInfoByAppId(String appId) throws YarnException { - SubClusterInfo subClusterInfo = null; + + if (StringUtils.isBlank(appId)) { + throw new IllegalArgumentException("applicationId can't null or empty."); + } + try { ApplicationId applicationId = ApplicationId.fromString(appId); SubClusterId subClusterId = federationFacade.getApplicationHomeSubCluster(applicationId); @@ -3183,8 +2978,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { RouterServerUtil.logAndThrowException(null, "Can't get HomeSubCluster by applicationId %s", applicationId); } - subClusterInfo = federationFacade.getSubCluster(subClusterId); - return subClusterInfo; + return federationFacade.getSubCluster(subClusterId); } catch (IllegalArgumentException e){ throw new IllegalArgumentException(e); } catch (YarnException e) { @@ -3210,8 +3004,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { RouterServerUtil.logAndThrowException(null, "Can't get HomeSubCluster by reservationId %s", resId); } - SubClusterInfo subClusterInfo = federationFacade.getSubCluster(subClusterId); - return subClusterInfo; + return federationFacade.getSubCluster(subClusterId); } catch (YarnException | IOException e) { RouterServerUtil.logAndThrowException(e, "Get HomeSubClusterInfo by reservationId %s failed.", resId); @@ -3236,12 +3029,10 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor { @VisibleForTesting public Map invokeConcurrentGetNodeLabel() throws IOException, YarnException { - Map subClustersActive = getActiveSubclusters(); + Collection subClustersActive = federationFacade.getActiveSubClusters(); Class[] argsClasses = new Class[]{String.class}; Object[] args = new Object[]{null}; ClientMethod remoteMethod = new ClientMethod("getNodes", argsClasses, args); - Map nodesMap = - invokeConcurrent(subClustersActive.values(), remoteMethod, NodesInfo.class); - return nodesMap; + return invokeConcurrent(subClustersActive, remoteMethod, NodesInfo.class); } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java index 7af470dc583..07afc9180ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java @@ -111,8 +111,8 @@ public final class RouterWebServiceUtil { * @param formParam the form parameters as input for a specific REST call * @param additionalParam the query parameters as input for a specific REST * call in case the call has no servlet request + * @param conf configuration. * @param client same client used to reduce number of clients created - * @param conf configuration * @return the retrieved entity from the REST call */ protected static T genericForward(final String webApp, @@ -510,6 +510,11 @@ public final class RouterWebServiceUtil { /** * Extract from HttpServletRequest the MediaType in output. + * + * @param request the servlet request. + * @param returnType the return type of the REST call. + * @param Generic Type T. + * @return MediaType. */ protected static String getMediaTypeFromHttpServletRequest( HttpServletRequest request, final Class returnType) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java index 19bba51e270..5279902b58a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java @@ -145,8 +145,6 @@ import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.junit.Assert; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_KEY_UPDATE_INTERVAL_KEY; @@ -170,11 +168,11 @@ import static org.mockito.Mockito.when; * reused to validate different request interceptor chains. */ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { - private static final Logger LOG = - LoggerFactory.getLogger(TestFederationInterceptorREST.class); + private final static int NUM_SUBCLUSTER = 4; private static final int BAD_REQUEST = 400; private static final int ACCEPTED = 202; + private static final String TEST_USER = "test-user"; private static final int OK = 200; private static String user = "test-user"; private TestableFederationInterceptorREST interceptor; @@ -195,7 +193,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { stateStoreUtil = new FederationStateStoreTestUtil(stateStore); interceptor.setConf(this.getConf()); - interceptor.init(user); + interceptor.init(TEST_USER); subClusters = new ArrayList<>(); @@ -282,8 +280,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * ApplicationId has to belong to one of the SubCluster in the cluster. */ @Test - public void testGetNewApplication() - throws YarnException, IOException, InterruptedException { + public void testGetNewApplication() throws IOException, InterruptedException { Response response = interceptor.createNewApplication(null); @@ -359,8 +356,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * request. */ @Test - public void testSubmitApplicationEmptyRequest() - throws YarnException, IOException, InterruptedException { + public void testSubmitApplicationEmptyRequest() throws IOException, InterruptedException { // ApplicationSubmissionContextInfo null Response response = interceptor.submitApplication(null, null); @@ -384,8 +380,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * application in wrong format. */ @Test - public void testSubmitApplicationWrongFormat() - throws YarnException, IOException, InterruptedException { + public void testSubmitApplicationWrongFormat() throws IOException, InterruptedException { ApplicationSubmissionContextInfo context = new ApplicationSubmissionContextInfo(); @@ -506,8 +501,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * application does not exist in StateStore. */ @Test - public void testGetApplicationNotExists() - throws YarnException, IOException, InterruptedException { + public void testGetApplicationNotExists() { ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); @@ -522,8 +516,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * application in wrong format. */ @Test - public void testGetApplicationWrongFormat() - throws YarnException, IOException, InterruptedException { + public void testGetApplicationWrongFormat() { AppInfo response = interceptor.getApp(null, "Application_wrong_id", null); @@ -535,8 +528,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * subcluster provided one application. */ @Test - public void testGetApplicationsReport() - throws YarnException, IOException, InterruptedException { + public void testGetApplicationsReport() { AppsInfo responseGet = interceptor.getApps(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); @@ -645,8 +637,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { * application does not exist in StateStore. */ @Test - public void testGetApplicationStateNotExists() - throws YarnException, IOException, InterruptedException { + public void testGetApplicationStateNotExists() throws IOException { ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); @@ -662,7 +653,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { */ @Test public void testGetApplicationStateWrongFormat() - throws YarnException, IOException, InterruptedException { + throws IOException { AppState response = interceptor.getAppState(null, "Application_wrong_id"); @@ -865,8 +856,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppAttempts() - throws IOException, InterruptedException, YarnException { + public void testGetAppAttempts() throws IOException, InterruptedException { // Submit application to multiSubCluster ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); ApplicationSubmissionContextInfo context = new ApplicationSubmissionContextInfo(); @@ -897,8 +887,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppAttempt() - throws IOException, InterruptedException, YarnException { + public void testGetAppAttempt() throws IOException, InterruptedException { // Generate ApplicationId information ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); @@ -922,7 +911,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppTimeout() throws IOException, InterruptedException, YarnException { + public void testGetAppTimeout() throws IOException, InterruptedException { // Generate ApplicationId information ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); @@ -942,7 +931,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppTimeouts() throws IOException, InterruptedException, YarnException { + public void testGetAppTimeouts() throws IOException, InterruptedException { // Generate ApplicationId information ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); @@ -1022,8 +1011,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppPriority() throws IOException, InterruptedException, - YarnException { + public void testGetAppPriority() throws IOException, InterruptedException { // Submit application to multiSubCluster ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); @@ -1072,7 +1060,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppQueue() throws IOException, InterruptedException, YarnException { + public void testGetAppQueue() throws IOException, InterruptedException { String queueName = "queueName"; // Submit application to multiSubCluster @@ -1090,7 +1078,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { } @Test - public void testGetAppsInfoCache() throws IOException, InterruptedException, YarnException { + public void testGetAppsInfoCache() { AppsInfo responseGet = interceptor.getApps( null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); @@ -1102,7 +1090,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { LRUCacheHashMap appsInfoCache = interceptor.getAppInfosCaches(); Assert.assertNotNull(appsInfoCache); - Assert.assertTrue(!appsInfoCache.isEmpty()); + Assert.assertFalse(appsInfoCache.isEmpty()); Assert.assertEquals(1, appsInfoCache.size()); Assert.assertTrue(appsInfoCache.containsKey(cacheKey)); @@ -1113,7 +1101,6 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { @Test public void testGetAppStatistics() throws IOException, InterruptedException, YarnException { - AppState appStateRUNNING = new AppState(YarnApplicationState.RUNNING.name()); // Submit application to multiSubCluster ApplicationId appId = ApplicationId.newInstance(Time.now(), 1); @@ -1200,6 +1187,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertNotNull(entity); Assert.assertNotNull(entity instanceof ReservationListInfo); + Assert.assertTrue(entity instanceof ReservationListInfo); ReservationListInfo listInfo = (ReservationListInfo) entity; Assert.assertNotNull(listInfo); @@ -1267,6 +1255,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertNotNull(entity); Assert.assertNotNull(entity instanceof ReservationListInfo); + Assert.assertTrue(entity instanceof ReservationListInfo); ReservationListInfo listInfo = (ReservationListInfo) entity; Assert.assertNotNull(listInfo); @@ -1310,6 +1299,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertNotNull(entity); Assert.assertNotNull(entity instanceof ReservationListInfo); + Assert.assertTrue(entity instanceof ReservationListInfo); ReservationListInfo listInfo = (ReservationListInfo) entity; Assert.assertNotNull(listInfo); @@ -1373,8 +1363,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { throws IOException, InterruptedException { ReservationSubmissionRequestInfo resSubmissionRequestInfo = getReservationSubmissionRequestInfo(reservationId); - Response response = interceptor.submitReservation(resSubmissionRequestInfo, null); - return response; + return interceptor.submitReservation(resSubmissionRequestInfo, null); } public static ReservationSubmissionRequestInfo getReservationSubmissionRequestInfo( @@ -1402,15 +1391,13 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { long arrival = Time.now(); // deadline by when the resource(s) must be allocated. - // The reason for choosing 1.05 is because this gives an integer + // The reason for choosing 1.05 is that this gives an integer // DURATION * 0.05 = 3000(ms) // deadline = arrival + 3000ms long deadline = (long) (arrival + 1.05 * DURATION); - ReservationSubmissionRequest submissionRequest = createSimpleReservationRequest( + return createSimpleReservationRequest( reservationId, numContainers, arrival, deadline, DURATION, memory, vcore); - - return submissionRequest; } public static ReservationSubmissionRequest createSimpleReservationRequest( @@ -1423,9 +1410,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Collections.singletonList(r), ReservationRequestInterpreter.R_ALL); ReservationDefinition rDef = ReservationDefinition.newInstance( arrival, deadline, reqs, "testClientRMService#reservation", "0", Priority.UNDEFINED); - ReservationSubmissionRequest request = ReservationSubmissionRequest.newInstance( - rDef, QUEUE_DEDICATED_FULL, reservationId); - return request; + return ReservationSubmissionRequest.newInstance(rDef, QUEUE_DEDICATED_FULL, reservationId); } @Test @@ -1497,7 +1482,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { interceptor.checkUserAccessToQueue(queue, userName, queueACL.name(), mockHsr); Assert.assertNotNull(aclInfo); Assert.assertTrue(aclInfo instanceof FederationRMQueueAclInfo); - FederationRMQueueAclInfo fedAclInfo = FederationRMQueueAclInfo.class.cast(aclInfo); + FederationRMQueueAclInfo fedAclInfo = (FederationRMQueueAclInfo) aclInfo; List aclInfos = fedAclInfo.getList(); Assert.assertNotNull(aclInfos); Assert.assertEquals(4, aclInfos.size()); @@ -1513,7 +1498,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { interceptor.checkUserAccessToQueue(queue, userName, queueACL.name(), mockHsr); Assert.assertNotNull(aclInfo); Assert.assertTrue(aclInfo instanceof FederationRMQueueAclInfo); - FederationRMQueueAclInfo fedAclInfo = FederationRMQueueAclInfo.class.cast(aclInfo); + FederationRMQueueAclInfo fedAclInfo = (FederationRMQueueAclInfo) aclInfo; List aclInfos = fedAclInfo.getList(); Assert.assertNotNull(aclInfos); Assert.assertEquals(4, aclInfos.size()); @@ -1589,13 +1574,12 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertTrue(typeInfo instanceof FederationSchedulerTypeInfo); FederationSchedulerTypeInfo federationSchedulerTypeInfo = - FederationSchedulerTypeInfo.class.cast(typeInfo); + (FederationSchedulerTypeInfo) typeInfo; Assert.assertNotNull(federationSchedulerTypeInfo); List schedulerTypeInfos = federationSchedulerTypeInfo.getList(); Assert.assertNotNull(schedulerTypeInfos); Assert.assertEquals(4, schedulerTypeInfos.size()); - List subClusterIds = - subClusters.stream().map(subClusterId -> subClusterId.getId()). + List subClusterIds = subClusters.stream().map(SubClusterId::getId). collect(Collectors.toList()); for (SchedulerTypeInfo schedulerTypeInfo : schedulerTypeInfos) { @@ -1609,8 +1593,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { SchedulerInfo schedulerInfo = schedulerTypeInfo.getSchedulerInfo(); Assert.assertNotNull(schedulerInfo); Assert.assertTrue(schedulerInfo instanceof CapacitySchedulerInfo); - CapacitySchedulerInfo capacitySchedulerInfo = - CapacitySchedulerInfo.class.cast(schedulerInfo); + CapacitySchedulerInfo capacitySchedulerInfo = (CapacitySchedulerInfo) schedulerInfo; Assert.assertNotNull(capacitySchedulerInfo); // 3. The parent queue name should be root @@ -1702,7 +1685,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertNotNull(entity); Assert.assertTrue(entity instanceof DelegationToken); - DelegationToken dtoken = DelegationToken.class.cast(entity); + DelegationToken dtoken = (DelegationToken) entity; Assert.assertEquals(TEST_RENEWER, dtoken.getRenewer()); Assert.assertEquals(TEST_RENEWER, dtoken.getOwner()); Assert.assertEquals("RM_DELEGATION_TOKEN", dtoken.getKind()); @@ -1751,7 +1734,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Object entity = response.getEntity(); Assert.assertNotNull(entity); Assert.assertTrue(entity instanceof DelegationToken); - DelegationToken dtoken = DelegationToken.class.cast(entity); + DelegationToken dtoken = (DelegationToken) entity; final String yarnTokenHeader = "Hadoop-YARN-RM-Delegation-Token"; when(request.getHeader(yarnTokenHeader)).thenReturn(dtoken.getToken()); @@ -1764,7 +1747,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertTrue(renewEntity instanceof DelegationToken); // renewDelegation, we only return renewDate, other values are NULL. - DelegationToken renewDToken = DelegationToken.class.cast(renewEntity); + DelegationToken renewDToken = (DelegationToken) renewEntity; Assert.assertNull(renewDToken.getRenewer()); Assert.assertNull(renewDToken.getOwner()); Assert.assertNull(renewDToken.getKind()); @@ -1789,7 +1772,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Object entity = response.getEntity(); Assert.assertNotNull(entity); Assert.assertTrue(entity instanceof DelegationToken); - DelegationToken dtoken = DelegationToken.class.cast(entity); + DelegationToken dtoken = (DelegationToken) entity; final String yarnTokenHeader = "Hadoop-YARN-RM-Delegation-Token"; when(request.getHeader(yarnTokenHeader)).thenReturn(dtoken.getToken()); @@ -1903,7 +1886,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { // We cannot guarantee the calling order of the sub-clusters, // We guarantee that the returned result contains the information of each subCluster. Assert.assertNotNull(dumpSchedulerLogsMsg); - subClusters.stream().forEach(subClusterId -> { + subClusters.forEach(subClusterId -> { String subClusterMsg = "subClusterId" + subClusterId + " : Capacity scheduler logs are being created.; "; Assert.assertTrue(dumpSchedulerLogsMsg.contains(subClusterMsg)); @@ -1978,7 +1961,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { Assert.assertTrue(bulkActivitiesInfo instanceof FederationBulkActivitiesInfo); FederationBulkActivitiesInfo federationBulkActivitiesInfo = - FederationBulkActivitiesInfo.class.cast(bulkActivitiesInfo); + (FederationBulkActivitiesInfo) bulkActivitiesInfo; Assert.assertNotNull(federationBulkActivitiesInfo); List activitiesInfos = federationBulkActivitiesInfo.getList(); @@ -2033,9 +2016,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { // we confirm the result by contains String expectedMsg = "SubCluster-0:SUCCESS,SubCluster-1:SUCCESS,SubCluster-2:SUCCESS,SubCluster-3:SUCCESS"; - Arrays.stream(entities).forEach(item -> { - Assert.assertTrue(expectedMsg.contains(item)); - }); + Arrays.stream(entities).forEach(item -> Assert.assertTrue(expectedMsg.contains(item))); } @Test @@ -2098,9 +2079,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest { // we confirm the result by contains String expectedMsg = "SubCluster-0:SUCCESS,SubCluster-1:SUCCESS,SubCluster-2:SUCCESS,SubCluster-3:SUCCESS"; - Arrays.stream(entities).forEach(item -> { - Assert.assertTrue(expectedMsg.contains(item)); - }); + Arrays.stream(entities).forEach(item -> Assert.assertTrue(expectedMsg.contains(item))); } @Test From 811441d5bc4fc6f788a277558274160bf4c242a9 Mon Sep 17 00:00:00 2001 From: zhtttylz Date: Sat, 1 Apr 2023 18:18:20 +0800 Subject: [PATCH 53/97] HDFS-16951. Add description of GETSERVERDEFAULTS to WebHDFS doc (#5491) Co-authored-by: Zhtttylz Reviewed-by: Shilun Fan Signed-off-by: Shilun Fan --- .../hadoop-hdfs/src/site/markdown/WebHDFS.md | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md index 46b5613fe72..5e5924ad36e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md @@ -57,6 +57,7 @@ The HTTP REST API supports the complete [FileSystem](../../api/org/apache/hadoop * [`GETSNAPSHOTLIST`](#Get_Snapshot_List) * [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations) * [`GETECPOLICY`](#Get_EC_Policy) (see [HDFSErasureCoding](./HDFSErasureCoding.html#Administrative_commands).getErasureCodingPolicy) + * [`GETSERVERDEFAULTS`](#Get_Server_Defaults) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults) * HTTP PUT * [`CREATE`](#Create_and_Write_to_a_File) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).create) * [`MKDIRS`](#Make_a_Directory) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).mkdirs) @@ -1109,6 +1110,35 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getAclSta See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).access +### Get Server Defaults + +* Submit a HTTP GET request. + + curl -i "http://:/webhdfs/v1/?op=GETSERVERDEFAULTS" + + The client receives a response with a [`ServerDefaults` JSON object](Server_Defaults_JSON_Schema): + + HTTP/1.1 200 OK + Content-Type: application/json + Transfer-Encoding: chunked + + { + "FsServerDefaults": { + "replication": 3, + "encryptDataTransfer": "false", + "defaultStoragePolicyId":7, + "writePacketSize": 65536, + "fileBufferSize": 4096, + "checksumType": 2, + "trashInterval": 10080, + "keyProviderUri": "", + "blockSize": 134217728, + "bytesPerChecksum": 512 + } + } + +See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults + Storage Policy Operations ------------------------- @@ -3042,6 +3072,24 @@ var blockLocationProperties = } }; ``` +### Server Defaults JSON Schema + +```json +{ + "FsServerDefaults": { + "replication": 3, + "encryptDataTransfer": false, + "defaultStoragePolicyId": 7, + "writePacketSize": 65536, + "fileBufferSize": 4096, + "checksumType": 2, + "trashInterval": 10080, + "keyProviderUri": "", + "blockSize": 134217728, + "bytesPerChecksum": 512 + } +} +``` HTTP Query Parameter Dictionary ------------------------------- From 14c5810d5ef284216a88ee1d0c158fc451cf7fda Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Mon, 3 Apr 2023 22:53:29 +0000 Subject: [PATCH 54/97] HADOOP-18680: Insufficient heap during full test runs in Docker container. Closes #5522 Signed-off-by: Ayush Saxena --- dev-support/docker/Dockerfile_aarch64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64 index dd0348961f4..14a53780127 100644 --- a/dev-support/docker/Dockerfile_aarch64 +++ b/dev-support/docker/Dockerfile_aarch64 @@ -74,7 +74,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin" ### # Avoid out of memory errors in builds ### -ENV MAVEN_OPTS -Xms256m -Xmx1536m +ENV MAVEN_OPTS -Xms256m -Xmx3072m # Skip gpg verification when downloading Yetus via yetus-wrapper ENV HADOOP_SKIP_YETUS_VERIFICATION true From 937caf7de9e50268ff49af86825eac698fb98d2d Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 4 Apr 2023 06:39:53 -0700 Subject: [PATCH 55/97] HDFS-16967. RBF: File based state stores should allow concurrent access to the records (#5523) Reviewed-by: Inigo Goiri Reviewed-by: Simbarashe Dzinamarira Signed-off-by: Takanobu Asanuma --- .../federation/router/RBFConfigKeys.java | 9 + .../driver/impl/StateStoreFileBaseImpl.java | 197 ++++++++++++++---- .../store/driver/impl/StateStoreFileImpl.java | 7 + .../driver/impl/StateStoreFileSystemImpl.java | 9 +- .../src/main/resources/hdfs-rbf-default.xml | 28 +++ .../driver/TestStateStoreDriverBase.java | 1 + .../store/driver/TestStateStoreFile.java | 32 ++- .../driver/TestStateStoreFileSystem.java | 47 +++-- 8 files changed, 268 insertions(+), 62 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java index c0ee9504597..f47d6ceb269 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java @@ -255,6 +255,15 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic { public static final int FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT = -1; + // HDFS Router-based federation File based store implementation specific configs + public static final String FEDERATION_STORE_FILE_ASYNC_THREADS = + FEDERATION_STORE_PREFIX + "driver.file.async.threads"; + public static final int FEDERATION_STORE_FILE_ASYNC_THREADS_DEFAULT = 0; + + public static final String FEDERATION_STORE_FS_ASYNC_THREADS = + FEDERATION_STORE_PREFIX + "driver.fs.async.threads"; + public static final int FEDERATION_STORE_FS_ASYNC_THREADS_DEFAULT = 0; + // HDFS Router safe mode public static final String DFS_ROUTER_SAFEMODE_ENABLE = FEDERATION_ROUTER_PREFIX + "safemode.enable"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java index c93d919aea0..ec3c89b65bc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java @@ -25,14 +25,24 @@ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; + import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics; import org.apache.hadoop.hdfs.server.federation.store.StateStoreUnavailableException; import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; @@ -69,6 +79,8 @@ public abstract class StateStoreFileBaseImpl /** If it is initialized. */ private boolean initialized = false; + private ExecutorService concurrentStoreAccessPool; + /** * Get the reader of a record for the file system. @@ -137,6 +149,8 @@ public abstract class StateStoreFileBaseImpl */ protected abstract String getRootDir(); + protected abstract int getConcurrentFilesAccessNumThreads(); + /** * Set the driver as initialized. * @@ -168,9 +182,31 @@ public abstract class StateStoreFileBaseImpl return false; } setInitialized(true); + int threads = getConcurrentFilesAccessNumThreads(); + if (threads > 1) { + this.concurrentStoreAccessPool = + new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(), + new ThreadFactoryBuilder() + .setNameFormat("state-store-file-based-concurrent-%d") + .setDaemon(true).build()); + LOG.info("File based state store will be accessed concurrently with {} max threads", threads); + } else { + LOG.info("File based state store will be accessed serially"); + } return true; } + @Override + public void close() throws Exception { + if (this.concurrentStoreAccessPool != null) { + this.concurrentStoreAccessPool.shutdown(); + boolean isTerminated = this.concurrentStoreAccessPool.awaitTermination(5, TimeUnit.SECONDS); + LOG.info("Concurrent store access pool is terminated: {}", isTerminated); + this.concurrentStoreAccessPool = null; + } + } + @Override public boolean initRecordStorage( String className, Class recordClass) { @@ -198,22 +234,29 @@ public abstract class StateStoreFileBaseImpl verifyDriverReady(); long start = monotonicNow(); StateStoreMetrics metrics = getMetrics(); - List ret = new ArrayList<>(); + List result = Collections.synchronizedList(new ArrayList<>()); try { String path = getPathForClass(clazz); List children = getChildren(path); - for (String child : children) { - String pathRecord = path + "/" + child; - if (child.endsWith(TMP_MARK)) { - LOG.debug("There is a temporary file {} in {}", child, path); - if (isOldTempRecord(child)) { - LOG.warn("Removing {} as it's an old temporary record", child); - remove(pathRecord); - } - } else { - T record = getRecord(pathRecord, clazz); - ret.add(record); + List> callables = new ArrayList<>(); + children.forEach(child -> callables.add( + () -> getRecordsFromFileAndRemoveOldTmpRecords(clazz, result, path, child))); + if (this.concurrentStoreAccessPool != null) { + // Read records concurrently + List> futures = this.concurrentStoreAccessPool.invokeAll(callables); + for (Future future : futures) { + future.get(); } + } else { + // Read records serially + callables.forEach(e -> { + try { + e.call(); + } catch (Exception ex) { + LOG.error("Failed to retrieve record using file operations.", ex); + throw new RuntimeException(ex); + } + }); } } catch (Exception e) { if (metrics != null) { @@ -227,7 +270,37 @@ public abstract class StateStoreFileBaseImpl if (metrics != null) { metrics.addRead(monotonicNow() - start); } - return new QueryResult(ret, getTime()); + return new QueryResult<>(result, getTime()); + } + + /** + * Get the state store record from the given path (path/child) and add the record to the + * result list. + * + * @param clazz Class of the record. + * @param result The list of results record. The records would be added to it unless the given + * path represents old temp file. + * @param path The parent path. + * @param child The child path under the parent path. Both path and child completes the file + * location for the given record. + * @param Record class of the records. + * @return Void. + * @throws IOException If the file read operation fails. + */ + private Void getRecordsFromFileAndRemoveOldTmpRecords(Class clazz, + List result, String path, String child) throws IOException { + String pathRecord = path + "/" + child; + if (child.endsWith(TMP_MARK)) { + LOG.debug("There is a temporary file {} in {}", child, path); + if (isOldTempRecord(child)) { + LOG.warn("Removing {} as it's an old temporary record", child); + remove(pathRecord); + } + } else { + T record = getRecord(pathRecord, clazz); + result.add(record); + } + return null; } /** @@ -260,23 +333,17 @@ public abstract class StateStoreFileBaseImpl */ private T getRecord( final String path, final Class clazz) throws IOException { - BufferedReader reader = getReader(path); - try { + try (BufferedReader reader = getReader(path)) { String line; while ((line = reader.readLine()) != null) { if (!line.startsWith("#") && line.length() > 0) { try { - T record = newRecord(line, clazz, false); - return record; + return newRecord(line, clazz, false); } catch (Exception ex) { LOG.error("Cannot parse line {} in file {}", line, path, ex); } } } - } finally { - if (reader != null) { - reader.close(); - } } throw new IOException("Cannot read " + path + " for record " + clazz.getSimpleName()); @@ -330,13 +397,12 @@ public abstract class StateStoreFileBaseImpl record.setDateModified(this.getTime()); toWrite.put(recordPath, record); } else if (errorIfExists) { - LOG.error("Attempt to insert record {} that already exists", - recordPath); + LOG.error("Attempt to insert record {} that already exists", recordPath); if (metrics != null) { metrics.addFailure(monotonicNow() - start); } return false; - } else { + } else { LOG.debug("Not updating {}", record); } } else { @@ -345,36 +411,81 @@ public abstract class StateStoreFileBaseImpl } // Write the records - boolean success = true; - for (Entry entry : toWrite.entrySet()) { - String recordPath = entry.getKey(); - String recordPathTemp = recordPath + "." + now() + TMP_MARK; - boolean recordWrittenSuccessfully = true; - try (BufferedWriter writer = getWriter(recordPathTemp)) { - T record = entry.getValue(); - String line = serializeString(record); - writer.write(line); - } catch (IOException e) { - LOG.error("Cannot write {}", recordPathTemp, e); - recordWrittenSuccessfully = false; - success = false; + final AtomicBoolean success = new AtomicBoolean(true); + final List> callables = new ArrayList<>(); + toWrite.entrySet().forEach(entry -> callables.add(() -> writeRecordToFile(success, entry))); + if (this.concurrentStoreAccessPool != null) { + // Write records concurrently + List> futures = null; + try { + futures = this.concurrentStoreAccessPool.invokeAll(callables); + } catch (InterruptedException e) { + success.set(false); + LOG.error("Failed to put record concurrently.", e); } - // Commit - if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) { - LOG.error("Failed committing record into {}", recordPath); - success = false; + if (futures != null) { + for (Future future : futures) { + try { + future.get(); + } catch (InterruptedException | ExecutionException e) { + success.set(false); + LOG.error("Failed to retrieve results from concurrent record put runs.", e); + } + } } + } else { + // Write records serially + callables.forEach(callable -> { + try { + callable.call(); + } catch (Exception e) { + success.set(false); + LOG.error("Failed to put record.", e); + } + }); } long end = monotonicNow(); if (metrics != null) { - if (success) { + if (success.get()) { metrics.addWrite(end - start); } else { metrics.addFailure(end - start); } } - return success; + return success.get(); + } + + /** + * Writes the state store record to the file. At first, the record is written to a temp location + * and then later renamed to the final location that is passed with the entry key. + * + * @param success The atomic boolean that gets updated to false if the file write operation fails. + * @param entry The entry of the record path and the state store record to be written to the file + * by first writing to a temp location and then renaming it to the record path. + * @param Record class of the records. + * @return Void. + */ + private Void writeRecordToFile(AtomicBoolean success, + Entry entry) { + String recordPath = entry.getKey(); + String recordPathTemp = recordPath + "." + now() + TMP_MARK; + boolean recordWrittenSuccessfully = true; + try (BufferedWriter writer = getWriter(recordPathTemp)) { + T record = entry.getValue(); + String line = serializeString(record); + writer.write(line); + } catch (IOException e) { + LOG.error("Cannot write {}", recordPathTemp, e); + recordWrittenSuccessfully = false; + success.set(false); + } + // Commit + if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) { + LOG.error("Failed committing record into {}", recordPath); + success.set(false); + } + return null; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java index 6ca26637161..1df26e07843 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileImpl.java @@ -109,6 +109,12 @@ public class StateStoreFileImpl extends StateStoreFileBaseImpl { return this.rootDirectory; } + @Override + protected int getConcurrentFilesAccessNumThreads() { + return getConf().getInt(RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS, + RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS_DEFAULT); + } + @Override protected BufferedReader getReader(String filename) { BufferedReader reader = null; @@ -144,6 +150,7 @@ public class StateStoreFileImpl extends StateStoreFileBaseImpl { @Override public void close() throws Exception { + super.close(); setInitialized(false); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java index ee34d8a4cab..d05682398ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java @@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory; /** * {@link StateStoreDriver} implementation based on a filesystem. The common * implementation uses HDFS as a backend. The path can be specified setting - * dfs.federation.router.driver.fs.path=hdfs://host:port/path/to/store. + * dfs.federation.router.store.driver.fs.path=hdfs://host:port/path/to/store. */ public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl { @@ -117,8 +117,15 @@ public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl { return this.workPath; } + @Override + protected int getConcurrentFilesAccessNumThreads() { + return getConf().getInt(RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS, + RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS_DEFAULT); + } + @Override public void close() throws Exception { + super.close(); if (fs != null) { fs.close(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index 79a16cc2022..780fb76a2da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -894,4 +894,32 @@ If this is below 0, the auto-refresh is disabled. + + + dfs.federation.router.store.driver.file.async.threads + 0 + + Max threads used by StateStoreFileImpl to access state store files concurrently. + The only class currently being supported: + org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl. + Default value is 0, which means StateStoreFileImpl would work in sync mode, meaning it + would access one file at a time. + Use positive integer value to enable concurrent files access. + + + + + dfs.federation.router.store.driver.fs.async.threads + 0 + + Max threads used by StateStoreFileSystemImpl to access state store files from the given + filesystem concurrently. + The only class currently being supported: + org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl. + Default value is 0, which means StateStoreFileSystemImpl would work in sync mode, meaning it + would access one file from the filesystem at a time. + Use positive integer value to enable concurrent files access from the given filesystem. + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java index 48d84f9326b..73d0774ace3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreDriverBase.java @@ -94,6 +94,7 @@ public class TestStateStoreDriverBase { public static void tearDownCluster() { if (stateStore != null) { stateStore.stop(); + stateStore = null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java index b01500b2ea1..5b5b3fc1f81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFile.java @@ -18,31 +18,55 @@ package org.apache.hadoop.hdfs.server.federation.store.driver; import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS; import java.io.IOException; +import java.util.Arrays; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl; + +import org.junit.After; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; /** * Test the FileSystem (e.g., HDFS) implementation of the State Store driver. */ +@RunWith(Parameterized.class) public class TestStateStoreFile extends TestStateStoreDriverBase { - @BeforeClass - public static void setupCluster() throws Exception { + private final String numFileAsyncThreads; + + public TestStateStoreFile(String numFileAsyncThreads) { + this.numFileAsyncThreads = numFileAsyncThreads; + } + + @Parameterized.Parameters(name = "numFileAsyncThreads-{0}") + public static List data() { + return Arrays.asList(new String[][] {{"20"}, {"0"}}); + } + + private static void setupCluster(String numFsAsyncThreads) throws Exception { Configuration conf = getStateStoreConfiguration(StateStoreFileImpl.class); + conf.setInt(FEDERATION_STORE_FILE_ASYNC_THREADS, Integer.parseInt(numFsAsyncThreads)); getStateStore(conf); } @Before - public void startup() throws IOException { + public void startup() throws Exception { + setupCluster(numFileAsyncThreads); removeAll(getStateStoreDriver()); } + @After + public void tearDown() throws Exception { + tearDownCluster(); + } + @Test public void testInsert() throws IllegalArgumentException, IllegalAccessException, IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java index 8c06e6b8ed1..4d383ae63fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreFileSystem.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver; import java.io.BufferedWriter; import java.io.IOException; +import java.util.Arrays; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -26,12 +28,15 @@ import org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUt import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileBaseImpl; import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl; import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; -import org.junit.AfterClass; + +import org.junit.After; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.mockito.stubbing.Answer; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; @@ -41,16 +46,22 @@ import static org.mockito.Mockito.spy; /** * Test the FileSystem (e.g., HDFS) implementation of the State Store driver. */ +@RunWith(Parameterized.class) public class TestStateStoreFileSystem extends TestStateStoreDriverBase { private static MiniDFSCluster dfsCluster; - @BeforeClass - public static void setupCluster() throws Exception { - Configuration conf = FederationStateStoreTestUtils - .getStateStoreConfiguration(StateStoreFileSystemImpl.class); - conf.set(StateStoreFileSystemImpl.FEDERATION_STORE_FS_PATH, - "/hdfs-federation/"); + private final String numFsAsyncThreads; + + public TestStateStoreFileSystem(String numFsAsyncThreads) { + this.numFsAsyncThreads = numFsAsyncThreads; + } + + private static void setupCluster(String numFsAsyncThreads) throws Exception { + Configuration conf = + FederationStateStoreTestUtils.getStateStoreConfiguration(StateStoreFileSystemImpl.class); + conf.set(StateStoreFileSystemImpl.FEDERATION_STORE_FS_PATH, "/hdfs-federation/"); + conf.setInt(FEDERATION_STORE_FS_ASYNC_THREADS, Integer.parseInt(numFsAsyncThreads)); // Create HDFS cluster to back the state tore MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); @@ -60,18 +71,26 @@ public class TestStateStoreFileSystem extends TestStateStoreDriverBase { getStateStore(conf); } - @AfterClass - public static void tearDownCluster() { - if (dfsCluster != null) { - dfsCluster.shutdown(); - } + @Parameterized.Parameters(name = "numFsAsyncThreads-{0}") + public static List data() { + return Arrays.asList(new String[][] {{"20"}, {"0"}}); } @Before - public void startup() throws IOException { + public void startup() throws Exception { + setupCluster(numFsAsyncThreads); removeAll(getStateStoreDriver()); } + @After + public void tearDown() throws Exception { + tearDownCluster(); + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + } + @Test public void testInsert() throws IllegalArgumentException, IllegalAccessException, IOException { From dfb2ca0a64a6ff377a8d6796b635298f46dc67ec Mon Sep 17 00:00:00 2001 From: HarshitGupta11 <50410275+HarshitGupta11@users.noreply.github.com> Date: Wed, 5 Apr 2023 17:12:11 +0530 Subject: [PATCH 56/97] HADOOP-18684. S3A filesystem to support binding to to other URI schemes (#5521) Contributed by Harshit Gupta --- .../java/org/apache/hadoop/fs/s3a/S3A.java | 11 ++-- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 8 ++- .../hadoop/fs/s3a/ITestS3AUrlScheme.java | 51 +++++++++++++++++++ .../s3a/fileContext/ITestS3AFileContext.java | 25 ++++++++- 4 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java index ec433fa95c2..34779996963 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3A.java @@ -18,14 +18,16 @@ package org.apache.hadoop.fs.s3a; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DelegateToFileSystem; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; /** * S3A implementation of AbstractFileSystem. @@ -37,7 +39,8 @@ public class S3A extends DelegateToFileSystem { public S3A(URI theUri, Configuration conf) throws IOException, URISyntaxException { - super(theUri, new S3AFileSystem(), conf, "s3a", false); + super(theUri, new S3AFileSystem(), conf, + theUri.getScheme().isEmpty() ? FS_S3A : theUri.getScheme(), false); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index cb17b80fb6a..e96feb0243a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -419,6 +419,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private final Set deleteOnExit = new TreeSet<>(); + /** + * Scheme for the current filesystem. + */ + private String scheme = FS_S3A; + /** Add any deprecated keys. */ @SuppressWarnings("deprecation") private static void addDeprecatedKeys() { @@ -642,6 +647,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, vectoredActiveRangeReads = intOption(conf, AWS_S3_VECTOR_ACTIVE_RANGE_READS, DEFAULT_AWS_S3_VECTOR_ACTIVE_RANGE_READS, 1); vectoredIOContext = populateVectoredIOContext(conf); + scheme = (this.uri != null && this.uri.getScheme() != null) ? this.uri.getScheme() : FS_S3A; } catch (AmazonClientException e) { // amazon client exception: stop all services then throw the translation cleanupWithLogger(LOG, span); @@ -1201,7 +1207,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ @Override public String getScheme() { - return "s3a"; + return this.scheme; } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java new file mode 100644 index 00000000000..cfe46440c75 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AUrlScheme.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class ITestS3AUrlScheme extends AbstractS3ATestBase{ + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + conf.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem"); + return conf; + } + + @Test + public void testFSScheme() throws IOException, URISyntaxException { + FileSystem fs = FileSystem.get(new URI("s3://mybucket/path"), + getConfiguration()); + try { + assertEquals("s3", fs.getScheme()); + Path path = fs.makeQualified(new Path("tmp/path")); + assertEquals("s3", path.toUri().getScheme()); + } finally { + fs.close(); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java index 7e4273a4c70..d29a017a643 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContext.java @@ -13,11 +13,34 @@ */ package org.apache.hadoop.fs.s3a.fileContext; +import java.net.URI; +import java.net.URISyntaxException; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.TestFileContext; +import org.apache.hadoop.fs.UnsupportedFileSystemException; + +import static org.junit.Assert.assertEquals; /** * Implementation of TestFileContext for S3a. */ -public class ITestS3AFileContext extends TestFileContext{ +public class ITestS3AFileContext extends TestFileContext { + @Test + public void testScheme() + throws URISyntaxException, UnsupportedFileSystemException { + Configuration conf = new Configuration(); + URI uri = new URI("s3://mybucket/path"); + conf.set("fs.AbstractFileSystem.s3.impl", + "org.apache.hadoop.fs.s3a.S3A"); + FileContext fc = FileContext.getFileContext(uri, conf); + assertEquals("s3", fc.getDefaultFileSystem().getUri().getScheme()); + Path path = fc.makeQualified(new Path("tmp/path")); + assertEquals("s3", path.toUri().getScheme()); + } } From 69b90b5698df0e36a507c5288ec0fb93a48a5a55 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:35:24 +0800 Subject: [PATCH 57/97] YARN-11436. [Federation] MemoryFederationStateStore Support Version. (#5518) --- ...tionStateVersionIncompatibleException.java | 37 +++++++++++++++++++ .../impl/MemoryFederationStateStore.java | 37 ++++++++++++++++--- .../impl/TestMemoryFederationStateStore.java | 37 +++++++++++++++++++ 3 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java new file mode 100644 index 00000000000..090c2807739 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/exception/FederationStateVersionIncompatibleException.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.federation.store.exception; + +import org.apache.hadoop.yarn.exceptions.YarnException; + +public class FederationStateVersionIncompatibleException extends YarnException { + + private static final long serialVersionUID = 1L; + + public FederationStateVersionIncompatibleException(Throwable cause) { + super(cause); + } + + public FederationStateVersionIncompatibleException(String message) { + super(message); + } + + public FederationStateVersionIncompatibleException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java index 273e736e887..4aad86fbb16 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java @@ -31,17 +31,18 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import java.util.Comparator; -import org.apache.commons.lang3.NotImplementedException; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; +import org.apache.hadoop.yarn.server.federation.store.exception.FederationStateVersionIncompatibleException; import org.apache.hadoop.yarn.server.federation.store.metrics.FederationStateStoreClientMetrics; import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest; import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterResponse; @@ -97,6 +98,7 @@ import org.apache.hadoop.yarn.server.federation.store.utils.FederationMembership import org.apache.hadoop.yarn.server.federation.store.utils.FederationPolicyStoreInputValidator; import org.apache.hadoop.yarn.server.federation.store.utils.FederationStateStoreUtils; import org.apache.hadoop.yarn.server.records.Version; +import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl; import org.apache.hadoop.yarn.util.MonotonicClock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -116,6 +118,9 @@ public class MemoryFederationStateStore implements FederationStateStore { private int maxAppsInStateStore; private AtomicInteger sequenceNum; private AtomicInteger masterKeyId; + private static final Version CURRENT_VERSION_INFO = Version + .newInstance(1, 1); + private byte[] version; private final MonotonicClock clock = new MonotonicClock(); @@ -134,6 +139,7 @@ public class MemoryFederationStateStore implements FederationStateStore { YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_MAX_APPLICATIONS); sequenceNum = new AtomicInteger(); masterKeyId = new AtomicInteger(); + version = ((VersionPBImpl) CURRENT_VERSION_INFO).getProto().toByteArray(); } @Override @@ -367,22 +373,43 @@ public class MemoryFederationStateStore implements FederationStateStore { @Override public Version getCurrentVersion() { - throw new NotImplementedException("Code is not implemented"); + return CURRENT_VERSION_INFO; } @Override public Version loadVersion() throws Exception { - throw new NotImplementedException("Code is not implemented"); + if (version != null) { + VersionProto versionProto = VersionProto.parseFrom(version); + return new VersionPBImpl(versionProto); + } + return null; } @Override public void storeVersion() throws Exception { - throw new NotImplementedException("Code is not implemented"); + version = ((VersionPBImpl) CURRENT_VERSION_INFO).getProto().toByteArray(); } @Override public void checkVersion() throws Exception { - throw new NotImplementedException("Code is not implemented"); + Version loadedVersion = loadVersion(); + LOG.info("Loaded Router State Version Info = {}.", loadedVersion); + Version currentVersion = getCurrentVersion(); + if (loadedVersion != null && loadedVersion.equals(currentVersion)) { + return; + } + // if there is no version info, treat it as CURRENT_VERSION_INFO; + if (loadedVersion == null) { + loadedVersion = currentVersion; + } + if (loadedVersion.isCompatibleTo(currentVersion)) { + LOG.info("Storing Router State Version Info {}.", currentVersion); + storeVersion(); + } else { + throw new FederationStateVersionIncompatibleException( + "Expecting Router state version " + currentVersion + + ", but loading version " + loadedVersion); + } } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java index 5548dab1b8c..bb7e130b5e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestMemoryFederationStateStore.java @@ -27,6 +27,8 @@ import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKey; import org.apache.hadoop.yarn.server.federation.store.records.RouterRMDTSecretManagerState; import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken; +import org.apache.hadoop.yarn.server.records.Version; +import org.junit.Test; import java.io.IOException; import java.nio.ByteBuffer; @@ -88,4 +90,39 @@ public class TestMemoryFederationStateStore extends FederationStateStoreBaseTest assertTrue(tokenIdentifier instanceof RMDelegationTokenIdentifier); assertEquals(identifier, tokenIdentifier); } + + @Test + public void testGetCurrentVersion() { + MemoryFederationStateStore memoryStateStore = + MemoryFederationStateStore.class.cast(this.getStateStore()); + Version version = memoryStateStore.getCurrentVersion(); + assertEquals(version.getMajorVersion(), 1); + assertEquals(version.getMinorVersion(), 1); + } + + @Test + public void testStoreVersion() throws Exception { + MemoryFederationStateStore memoryStateStore = + MemoryFederationStateStore.class.cast(this.getStateStore()); + memoryStateStore.storeVersion(); + Version version = memoryStateStore.getCurrentVersion(); + assertEquals(version.getMajorVersion(), 1); + assertEquals(version.getMinorVersion(), 1); + } + + @Test + public void testLoadVersion() throws Exception { + MemoryFederationStateStore memoryStateStore = + MemoryFederationStateStore.class.cast(this.getStateStore()); + Version version = memoryStateStore.loadVersion(); + assertEquals(version.getMajorVersion(), 1); + assertEquals(version.getMinorVersion(), 1); + } + + @Test + public void testCheckVersion() throws Exception { + MemoryFederationStateStore memoryStateStore = + MemoryFederationStateStore.class.cast(this.getStateStore()); + memoryStateStore.checkVersion(); + } } \ No newline at end of file From 422bf3b24c82803cb0e8ed25fa0b12b5f5cccc1b Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 5 Apr 2023 14:06:38 -0700 Subject: [PATCH 58/97] HDFS-16973. RBF: MountTableResolver cache size lookup should take read lock (#5533) --- .../federation/resolver/MountTableResolver.java | 13 +++++++++---- .../federation/resolver/TestMountTableResolver.java | 10 ++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java index 4b21ec0aa63..adb0f91d042 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java @@ -678,11 +678,16 @@ public class MountTableResolver * @return Size of the cache. * @throws IOException If the cache is not initialized. */ - protected long getCacheSize() throws IOException{ - if (this.locationCache != null) { - return this.locationCache.size(); + protected long getCacheSize() throws IOException { + this.readLock.lock(); + try { + if (this.locationCache != null) { + return this.locationCache.size(); + } + throw new IOException("localCache is null"); + } finally { + this.readLock.unlock(); } - throw new IOException("localCache is null"); } @VisibleForTesting diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java index 15d3caa5e4e..998b79782de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java @@ -552,6 +552,16 @@ public class TestMountTableResolver { assertEquals(100000, mountTable.getMountPoints("/").size()); assertEquals(100000, mountTable.getMounts("/").size()); + // test concurrency for mount table cache size when it gets updated frequently + for (int i = 0; i < 20; i++) { + mountTable.getDestinationForPath("/" + i); + if (i >= 10) { + assertEquals(TEST_MAX_CACHE_SIZE, mountTable.getCacheSize()); + } else { + assertEquals(i + 1, mountTable.getCacheSize()); + } + } + assertEquals(TEST_MAX_CACHE_SIZE, mountTable.getCacheSize()); // Add 1000 entries in deep list mountTable.refreshEntries(emptyList); From 47c22e388ee5631c99a7f926d11a8747aa51e5e4 Mon Sep 17 00:00:00 2001 From: Simbarashe Dzinamarira Date: Wed, 5 Apr 2023 16:44:29 -0700 Subject: [PATCH 59/97] HDFS-16943. RBF: Implements MySQL based StateStoreDriver. (#5469) --- .../store/driver/StateStoreDriver.java | 4 + .../store/driver/impl/StateStoreBaseImpl.java | 4 + .../driver/impl/StateStoreMySQLImpl.java | 425 ++++++++++++++++++ .../impl/StateStoreSerializableImpl.java | 4 + .../src/main/resources/hdfs-rbf-default.xml | 3 +- .../store/driver/TestStateStoreMySQL.java | 102 +++++ 6 files changed, 541 insertions(+), 1 deletion(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java index a4e9c1ce82b..778ac3ecea5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver; import java.net.InetAddress; import java.util.Collection; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics; import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; @@ -35,6 +37,8 @@ import org.slf4j.LoggerFactory; * provider. Driver implementations will extend this class and implement some of * the default methods. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving public abstract class StateStoreDriver implements StateStoreRecordOperations { private static final Logger LOG = diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java index 30686f104b7..f7a6174226e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver; import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; @@ -39,6 +41,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult; * optimization, such as custom get/put/remove queries, depending on the * capabilities of the data store. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving public abstract class StateStoreBaseImpl extends StateStoreDriver { @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java new file mode 100644 index 00000000000..72644bb816e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java @@ -0,0 +1,425 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.store.driver.impl; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics; +import org.apache.hadoop.hdfs.server.federation.router.security.token.SQLConnectionFactory; +import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; +import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; +import org.apache.hadoop.hdfs.server.federation.store.records.DisabledNameservice; +import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; +import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.hdfs.server.federation.store.records.Query; +import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult; +import org.apache.hadoop.hdfs.server.federation.store.records.RouterState; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils.*; + +/** + * StateStoreDriver implementation based on MySQL. + * There is a separate table for each record type. Each table just as two + * columns, recordKey and recordValue. + */ +public class StateStoreMySQLImpl extends StateStoreSerializableImpl { + public static final String SQL_STATE_STORE_CONF_PREFIX = "state-store-mysql."; + public static final String CONNECTION_URL = + SQL_STATE_STORE_CONF_PREFIX + "connection.url"; + public static final String CONNECTION_USERNAME = + SQL_STATE_STORE_CONF_PREFIX + "connection.username"; + public static final String CONNECTION_PASSWORD = + SQL_STATE_STORE_CONF_PREFIX + "connection.password"; + public static final String CONNECTION_DRIVER = + SQL_STATE_STORE_CONF_PREFIX + "connection.driver"; + + private static final Logger LOG = + LoggerFactory.getLogger(StateStoreSerializableImpl.class); + private SQLConnectionFactory connectionFactory; + /** If the driver has been initialized. */ + private boolean initialized = false; + private final static Set VALID_TABLES = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList( + MembershipState.class.getSimpleName(), + RouterState.class.getSimpleName(), + MountTable.class.getSimpleName(), + DisabledNameservice.class.getSimpleName() + )) + ); + + @Override + public boolean initDriver() { + Configuration conf = getConf(); + connectionFactory = new MySQLStateStoreHikariDataSourceConnectionFactory(conf); + initialized = true; + LOG.info("MySQL state store connection factory initialized"); + return true; + } + + @Override + public boolean initRecordStorage(String className, Class clazz) { + String tableName = getAndValidateTableNameForClass(clazz); + try (Connection connection = connectionFactory.getConnection(); + ResultSet resultSet = connection + .getMetaData() + .getTables(null, null, tableName, null)) { + if (resultSet.next()) { + return true; + } + } catch (SQLException e) { + LOG.error("Could not check if table {} able exists", tableName); + } + + try (Connection connection = connectionFactory.getConnection(); + Statement statement = connection.createStatement()) { + String sql = String.format("CREATE TABLE %s (" + + "recordKey VARCHAR (255) NOT NULL," + + "recordValue VARCHAR (2047) NOT NULL, " + + "PRIMARY KEY(recordKey))", tableName); + statement.execute(sql); + return true; + } catch (SQLException e) { + LOG.error(String.format("Cannot create table %s for record type %s.", + tableName, className), e.getMessage()); + return false; + } + } + + @Override + public boolean isDriverReady() { + return this.initialized; + } + + @Override + public void close() throws Exception { + connectionFactory.shutdown(); + } + + @Override + public QueryResult get(Class clazz) + throws IOException { + String tableName = getAndValidateTableNameForClass(clazz); + verifyDriverReady(); + long start = Time.monotonicNow(); + StateStoreMetrics metrics = getMetrics(); + List ret = new ArrayList<>(); + try (Connection connection = connectionFactory.getConnection(); + PreparedStatement statement = connection.prepareStatement( + String.format("SELECT * FROM %s", tableName))) { + try (ResultSet result = statement.executeQuery()) { + while(result.next()) { + String recordValue = result.getString("recordValue"); + T record = newRecord(recordValue, clazz, false); + ret.add(record); + } + } + } catch (SQLException e) { + if (metrics != null) { + metrics.addFailure(Time.monotonicNow() - start); + } + String msg = "Cannot fetch records for " + clazz.getSimpleName(); + LOG.error(msg, e); + throw new IOException(msg, e); + } + + if (metrics != null) { + metrics.addRead(Time.monotonicNow() - start); + } + return new QueryResult<>(ret, getTime()); + } + + @Override + public boolean putAll( + List records, boolean allowUpdate, boolean errorIfExists) throws IOException { + if (records.isEmpty()) { + return true; + } + + verifyDriverReady(); + StateStoreMetrics metrics = getMetrics(); + + long start = Time.monotonicNow(); + + boolean success = true; + for (T record : records) { + String tableName = getAndValidateTableNameForClass(record.getClass()); + String primaryKey = getPrimaryKey(record); + String data = serializeString(record); + + if (recordExists(tableName, primaryKey)) { + if (allowUpdate) { + // Update the mod time stamp. Many backends will use their + // own timestamp for the mod time. + record.setDateModified(this.getTime()); + if (!updateRecord(tableName, primaryKey, data)) { + LOG.error("Cannot write {} into table {}", primaryKey, tableName); + success = false; + } + } else { + if (errorIfExists) { + LOG.error("Attempted to insert record {} that already exists " + + "in table {} and updates are disallowed.", primaryKey, tableName); + if (metrics != null) { + metrics.addFailure(Time.monotonicNow() - start); + } + return false; + } else { + LOG.debug("Not updating {} as updates are not allowed", record); + } + } + } else { + if (!insertRecord(tableName, primaryKey, data)) { + LOG.error("Cannot write {} in table {}", primaryKey, tableName); + success = false; + } + } + } + + long end = Time.monotonicNow(); + if (metrics != null) { + if (success) { + metrics.addWrite(end - start); + } else { + metrics.addFailure(end - start); + } + } + return success; + } + + @Override + public boolean removeAll(Class clazz) throws IOException { + verifyDriverReady(); + long startTimeMs = Time.monotonicNow(); + StateStoreMetrics metrics = getMetrics(); + boolean success = true; + String tableName = getAndValidateTableNameForClass(clazz); + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement truncateTable = connection.prepareStatement( + String.format("TRUNCATE TABLE %s", tableName))){ + truncateTable.execute(); + } catch (SQLException e) { + LOG.error("Could not remove all records in table {}", tableName, e); + success = false; + } + + if (metrics != null) { + long durationMs = Time.monotonicNow() - startTimeMs; + if (success) { + metrics.addRemove(durationMs); + } else { + metrics.addFailure(durationMs); + } + } + return success; + } + + @Override + public int remove(Class clazz, Query query) throws IOException { + verifyDriverReady(); + + if (query == null) { + return 0; + } + + long startTimeMs = Time.monotonicNow(); + StateStoreMetrics metrics = getMetrics(); + int removed = 0; + // Get the current records + try { + final QueryResult result = get(clazz); + final List existingRecords = result.getRecords(); + // Write all of the existing records except those to be removed + final List recordsToRemove = filterMultiple(query, existingRecords); + boolean success = true; + for (T recordToRemove : recordsToRemove) { + String tableName = getAndValidateTableNameForClass(clazz); + String primaryKey = getPrimaryKey(recordToRemove); + if (removeRecord(tableName, primaryKey)) { + removed++; + } else { + LOG.error("Cannot remove record {} from table {}", primaryKey, tableName); + success = false; + } + } + if (!success) { + LOG.error("Cannot remove records {} query {}", clazz, query); + if (metrics != null) { + metrics.addFailure(Time.monotonicNow() - startTimeMs); + } + } + } catch (IOException e) { + LOG.error("Cannot remove records {} query {}", clazz, query, e); + if (metrics != null) { + metrics.addFailure(Time.monotonicNow() - startTimeMs); + } + } + + if (removed > 0 && metrics != null) { + metrics.addRemove(Time.monotonicNow() - startTimeMs); + } + return removed; + } + + /** + * Insert a record with a given key into the specified table. + * @param tableName Name of table to modify + * @param key Primary key for the record. + * @return True is operation is successful, false otherwise. + */ + protected boolean insertRecord(String tableName, String key, String data) { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + String.format("INSERT INTO %s (recordKey, recordValue) VALUES (?, ?)", tableName))) { + statement.setString(1, key); + statement.setString(2, data); + statement.execute(); + } catch (SQLException e) { + LOG.error("Failed to insert record {} into table {}", key, tableName, e); + return false; + } + return true; + } + + /** + * Updates the record with a given key from the specified table. + * @param tableName Name of table to modify + * @param key Primary key for the record. + * @return True is operation is successful, false otherwise. + */ + protected boolean updateRecord(String tableName, String key, String data) { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + String.format("UPDATE %s SET recordValue = ? WHERE recordKey = ?", tableName))) { + statement.setString(1, data); + statement.setString(2, key); + statement.execute(); + } catch (SQLException e){ + LOG.error("Failed to update record {} in table {}", key, tableName, e); + return false; + } + return true; + } + + /** + * Checks if a record with a given key existing in the specified table. + * @param tableName Name of table to modify + * @param key Primary key for the record. + * @return True is operation is successful, false otherwise. + */ + protected boolean recordExists(String tableName, String key) { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + String.format("SELECT * FROM %s WHERE recordKey = ?", tableName))) { + statement.setString(1, key); + try (ResultSet result = statement.executeQuery()) { + return result.next(); + } + } catch (SQLException e) { + LOG.error("Failed to check existence of record {} in table {}", key, tableName, e); + return false; + } + } + + /** + * Removes the record with a given key from the specified table. + * @param tableName Name of table to modify + * @param key Primary key for the record. + * @return True is operation is successful, false otherwise. + */ + protected boolean removeRecord(String tableName, String key) { + try (Connection connection = connectionFactory.getConnection(true); + PreparedStatement statement = connection.prepareStatement( + String.format("DELETE FROM %s WHERE recordKey = ?", tableName))) { + statement.setString(1, key); + statement.execute(); + return true; + } catch (SQLException e) { + LOG.error("Failed to remove record {} in table {}", key, tableName, e); + return false; + } + } + + /** + * Get the table for a record class and validate is this is one of the supported + * record types. + * @param clazz Class of the record. + * @return Table name for this record class. + */ + private String getAndValidateTableNameForClass(final Class clazz) { + String tableName = StateStoreUtils.getRecordName(clazz); + if (VALID_TABLES.contains(tableName)) { + return tableName; + } else { + throw new IllegalArgumentException(tableName + " is not a valid table name"); + } + } + + + /** + * Class that relies on a HikariDataSource to provide SQL connections. + */ + static class MySQLStateStoreHikariDataSourceConnectionFactory + implements SQLConnectionFactory { + protected final static String HIKARI_PROPS = SQL_STATE_STORE_CONF_PREFIX + + "connection.hikari."; + private final HikariDataSource dataSource; + + MySQLStateStoreHikariDataSourceConnectionFactory(Configuration conf) { + Properties properties = new Properties(); + properties.setProperty("jdbcUrl", conf.get(StateStoreMySQLImpl.CONNECTION_URL)); + properties.setProperty("username", conf.get(StateStoreMySQLImpl.CONNECTION_USERNAME)); + properties.setProperty("password", conf.get(StateStoreMySQLImpl.CONNECTION_PASSWORD)); + properties.setProperty("driverClassName", conf.get(StateStoreMySQLImpl.CONNECTION_DRIVER)); + + // Include hikari connection properties + properties.putAll(conf.getPropsWithPrefix(HIKARI_PROPS)); + + HikariConfig hikariConfig = new HikariConfig(properties); + this.dataSource = new HikariDataSource(hikariConfig); + } + + @Override + public Connection getConnection() throws SQLException { + return dataSource.getConnection(); + } + + @Override + public void shutdown() { + // Close database connections + dataSource.close(); + } + } + +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java index 7bc93de84bc..8f766c65c5b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreSerializableImpl.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver.impl; import java.io.IOException; import java.util.Collection; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics; import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreSerializer; @@ -29,6 +31,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; * State Store driver that stores a serialization of the records. The serializer * is pluggable. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving public abstract class StateStoreSerializableImpl extends StateStoreBaseImpl { /** Mark for slashes in path names. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index 780fb76a2da..c7b403ce634 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -362,7 +362,8 @@ Class to implement the State Store. There are three implementation classes currently being supported: org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl, - org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl and + org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl, + org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreMySQLImpl and org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl. These implementation classes use the local file, filesystem and ZooKeeper as a backend respectively. By default it uses the ZooKeeper as the default State Store. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java new file mode 100644 index 00000000000..ebac2c0b93b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreMySQL.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.store.driver; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreMySQLImpl; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.*; + +/** + * Test the FileSystem (e.g., HDFS) implementation of the State Store driver. + */ +public class TestStateStoreMySQL extends TestStateStoreDriverBase { + private static final String CONNECTION_URL = "jdbc:derby:memory:StateStore"; + + @BeforeClass + public static void initDatabase() throws Exception { + Connection connection = DriverManager.getConnection(CONNECTION_URL + ";create=true"); + Statement s = connection.createStatement(); + s.execute("CREATE SCHEMA TESTUSER"); + + Configuration conf = + getStateStoreConfiguration(StateStoreMySQLImpl.class); + conf.set(StateStoreMySQLImpl.CONNECTION_URL, CONNECTION_URL); + conf.set(StateStoreMySQLImpl.CONNECTION_USERNAME, "testuser"); + conf.set(StateStoreMySQLImpl.CONNECTION_PASSWORD, "testpassword"); + conf.set(StateStoreMySQLImpl.CONNECTION_DRIVER, "org.apache.derby.jdbc.EmbeddedDriver"); + getStateStore(conf); + } + + @Before + public void startup() throws IOException { + removeAll(getStateStoreDriver()); + } + + @AfterClass + public static void cleanupDatabase() { + try { + DriverManager.getConnection(CONNECTION_URL + ";drop=true"); + } catch (SQLException e) { + // SQLException expected when database is dropped + if (!e.getMessage().contains("dropped")) { + throw new RuntimeException(e); + } + } + } + + @Test + public void testInsert() + throws IllegalArgumentException, IllegalAccessException, IOException { + testInsert(getStateStoreDriver()); + } + + @Test + public void testUpdate() + throws IllegalArgumentException, ReflectiveOperationException, + IOException, SecurityException { + testPut(getStateStoreDriver()); + } + + @Test + public void testDelete() + throws IllegalArgumentException, IllegalAccessException, IOException { + testRemove(getStateStoreDriver()); + } + + @Test + public void testFetchErrors() + throws IllegalArgumentException, IllegalAccessException, IOException { + testFetchErrors(getStateStoreDriver()); + } + + @Test + public void testMetrics() + throws IllegalArgumentException, IllegalAccessException, IOException { + testMetrics(getStateStoreDriver()); + } +} \ No newline at end of file From 523ff816246322e7376c52b3c8df92c0d6e4f6ef Mon Sep 17 00:00:00 2001 From: zhtttylz Date: Thu, 6 Apr 2023 19:44:47 +0800 Subject: [PATCH 60/97] HDFS-16952. Support getLinkTarget API in WebHDFS (#5517) Co-authored-by: Zhtttylz Reviewed-by: Shilun Fan Signed-off-by: Shilun Fan --- .../hadoop/hdfs/web/WebHdfsFileSystem.java | 13 +++++++++ .../hadoop/hdfs/web/resources/GetOpParam.java | 1 + .../router/RouterWebHdfsMethods.java | 1 + .../web/resources/NamenodeWebHdfsMethods.java | 5 ++++ .../hadoop-hdfs/src/site/markdown/WebHDFS.md | 17 +++++++++++ .../apache/hadoop/hdfs/web/TestWebHDFS.java | 28 +++++++++++++++++++ 6 files changed, 65 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index f0774e98d1f..615cf3bd7c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -2147,6 +2147,19 @@ public class WebHdfsFileSystem extends FileSystem }.run(); } + @Override + public Path getLinkTarget(Path f) throws IOException { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_LINK_TARGET); + final HttpOpParam.Op op = GetOpParam.Op.GETLINKTARGET; + return new FsPathResponseRunner(op, f) { + @Override + Path decodeResponse(Map json) { + return new Path((String) json.get(Path.class.getSimpleName())); + } + }.run(); + } + @VisibleForTesting InetSocketAddress[] getResolvedNNAddr() { return nnAddrs; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java index 14938c3c45b..89979295c79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java @@ -64,6 +64,7 @@ public class GetOpParam extends HttpOpParam { GETSNAPSHOTDIFF(false, HttpURLConnection.HTTP_OK), GETSNAPSHOTDIFFLISTING(false, HttpURLConnection.HTTP_OK), GETSNAPSHOTTABLEDIRECTORYLIST(false, HttpURLConnection.HTTP_OK), + GETLINKTARGET(false, HttpURLConnection.HTTP_OK), GETSNAPSHOTLIST(false, HttpURLConnection.HTTP_OK); final boolean redirect; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java index a66953b1bd7..477a59941fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java @@ -385,6 +385,7 @@ public class RouterWebHdfsMethods extends NamenodeWebHdfsMethods { case GETXATTRS: case LISTXATTRS: case CHECKACCESS: + case GETLINKTARGET: { return super.get(ugi, delegation, username, doAsUser, fullpath, op, offset, length, renewer, bufferSize, xattrNames, xattrEncoding, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index a3250c213ca..4b3b53731ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -1383,6 +1383,11 @@ public class NamenodeWebHdfsMethods { final String js = JsonUtil.toJsonString(snapshotList); return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } + case GETLINKTARGET: { + String target = cp.getLinkTarget(fullpath); + final String js = JsonUtil.toJsonString("Path", target); + return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); + } default: throw new UnsupportedOperationException(op + " is not supported"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md index 5e5924ad36e..f84018ae821 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md @@ -58,6 +58,7 @@ The HTTP REST API supports the complete [FileSystem](../../api/org/apache/hadoop * [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations) * [`GETECPOLICY`](#Get_EC_Policy) (see [HDFSErasureCoding](./HDFSErasureCoding.html#Administrative_commands).getErasureCodingPolicy) * [`GETSERVERDEFAULTS`](#Get_Server_Defaults) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults) + * [`GETLINKTARGET`](#Get_Link_Target) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getLinkTarget) * HTTP PUT * [`CREATE`](#Create_and_Write_to_a_File) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).create) * [`MKDIRS`](#Make_a_Directory) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).mkdirs) @@ -1139,6 +1140,22 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).access See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults +### Get Link Target + +* Submit a HTTP GET request. + + curl -i "http://:/webhdfs/v1/?op=GETLINKTARGET" + + The client receives a response with a [`Path` JSON object](#Path_JSON_Schema): + + HTTP/1.1 200 OK + Content-Type: application/json + Transfer-Encoding: chunked + + {"Path": "/user/username/targetFile"} + +See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getLinkTarget + Storage Policy Operations ------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index c4f53b05615..8f4759d8e30 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -2202,6 +2202,34 @@ public class TestWebHDFS { cluster.shutdown(); } } + + @Test + public void testLinkTarget() throws Exception { + final Configuration conf = WebHdfsTestUtil.createConf(); + try { + cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(3) + .build(); + cluster.waitActive(); + + final WebHdfsFileSystem webHdfs = + WebHdfsTestUtil.getWebHdfsFileSystem(conf, + WebHdfsConstants.WEBHDFS_SCHEME); + + // Symbolic link + Path root = new Path("/webHdfsTest/"); + Path targetFile = new Path(root, "debug.log"); + FileSystemTestHelper.createFile(webHdfs, targetFile); + + Path symLink = new Path(root, "debug.link"); + + webHdfs.createSymlink(targetFile, symLink, false); + assertEquals(webHdfs.getLinkTarget(symLink), targetFile); + } finally { + cluster.shutdown(); + } + } + /** * Get FileStatus JSONObject from ListStatus response. */ From e45451f9c715d067bfd6ea02d266e4aef782fcfe Mon Sep 17 00:00:00 2001 From: mjwiq <80327153+mjwiq@users.noreply.github.com> Date: Thu, 6 Apr 2023 17:00:33 +0200 Subject: [PATCH 61/97] HADOOP-18687. hadoop-auth: remove unnecessary dependency on json-smart (#5524) Contributed by Michiel de Jong --- hadoop-common-project/hadoop-auth/pom.xml | 12 ------------ hadoop-project/pom.xml | 4 ---- 2 files changed, 16 deletions(-) diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml index 6eaa4fdfce5..433a615c606 100644 --- a/hadoop-common-project/hadoop-auth/pom.xml +++ b/hadoop-common-project/hadoop-auth/pom.xml @@ -110,20 +110,8 @@ org.bouncycastle bcprov-jdk15on - - - net.minidev - json-smart - - - net.minidev - json-smart - org.apache.zookeeper zookeeper diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index d8114afb58f..7a57f05011d 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1730,10 +1730,6 @@ - net.minidev json-smart ${json-smart.version} From 3e2ae1da00e055211914c90cca89d62432096530 Mon Sep 17 00:00:00 2001 From: rdingankar Date: Mon, 10 Apr 2023 08:56:00 -0700 Subject: [PATCH 62/97] =?UTF-8?q?HDFS-16949=20Introduce=20inverse=20quanti?= =?UTF-8?q?les=20for=20metrics=20where=20higher=20numer=E2=80=A6=20(#5495)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hadoop/metrics2/lib/MetricsRegistry.java | 25 ++++- .../metrics2/lib/MutableInverseQuantiles.java | 89 +++++++++++++++++ .../hadoop/metrics2/lib/MutableQuantiles.java | 99 +++++++++++++++---- .../metrics2/util/TestSampleQuantiles.java | 68 ++++++++++--- .../apache/hadoop/test/MetricsAsserts.java | 25 ++++- .../datanode/metrics/DataNodeMetrics.java | 2 +- .../server/datanode/TestDataNodeMetrics.java | 3 +- 7 files changed, 273 insertions(+), 38 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java index b71f7f8cc5e..31031b808ea 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java @@ -227,6 +227,29 @@ public class MetricsRegistry { return ret; } + /** + * Create a mutable inverse metric that estimates inverse quantiles of a stream of values + * @param name of the metric + * @param desc metric description + * @param sampleName of the metric (e.g., "Ops") + * @param valueName of the metric (e.g., "Rate") + * @param interval rollover interval of estimator in seconds + * @return a new inverse quantile estimator object + * @throws MetricsException if interval is not a positive integer + */ + public synchronized MutableQuantiles newInverseQuantiles(String name, String desc, + String sampleName, String valueName, int interval) { + checkMetricName(name); + if (interval <= 0) { + throw new MetricsException("Interval should be positive. Value passed" + + " is: " + interval); + } + MutableQuantiles ret = + new MutableInverseQuantiles(name, desc, sampleName, valueName, interval); + metricsMap.put(name, ret); + return ret; + } + /** * Create a mutable metric with stats * @param name of the metric @@ -278,7 +301,7 @@ public class MetricsRegistry { } /** - * Create a mutable rate metric (for throughput measurement) + * Create a mutable rate metric (for throughput measurement). * @param name of the metric * @param desc description * @param extended produce extended stat (stdev/min/max etc.) if true diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java new file mode 100644 index 00000000000..a3d579cb9e7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics2.lib; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.metrics2.util.Quantile; +import org.apache.hadoop.metrics2.util.SampleQuantiles; +import java.text.DecimalFormat; +import static org.apache.hadoop.metrics2.lib.Interns.info; + +/** + * Watches a stream of long values, maintaining online estimates of specific + * quantiles with provably low error bounds. Inverse quantiles are meant for + * highly accurate low-percentile (e.g. 1st, 5th) metrics. + * InverseQuantiles are used for metrics where higher the value better it is. + * ( eg: data transfer rate ). + * The 1st percentile here corresponds to the 99th inverse percentile metric, + * 5th percentile to 95th and so on. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class MutableInverseQuantiles extends MutableQuantiles{ + + static class InversePercentile extends Quantile { + InversePercentile(double inversePercentile) { + super(inversePercentile/100, inversePercentile/1000); + } + } + + @VisibleForTesting + public static final Quantile[] INVERSE_QUANTILES = {new InversePercentile(50), + new InversePercentile(25), new InversePercentile(10), + new InversePercentile(5), new InversePercentile(1)}; + + /** + * Instantiates a new {@link MutableInverseQuantiles} for a metric that rolls itself + * over on the specified time interval. + * + * @param name of the metric + * @param description long-form textual description of the metric + * @param sampleName type of items in the stream (e.g., "Ops") + * @param valueName type of the values + * @param intervalSecs rollover interval (in seconds) of the estimator + */ + public MutableInverseQuantiles(String name, String description, String sampleName, + String valueName, int intervalSecs) { + super(name, description, sampleName, valueName, intervalSecs); + } + + /** + * Sets quantileInfo and estimator. + * + * @param ucName capitalized name of the metric + * @param uvName capitalized type of the values + * @param desc uncapitalized long-form textual description of the metric + * @param lvName uncapitalized type of the values + * @param df Number formatter for inverse percentile value + */ + void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat df) { + // Construct the MetricsInfos for inverse quantiles, converting to inverse percentiles + setQuantileInfos(INVERSE_QUANTILES.length); + for (int i = 0; i < INVERSE_QUANTILES.length; i++) { + double inversePercentile = 100 * (1 - INVERSE_QUANTILES[i].quantile); + String nameTemplate = ucName + df.format(inversePercentile) + "thInversePercentile" + uvName; + String descTemplate = df.format(inversePercentile) + " inverse percentile " + lvName + + " with " + getInterval() + " second interval for " + desc; + addQuantileInfo(i, info(nameTemplate, descTemplate)); + } + + setEstimator(new SampleQuantiles(INVERSE_QUANTILES)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java index f7dfaffb3f9..edb2159f17b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java @@ -20,6 +20,7 @@ package org.apache.hadoop.metrics2.lib; import static org.apache.hadoop.metrics2.lib.Interns.info; +import java.text.DecimalFormat; import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -52,9 +53,10 @@ public class MutableQuantiles extends MutableMetric { new Quantile(0.75, 0.025), new Quantile(0.90, 0.010), new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) }; - private final MetricsInfo numInfo; - private final MetricsInfo[] quantileInfos; - private final int interval; + private MetricsInfo numInfo; + private MetricsInfo[] quantileInfos; + private int intervalSecs; + private static DecimalFormat decimalFormat = new DecimalFormat("###.####"); private QuantileEstimator estimator; private long previousCount = 0; @@ -91,26 +93,39 @@ public class MutableQuantiles extends MutableMetric { String lsName = StringUtils.uncapitalize(sampleName); String lvName = StringUtils.uncapitalize(valueName); - numInfo = info(ucName + "Num" + usName, String.format( - "Number of %s for %s with %ds interval", lsName, desc, interval)); - // Construct the MetricsInfos for the quantiles, converting to percentiles - quantileInfos = new MetricsInfo[quantiles.length]; - String nameTemplate = ucName + "%dthPercentile" + uvName; - String descTemplate = "%d percentile " + lvName + " with " + interval - + " second interval for " + desc; - for (int i = 0; i < quantiles.length; i++) { - int percentile = (int) (100 * quantiles[i].quantile); - quantileInfos[i] = info(String.format(nameTemplate, percentile), - String.format(descTemplate, percentile)); - } - - estimator = new SampleQuantiles(quantiles); - - this.interval = interval; + setInterval(interval); + setNumInfo(info(ucName + "Num" + usName, String.format( + "Number of %s for %s with %ds interval", lsName, desc, interval))); scheduledTask = scheduler.scheduleWithFixedDelay(new RolloverSample(this), interval, interval, TimeUnit.SECONDS); + setQuantiles(ucName, uvName, desc, lvName, decimalFormat); } + /** + * Sets quantileInfo and estimator. + * + * @param ucName capitalized name of the metric + * @param uvName capitalized type of the values + * @param desc uncapitalized long-form textual description of the metric + * @param lvName uncapitalized type of the values + * @param pDecimalFormat Number formatter for percentile value + */ + void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat pDecimalFormat) { + // Construct the MetricsInfos for the quantiles, converting to percentiles + setQuantileInfos(quantiles.length); + for (int i = 0; i < quantiles.length; i++) { + double percentile = 100 * quantiles[i].quantile; + String nameTemplate = ucName + pDecimalFormat.format(percentile) + "thPercentile" + uvName; + String descTemplate = pDecimalFormat.format(percentile) + " percentile " + lvName + + " with " + getInterval() + " second interval for " + desc; + addQuantileInfo(i, info(nameTemplate, descTemplate)); + } + + setEstimator(new SampleQuantiles(quantiles)); + } + + public MutableQuantiles() {} + @Override public synchronized void snapshot(MetricsRecordBuilder builder, boolean all) { if (all || changed()) { @@ -133,8 +148,50 @@ public class MutableQuantiles extends MutableMetric { estimator.insert(value); } - public int getInterval() { - return interval; + /** + * Set info about the metrics. + * + * @param pNumInfo info about the metrics. + */ + public synchronized void setNumInfo(MetricsInfo pNumInfo) { + this.numInfo = pNumInfo; + } + + /** + * Initialize quantileInfos array. + * + * @param length of the quantileInfos array. + */ + public synchronized void setQuantileInfos(int length) { + this.quantileInfos = new MetricsInfo[length]; + } + + /** + * Add entry to quantileInfos array. + * + * @param i array index. + * @param info info to be added to quantileInfos array. + */ + public synchronized void addQuantileInfo(int i, MetricsInfo info) { + this.quantileInfos[i] = info; + } + + /** + * Set the rollover interval (in seconds) of the estimator. + * + * @param pIntervalSecs of the estimator. + */ + public synchronized void setInterval(int pIntervalSecs) { + this.intervalSecs = pIntervalSecs; + } + + /** + * Get the rollover interval (in seconds) of the estimator. + * + * @return intervalSecs of the estimator. + */ + public synchronized int getInterval() { + return intervalSecs; } public void stop() { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java index c7d8f60b181..aefd7a264b0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/util/TestSampleQuantiles.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.Map; import java.util.Random; +import org.apache.hadoop.metrics2.lib.MutableInverseQuantiles; import org.junit.Before; import org.junit.Test; @@ -36,6 +37,7 @@ public class TestSampleQuantiles { new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) }; SampleQuantiles estimator; + final static int NUM_REPEATS = 10; @Before public void init() { @@ -91,28 +93,70 @@ public class TestSampleQuantiles { @Test public void testQuantileError() throws IOException { final int count = 100000; - Random r = new Random(0xDEADDEAD); - Long[] values = new Long[count]; + Random rnd = new Random(0xDEADDEAD); + int[] values = new int[count]; for (int i = 0; i < count; i++) { - values[i] = (long) (i + 1); + values[i] = i + 1; } - // Do 10 shuffle/insert/check cycles - for (int i = 0; i < 10; i++) { - System.out.println("Starting run " + i); - Collections.shuffle(Arrays.asList(values), r); + + // Repeat shuffle/insert/check cycles 10 times + for (int i = 0; i < NUM_REPEATS; i++) { + + // Shuffle + Collections.shuffle(Arrays.asList(values), rnd); estimator.clear(); - for (int j = 0; j < count; j++) { - estimator.insert(values[j]); + + // Insert + for (int value : values) { + estimator.insert(value); } Map snapshot; snapshot = estimator.snapshot(); + + // Check for (Quantile q : quantiles) { long actual = (long) (q.quantile * count); long error = (long) (q.error * count); long estimate = snapshot.get(q); - System.out - .println(String.format("Expected %d with error %d, estimated %d", - actual, error, estimate)); + assertThat(estimate <= actual + error).isTrue(); + assertThat(estimate >= actual - error).isTrue(); + } + } + } + + /** + * Correctness test that checks that absolute error of the estimate for inverse quantiles + * is within specified error bounds for some randomly permuted streams of items. + */ + @Test + public void testInverseQuantiles() throws IOException { + SampleQuantiles inverseQuantilesEstimator = + new SampleQuantiles(MutableInverseQuantiles.INVERSE_QUANTILES); + final int count = 100000; + Random rnd = new Random(0xDEADDEAD); + int[] values = new int[count]; + for (int i = 0; i < count; i++) { + values[i] = i + 1; + } + + // Repeat shuffle/insert/check cycles 10 times + for (int i = 0; i < NUM_REPEATS; i++) { + // Shuffle + Collections.shuffle(Arrays.asList(values), rnd); + inverseQuantilesEstimator.clear(); + + // Insert + for (int value : values) { + inverseQuantilesEstimator.insert(value); + } + Map snapshot; + snapshot = inverseQuantilesEstimator.snapshot(); + + // Check + for (Quantile q : MutableInverseQuantiles.INVERSE_QUANTILES) { + long actual = (long) (q.quantile * count); + long error = (long) (q.error * count); + long estimate = snapshot.get(q); assertThat(estimate <= actual + error).isTrue(); assertThat(estimate >= actual - error).isTrue(); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java index 9132e20210a..8210322f8f4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java @@ -392,13 +392,34 @@ public class MetricsAsserts { */ public static void assertQuantileGauges(String prefix, MetricsRecordBuilder rb, String valueName) { - verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0l)); + verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L)); for (Quantile q : MutableQuantiles.quantiles) { String nameTemplate = prefix + "%dthPercentile" + valueName; int percentile = (int) (100 * q.quantile); verify(rb).addGauge( eqName(info(String.format(nameTemplate, percentile), "")), - geq(0l)); + geq(0L)); + } + } + + /** + * Asserts that the NumOps and inverse quantiles for a metric have been changed at + * some point to a non-zero value, for the specified value name of the + * metrics (e.g., "Rate"). + * + * @param prefix of the metric + * @param rb MetricsRecordBuilder with the metric + * @param valueName the value name for the metric + */ + public static void assertInverseQuantileGauges(String prefix, + MetricsRecordBuilder rb, String valueName) { + verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L)); + for (Quantile q : MutableQuantiles.quantiles) { + String nameTemplate = prefix + "%dthInversePercentile" + valueName; + int percentile = (int) (100 * q.quantile); + verify(rb).addGauge( + eqName(info(String.format(nameTemplate, percentile), "")), + geq(0L)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index 675dbbff4c3..c3aa3c3a454 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -258,7 +258,7 @@ public class DataNodeMetrics { "ramDiskBlocksLazyPersistWindows" + interval + "s", "Time between the RamDisk block write and disk persist in ms", "ops", "latency", interval); - readTransferRateQuantiles[i] = registry.newQuantiles( + readTransferRateQuantiles[i] = registry.newInverseQuantiles( "readTransferRate" + interval + "s", "Rate at which bytes are read from datanode calculated in bytes per second", "ops", "rate", interval); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java index de5c985a4f0..35f7924be11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY; import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.assertInverseQuantileGauges; import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -413,7 +414,7 @@ public class TestDataNodeMetrics { final long endWriteValue = getLongCounter("TotalWriteTime", rbNew); final long endReadValue = getLongCounter("TotalReadTime", rbNew); assertCounter("ReadTransferRateNumOps", 1L, rbNew); - assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate"); + assertInverseQuantileGauges("ReadTransferRate60s", rbNew, "Rate"); return endWriteValue > startWriteValue && endReadValue > startReadValue; } From 74ddf69f808b9fbf94bdad802b4745613152dbe5 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 11 Apr 2023 02:33:16 +0530 Subject: [PATCH 63/97] HDFS-16911. Distcp with snapshot diff to support Ozone filesystem. (#5364) --- .../org/apache/hadoop/tools/DistCpSync.java | 110 ++++++++++++------ .../apache/hadoop/tools/TestDistCpSync.java | 67 +++++++++++ 2 files changed, 140 insertions(+), 37 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 1cf2d97ec1f..dbc86fd0b47 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -20,18 +20,19 @@ package org.apache.hadoop.tools; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtilClient; -import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; -import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.tools.CopyListing.InvalidInputException; import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.Arrays; import java.util.List; import java.util.Random; @@ -106,20 +107,7 @@ class DistCpSync { final FileSystem snapshotDiffFs = isRdiff() ? tgtFs : srcFs; final Path snapshotDiffDir = isRdiff() ? targetDir : sourceDir; - // currently we require both the source and the target file system are - // DistributedFileSystem or (S)WebHdfsFileSystem. - if (!(srcFs instanceof DistributedFileSystem - || srcFs instanceof WebHdfsFileSystem)) { - throw new IllegalArgumentException("Unsupported source file system: " - + srcFs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); - } - if (!(tgtFs instanceof DistributedFileSystem - || tgtFs instanceof WebHdfsFileSystem)) { - throw new IllegalArgumentException("Unsupported target file system: " - + tgtFs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); - } + checkFilesystemSupport(sourceDir,targetDir,srcFs, tgtFs); // make sure targetFS has no change between from and the current states if (!checkNoChange(tgtFs, targetDir)) { @@ -165,6 +153,42 @@ class DistCpSync { return true; } + /** + * Check if the source and target filesystems support snapshots. + */ + private void checkFilesystemSupport(Path sourceDir, Path targetDir, + FileSystem srcFs, FileSystem tgtFs) throws IOException { + if (!srcFs.hasPathCapability(sourceDir, + CommonPathCapabilities.FS_SNAPSHOTS)) { + throw new UnsupportedOperationException( + "The source file system " + srcFs.getScheme() + + " does not support snapshot."); + } + if (!tgtFs.hasPathCapability(targetDir, + CommonPathCapabilities.FS_SNAPSHOTS)) { + throw new UnsupportedOperationException( + "The target file system " + tgtFs.getScheme() + + " does not support snapshot."); + } + try { + getSnapshotDiffReportMethod(srcFs); + } catch (NoSuchMethodException e) { + throw new UnsupportedOperationException( + "The source file system " + srcFs.getScheme() + + " does not support getSnapshotDiffReport", + e); + } + try { + getSnapshotDiffReportMethod(tgtFs); + } catch (NoSuchMethodException e) { + throw new UnsupportedOperationException( + "The target file system " + tgtFs.getScheme() + + " does not support getSnapshotDiffReport", + e); + } + + } + public boolean sync() throws IOException { if (!preSyncCheck()) { return false; @@ -211,21 +235,10 @@ class DistCpSync { context.getTargetPath() : context.getSourcePaths().get(0); try { - SnapshotDiffReport report = null; - FileSystem fs = ssDir.getFileSystem(conf); final String from = getSnapshotName(context.getFromSnapshot()); final String to = getSnapshotName(context.getToSnapshot()); - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem)fs; - report = dfs.getSnapshotDiffReport(ssDir, from, to); - } else if (fs instanceof WebHdfsFileSystem) { - WebHdfsFileSystem webHdfs = (WebHdfsFileSystem)fs; - report = webHdfs.getSnapshotDiffReport(ssDir, from, to); - } else { - throw new IllegalArgumentException("Unsupported file system: " + - fs.getScheme() + "://. " + - "Supported file systems: hdfs://, webhdfs:// and swebhdfs://."); - } + SnapshotDiffReport report = + getSnapshotDiffReport(ssDir.getFileSystem(conf), ssDir, from, to); this.diffMap = new EnumMap<>(SnapshotDiffReport.DiffType.class); for (SnapshotDiffReport.DiffType type : @@ -286,6 +299,36 @@ class DistCpSync { return false; } + /** + * Check if the filesystem implementation has a method named + * getSnapshotDiffReport. + */ + private static Method getSnapshotDiffReportMethod(FileSystem fs) + throws NoSuchMethodException { + return fs.getClass().getMethod( + "getSnapshotDiffReport", Path.class, String.class, String.class); + } + + /** + * Get the snapshotDiff b/w the fromSnapshot & toSnapshot for the given + * filesystem. + */ + private static SnapshotDiffReport getSnapshotDiffReport( + final FileSystem fs, + final Path snapshotDir, + final String fromSnapshot, + final String toSnapshot) throws IOException { + try { + return (SnapshotDiffReport) getSnapshotDiffReportMethod(fs).invoke( + fs, snapshotDir, fromSnapshot, toSnapshot); + } catch (InvocationTargetException e) { + throw new IOException(e.getCause()); + } catch (NoSuchMethodException|IllegalAccessException e) { + throw new IllegalArgumentException( + "Failed to invoke getSnapshotDiffReport.", e); + } + } + private String getSnapshotName(String name) { return Path.CUR_DIR.equals(name) ? "" : name; } @@ -327,14 +370,7 @@ class DistCpSync { private boolean checkNoChange(FileSystem fs, Path path) { try { final String from = getSnapshotName(context.getFromSnapshot()); - SnapshotDiffReport targetDiff = null; - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem)fs; - targetDiff = dfs.getSnapshotDiffReport(path, from, ""); - } else { - WebHdfsFileSystem webHdfs = (WebHdfsFileSystem)fs; - targetDiff = webHdfs.getSnapshotDiffReport(path, from, ""); - } + SnapshotDiffReport targetDiff = getSnapshotDiffReport(fs, path, from, ""); if (!targetDiff.getDiffList().isEmpty()) { DistCp.LOG.warn("The target has been modified since snapshot " + context.getFromSnapshot()); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 93796e752eb..0fbcd6571c6 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -23,6 +23,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -38,6 +40,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.tools.mapred.CopyMapper; import org.junit.After; import org.junit.Assert; @@ -47,6 +50,7 @@ import org.junit.Test; import java.io.IOException; import java.io.FileWriter; import java.io.BufferedWriter; +import java.net.URI; import java.nio.file.Files; import java.util.Arrays; import java.util.ArrayList; @@ -56,6 +60,9 @@ import java.util.List; import java.util.Map; import java.util.regex.Pattern; +import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + public class TestDistCpSync { private MiniDFSCluster cluster; private final Configuration conf = new HdfsConfiguration(); @@ -89,6 +96,7 @@ public class TestDistCpSync { conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, target.toString()); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, target.toString()); + conf.setClass("fs.dummy.impl", DummyFs.class, FileSystem.class); } @After @@ -1276,4 +1284,63 @@ public class TestDistCpSync { verifyCopyByFs(sourceFS, targetFS, sourceFS.getFileStatus(sourceFSPath), targetFS.getFileStatus(targetFSPath), false); } + + @Test + public void testSyncSnapshotDiffWithLocalFileSystem() throws Exception { + String[] args = new String[]{"-update", "-diff", "s1", "s2", + "file:///source", "file:///target"}; + LambdaTestUtils.intercept( + UnsupportedOperationException.class, + "The source file system file does not support snapshot", + () -> new DistCp(conf, OptionsParser.parse(args)).execute()); + } + + @Test + public void testSyncSnapshotDiffWithDummyFileSystem() { + String[] args = + new String[] { "-update", "-diff", "s1", "s2", "dummy:///source", + "dummy:///target" }; + try { + FileSystem dummyFs = FileSystem.get(URI.create("dummy:///"), conf); + assertThat(dummyFs).isInstanceOf(DummyFs.class); + new DistCp(conf, OptionsParser.parse(args)).execute(); + } catch (UnsupportedOperationException e) { + throw e; + } catch (Exception e) { + // can expect other exceptions as source and target paths + // are not created. + } + } + + public static class DummyFs extends RawLocalFileSystem { + public DummyFs() { + super(); + } + + public URI getUri() { + return URI.create("dummy:///"); + } + + @Override + public boolean hasPathCapability(Path path, String capability) + throws IOException { + switch (validatePathCapabilityArgs(makeQualified(path), capability)) { + case CommonPathCapabilities.FS_SNAPSHOTS: + return true; + default: + return super.hasPathCapability(path, capability); + } + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + return new FileStatus(); + } + + public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir, + final String fromSnapshot, final String toSnapshot) { + return new SnapshotDiffReport(snapshotDir.getName(), fromSnapshot, + toSnapshot, new ArrayList()); + } + } } From bffa49a64f93c3860b984d137df68d383b3a79f1 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 12 Apr 2023 00:47:58 +0800 Subject: [PATCH 64/97] =?UTF-8?q?YARN-11377.=20[Federation]=20Support=20ad?= =?UTF-8?q?dToClusterNodeLabels=E3=80=81removeFromClusterNodeLabels?= =?UTF-8?q?=E3=80=81replaceLabelsOnNode=20API's=20for=20Federation.=20(#55?= =?UTF-8?q?25)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../AddToClusterNodeLabelsRequest.java | 30 ++++ .../RemoveFromClusterNodeLabelsRequest.java | 28 ++++ .../ReplaceLabelsOnNodeRequest.java | 28 ++++ ...erver_resourcemanager_service_protos.proto | 3 + .../AddToClusterNodeLabelsRequestPBImpl.java | 16 +++ ...oveFromClusterNodeLabelsRequestPBImpl.java | 19 ++- .../pb/ReplaceLabelsOnNodeRequestPBImpl.java | 19 ++- .../rmadmin/FederationRMAdminInterceptor.java | 101 ++++++++++++- .../TestFederationRMAdminInterceptor.java | 133 ++++++++++++++++++ 9 files changed, 368 insertions(+), 9 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java index f2ac395dc3d..1f61268515a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.api.protocolrecords; import java.util.List; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.util.Records; @@ -37,6 +38,17 @@ public abstract class AddToClusterNodeLabelsRequest { return request; } + @Public + @Unstable + public static AddToClusterNodeLabelsRequest newInstance(String subClusterId, + List nodeLabels) { + AddToClusterNodeLabelsRequest request = Records + .newRecord(AddToClusterNodeLabelsRequest.class); + request.setNodeLabels(nodeLabels); + request.setSubClusterId(subClusterId); + return request; + } + @Public @Unstable public abstract void setNodeLabels(List nodeLabels); @@ -44,4 +56,22 @@ public abstract class AddToClusterNodeLabelsRequest { @Public @Unstable public abstract List getNodeLabels(); + + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + @Public + @InterfaceStability.Evolving + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + @Public + @InterfaceStability.Evolving + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java index fd45f91e457..11baea04475 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoveFromClusterNodeLabelsRequest.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.api.protocolrecords; import java.util.Set; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.yarn.util.Records; @@ -35,6 +36,15 @@ public abstract class RemoveFromClusterNodeLabelsRequest { return request; } + public static RemoveFromClusterNodeLabelsRequest newInstance(String subClusterId, + Set labels) { + RemoveFromClusterNodeLabelsRequest request = + Records.newRecord(RemoveFromClusterNodeLabelsRequest.class); + request.setNodeLabels(labels); + request.setSubClusterId(subClusterId); + return request; + } + @Public @Evolving public abstract void setNodeLabels(Set labels); @@ -42,4 +52,22 @@ public abstract class RemoveFromClusterNodeLabelsRequest { @Public @Evolving public abstract Set getNodeLabels(); + + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + @Public + @InterfaceStability.Evolving + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + @Public + @InterfaceStability.Evolving + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java index 1b8e687b3dc..ab67e95f7d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/ReplaceLabelsOnNodeRequest.java @@ -22,6 +22,7 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.util.Records; @@ -37,6 +38,15 @@ public abstract class ReplaceLabelsOnNodeRequest { return request; } + public static ReplaceLabelsOnNodeRequest newInstance(Map> map, + String subClusterId) { + ReplaceLabelsOnNodeRequest request = + Records.newRecord(ReplaceLabelsOnNodeRequest.class); + request.setNodeToLabels(map); + request.setSubClusterId(subClusterId); + return request; + } + @Public @Evolving public abstract void setNodeToLabels(Map> map); @@ -52,4 +62,22 @@ public abstract class ReplaceLabelsOnNodeRequest { @Public @Evolving public abstract boolean getFailOnUnknownNodes(); + + /** + * Get the subClusterId. + * + * @return subClusterId. + */ + @Public + @InterfaceStability.Evolving + public abstract String getSubClusterId(); + + /** + * Set the subClusterId. + * + * @param subClusterId subCluster Id. + */ + @Public + @InterfaceStability.Evolving + public abstract void setSubClusterId(String subClusterId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto index 132f937e150..f2145ca73d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto @@ -95,6 +95,7 @@ message RefreshNodesResourcesResponseProto { message AddToClusterNodeLabelsRequestProto { repeated string deprecatedNodeLabels = 1; repeated NodeLabelProto nodeLabels = 2; + optional string sub_cluster_id = 3; } message AddToClusterNodeLabelsResponseProto { @@ -102,6 +103,7 @@ message AddToClusterNodeLabelsResponseProto { message RemoveFromClusterNodeLabelsRequestProto { repeated string nodeLabels = 1; + optional string sub_cluster_id = 2; } message RemoveFromClusterNodeLabelsResponseProto { @@ -110,6 +112,7 @@ message RemoveFromClusterNodeLabelsResponseProto { message ReplaceLabelsOnNodeRequestProto { repeated NodeIdToLabelsProto nodeToLabels = 1; optional bool failOnUnknownNodes = 2; + optional string sub_cluster_id = 3; } message ReplaceLabelsOnNodeResponseProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java index 3bf22fb1b51..2012b9f3030 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java @@ -152,4 +152,20 @@ public class AddToClusterNodeLabelsRequestPBImpl extends initLocalNodeLabels(); return this.updatedNodeLabels; } + + @Override + public String getSubClusterId() { + AddToClusterNodeLabelsRequestProtoOrBuilder p = viaProto ? proto : builder; + return (p.hasSubClusterId()) ? p.getSubClusterId() : null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java index afabcd919fe..d420bda5d7b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java @@ -103,10 +103,25 @@ public class RemoveFromClusterNodeLabelsRequestPBImpl return this.labels; } + @Override + public String getSubClusterId() { + RemoveFromClusterNodeLabelsRequestProtoOrBuilder p = viaProto ? proto : builder; + return (p.hasSubClusterId()) ? p.getSubClusterId() : null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } + @Override public int hashCode() { - assert false : "hashCode not designed"; - return 0; + return getProto().hashCode(); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java index b2f491950d0..e7f2fa658e1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java @@ -151,6 +151,22 @@ public class ReplaceLabelsOnNodeRequestPBImpl extends return p.getFailOnUnknownNodes(); } + @Override + public String getSubClusterId() { + ReplaceLabelsOnNodeRequestProtoOrBuilder p = viaProto ? proto : builder; + return (p.hasSubClusterId()) ? p.getSubClusterId() : null; + } + + @Override + public void setSubClusterId(String subClusterId) { + maybeInitBuilder(); + if (subClusterId == null) { + builder.clearSubClusterId(); + return; + } + builder.setSubClusterId(subClusterId); + } + @Override public void setFailOnUnknownNodes(boolean failOnUnknownNodes) { maybeInitBuilder(); @@ -163,8 +179,7 @@ public class ReplaceLabelsOnNodeRequestPBImpl extends @Override public int hashCode() { - assert false : "hashCode not designed"; - return 0; + return getProto().hashCode(); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java index c930459559f..c3cac82e38c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java @@ -512,22 +512,113 @@ public class FederationRMAdminInterceptor extends AbstractRMAdminRequestIntercep @Override public AddToClusterNodeLabelsResponse addToClusterNodeLabels( - AddToClusterNodeLabelsRequest request) - throws YarnException, IOException { - throw new NotImplementedException(); + AddToClusterNodeLabelsRequest request) throws YarnException, IOException { + // parameter verification. + if (request == null) { + routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing AddToClusterNodeLabels request.", null); + } + + String subClusterId = request.getSubClusterId(); + if (StringUtils.isBlank(subClusterId)) { + routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing AddToClusterNodeLabels SubClusterId.", null); + } + + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[]{AddToClusterNodeLabelsRequest.class}, new Object[]{request}); + Collection addToClusterNodeLabelsResps = + remoteMethod.invokeConcurrent(this, AddToClusterNodeLabelsResponse.class, subClusterId); + if (CollectionUtils.isNotEmpty(addToClusterNodeLabelsResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededAddToClusterNodeLabelsRetrieved(stopTime - startTime); + return AddToClusterNodeLabelsResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to addToClusterNodeLabels due to exception. " + e.getMessage()); + } + + routerMetrics.incrAddToClusterNodeLabelsFailedRetrieved(); + throw new YarnException("Unable to addToClusterNodeLabels."); } @Override public RemoveFromClusterNodeLabelsResponse removeFromClusterNodeLabels( RemoveFromClusterNodeLabelsRequest request) throws YarnException, IOException { - throw new NotImplementedException(); + // parameter verification. + if (request == null) { + routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing RemoveFromClusterNodeLabels request.", null); + } + + String subClusterId = request.getSubClusterId(); + if (StringUtils.isBlank(subClusterId)) { + routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing RemoveFromClusterNodeLabels SubClusterId.", + null); + } + + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[]{RemoveFromClusterNodeLabelsRequest.class}, new Object[]{request}); + Collection refreshNodesResourcesResps = + remoteMethod.invokeConcurrent(this, RemoveFromClusterNodeLabelsResponse.class, + subClusterId); + if (CollectionUtils.isNotEmpty(refreshNodesResourcesResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededRemoveFromClusterNodeLabelsRetrieved(stopTime - startTime); + return RemoveFromClusterNodeLabelsResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to removeFromClusterNodeLabels due to exception. " + e.getMessage()); + } + + routerMetrics.incrRemoveFromClusterNodeLabelsFailedRetrieved(); + throw new YarnException("Unable to removeFromClusterNodeLabels."); } @Override public ReplaceLabelsOnNodeResponse replaceLabelsOnNode(ReplaceLabelsOnNodeRequest request) throws YarnException, IOException { - throw new NotImplementedException(); + // parameter verification. + if (request == null) { + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing ReplaceLabelsOnNode request.", null); + } + + String subClusterId = request.getSubClusterId(); + if (StringUtils.isBlank(subClusterId)) { + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + RouterServerUtil.logAndThrowException("Missing ReplaceLabelsOnNode SubClusterId.", null); + } + + try { + long startTime = clock.getTime(); + RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod( + new Class[]{ReplaceLabelsOnNodeRequest.class}, new Object[]{request}); + Collection replaceLabelsOnNodeResps = + remoteMethod.invokeConcurrent(this, ReplaceLabelsOnNodeResponse.class, subClusterId); + if (CollectionUtils.isNotEmpty(replaceLabelsOnNodeResps)) { + long stopTime = clock.getTime(); + routerMetrics.succeededRemoveFromClusterNodeLabelsRetrieved(stopTime - startTime); + return ReplaceLabelsOnNodeResponse.newInstance(); + } + } catch (YarnException e) { + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + RouterServerUtil.logAndThrowException(e, + "Unable to replaceLabelsOnNode due to exception. " + e.getMessage()); + } + + routerMetrics.incrReplaceLabelsOnNodeFailedRetrieved(); + throw new YarnException("Unable to replaceLabelsOnNode."); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java index 7449c8474d5..fa38bd6f4ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/rmadmin/TestFederationRMAdminInterceptor.java @@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.api.records.DecommissionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceOption; +import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest; @@ -42,6 +43,12 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceReque import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoveFromClusterNodeLabelsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoveFromClusterNodeLabelsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeResponse; import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade; @@ -55,6 +62,8 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.HashSet; import static org.junit.Assert.assertNotNull; @@ -388,4 +397,128 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest { LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", () -> interceptor.refreshNodesResources(request1)); } + + @Test + public void testAddToClusterNodeLabelsEmptyRequest() throws Exception { + // null request1. + LambdaTestUtils.intercept(YarnException.class, "Missing AddToClusterNodeLabels request.", + () -> interceptor.addToClusterNodeLabels(null)); + + // null request2. + AddToClusterNodeLabelsRequest request = AddToClusterNodeLabelsRequest.newInstance(null, null); + LambdaTestUtils.intercept(YarnException.class, "Missing AddToClusterNodeLabels SubClusterId.", + () -> interceptor.addToClusterNodeLabels(request)); + } + + @Test + public void testAddToClusterNodeLabelsNormalRequest() throws Exception { + // case1, We add NodeLabel to subCluster SC-1 + NodeLabel nodeLabelA = NodeLabel.newInstance("a"); + NodeLabel nodeLabelB = NodeLabel.newInstance("b"); + List labels = new ArrayList<>(); + labels.add(nodeLabelA); + labels.add(nodeLabelB); + + AddToClusterNodeLabelsRequest request = + AddToClusterNodeLabelsRequest.newInstance("SC-1", labels); + AddToClusterNodeLabelsResponse response = interceptor.addToClusterNodeLabels(request); + assertNotNull(response); + + // case2, test the non-exist subCluster. + AddToClusterNodeLabelsRequest request1 = + AddToClusterNodeLabelsRequest.newInstance("SC-NON", labels); + LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.addToClusterNodeLabels(request1)); + } + + @Test + public void testRemoveFromClusterNodeLabelsEmptyRequest() throws Exception { + // null request1. + LambdaTestUtils.intercept(YarnException.class, "Missing RemoveFromClusterNodeLabels request.", + () -> interceptor.removeFromClusterNodeLabels(null)); + + // null request2. + RemoveFromClusterNodeLabelsRequest request = + RemoveFromClusterNodeLabelsRequest.newInstance(null, null); + LambdaTestUtils.intercept(YarnException.class, + "Missing RemoveFromClusterNodeLabels SubClusterId.", + () -> interceptor.removeFromClusterNodeLabels(request)); + } + + @Test + public void testRemoveFromClusterNodeLabelsNormalRequest() throws Exception { + // case1, We add nodelabel a for SC-1, and then remove nodelabel a + + // Step1. Add NodeLabel for subCluster SC-1 + NodeLabel nodeLabelA = NodeLabel.newInstance("a"); + NodeLabel nodeLabelB = NodeLabel.newInstance("b"); + List nodeLabels = new ArrayList<>(); + nodeLabels.add(nodeLabelA); + nodeLabels.add(nodeLabelB); + + AddToClusterNodeLabelsRequest request = + AddToClusterNodeLabelsRequest.newInstance("SC-1", nodeLabels); + interceptor.addToClusterNodeLabels(request); + + // Step2. We delete the label a of subCluster SC-1 + Set labels = new HashSet<>(); + labels.add("a"); + + RemoveFromClusterNodeLabelsRequest request1 = + RemoveFromClusterNodeLabelsRequest.newInstance("SC-1", labels); + RemoveFromClusterNodeLabelsResponse response = + interceptor.removeFromClusterNodeLabels(request1); + assertNotNull(response); + + // case2, test the non-exist subCluster. + RemoveFromClusterNodeLabelsRequest request2 = + RemoveFromClusterNodeLabelsRequest.newInstance("SC-NON", labels); + LambdaTestUtils.intercept(YarnException.class, + "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.removeFromClusterNodeLabels(request2)); + } + + @Test + public void testReplaceLabelsOnNodeEmptyRequest() throws Exception { + // null request1. + LambdaTestUtils.intercept(YarnException.class, "Missing ReplaceLabelsOnNode request.", + () -> interceptor.replaceLabelsOnNode(null)); + + // null request2. + Map> labelMap = new HashMap<>(); + ReplaceLabelsOnNodeRequest request = ReplaceLabelsOnNodeRequest.newInstance(labelMap, null); + LambdaTestUtils.intercept(YarnException.class, "Missing ReplaceLabelsOnNode SubClusterId.", + () -> interceptor.replaceLabelsOnNode(request)); + } + + @Test + public void tesReplaceLabelsOnNodeEmptyNormalRequest() throws Exception { + // case1, We add nodelabel for SC-1, and then replace the label for the specific node. + NodeLabel nodeLabelA = NodeLabel.newInstance("a"); + NodeLabel nodeLabelB = NodeLabel.newInstance("b"); + List nodeLabels = new ArrayList<>(); + nodeLabels.add(nodeLabelA); + nodeLabels.add(nodeLabelB); + + AddToClusterNodeLabelsRequest request = + AddToClusterNodeLabelsRequest.newInstance("SC-1", nodeLabels); + interceptor.addToClusterNodeLabels(request); + + Map> pMap = new HashMap<>(); + NodeId nodeId = NodeId.newInstance("127.0.0.1", 0); + Set labels = new HashSet<>(); + labels.add("a"); + pMap.put(nodeId, labels); + + ReplaceLabelsOnNodeRequest request1 = ReplaceLabelsOnNodeRequest.newInstance(pMap, "SC-1"); + ReplaceLabelsOnNodeResponse response = interceptor.replaceLabelsOnNode(request1); + assertNotNull(response); + + // case2, test the non-exist subCluster. + ReplaceLabelsOnNodeRequest request2 = + ReplaceLabelsOnNodeRequest.newInstance(pMap, "SC-NON"); + LambdaTestUtils.intercept(YarnException.class, + "subClusterId = SC-NON is not an active subCluster.", + () -> interceptor.replaceLabelsOnNode(request2)); + } } From 7c3d94a032ba0bfafb2d1ff35d4675cb6b5618d9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 12 Apr 2023 00:47:45 +0100 Subject: [PATCH 65/97] HADOOP-18637. S3A to support upload of files greater than 2 GB using DiskBlocks (#5543) Contributed By: HarshitGupta and Steve Loughran --- hadoop-tools/hadoop-aws/pom.xml | 2 + .../org/apache/hadoop/fs/s3a/Constants.java | 21 +++++ .../hadoop/fs/s3a/S3ABlockOutputStream.java | 72 +++++++++++---- .../apache/hadoop/fs/s3a/S3ADataBlocks.java | 76 ++++++++++------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 26 ++++-- .../hadoop/fs/s3a/S3AInstrumentation.java | 8 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 33 +++++++ .../hadoop/fs/s3a/WriteOperationHelper.java | 4 +- .../apache/hadoop/fs/s3a/WriteOperations.java | 2 +- .../hadoop/fs/s3a/api/RequestFactory.java | 5 +- .../fs/s3a/commit/AbstractS3ACommitter.java | 4 + .../fs/s3a/impl/RequestFactoryImpl.java | 30 ++++++- .../BlockOutputStreamStatistics.java | 8 +- .../impl/EmptyS3AStatisticsContext.java | 8 +- .../site/markdown/tools/hadoop-aws/index.md | 4 +- .../hadoop/fs/s3a/MockS3AFileSystem.java | 5 ++ .../ITestMagicCommitProtocolFailure.java | 69 ++++++++++++++ .../ITestStagingCommitProtocolFailure.java | 69 ++++++++++++++ .../fs/s3a/impl/TestRequestFactory.java | 3 +- .../ITestS3AHugeFileUploadSinglePut.java | 89 +++++++++++++++++++ 20 files changed, 465 insertions(+), 73 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 6ebf1c71f0d..ae8db93329d 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -108,6 +108,7 @@ ${testsThreadCount} false + false ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true ${testsThreadCount} @@ -272,6 +273,7 @@ verify + false ${fs.s3a.scale.test.enabled} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 16472a75fd2..a59a07c8437 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1255,4 +1255,25 @@ public final class Constants { */ public static final String PREFETCH_BLOCK_COUNT_KEY = "fs.s3a.prefetch.block.count"; public static final int PREFETCH_BLOCK_DEFAULT_COUNT = 8; + + /** + * Option to enable or disable the multipart uploads. + * Value: {@value}. + *

+ * Default is {@link #DEFAULT_MULTIPART_UPLOAD_ENABLED}. + */ + public static final String MULTIPART_UPLOADS_ENABLED = "fs.s3a.multipart.uploads.enabled"; + + /** + * Default value for multipart uploads. + * {@value} + */ + public static final boolean DEFAULT_MULTIPART_UPLOAD_ENABLED = true; + + /** + * Stream supports multipart uploads to the given path. + */ + public static final String STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED = + "fs.s3a.capability.multipart.uploads.enabled"; + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index 19943ff2f70..df3c9315ba8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -101,7 +101,7 @@ class S3ABlockOutputStream extends OutputStream implements private final String key; /** Size of all blocks. */ - private final int blockSize; + private final long blockSize; /** IO Statistics. */ private final IOStatistics iostatistics; @@ -169,6 +169,9 @@ class S3ABlockOutputStream extends OutputStream implements /** Thread level IOStatistics Aggregator. */ private final IOStatisticsAggregator threadIOStatisticsAggregator; + /** Is multipart upload enabled? */ + private final boolean isMultipartUploadEnabled; + /** * An S3A output stream which uploads partitions in a separate pool of * threads; different {@link S3ADataBlocks.BlockFactory} @@ -181,7 +184,6 @@ class S3ABlockOutputStream extends OutputStream implements this.builder = builder; this.key = builder.key; this.blockFactory = builder.blockFactory; - this.blockSize = (int) builder.blockSize; this.statistics = builder.statistics; // test instantiations may not provide statistics; this.iostatistics = statistics.getIOStatistics(); @@ -195,17 +197,26 @@ class S3ABlockOutputStream extends OutputStream implements (ProgressListener) progress : new ProgressableListener(progress); downgradeSyncableExceptions = builder.downgradeSyncableExceptions; - // create that first block. This guarantees that an open + close sequence - // writes a 0-byte entry. - createBlockIfNeeded(); - LOG.debug("Initialized S3ABlockOutputStream for {}" + - " output to {}", key, activeBlock); + + // look for multipart support. + this.isMultipartUploadEnabled = builder.isMultipartUploadEnabled; + // block size is infinite if multipart is disabled, so ignore + // what was passed in from the builder. + this.blockSize = isMultipartUploadEnabled + ? builder.blockSize + : -1; + if (putTracker.initialize()) { LOG.debug("Put tracker requests multipart upload"); initMultipartUpload(); } this.isCSEEnabled = builder.isCSEEnabled; this.threadIOStatisticsAggregator = builder.ioStatisticsAggregator; + // create that first block. This guarantees that an open + close sequence + // writes a 0-byte entry. + createBlockIfNeeded(); + LOG.debug("Initialized S3ABlockOutputStream for {}" + + " output to {}", key, activeBlock); } /** @@ -318,7 +329,15 @@ class S3ABlockOutputStream extends OutputStream implements statistics.writeBytes(len); S3ADataBlocks.DataBlock block = createBlockIfNeeded(); int written = block.write(source, offset, len); - int remainingCapacity = block.remainingCapacity(); + if (!isMultipartUploadEnabled) { + // no need to check for space as multipart uploads + // are not available...everything is saved to a single + // (disk) block. + return; + } + // look to see if another block is needed to complete + // the upload or exactly a block was written. + int remainingCapacity = (int) block.remainingCapacity(); if (written < len) { // not everything was written —the block has run out // of capacity @@ -369,6 +388,8 @@ class S3ABlockOutputStream extends OutputStream implements */ @Retries.RetryTranslated private void initMultipartUpload() throws IOException { + Preconditions.checkState(isMultipartUploadEnabled, + "multipart upload is disabled"); if (multiPartUpload == null) { LOG.debug("Initiating Multipart upload"); multiPartUpload = new MultiPartUpload(key); @@ -558,19 +579,20 @@ class S3ABlockOutputStream extends OutputStream implements } /** - * Upload the current block as a single PUT request; if the buffer - * is empty a 0-byte PUT will be invoked, as it is needed to create an - * entry at the far end. - * @throws IOException any problem. - * @return number of bytes uploaded. If thread was interrupted while - * waiting for upload to complete, returns zero with interrupted flag set - * on this thread. + * Upload the current block as a single PUT request; if the buffer is empty a + * 0-byte PUT will be invoked, as it is needed to create an entry at the far + * end. + * @return number of bytes uploaded. If thread was interrupted while waiting + * for upload to complete, returns zero with interrupted flag set on this + * thread. + * @throws IOException + * any problem. */ - private int putObject() throws IOException { + private long putObject() throws IOException { LOG.debug("Executing regular upload for {}", writeOperationHelper); final S3ADataBlocks.DataBlock block = getActiveBlock(); - int size = block.dataSize(); + long size = block.dataSize(); final S3ADataBlocks.BlockUploadData uploadData = block.startUpload(); final PutObjectRequest putObjectRequest = uploadData.hasFile() ? writeOperationHelper.createPutObjectRequest( @@ -617,6 +639,7 @@ class S3ABlockOutputStream extends OutputStream implements "S3ABlockOutputStream{"); sb.append(writeOperationHelper.toString()); sb.append(", blockSize=").append(blockSize); + sb.append(", isMultipartUploadEnabled=").append(isMultipartUploadEnabled); // unsynced access; risks consistency in exchange for no risk of deadlock. S3ADataBlocks.DataBlock block = activeBlock; if (block != null) { @@ -835,7 +858,7 @@ class S3ABlockOutputStream extends OutputStream implements Preconditions.checkNotNull(uploadId, "Null uploadId"); maybeRethrowUploadFailure(); partsSubmitted++; - final int size = block.dataSize(); + final long size = block.dataSize(); bytesSubmitted += size; final int currentPartNumber = partETagsFutures.size() + 1; final UploadPartRequest request; @@ -1011,7 +1034,7 @@ class S3ABlockOutputStream extends OutputStream implements ProgressEventType eventType = progressEvent.getEventType(); long bytesTransferred = progressEvent.getBytesTransferred(); - int size = block.dataSize(); + long size = block.dataSize(); switch (eventType) { case REQUEST_BYTE_TRANSFER_EVENT: @@ -1126,6 +1149,11 @@ class S3ABlockOutputStream extends OutputStream implements */ private IOStatisticsAggregator ioStatisticsAggregator; + /** + * Is Multipart Uploads enabled for the given upload. + */ + private boolean isMultipartUploadEnabled; + private BlockOutputStreamBuilder() { } @@ -1276,5 +1304,11 @@ class S3ABlockOutputStream extends OutputStream implements ioStatisticsAggregator = value; return this; } + + public BlockOutputStreamBuilder withMultipartEnabled( + final boolean value) { + isMultipartUploadEnabled = value; + return this; + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java index 03b5bd96162..b20d8e859aa 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java @@ -180,7 +180,7 @@ final class S3ADataBlocks { * @param statistics stats to work with * @return a new block. */ - abstract DataBlock create(long index, int limit, + abstract DataBlock create(long index, long limit, BlockOutputStreamStatistics statistics) throws IOException; @@ -258,7 +258,7 @@ final class S3ADataBlocks { * Return the current data size. * @return the size of the data */ - abstract int dataSize(); + abstract long dataSize(); /** * Predicate to verify that the block has the capacity to write @@ -280,7 +280,7 @@ final class S3ADataBlocks { * The remaining capacity in the block before it is full. * @return the number of bytes remaining. */ - abstract int remainingCapacity(); + abstract long remainingCapacity(); /** * Write a series of bytes from the buffer, from the offset. @@ -391,9 +391,11 @@ final class S3ADataBlocks { } @Override - DataBlock create(long index, int limit, + DataBlock create(long index, long limit, BlockOutputStreamStatistics statistics) throws IOException { + Preconditions.checkArgument(limit > 0, + "Invalid block size: %d", limit); return new ByteArrayBlock(0, limit, statistics); } @@ -436,11 +438,11 @@ final class S3ADataBlocks { private Integer dataSize; ByteArrayBlock(long index, - int limit, + long limit, BlockOutputStreamStatistics statistics) { super(index, statistics); - this.limit = limit; - buffer = new S3AByteArrayOutputStream(limit); + this.limit = (limit > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int) limit; + buffer = new S3AByteArrayOutputStream(this.limit); blockAllocated(); } @@ -449,7 +451,7 @@ final class S3ADataBlocks { * @return the amount of data available to upload. */ @Override - int dataSize() { + long dataSize() { return dataSize != null ? dataSize : buffer.size(); } @@ -468,14 +470,14 @@ final class S3ADataBlocks { } @Override - int remainingCapacity() { + long remainingCapacity() { return limit - dataSize(); } @Override int write(byte[] b, int offset, int len) throws IOException { super.write(b, offset, len); - int written = Math.min(remainingCapacity(), len); + int written = (int) Math.min(remainingCapacity(), len); buffer.write(b, offset, written); return written; } @@ -514,9 +516,11 @@ final class S3ADataBlocks { } @Override - ByteBufferBlock create(long index, int limit, + ByteBufferBlock create(long index, long limit, BlockOutputStreamStatistics statistics) throws IOException { + Preconditions.checkArgument(limit > 0, + "Invalid block size: %d", limit); return new ByteBufferBlock(index, limit, statistics); } @@ -564,11 +568,12 @@ final class S3ADataBlocks { * @param statistics statistics to update */ ByteBufferBlock(long index, - int bufferSize, + long bufferSize, BlockOutputStreamStatistics statistics) { super(index, statistics); - this.bufferSize = bufferSize; - blockBuffer = requestBuffer(bufferSize); + this.bufferSize = bufferSize > Integer.MAX_VALUE ? + Integer.MAX_VALUE : (int) bufferSize; + blockBuffer = requestBuffer(this.bufferSize); blockAllocated(); } @@ -577,7 +582,7 @@ final class S3ADataBlocks { * @return the amount of data available to upload. */ @Override - int dataSize() { + long dataSize() { return dataSize != null ? dataSize : bufferCapacityUsed(); } @@ -598,7 +603,7 @@ final class S3ADataBlocks { } @Override - public int remainingCapacity() { + public long remainingCapacity() { return blockBuffer != null ? blockBuffer.remaining() : 0; } @@ -609,7 +614,7 @@ final class S3ADataBlocks { @Override int write(byte[] b, int offset, int len) throws IOException { super.write(b, offset, len); - int written = Math.min(remainingCapacity(), len); + int written = (int) Math.min(remainingCapacity(), len); blockBuffer.put(b, offset, written); return written; } @@ -802,16 +807,18 @@ final class S3ADataBlocks { * Create a temp file and a {@link DiskBlock} instance to manage it. * * @param index block index - * @param limit limit of the block. + * @param limit limit of the block. -1 means "no limit" * @param statistics statistics to update * @return the new block * @throws IOException IO problems */ @Override DataBlock create(long index, - int limit, + long limit, BlockOutputStreamStatistics statistics) throws IOException { + Preconditions.checkArgument(limit != 0, + "Invalid block size: %d", limit); File destFile = getOwner() .createTmpFileForWrite(String.format("s3ablock-%04d-", index), limit, getOwner().getConf()); @@ -825,14 +832,14 @@ final class S3ADataBlocks { */ static class DiskBlock extends DataBlock { - private int bytesWritten; + private long bytesWritten; private final File bufferFile; - private final int limit; + private final long limit; private BufferedOutputStream out; private final AtomicBoolean closed = new AtomicBoolean(false); DiskBlock(File bufferFile, - int limit, + long limit, long index, BlockOutputStreamStatistics statistics) throws FileNotFoundException { @@ -844,24 +851,39 @@ final class S3ADataBlocks { } @Override - int dataSize() { + long dataSize() { return bytesWritten; } + /** + * Does this block have unlimited space? + * @return true if a block with no size limit was created. + */ + private boolean unlimited() { + return limit < 0; + } + @Override boolean hasCapacity(long bytes) { - return dataSize() + bytes <= limit; + return unlimited() || dataSize() + bytes <= limit; } + /** + * {@inheritDoc}. + * If there is no limit to capacity, return MAX_VALUE. + * @return capacity in the block. + */ @Override - int remainingCapacity() { - return limit - bytesWritten; + long remainingCapacity() { + return unlimited() + ? Integer.MAX_VALUE + : limit - bytesWritten; } @Override int write(byte[] b, int offset, int len) throws IOException { super.write(b, offset, len); - int written = Math.min(remainingCapacity(), len); + int written = (int) Math.min(remainingCapacity(), len); out.write(b, offset, written); bytesWritten += written; return written; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e96feb0243a..a73bd55b55e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -413,6 +413,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private ArnResource accessPoint; + /** + * Does this S3A FS instance have multipart upload enabled? + */ + private boolean isMultipartUploadEnabled = DEFAULT_MULTIPART_UPLOAD_ENABLED; + /** * A cache of files that should be deleted when the FileSystem is closed * or the JVM is exited. @@ -543,7 +548,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, this.prefetchBlockSize = (int) prefetchBlockSizeLong; this.prefetchBlockCount = intOption(conf, PREFETCH_BLOCK_COUNT_KEY, PREFETCH_BLOCK_DEFAULT_COUNT, 1); - + this.isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED, + DEFAULT_MULTIPART_UPLOAD_ENABLED); initThreadPools(conf); int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); @@ -605,7 +611,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, } blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER); - partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize); blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer); blockOutputActiveBlocks = intOption(conf, FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1); @@ -614,8 +619,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, blockOutputActiveBlocks = 1; } LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + - " queue limit={}", - blockOutputBuffer, partSize, blockOutputActiveBlocks); + " queue limit={}; multipart={}", + blockOutputBuffer, partSize, blockOutputActiveBlocks, isMultipartUploadEnabled); // verify there's no S3Guard in the store config. checkNoS3Guard(this.getUri(), getConf()); @@ -1092,6 +1097,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, .withRequestPreparer(getAuditManager()::requestCreated) .withContentEncoding(contentEncoding) .withStorageClass(storageClass) + .withMultipartUploadEnabled(isMultipartUploadEnabled) .build(); } @@ -1842,6 +1848,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, final PutObjectOptions putOptions = new PutObjectOptions(keep, null, options.getHeaders()); + validateOutputStreamConfiguration(path, getConf()); + final S3ABlockOutputStream.BlockOutputStreamBuilder builder = S3ABlockOutputStream.builder() .withKey(destKey) @@ -1865,7 +1873,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, .withCSEEnabled(isCSEEnabled) .withPutOptions(putOptions) .withIOStatisticsAggregator( - IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator()); + IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator()) + .withMultipartEnabled(isMultipartUploadEnabled); return new FSDataOutputStream( new S3ABlockOutputStream(builder), null); @@ -5103,6 +5112,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, case STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_DELETE: return !keepDirectoryMarkers(path); + case STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED: + return isMultipartUploadEnabled(); + // create file options case FS_S3A_CREATE_PERFORMANCE: case FS_S3A_CREATE_HEADER: @@ -5419,4 +5431,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, public boolean isCSEEnabled() { return isCSEEnabled; } + + public boolean isMultipartUploadEnabled() { + return isMultipartUploadEnabled; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java index 9d33efa9d01..da12223570e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java @@ -1547,7 +1547,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource, * of block uploads pending (1) and the bytes pending (blockSize). */ @Override - public void blockUploadQueued(int blockSize) { + public void blockUploadQueued(long blockSize) { incCounter(StreamStatisticNames.STREAM_WRITE_BLOCK_UPLOADS); incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_PENDING, 1); incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING, blockSize); @@ -1560,7 +1560,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource, * {@code STREAM_WRITE_BLOCK_UPLOADS_ACTIVE}. */ @Override - public void blockUploadStarted(Duration timeInQueue, int blockSize) { + public void blockUploadStarted(Duration timeInQueue, long blockSize) { // the local counter is used in toString reporting. queueDuration.addAndGet(timeInQueue.toMillis()); // update the duration fields in the IOStatistics. @@ -1588,7 +1588,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource, @Override public void blockUploadCompleted( Duration timeSinceUploadStarted, - int blockSize) { + long blockSize) { transferDuration.addAndGet(timeSinceUploadStarted.toMillis()); incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_ACTIVE, -1); blockUploadsCompleted.incrementAndGet(); @@ -1602,7 +1602,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource, @Override public void blockUploadFailed( Duration timeSinceUploadStarted, - int blockSize) { + long blockSize) { incCounter(StreamStatisticNames.STREAM_WRITE_EXCEPTIONS); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 8a1947f3e42..274bc96fb99 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.util.functional.RemoteIterators; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; @@ -1031,6 +1032,38 @@ public final class S3AUtils { return partSize; } + /** + * Validates the output stream configuration. + * @param path path: for error messages + * @param conf : configuration object for the given context + * @throws PathIOException Unsupported configuration. + */ + public static void validateOutputStreamConfiguration(final Path path, + Configuration conf) throws PathIOException { + if(!checkDiskBuffer(conf)){ + throw new PathIOException(path.toString(), + "Unable to create OutputStream with the given" + + " multipart upload and buffer configuration."); + } + } + + /** + * Check whether the configuration for S3ABlockOutputStream is + * consistent or not. Multipart uploads allow all kinds of fast buffers to + * be supported. When the option is disabled only disk buffers are allowed to + * be used as the file size might be bigger than the buffer size that can be + * allocated. + * @param conf : configuration object for the given context + * @return true if the disk buffer and the multipart settings are supported + */ + public static boolean checkDiskBuffer(Configuration conf) { + boolean isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED, + DEFAULT_MULTIPART_UPLOAD_ENABLED); + return isMultipartUploadEnabled + || FAST_UPLOAD_BUFFER_DISK.equals( + conf.get(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER)); + } + /** * Ensure that the long value is in the range of an integer. * @param name property name for error messages diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index 14ffeed4a55..7f9db33157f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -269,8 +269,6 @@ public class WriteOperationHelper implements WriteOperations { String dest, File sourceFile, final PutObjectOptions options) { - Preconditions.checkState(sourceFile.length() < Integer.MAX_VALUE, - "File length is too big for a single PUT upload"); activateAuditSpan(); final ObjectMetadata objectMetadata = newObjectMetadata((int) sourceFile.length()); @@ -532,7 +530,7 @@ public class WriteOperationHelper implements WriteOperations { String destKey, String uploadId, int partNumber, - int size, + long size, InputStream uploadStream, File sourceFile, Long offset) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java index 321390446f7..32888314d88 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java @@ -233,7 +233,7 @@ public interface WriteOperations extends AuditSpanSource, Closeable { String destKey, String uploadId, int partNumber, - int size, + long size, InputStream uploadStream, File sourceFile, Long offset) throws IOException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index cae4d3ef034..2a4771925f0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -196,10 +196,11 @@ public interface RequestFactory { * @param destKey destination object key * @param options options for the request * @return the request. + * @throws PathIOException if multipart uploads are disabled */ InitiateMultipartUploadRequest newMultipartUploadRequest( String destKey, - @Nullable PutObjectOptions options); + @Nullable PutObjectOptions options) throws PathIOException; /** * Complete a multipart upload. @@ -248,7 +249,7 @@ public interface RequestFactory { String destKey, String uploadId, int partNumber, - int size, + long size, InputStream uploadStream, File sourceFile, long offset) throws PathIOException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java index d6044edde29..e53c690431e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java @@ -217,6 +217,10 @@ public abstract class AbstractS3ACommitter extends PathOutputCommitter LOG.debug("{} instantiated for job \"{}\" ID {} with destination {}", role, jobName(context), jobIdString(context), outputPath); S3AFileSystem fs = getDestS3AFS(); + if (!fs.isMultipartUploadEnabled()) { + throw new PathCommitException(outputPath, "Multipart uploads are disabled for the FileSystem," + + " the committer can't proceed."); + } // set this thread's context with the job ID. // audit spans created in this thread will pick // up this value., including the commit operations instance diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index ce11df03839..7227941e344 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -124,6 +124,11 @@ public class RequestFactoryImpl implements RequestFactory { */ private final StorageClass storageClass; + /** + * Is multipart upload enabled. + */ + private final boolean isMultipartUploadEnabled; + /** * Constructor. * @param builder builder with all the configuration. @@ -137,6 +142,7 @@ public class RequestFactoryImpl implements RequestFactory { this.requestPreparer = builder.requestPreparer; this.contentEncoding = builder.contentEncoding; this.storageClass = builder.storageClass; + this.isMultipartUploadEnabled = builder.isMultipartUploadEnabled; } /** @@ -460,7 +466,10 @@ public class RequestFactoryImpl implements RequestFactory { @Override public InitiateMultipartUploadRequest newMultipartUploadRequest( final String destKey, - @Nullable final PutObjectOptions options) { + @Nullable final PutObjectOptions options) throws PathIOException { + if (!isMultipartUploadEnabled) { + throw new PathIOException(destKey, "Multipart uploads are disabled."); + } final ObjectMetadata objectMetadata = newObjectMetadata(-1); maybeSetMetadata(options, objectMetadata); final InitiateMultipartUploadRequest initiateMPURequest = @@ -509,7 +518,7 @@ public class RequestFactoryImpl implements RequestFactory { String destKey, String uploadId, int partNumber, - int size, + long size, InputStream uploadStream, File sourceFile, long offset) throws PathIOException { @@ -682,6 +691,11 @@ public class RequestFactoryImpl implements RequestFactory { */ private PrepareRequest requestPreparer; + /** + * Is Multipart Enabled on the path. + */ + private boolean isMultipartUploadEnabled = true; + private RequestFactoryBuilder() { } @@ -767,6 +781,18 @@ public class RequestFactoryImpl implements RequestFactory { this.requestPreparer = value; return this; } + + /** + * Multipart upload enabled. + * + * @param value new value + * @return the builder + */ + public RequestFactoryBuilder withMultipartUploadEnabled( + final boolean value) { + this.isMultipartUploadEnabled = value; + return this; + } } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java index bd1466b2a43..554b628d003 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/BlockOutputStreamStatistics.java @@ -32,21 +32,21 @@ public interface BlockOutputStreamStatistics extends Closeable, * Block is queued for upload. * @param blockSize block size. */ - void blockUploadQueued(int blockSize); + void blockUploadQueued(long blockSize); /** * Queued block has been scheduled for upload. * @param timeInQueue time in the queue. * @param blockSize block size. */ - void blockUploadStarted(Duration timeInQueue, int blockSize); + void blockUploadStarted(Duration timeInQueue, long blockSize); /** * A block upload has completed. Duration excludes time in the queue. * @param timeSinceUploadStarted time in since the transfer began. * @param blockSize block size */ - void blockUploadCompleted(Duration timeSinceUploadStarted, int blockSize); + void blockUploadCompleted(Duration timeSinceUploadStarted, long blockSize); /** * A block upload has failed. Duration excludes time in the queue. @@ -57,7 +57,7 @@ public interface BlockOutputStreamStatistics extends Closeable, * @param timeSinceUploadStarted time in since the transfer began. * @param blockSize block size */ - void blockUploadFailed(Duration timeSinceUploadStarted, int blockSize); + void blockUploadFailed(Duration timeSinceUploadStarted, long blockSize); /** * Intermediate report of bytes uploaded. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java index d10b6484175..6454065b240 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/EmptyS3AStatisticsContext.java @@ -442,22 +442,22 @@ public final class EmptyS3AStatisticsContext implements S3AStatisticsContext { implements BlockOutputStreamStatistics { @Override - public void blockUploadQueued(final int blockSize) { + public void blockUploadQueued(final long blockSize) { } @Override public void blockUploadStarted(final Duration timeInQueue, - final int blockSize) { + final long blockSize) { } @Override public void blockUploadCompleted(final Duration timeSinceUploadStarted, - final int blockSize) { + final long blockSize) { } @Override public void blockUploadFailed(final Duration timeSinceUploadStarted, - final int blockSize) { + final long blockSize) { } @Override diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index ae042b16199..7e2a1c2b120 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -1727,7 +1727,9 @@ The "fast" output stream 1. Uploads large files as blocks with the size set by `fs.s3a.multipart.size`. That is: the threshold at which multipart uploads - begin and the size of each upload are identical. + begin and the size of each upload are identical. This behavior can be enabled + or disabled by using the flag `fs.s3a.multipart.uploads.enabled` which by + default is set to true. 1. Buffers blocks to disk (default) or in on-heap or off-heap memory. 1. Uploads blocks in parallel in background threads. 1. Begins uploading blocks as soon as the buffered data exceeds this partition diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index a859cd534bb..40857373fb8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -200,6 +200,11 @@ public class MockS3AFileSystem extends S3AFileSystem { return true; } + @Override + public boolean isMultipartUploadEnabled() { + return true; + } + /** * Make operation to set the s3 client public. * @param client client. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java new file mode 100644 index 00000000000..41593c2b263 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocolFailure.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.commit.magic; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; +import org.apache.hadoop.fs.s3a.commit.CommitConstants; +import org.apache.hadoop.fs.s3a.commit.PathCommitException; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; + +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_UPLOADS_ENABLED; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_NAME; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.MAGIC_COMMITTER_ENABLED; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.S3A_COMMITTER_FACTORY_KEY; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Verify that the magic committer cannot be created if the FS doesn't support multipart + * uploads. + */ +public class ITestMagicCommitProtocolFailure extends AbstractS3ATestBase { + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBucketOverrides(getTestBucketName(conf), conf, + MAGIC_COMMITTER_ENABLED, + S3A_COMMITTER_FACTORY_KEY, + FS_S3A_COMMITTER_NAME, + MULTIPART_UPLOADS_ENABLED); + conf.setBoolean(MULTIPART_UPLOADS_ENABLED, false); + conf.set(S3A_COMMITTER_FACTORY_KEY, CommitConstants.S3A_COMMITTER_FACTORY); + conf.set(FS_S3A_COMMITTER_NAME, CommitConstants.COMMITTER_NAME_MAGIC); + return conf; + } + + @Test + public void testCreateCommitter() throws Exception { + TaskAttemptContext tContext = new TaskAttemptContextImpl(getConfiguration(), + new TaskAttemptID()); + Path commitPath = methodPath(); + LOG.debug("Trying to create a committer on the path: {}", commitPath); + intercept(PathCommitException.class, + () -> new MagicS3GuardCommitter(commitPath, tContext)); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java new file mode 100644 index 00000000000..a6d2c57d1d2 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocolFailure.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.commit.staging.integration; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; +import org.apache.hadoop.fs.s3a.commit.CommitConstants; +import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants; +import org.apache.hadoop.fs.s3a.commit.PathCommitException; +import org.apache.hadoop.fs.s3a.commit.staging.StagingCommitter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; + +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_UPLOADS_ENABLED; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_NAME; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.S3A_COMMITTER_FACTORY_KEY; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Verify that a staging committer cannot be created if the FS doesn't support multipart + * uploads. + */ +public class ITestStagingCommitProtocolFailure extends AbstractS3ATestBase { + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBucketOverrides(getTestBucketName(conf), conf, + S3A_COMMITTER_FACTORY_KEY, + FS_S3A_COMMITTER_NAME, + MULTIPART_UPLOADS_ENABLED); + conf.setBoolean(MULTIPART_UPLOADS_ENABLED, false); + conf.set(S3A_COMMITTER_FACTORY_KEY, CommitConstants.S3A_COMMITTER_FACTORY); + conf.set(FS_S3A_COMMITTER_NAME, InternalCommitterConstants.COMMITTER_NAME_STAGING); + return conf; + } + + @Test + public void testCreateCommitter() throws Exception { + TaskAttemptContext tContext = new TaskAttemptContextImpl(getConfiguration(), + new TaskAttemptID()); + Path commitPath = methodPath(); + LOG.debug("Trying to create a committer on the path: {}", commitPath); + intercept(PathCommitException.class, + () -> new StagingCommitter(commitPath, tContext)); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index 5c243bb820f..7c85142d437 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -20,6 +20,7 @@ package org.apache.hadoop.fs.s3a.impl; import java.io.ByteArrayInputStream; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; @@ -155,7 +156,7 @@ public class TestRequestFactory extends AbstractHadoopTestBase { * Create objects through the factory. * @param factory factory */ - private void createFactoryObjects(RequestFactory factory) { + private void createFactoryObjects(RequestFactory factory) throws IOException { String path = "path"; String path2 = "path2"; String id = "1"; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java new file mode 100644 index 00000000000..08192969e2d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFileUploadSinglePut.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.junit.Test; + +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.Constants; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.IO_CHUNK_BUFFER_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER; +import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_DISK; +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_UPLOADS_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestPropertyBytes; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_PUT_REQUESTS; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; + +/** + * Test a file upload using a single PUT operation. Multipart uploads will + * be disabled in the test. + */ +public class ITestS3AHugeFileUploadSinglePut extends S3AScaleTestBase { + + public static final Logger LOG = LoggerFactory.getLogger( + ITestS3AHugeFileUploadSinglePut.class); + + private long fileSize; + + @Override + protected Configuration createScaleConfiguration() { + Configuration conf = super.createScaleConfiguration(); + removeBucketOverrides(getTestBucketName(conf), conf, + FAST_UPLOAD_BUFFER, + IO_CHUNK_BUFFER_SIZE, + KEY_HUGE_FILESIZE, + MULTIPART_UPLOADS_ENABLED, + MULTIPART_SIZE, + REQUEST_TIMEOUT); + conf.setBoolean(Constants.MULTIPART_UPLOADS_ENABLED, false); + fileSize = getTestPropertyBytes(conf, KEY_HUGE_FILESIZE, + DEFAULT_HUGE_FILESIZE); + // set a small part size to verify it does not impact block allocation size + conf.setLong(MULTIPART_SIZE, 10_000); + conf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK); + conf.setInt(IO_CHUNK_BUFFER_SIZE, 655360); + conf.set(REQUEST_TIMEOUT, "1h"); + return conf; + } + + @Test + public void uploadFileSinglePut() throws IOException { + LOG.info("Creating file with size : {}", fileSize); + S3AFileSystem fs = getFileSystem(); + ContractTestUtils.createAndVerifyFile(fs, + methodPath(), fileSize); + // Exactly three put requests should be made during the upload of the file + // First one being the creation of the directory marker + // Second being the creation of the test file + // Third being the creation of directory marker on the file delete + assertThatStatisticCounter(fs.getIOStatistics(), OBJECT_PUT_REQUESTS.getSymbol()) + .isEqualTo(3); + } +} From dd6d0ac5108ffa616241886d9e8d8f07dbc034cf Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:08:23 +0800 Subject: [PATCH 66/97] YARN-11462. Fix Typo of hadoop-yarn-common. (#5539) Co-authored-by: Shilun Fan Reviewed-by: He Xiaoqiao Signed-off-by: Shilun Fan --- .../pb/client/ContainerManagementProtocolPBClientImpl.java | 4 ++-- .../hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java | 2 +- .../apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java | 2 +- .../yarn/security/client/ClientToAMTokenIdentifier.java | 2 +- .../impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java | 2 +- .../main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java | 2 +- .../apache/hadoop/yarn/webapp/GenericExceptionHandler.java | 2 +- .../java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java | 2 +- .../org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java | 2 +- .../api/records/impl/pb/TestSerializedExceptionPBImpl.java | 2 +- .../hadoop/yarn/client/api/impl/TestTimelineClient.java | 2 +- .../yarn/client/api/impl/TestTimelineClientForATS1_5.java | 2 +- .../hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java | 4 ++-- .../org/apache/hadoop/yarn/conf/TestYarnConfiguration.java | 4 ++-- .../ifile/TestLogAggregationIndexedFileController.java | 2 +- .../hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java | 2 +- 16 files changed, 19 insertions(+), 19 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java index 86fc398f252..cdd2661f05a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java @@ -109,11 +109,11 @@ public class ContainerManagementProtocolPBClientImpl implements ContainerManagem ProtobufRpcEngine2.class); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - int expireIntvl = conf.getInt(NM_COMMAND_TIMEOUT, DEFAULT_COMMAND_TIMEOUT); + int expireInterval = conf.getInt(NM_COMMAND_TIMEOUT, DEFAULT_COMMAND_TIMEOUT); proxy = (ContainerManagementProtocolPB) RPC.getProxy(ContainerManagementProtocolPB.class, clientVersion, addr, ugi, conf, - NetUtils.getDefaultSocketFactory(conf), expireIntvl); + NetUtils.getDefaultSocketFactory(conf), expireInterval); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java index 83a4df451bb..ed74addd162 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java @@ -531,7 +531,7 @@ public class TimelineV2ClientImpl extends TimelineV2Client { count++; if (count == numberOfAsyncsToMerge) { // Flush the entities if the number of the async - // putEntites merged reaches the desired limit. To avoid + // putEntities merged reaches the desired limit. To avoid // collecting multiple entities and delaying for a long // time. entitiesHolder.run(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java index 15d4efc03e6..fed6e46e497 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/FSStoreOpHandler.java @@ -62,7 +62,7 @@ public class FSStoreOpHandler { registerLog(NODE_LABEL_STORE, RemoveClusterLabelOp.OPCODE, RemoveClusterLabelOp.class); - //NodeAttibute operation + //NodeAttribute operation registerLog(NODE_ATTRIBUTE, AddNodeToAttributeLogOp.OPCODE, AddNodeToAttributeLogOp.class); registerLog(NODE_ATTRIBUTE, RemoveNodeToAttributeLogOp.OPCODE, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java index 2085982e2fe..e18a1a2d472 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/client/ClientToAMTokenIdentifier.java @@ -44,7 +44,7 @@ public class ClientToAMTokenIdentifier extends TokenIdentifier { private ClientToAMTokenIdentifierProto proto; // TODO: Add more information in the tokenID such that it is not - // transferrable, more secure etc. + // transferable, more secure etc. public ClientToAMTokenIdentifier() { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java index d420bda5d7b..0ab81661c67 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoveFromClusterNodeLabelsRequestPBImpl.java @@ -29,7 +29,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords .RemoveFromClusterNodeLabelsRequest; /** - * Proto class to handlde RemoveFromClusterNodeLabels request. + * Proto class to handle RemoveFromClusterNodeLabels request. */ public class RemoveFromClusterNodeLabelsRequestPBImpl extends RemoveFromClusterNodeLabelsRequest { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java index a859ffbc1f2..8a1686987b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java @@ -252,7 +252,7 @@ public class Dispatcher extends HttpServlet { checkState(devMode, "only in dev mode"); new Timer("webapp exit", true).schedule(new TimerTask() { @Override public void run() { - LOG.info("WebAppp /{} exiting...", webApp.name()); + LOG.info("WebApp /{} exiting...", webApp.name()); webApp.stop(); System.exit(0); // FINDBUG: this is intended in dev mode } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java index b8fc9e00541..7cb6018e92a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/GenericExceptionHandler.java @@ -53,7 +53,7 @@ public class GenericExceptionHandler implements ExceptionMapper { @Override public Response toResponse(Exception e) { if (LOG.isTraceEnabled()) { - LOG.trace("GOT EXCEPITION", e); + LOG.trace("GOT EXCEPTION", e); } // Don't catch this as filter forward on 404 // (ServletContainer.FEATURE_FILTER_FORWARD_ON_404) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java index 210cf0482a0..9c04e00e384 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java @@ -88,7 +88,7 @@ public abstract class HtmlPage extends TextView { } /** - * Render the the HTML page. + * Render the HTML page. * @param html the page to render data to. */ protected abstract void render(Page.HTML<__> html); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java index e9ac044affc..2fd6760cd97 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java @@ -150,7 +150,7 @@ public class BasePBImplRecordsTest { } /** - * this method generate record instance by calling newIntance + * this method generate record instance by calling newInstance * using reflection, add register the generated value to typeValueCache */ @SuppressWarnings("rawtypes") diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java index d4bfb318fed..73a8b85922f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/impl/pb/TestSerializedExceptionPBImpl.java @@ -49,7 +49,7 @@ public class TestSerializedExceptionPBImpl { try { pb.deSerialize(); - fail("deSerialze should throw YarnRuntimeException"); + fail("deSerialize should throw YarnRuntimeException"); } catch (YarnRuntimeException e) { assertEquals(ClassNotFoundException.class, e.getCause().getClass()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java index 507cac61332..4b9b7c5f503 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java @@ -439,7 +439,7 @@ public class TestTimelineClient { public static TimelineDomain generateDomain() { TimelineDomain domain = new TimelineDomain(); - domain.setId("namesapce id"); + domain.setId("namespace id"); domain.setDescription("domain description"); domain.setOwner("domain owner"); domain.setReaders("domain_reader"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java index 4d4e412e732..2fdff72a4f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientForATS1_5.java @@ -238,7 +238,7 @@ public class TestTimelineClientForATS1_5 { private static TimelineDomain generateDomain() { TimelineDomain domain = new TimelineDomain(); - domain.setId("namesapce id"); + domain.setId("namespace id"); domain.setDescription("domain description"); domain.setOwner("domain owner"); domain.setReaders("domain_reader"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java index a26b4bf0a67..659d6cd7517 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClientV2Impl.java @@ -241,7 +241,7 @@ public class TestTimelineClientV2Impl { @Test void testSyncCall() throws Exception { try { - // sync entity should not be be merged with Async + // sync entity should not be merged with Async client.putEntities(generateEntity("1")); client.putEntitiesAsync(generateEntity("2")); client.putEntitiesAsync(generateEntity("3")); @@ -360,7 +360,7 @@ public class TestTimelineClientV2Impl { new byte[0], "kind", new byte[0], "service"); client.setTimelineCollectorInfo(CollectorInfo.newInstance(null, token)); assertNull(client.currentTimelineToken, - "Timeline token in v2 client should not be set as token kind " + "is unexepcted."); + "Timeline token in v2 client should not be set as token kind " + "is unexpected."); assertEquals(0, ugi.getTokens().size()); token = Token.newInstance(new byte[0], TimelineDelegationTokenIdentifier. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java index b17c1806de3..e4547a9163d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java @@ -36,7 +36,7 @@ public class TestYarnConfiguration { void testDefaultRMWebUrl() throws Exception { YarnConfiguration conf = new YarnConfiguration(); String rmWebUrl = WebAppUtils.getRMWebAppURLWithScheme(conf); - // shouldn't have a "/" on the end of the url as all the other uri routinnes + // shouldn't have a "/" on the end of the url as all the other uri routines // specifically add slashes and Jetty doesn't handle double slashes. assertNotSame("http://0.0.0.0:8088", rmWebUrl, @@ -77,7 +77,7 @@ public class TestYarnConfiguration { String[] parts = rmWebUrl.split(":"); assertEquals(24543, Integer.parseInt(parts[parts.length - 1]), - "RM Web URL Port is incrrect"); + "RM Web URL Port is incorrect"); assertNotSame("http://rmtesting:24543", rmWebUrl, "RM Web Url not resolved correctly. Should not be rmtesting"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java index cd178382b52..b7fcb18ff60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexedFileController.java @@ -385,7 +385,7 @@ public class TestLogAggregationIndexedFileController @Test @Timeout(15000) - void testFetchApplictionLogsHar() throws Exception { + void testFetchApplicationLogsHar() throws Exception { List newLogTypes = new ArrayList<>(); newLogTypes.add("syslog"); newLogTypes.add("stdout"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java index e769a21a750..099684318f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java @@ -143,7 +143,7 @@ public class TestFileSystemNodeLabelsStore extends NodeLabelTestBase { "p4", toSet(toNodeId("n4")), "p2", toSet(toNodeId("n2")))); - // stutdown mgr and start a new mgr + // shutdown mgr and start a new mgr mgr.stop(); mgr = new MockNodeLabelManager(); mgr.init(conf); From 06f9bdffa6a717600e46f5d6b6efd7783121b546 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 13 Apr 2023 00:53:20 +0800 Subject: [PATCH 67/97] YARN-10846. Add dispatcher metrics to NM. (#4687) --- .../hadoop/yarn/conf/YarnConfiguration.java | 4 + .../yarn/metrics/GenericEventTypeMetrics.java | 6 +- .../src/main/resources/yarn-default.xml | 10 ++ .../GenericEventTypeMetricsManager.java | 43 +++++++++ .../yarn/server/nodemanager/NodeManager.java | 24 ++++- .../ContainerManagerImpl.java | 70 +++++++++++++- .../nodemanager/DummyContainerManager.java | 10 +- .../BaseContainerSchedulerTest.java | 2 +- .../TestContainerManagerRecovery.java | 4 +- .../metrics/TestNodeManagerMetrics.java | 94 +++++++++++++++++++ .../GenericEventTypeMetricsManager.java | 6 +- .../hadoop/yarn/server/MiniYARNCluster.java | 10 +- 12 files changed, 261 insertions(+), 22 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index eb7d3143ca7..6d77eb492dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -3070,6 +3070,10 @@ public class YarnConfiguration extends Configuration { + "amrmproxy.ha.enable"; public static final boolean DEFAULT_AMRM_PROXY_HA_ENABLED = false; + // Enable NM Dispatcher Metric default False. + public static final String NM_DISPATCHER_METRIC_ENABLED = NM_PREFIX + "dispatcher.metric.enable"; + public static final boolean DEFAULT_NM_DISPATCHER_METRIC_ENABLED = false; + /** * Default platform-agnostic CLASSPATH for YARN applications. A * comma-separated list of CLASSPATH entries. The parameter expansion marker diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java index 464edb27782..1809ad159d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/metrics/GenericEventTypeMetrics.java @@ -57,10 +57,8 @@ public class GenericEventTypeMetrics> //Initialize enum for (final T type : enums) { - String eventCountMetricsName = - type.toString() + "_" + "event_count"; - String processingTimeMetricsName = - type.toString() + "_" + "processing_time"; + String eventCountMetricsName = type + "_" + "event_count"; + String processingTimeMetricsName = type + "_" + "processing_time"; eventCountMetrics.put(type, this.registry. newGauge(eventCountMetricsName, eventCountMetricsName, 0L)); processingTimeMetrics.put(type, this.registry. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index ab422330788..b9385d1c276 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -5065,6 +5065,16 @@ + + yarn.nodemanager.dispatcher.metric.enable + false + + Yarn NodeManager enables Dispatcher Metric. + if true, will enable dispatcher metric; if false, will not enable dispatcher metric; + Default is false. + + + yarn.router.interceptor.user-thread-pool.minimum-pool-size 5 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java new file mode 100644 index 00000000000..88adf8a0d51 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/GenericEventTypeMetricsManager.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics; + +import static org.apache.hadoop.metrics2.lib.Interns.info; + +public final class GenericEventTypeMetricsManager { + + private GenericEventTypeMetricsManager() { + // nothing to do + } + + // Construct a GenericEventTypeMetrics for dispatcher + @SuppressWarnings("unchecked") + public static > GenericEventTypeMetrics + create(String dispatcherName, Class eventTypeClass) { + return new GenericEventTypeMetrics.EventTypeMetricsBuilder() + .setMs(DefaultMetricsSystem.instance()) + .setInfo(info("GenericEventTypeMetrics for " + eventTypeClass.getName(), + "Metrics for " + dispatcherName)) + .setEnumClass(eventTypeClass) + .setEnums(eventTypeClass.getEnumConstants()) + .build().registerMetrics(); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 81e60361dff..438a39b0973 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics; import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; import org.apache.hadoop.yarn.server.api.records.AppCollectorData; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; @@ -144,8 +145,10 @@ public class NodeManager extends CompositeService private AtomicBoolean isStopping = new AtomicBoolean(false); private boolean rmWorkPreservingRestartEnabled; private boolean shouldExitOnShutdownEvent = false; + private boolean nmDispatherMetricEnabled; private NMLogAggregationStatusTracker nmLogAggregationStatusTracker; + /** * Default Container State transition listener. */ @@ -366,6 +369,10 @@ public class NodeManager extends CompositeService .RM_WORK_PRESERVING_RECOVERY_ENABLED, YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_ENABLED); + nmDispatherMetricEnabled = conf.getBoolean( + YarnConfiguration.NM_DISPATCHER_METRIC_ENABLED, + YarnConfiguration.DEFAULT_NM_DISPATCHER_METRIC_ENABLED); + try { initAndStartRecoveryStore(conf); } catch (IOException e) { @@ -1006,8 +1013,17 @@ public class NodeManager extends CompositeService /** * Unit test friendly. */ + @SuppressWarnings("unchecked") protected AsyncDispatcher createNMDispatcher() { - return new AsyncDispatcher("NM Event dispatcher"); + dispatcher = new AsyncDispatcher("NM Event dispatcher"); + if (nmDispatherMetricEnabled) { + GenericEventTypeMetrics eventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), + ContainerManagerEventType.class); + dispatcher.addMetrics(eventTypeMetrics, eventTypeMetrics.getEnumClass()); + LOG.info("NM Event dispatcher Metric Initialization Completed."); + } + return dispatcher; } //For testing @@ -1052,4 +1068,10 @@ public class NodeManager extends CompositeService Context ctxt) { return new NMLogAggregationStatusTracker(ctxt); } + + @VisibleForTesting + @Private + public AsyncDispatcher getDispatcher() { + return dispatcher; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index cc5f0d914b8..e07a0e1cc18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -24,7 +24,9 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.api.protocolrecords.GetLocalizationStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetLocalizationStatusesResponse; import org.apache.hadoop.yarn.api.records.LocalizationStatus; +import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.UpdateContainerTokenEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerTokenUpdatedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent; import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator; @@ -105,6 +107,7 @@ import org.apache.hadoop.yarn.server.api.ContainerType; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; +import org.apache.hadoop.yarn.server.nodemanager.GenericEventTypeMetricsManager; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent; import org.apache.hadoop.yarn.server.nodemanager.CMgrUpdateContainersEvent; import org.apache.hadoop.yarn.server.nodemanager.CMgrSignalContainersEvent; @@ -120,6 +123,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerInitEvent; + import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationFinishEvent; @@ -217,7 +221,7 @@ public class ContainerManagerImpl extends CompositeService implements protected final NodeStatusUpdater nodeStatusUpdater; protected LocalDirsHandlerService dirsHandler; - protected final AsyncDispatcher dispatcher; + private AsyncDispatcher dispatcher; private final DeletionService deletionService; private LogHandler logHandler; @@ -233,6 +237,7 @@ public class ContainerManagerImpl extends CompositeService implements // NM metrics publisher is set only if the timeline service v.2 is enabled private NMTimelinePublisher nmMetricsPublisher; private boolean timelineServiceV2Enabled; + private boolean nmDispatherMetricEnabled; public ContainerManagerImpl(Context context, ContainerExecutor exec, DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater, @@ -242,7 +247,7 @@ public class ContainerManagerImpl extends CompositeService implements this.dirsHandler = dirsHandler; // ContainerManager level dispatcher. - dispatcher = new AsyncDispatcher("NM ContainerManager dispatcher"); + dispatcher = createContainerManagerDispatcher(); this.deletionService = deletionContext; this.metrics = metrics; @@ -324,10 +329,67 @@ public class ContainerManagerImpl extends CompositeService implements YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) + SHUTDOWN_CLEANUP_SLOP_MS; + nmDispatherMetricEnabled = conf.getBoolean( + YarnConfiguration.NM_DISPATCHER_METRIC_ENABLED, + YarnConfiguration.DEFAULT_NM_DISPATCHER_METRIC_ENABLED); + super.serviceInit(conf); recover(); } + @SuppressWarnings("unchecked") + protected AsyncDispatcher createContainerManagerDispatcher() { + dispatcher = new AsyncDispatcher("NM ContainerManager dispatcher"); + + if (!nmDispatherMetricEnabled) { + return dispatcher; + } + + GenericEventTypeMetrics containerEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), ContainerEventType.class); + dispatcher.addMetrics(containerEventTypeMetrics, containerEventTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics localizationEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), LocalizationEventType.class); + dispatcher.addMetrics(localizationEventTypeMetrics, + localizationEventTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics applicationEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), ApplicationEventType.class); + dispatcher.addMetrics(applicationEventTypeMetrics, + applicationEventTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics containersLauncherEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), + ContainersLauncherEventType.class); + dispatcher.addMetrics(containersLauncherEventTypeMetrics, + containersLauncherEventTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics containerSchedulerEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), + ContainerSchedulerEventType.class); + dispatcher.addMetrics(containerSchedulerEventTypeMetrics, + containerSchedulerEventTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics containersMonitorEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), + ContainersMonitorEventType.class); + dispatcher.addMetrics(containersMonitorEventTypeMetrics, + containersMonitorEventTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics auxServicesEventTypeTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), AuxServicesEventType.class); + dispatcher.addMetrics(auxServicesEventTypeTypeMetrics, + auxServicesEventTypeTypeMetrics.getEnumClass()); + + GenericEventTypeMetrics localizerEventTypeMetrics = + GenericEventTypeMetricsManager.create(dispatcher.getName(), LocalizerEventType.class); + dispatcher.addMetrics(localizerEventTypeMetrics, localizerEventTypeMetrics.getEnumClass()); + LOG.info("NM ContainerManager dispatcher Metric Initialization Completed."); + + return dispatcher; + } + protected void createAMRMProxyService(Configuration conf) { this.amrmProxyEnabled = conf.getBoolean(YarnConfiguration.AMRM_PROXY_ENABLED, @@ -2034,4 +2096,8 @@ public class ContainerManagerImpl extends CompositeService implements public ResourceLocalizationService getResourceLocalizationService() { return rsrcLocalizationSrvc; } + + public AsyncDispatcher getDispatcher() { + return dispatcher; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java index 1acf3e9a378..fa6d04c044f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java @@ -70,7 +70,7 @@ public class DummyContainerManager extends ContainerManagerImpl { NodeManagerMetrics metrics, LocalDirsHandlerService dirsHandler) { super(context, exec, deletionContext, nodeStatusUpdater, metrics, dirsHandler); - dispatcher.disableExitOnDispatchException(); + getDispatcher().disableExitOnDispatchException(); } @Override @@ -78,7 +78,7 @@ public class DummyContainerManager extends ContainerManagerImpl { protected ResourceLocalizationService createResourceLocalizationService( ContainerExecutor exec, DeletionService deletionContext, Context context, NodeManagerMetrics metrics) { - return new ResourceLocalizationService(super.dispatcher, exec, + return new ResourceLocalizationService(getDispatcher(), exec, deletionContext, super.dirsHandler, context, metrics) { @Override public void handle(LocalizationEvent event) { @@ -148,7 +148,7 @@ public class DummyContainerManager extends ContainerManagerImpl { @SuppressWarnings("unchecked") protected ContainersLauncher createContainersLauncher(Context context, ContainerExecutor exec) { - return new ContainersLauncher(context, super.dispatcher, exec, + return new ContainersLauncher(context, getDispatcher(), exec, super.dirsHandler, this) { @Override public void handle(ContainersLauncherEvent event) { @@ -156,12 +156,12 @@ public class DummyContainerManager extends ContainerManagerImpl { ContainerId containerId = container.getContainerId(); switch (event.getType()) { case LAUNCH_CONTAINER: - dispatcher.getEventHandler().handle( + getDispatcher().getEventHandler().handle( new ContainerEvent(containerId, ContainerEventType.CONTAINER_LAUNCHED)); break; case CLEANUP_CONTAINER: - dispatcher.getEventHandler().handle( + getDispatcher().getEventHandler().handle( new ContainerExitEvent(containerId, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, 0, "Container exited with exit code 0.")); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java index 5a495d74137..f3661a68e6c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java @@ -135,7 +135,7 @@ public class BaseContainerSchedulerTest extends BaseContainerManagerTest { @Override protected ContainersMonitor createContainersMonitor( ContainerExecutor exec) { - return new ContainersMonitorImpl(exec, dispatcher, this.context) { + return new ContainersMonitorImpl(exec, getDispatcher(), this.context) { // Define resources available for containers to be executed. @Override public long getPmemAllocatedForContainers() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index b960f5dea1f..868fb39ed15 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -797,7 +797,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest { } @Override protected ContainerScheduler createContainerScheduler(Context context) { - return new ContainerScheduler(context, dispatcher, metrics){ + return new ContainerScheduler(context, getDispatcher(), metrics){ @Override public ContainersMonitor getContainersMonitor() { return new ContainersMonitorImpl(null, null, null) { @@ -1001,7 +1001,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest { return null; } }; - containerManager.dispatcher.disableExitOnDispatchException(); + containerManager.getDispatcher().disableExitOnDispatchException(); return containerManager; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index 33a3ae12f10..84216665156 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -17,11 +17,24 @@ */ package org.apache.hadoop.yarn.server.nodemanager.metrics; +import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.source.JvmMetrics; + +import static org.apache.hadoop.metrics2.lib.Interns.info; import static org.apache.hadoop.test.MetricsAsserts.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.event.Event; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.util.Records; import org.junit.After; @@ -37,6 +50,7 @@ public class TestNodeManagerMetrics { @Before public void setup() { DefaultMetricsSystem.initialize("NodeManager"); + DefaultMetricsSystem.setMiniClusterMode(true); metrics = NodeManagerMetrics.create(); } @@ -140,4 +154,84 @@ public class TestNodeManagerMetrics { assertGauge("NodeGpuUtilization", nodeGpuUtilization, rb); assertGauge("ApplicationsRunning", applicationsRunning, rb); } + + private enum TestEnum { + TestEventType + } + + private static class TestHandler implements EventHandler { + + private long sleepTime = 1500; + + TestHandler() { + } + + TestHandler(long sleepTime) { + this.sleepTime = sleepTime; + } + + @Override + public void handle(Event event) { + try { + // As long as 10000 events queued + Thread.sleep(this.sleepTime); + } catch (InterruptedException e) { + } + } + } + + @Test + @SuppressWarnings("unchecked") + public void testNMDispatcherMetricsHistogram() throws Exception { + YarnConfiguration conf = new YarnConfiguration(); + + NodeManager nm = new NodeManager(); + nm.init(conf); + AsyncDispatcher dispatcher = nm.getDispatcher(); + + MetricsInfo metricsInfo = info( + "GenericEventTypeMetrics for " + TestEnum.class.getName(), + "Metrics for " + dispatcher.getName()); + + GenericEventTypeMetrics genericEventTypeMetrics = + new GenericEventTypeMetrics.EventTypeMetricsBuilder() + .setMs(DefaultMetricsSystem.instance()) + .setInfo(metricsInfo) + .setEnumClass(TestEnum.class) + .setEnums(TestEnum.class.getEnumConstants()) + .build().registerMetrics(); + + dispatcher.addMetrics(genericEventTypeMetrics, genericEventTypeMetrics.getEnumClass()); + dispatcher.init(conf); + + // Register handler + dispatcher.register(TestEnum.class, new TestHandler()); + dispatcher.start(); + + for (int i = 0; i < 3; ++i) { + Event event = mock(Event.class); + when(event.getType()).thenReturn(TestEnum.TestEventType); + dispatcher.getEventHandler().handle(event); + } + + // Check event type count. + GenericTestUtils.waitFor(() -> genericEventTypeMetrics. + get(TestEnum.TestEventType) == 3, 1000, 10000); + + String testEventTypeCountExpect = + Long.toString(genericEventTypeMetrics.get(TestEnum.TestEventType)); + Assert.assertNotNull(testEventTypeCountExpect); + String testEventTypeCountMetric = + genericEventTypeMetrics.getRegistry().get("TestEventType_event_count").toString(); + Assert.assertNotNull(testEventTypeCountMetric); + Assert.assertEquals(testEventTypeCountExpect, testEventTypeCountMetric); + + String testEventTypeProcessingTimeExpect = + Long.toString(genericEventTypeMetrics.getTotalProcessingTime(TestEnum.TestEventType)); + Assert.assertNotNull(testEventTypeProcessingTimeExpect); + String testEventTypeProcessingTimeMetric = + genericEventTypeMetrics.getRegistry().get("TestEventType_processing_time").toString(); + Assert.assertNotNull(testEventTypeProcessingTimeMetric); + Assert.assertEquals(testEventTypeProcessingTimeExpect, testEventTypeProcessingTimeMetric); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java index 8fda9b7f38a..8da793972e2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/GenericEventTypeMetricsManager.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.yarn.metrics.GenericEventTypeMetrics; @@ -31,10 +32,11 @@ public final class GenericEventTypeMetricsManager { // Construct a GenericEventTypeMetrics for dispatcher public static > GenericEventTypeMetrics create(String dispatcherName, Class eventTypeClass) { + MetricsInfo metricsInfo = info("GenericEventTypeMetrics for " + eventTypeClass.getName(), + "Metrics for " + dispatcherName); return new GenericEventTypeMetrics.EventTypeMetricsBuilder() .setMs(DefaultMetricsSystem.instance()) - .setInfo(info("GenericEventTypeMetrics for " + eventTypeClass.getName(), - "Metrics for " + dispatcherName)) + .setInfo(metricsInfo) .setEnumClass(eventTypeClass) .setEnums(eventTypeClass.getEnumConstants()) .build().registerMetrics(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java index ceea3c28213..026495fa202 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java @@ -902,8 +902,8 @@ public class MiniYARNCluster extends CompositeService { LOG.info("CustomAMRMProxyService is enabled. " + "All the AM->RM requests will be intercepted by the proxy"); AMRMProxyService amrmProxyService = - useRpc ? new AMRMProxyService(getContext(), dispatcher) - : new ShortCircuitedAMRMProxy(getContext(), dispatcher); + useRpc ? new AMRMProxyService(getContext(), getDispatcher()) + : new ShortCircuitedAMRMProxy(getContext(), getDispatcher()); this.setAMRMProxyService(amrmProxyService); addService(this.getAMRMProxyService()); } else { @@ -934,8 +934,8 @@ public class MiniYARNCluster extends CompositeService { LOG.info("CustomAMRMProxyService is enabled. " + "All the AM->RM requests will be intercepted by the proxy"); AMRMProxyService amrmProxyService = - useRpc ? new AMRMProxyService(getContext(), dispatcher) - : new ShortCircuitedAMRMProxy(getContext(), dispatcher); + useRpc ? new AMRMProxyService(getContext(), getDispatcher()) + : new ShortCircuitedAMRMProxy(getContext(), getDispatcher()); this.setAMRMProxyService(amrmProxyService); addService(this.getAMRMProxyService()); } else { @@ -946,7 +946,7 @@ public class MiniYARNCluster extends CompositeService { @Override protected ContainersMonitor createContainersMonitor(ContainerExecutor exec) { - return new ContainersMonitorImpl(exec, dispatcher, this.context) { + return new ContainersMonitorImpl(exec, getDispatcher(), this.context) { @Override public float getVmemRatio() { return 2.0f; From 2b60d0c1f440e61b57085abd2d72a30db7c013cf Mon Sep 17 00:00:00 2001 From: Melissa You <31492618+melissayou@users.noreply.github.com> Date: Thu, 13 Apr 2023 09:07:42 -0700 Subject: [PATCH 68/97] [HDFS-16971] Add read metrics for remote reads in FileSystem Statistics #5534 (#5536) --- .../java/org/apache/hadoop/fs/FileSystem.java | 34 +++++++++++++++++++ .../fs/FileSystemStorageStatistics.java | 5 ++- .../fs/TestFileSystemStorageStatistics.java | 6 +++- .../org/apache/hadoop/hdfs/DFSClient.java | 10 ++++-- .../apache/hadoop/hdfs/DFSInputStream.java | 9 +++-- .../hadoop/hdfs/DFSStripedInputStream.java | 6 ++-- .../org/apache/hadoop/hdfs/StripeReader.java | 5 ++- 7 files changed, 64 insertions(+), 11 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 763af197a1f..5d8f0e575f2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -3942,6 +3942,7 @@ public abstract class FileSystem extends Configured private volatile long bytesReadDistanceOfThreeOrFour; private volatile long bytesReadDistanceOfFiveOrLarger; private volatile long bytesReadErasureCoded; + private volatile long remoteReadTimeMS; /** * Add another StatisticsData object to this one. @@ -3959,6 +3960,7 @@ public abstract class FileSystem extends Configured this.bytesReadDistanceOfFiveOrLarger += other.bytesReadDistanceOfFiveOrLarger; this.bytesReadErasureCoded += other.bytesReadErasureCoded; + this.remoteReadTimeMS += other.remoteReadTimeMS; } /** @@ -3977,6 +3979,7 @@ public abstract class FileSystem extends Configured this.bytesReadDistanceOfFiveOrLarger = -this.bytesReadDistanceOfFiveOrLarger; this.bytesReadErasureCoded = -this.bytesReadErasureCoded; + this.remoteReadTimeMS = -this.remoteReadTimeMS; } @Override @@ -4025,6 +4028,10 @@ public abstract class FileSystem extends Configured public long getBytesReadErasureCoded() { return bytesReadErasureCoded; } + + public long getRemoteReadTimeMS() { + return remoteReadTimeMS; + } } private interface StatisticsAggregator { @@ -4252,6 +4259,14 @@ public abstract class FileSystem extends Configured } } + /** + * Increment the time taken to read bytes from remote in the statistics. + * @param durationMS time taken in ms to read bytes from remote + */ + public void increaseRemoteReadTime(final long durationMS) { + getThreadStatistics().remoteReadTimeMS += durationMS; + } + /** * Apply the given aggregator to all StatisticsData objects associated with * this Statistics object. @@ -4399,6 +4414,25 @@ public abstract class FileSystem extends Configured return bytesRead; } + /** + * Get total time taken in ms for bytes read from remote. + * @return time taken in ms for remote bytes read. + */ + public long getRemoteReadTime() { + return visitAll(new StatisticsAggregator() { + private long remoteReadTimeMS = 0; + + @Override + public void accept(StatisticsData data) { + remoteReadTimeMS += data.remoteReadTimeMS; + } + + public Long aggregate() { + return remoteReadTimeMS; + } + }); + } + /** * Get all statistics data. * MR or other frameworks can use the method to get all statistics at once. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java index 62806d61b54..9e62e63775a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java @@ -47,7 +47,8 @@ public class FileSystemStorageStatistics extends StorageStatistics { "bytesReadDistanceOfOneOrTwo", "bytesReadDistanceOfThreeOrFour", "bytesReadDistanceOfFiveOrLarger", - "bytesReadErasureCoded" + "bytesReadErasureCoded", + "remoteReadTimeMS" }; private static class LongStatisticIterator @@ -107,6 +108,8 @@ public class FileSystemStorageStatistics extends StorageStatistics { return data.getBytesReadDistanceOfFiveOrLarger(); case "bytesReadErasureCoded": return data.getBytesReadErasureCoded(); + case "remoteReadTimeMS": + return data.getRemoteReadTimeMS(); default: return null; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java index 2b4e686e592..e99f0f2348b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java @@ -52,7 +52,8 @@ public class TestFileSystemStorageStatistics { "bytesReadDistanceOfOneOrTwo", "bytesReadDistanceOfThreeOrFour", "bytesReadDistanceOfFiveOrLarger", - "bytesReadErasureCoded" + "bytesReadErasureCoded", + "remoteReadTimeMS" }; private FileSystem.Statistics statistics = @@ -74,6 +75,7 @@ public class TestFileSystemStorageStatistics { statistics.incrementBytesReadByDistance(1, RandomUtils.nextInt(0, 100)); statistics.incrementBytesReadByDistance(3, RandomUtils.nextInt(0, 100)); statistics.incrementBytesReadErasureCoded(RandomUtils.nextInt(0, 100)); + statistics.increaseRemoteReadTime(RandomUtils.nextInt(0, 100)); } @Test @@ -128,6 +130,8 @@ public class TestFileSystemStorageStatistics { return statistics.getBytesReadByDistance(5); case "bytesReadErasureCoded": return statistics.getBytesReadErasureCoded(); + case "remoteReadTimeMS": + return statistics.getRemoteReadTime(); default: return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index acfca6799f4..8faeebe8e85 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -3090,10 +3090,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, } } - void updateFileSystemReadStats(int distance, int nRead) { + void updateFileSystemReadStats(int distance, int readBytes, long readTimeMS) { if (stats != null) { - stats.incrementBytesRead(nRead); - stats.incrementBytesReadByDistance(distance, nRead); + stats.incrementBytesRead(readBytes); + stats.incrementBytesReadByDistance(distance, readBytes); + if (distance > 0) { + //remote read + stats.increaseRemoteReadTime(readTimeMS); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index a8d80016072..b5be33206e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -851,8 +851,9 @@ public class DFSInputStream extends FSInputStream locatedBlocks.getFileLength() - pos); } } + long beginReadMS = Time.monotonicNow(); int result = readBuffer(strategy, realLen, corruptedBlocks); - + long readTimeMS = Time.monotonicNow() - beginReadMS; if (result >= 0) { pos += result; } else { @@ -861,7 +862,7 @@ public class DFSInputStream extends FSInputStream } updateReadStatistics(readStatistics, result, blockReader); dfsClient.updateFileSystemReadStats(blockReader.getNetworkDistance(), - result); + result, readTimeMS); if (readStatistics.getBlockType() == BlockType.STRIPED) { dfsClient.updateFileSystemECReadStats(result); } @@ -1184,6 +1185,7 @@ public class DFSInputStream extends FSInputStream ByteBuffer tmp = buf.duplicate(); tmp.limit(tmp.position() + len); tmp = tmp.slice(); + long beginReadMS = Time.monotonicNow(); int nread = 0; int ret; while (true) { @@ -1193,11 +1195,12 @@ public class DFSInputStream extends FSInputStream } nread += ret; } + long readTimeMS = Time.monotonicNow() - beginReadMS; buf.position(buf.position() + nread); IOUtilsClient.updateReadStatistics(readStatistics, nread, reader); dfsClient.updateFileSystemReadStats( - reader.getNetworkDistance(), nread); + reader.getNetworkDistance(), nread, readTimeMS); if (readStatistics.getBlockType() == BlockType.STRIPED) { dfsClient.updateFileSystemECReadStats(nread); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java index 5ae51709593..6c1bafbef9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java @@ -331,15 +331,17 @@ public class DFSStripedInputStream extends DFSInputStream { * its ThreadLocal. * * @param stats striped read stats + * @param readTimeMS read time metrics in ms + * */ - void updateReadStats(final StripedBlockUtil.BlockReadStats stats) { + void updateReadStats(final StripedBlockUtil.BlockReadStats stats, long readTimeMS) { if (stats == null) { return; } updateReadStatistics(readStatistics, stats.getBytesRead(), stats.isShortCircuit(), stats.getNetworkDistance()); dfsClient.updateFileSystemReadStats(stats.getNetworkDistance(), - stats.getBytesRead()); + stats.getBytesRead(), readTimeMS); assert readStatistics.getBlockType() == BlockType.STRIPED; dfsClient.updateFileSystemECReadStats(stats.getBytesRead()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java index 3fc87c7952a..f2d6732a459 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/StripeReader.java @@ -351,9 +351,12 @@ abstract class StripeReader { // first read failure while (!futures.isEmpty()) { try { + long beginReadMS = Time.monotonicNow(); StripingChunkReadResult r = StripedBlockUtil .getNextCompletedStripedRead(service, futures, 0); - dfsStripedInputStream.updateReadStats(r.getReadStats()); + long readTimeMS = Time.monotonicNow() - beginReadMS; + + dfsStripedInputStream.updateReadStats(r.getReadStats(), readTimeMS); DFSClient.LOG.debug("Read task returned: {}, for stripe {}", r, alignedStripe); StripingChunk returnedChunk = alignedStripe.chunks[r.index]; From f1936d29f1f28bc0ce833147ff016a07aeb163b6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 13 Apr 2023 10:25:17 -0700 Subject: [PATCH 69/97] HADOOP-18693. Bump derby from 10.10.2.0 to 10.14.2.0 in /hadoop-project (#5427) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 7a57f05011d..4b80849af0a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -129,7 +129,7 @@ 1.0-alpha-1 3.3.1 4.0.3 - 10.10.2.0 + 10.14.2.0 8.0.29 6.2.1.jre7 4.10.0 From 0185afafeac26a447b6138b2d74a6f5ed0051d0b Mon Sep 17 00:00:00 2001 From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com> Date: Fri, 14 Apr 2023 10:33:30 +0800 Subject: [PATCH 70/97] HDFS-16974. Consider volumes average load of each DataNode when choosing target. (#5541). Contributed by Shuyan Zhang. Signed-off-by: He Xiaoqiao --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 + .../BlockPlacementPolicyDefault.java | 16 ++ .../blockmanagement/DatanodeDescriptor.java | 16 ++ .../blockmanagement/DatanodeManager.java | 11 ++ .../blockmanagement/DatanodeStatistics.java | 4 +- .../server/blockmanagement/DatanodeStats.java | 7 + .../blockmanagement/FSClusterStats.java | 14 +- .../blockmanagement/HeartbeatManager.java | 5 + .../src/main/resources/hdfs-default.xml | 8 + .../BaseReplicationPolicyTest.java | 4 +- ...ionPolicyRatioConsiderLoadWithStorage.java | 169 ++++++++++++++++++ 11 files changed, 254 insertions(+), 5 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 3286ffb4f09..1729106ad14 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -271,6 +271,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.namenode.redundancy.considerLoad.factor"; public static final double DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT = 2.0; + public static final String DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY = + "dfs.namenode.redundancy.considerLoadByVolume"; + public static final boolean + DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT + = false; public static final String DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY = HdfsClientConfigKeys.DeprecatedKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY; public static final int DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_DEFAULT = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 1fef3db69d0..3d5ecf9b575 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -82,6 +82,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { NOT_IN_SERVICE("the node is not in service"), NODE_STALE("the node is stale"), NODE_TOO_BUSY("the node is too busy"), + NODE_TOO_BUSY_BY_VOLUME("the node is too busy based on volume load"), TOO_MANY_NODES_ON_RACK("the rack has too many chosen nodes"), NOT_ENOUGH_STORAGE_SPACE("not enough storage space to place the block"), NO_REQUIRED_STORAGE_TYPE("required storage types are unavailable"), @@ -101,6 +102,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { protected boolean considerLoad; private boolean considerLoadByStorageType; protected double considerLoadFactor; + private boolean considerLoadByVolume = false; private boolean preferLocalNode; private boolean dataNodePeerStatsEnabled; private volatile boolean excludeSlowNodesEnabled; @@ -131,6 +133,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { this.considerLoadFactor = conf.getDouble( DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR, DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT); + this.considerLoadByVolume = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY, + DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT + ); this.stats = stats; this.clusterMap = clusterMap; this.host2datanodeMap = host2datanodeMap; @@ -1007,6 +1013,16 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { "(load: " + nodeLoad + " > " + maxLoad + ")"); return true; } + if (considerLoadByVolume) { + final int numVolumesAvailable = node.getNumVolumesAvailable(); + final double maxLoadForVolumes = considerLoadFactor * numVolumesAvailable * + stats.getInServiceXceiverAverageForVolume(); + if (maxLoadForVolumes > 0.0 && nodeLoad > maxLoadForVolumes) { + logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY_BY_VOLUME, + "(load: " + nodeLoad + " > " + maxLoadForVolumes + ") "); + return true; + } + } return false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index c77d54591a9..352238b7f70 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -233,6 +233,9 @@ public class DatanodeDescriptor extends DatanodeInfo { // HB processing can use it to tell if it is the first HB since DN restarted private boolean heartbeatedSinceRegistration = false; + /** The number of volumes that can be written.*/ + private int numVolumesAvailable = 0; + /** * DatanodeDescriptor constructor * @param nodeID id of the data node @@ -411,6 +414,7 @@ public class DatanodeDescriptor extends DatanodeInfo { long totalNonDfsUsed = 0; Set visitedMount = new HashSet<>(); Set failedStorageInfos = null; + int volumesAvailable = 0; // Decide if we should check for any missing StorageReport and mark it as // failed. There are different scenarios. @@ -489,7 +493,11 @@ public class DatanodeDescriptor extends DatanodeInfo { visitedMount.add(mount); } } + if (report.getRemaining() > 0 && storage.getState() != State.FAILED) { + volumesAvailable += 1; + } } + this.numVolumesAvailable = volumesAvailable; // Update total metrics for the node. setCapacity(totalCapacity); @@ -981,6 +989,14 @@ public class DatanodeDescriptor extends DatanodeInfo { return volumeFailureSummary; } + /** + * Return the number of volumes that can be written. + * @return the number of volumes that can be written. + */ + public int getNumVolumesAvailable() { + return numVolumesAvailable; + } + /** * @param nodeReg DatanodeID to update registration for. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 88f3ac4e7c4..ed60f388d3f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -2101,6 +2101,17 @@ public class DatanodeManager { return avgLoad; } + @Override + public double getInServiceXceiverAverageForVolume() { + double avgLoad = 0; + final int volumes = heartbeatManager.getInServiceAvailableVolumeCount(); + if (volumes > 0) { + final long xceivers = heartbeatManager.getInServiceXceiverCount(); + avgLoad = (double)xceivers/volumes; + } + return avgLoad; + } + @Override public Map getStorageTypeStats() { return heartbeatManager.getStorageTypeStats(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java index 36a9c2bc095..fcf86195bdf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStatistics.java @@ -60,7 +60,9 @@ public interface DatanodeStatistics { /** @return number of non-decommission(ing|ed) nodes */ public int getNumDatanodesInService(); - + + /** @return average xceiver count for writable volumes. */ + int getInServiceAvailableVolumeCount(); /** * @return the total used space by data nodes for non-DFS purposes * such as storing temporary files on the local file system diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java index 912d4d236a6..5bd88b561ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStats.java @@ -44,6 +44,7 @@ class DatanodeStats { private int nodesInService = 0; private int nodesInServiceXceiverCount = 0; + private int nodesInServiceAvailableVolumeCount = 0; private int expiredHeartbeats = 0; synchronized void add(final DatanodeDescriptor node) { @@ -58,6 +59,7 @@ class DatanodeStats { capacityRemaining += node.getRemaining(); cacheCapacity += node.getCacheCapacity(); cacheUsed += node.getCacheUsed(); + nodesInServiceAvailableVolumeCount += node.getNumVolumesAvailable(); } else if (node.isDecommissionInProgress() || node.isEnteringMaintenance()) { cacheCapacity += node.getCacheCapacity(); @@ -87,6 +89,7 @@ class DatanodeStats { capacityRemaining -= node.getRemaining(); cacheCapacity -= node.getCacheCapacity(); cacheUsed -= node.getCacheUsed(); + nodesInServiceAvailableVolumeCount -= node.getNumVolumesAvailable(); } else if (node.isDecommissionInProgress() || node.isEnteringMaintenance()) { cacheCapacity -= node.getCacheCapacity(); @@ -149,6 +152,10 @@ class DatanodeStats { return nodesInServiceXceiverCount; } + synchronized int getNodesInServiceAvailableVolumeCount() { + return nodesInServiceAvailableVolumeCount; + } + synchronized int getExpiredHeartbeats() { return expiredHeartbeats; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java index 14122952bb1..217dd36e3ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/FSClusterStats.java @@ -53,14 +53,24 @@ public interface FSClusterStats { public int getNumDatanodesInService(); /** - * an indication of the average load of non-decommission(ing|ed) nodes - * eligible for block placement + * An indication of the average load of non-decommission(ing|ed) nodes + * eligible for block placement. * * @return average of the in service number of block transfers and block * writes that are currently occurring on the cluster. */ public double getInServiceXceiverAverage(); + /** + * An indication of the average load of volumes at non-decommission(ing|ed) + * nodes eligible for block placement. + * + * @return average of in service number of block transfers and block + * writes that are currently occurring on the volumes of the + * cluster. + */ + double getInServiceXceiverAverageForVolume(); + /** * Indicates the storage statistics per storage type. * @return storage statistics per storage type. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java index 01e1b6392a0..429d40d9fbd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java @@ -183,6 +183,11 @@ class HeartbeatManager implements DatanodeStatistics { public int getNumDatanodesInService() { return stats.getNodesInService(); } + + @Override + public int getInServiceAvailableVolumeCount() { + return stats.getNodesInServiceAvailableVolumeCount(); + } @Override public long getCacheCapacity() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index bdd048004d3..8e6ef99040a 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -334,6 +334,14 @@ + + dfs.namenode.redundancy.considerLoadByVolume + false + Decide if chooseTarget considers the target's volume load or + not. + + + dfs.namenode.read.considerLoad false diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java index 1e75452d3d8..c9eb624e5c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java @@ -56,13 +56,13 @@ abstract public class BaseReplicationPolicyTest { protected String blockPlacementPolicy; protected NamenodeProtocols nameNodeRpc = null; - static void updateHeartbeatWithUsage(DatanodeDescriptor dn, + void updateHeartbeatWithUsage(DatanodeDescriptor dn, long capacity, long dfsUsed, long remaining, long blockPoolUsed, long dnCacheCapacity, long dnCacheUsed, int xceiverCount, int volFailures) { dn.getStorageInfos()[0].setUtilizationForTesting( capacity, dfsUsed, remaining, blockPoolUsed); - dn.updateHeartbeat( + dnManager.getHeartbeatManager().updateHeartbeat(dn, BlockManagerTestUtil.getStorageReportsForDatanode(dn), dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures, null); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java new file mode 100644 index 00000000000..d06af054699 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyRatioConsiderLoadWithStorage.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.blockmanagement; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.TestBlockStoragePolicy; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Verify that chooseTarget can exclude nodes with high volume average load. + */ +public class TestReplicationPolicyRatioConsiderLoadWithStorage + extends BaseReplicationPolicyTest { + + public TestReplicationPolicyRatioConsiderLoadWithStorage() { + this.blockPlacementPolicy = BlockPlacementPolicyDefault.class.getName(); + } + + @Override + DatanodeDescriptor[] getDatanodeDescriptors(Configuration conf) { + conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, + true); + conf.setDouble(DFSConfigKeys + .DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR, 2); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY, true); + + final String[] racks = { + "/rack1", + "/rack2", + "/rack3", + "/rack4", + "/rack5"}; + storages = DFSTestUtil.createDatanodeStorageInfos(racks); + DatanodeDescriptor[] descriptors = + DFSTestUtil.toDatanodeDescriptor(storages); + long storageCapacity = + 2 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE; + // Each datanode has 6 storages, but the number of available storages + // varies. + for (int i = 0; i < descriptors.length; i++) { + for (int j = 0; j < 5; j++) { + DatanodeStorage s = + new DatanodeStorage("s" + i + j); + descriptors[i].updateStorage(s); + + } + for (int j = 0; j < descriptors[i].getStorageInfos().length; j++) { + DatanodeStorageInfo dsInfo = descriptors[i].getStorageInfos()[j]; + if (j > i + 1) { + dsInfo.setUtilizationForTesting(storageCapacity, storageCapacity, 0, + storageCapacity); + } else { + dsInfo.setUtilizationForTesting(storageCapacity, 0, storageCapacity, + 0); + } + } + } + return descriptors; + } + + /** + * Tests that chooseTarget with considerLoad and consider volume load set to + * true and correctly calculates load. + */ + @Test + public void testChooseTargetWithRatioConsiderLoad() { + namenode.getNamesystem().writeLock(); + try { + // After heartbeat has been processed, the total load should be 200. + // And average load per node should be 40. The max load should be 2 * 40; + // And average load per storage should be 10. Considering available + // storages, the max load should be: + // 2*10*2, 3*10*2, 4*10*2, 5*10*2, 6*10*2. + // Considering the load of every node and number of storages: + // Index: 0, 1, 2, 3, 4 + // Available Storage: 2, 3, 4, 5, 6 + // Load: 50, 110, 28, 2, 10 + // So, dataNodes[1] should be never chosen because over-load of node. + // And dataNodes[0] should be never chosen because over-load of per + // storage. + dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[0], + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[0]), + dataNodes[0].getCacheCapacity(), + dataNodes[0].getCacheUsed(), + 50, 0, null); + dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[1], + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[1]), + dataNodes[0].getCacheCapacity(), + dataNodes[0].getCacheUsed(), + 110, 0, null); + dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[2], + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[2]), + dataNodes[0].getCacheCapacity(), + dataNodes[0].getCacheUsed(), + 28, 0, null); + dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[3], + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[3]), + dataNodes[0].getCacheCapacity(), + dataNodes[0].getCacheUsed(), + 2, 0, null); + dnManager.getHeartbeatManager().updateHeartbeat(dataNodes[4], + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[4]), + dataNodes[0].getCacheCapacity(), + dataNodes[0].getCacheUsed(), + 10, 0, null); + + Set targetSet = new HashSet<>(); + + // Try to choose 3 datanode targets. + DatanodeDescriptor writerDn = dataNodes[2]; + DatanodeStorageInfo[] targets = namenode.getNamesystem().getBlockManager() + .getBlockPlacementPolicy() + .chooseTarget("testFile.txt", 3, writerDn, new ArrayList<>(), false, + null, 1024, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY, null); + // The result contains 3 nodes(dataNodes[2],dataNodes[3],dataNodes[4]). + assertEquals(3, targets.length); + for (DatanodeStorageInfo dsi : targets) { + targetSet.add(dsi.getDatanodeDescriptor()); + } + assertTrue(targetSet.contains(dataNodes[2])); + assertTrue(targetSet.contains(dataNodes[3])); + assertTrue(targetSet.contains(dataNodes[4])); + + // Try to choose 4 datanode targets. + targets = namenode.getNamesystem().getBlockManager() + .getBlockPlacementPolicy() + .chooseTarget("testFile.txt", 4, writerDn, new ArrayList<>(), false, + null, 1024, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY, null); + // The result contains 3 nodes(dataNodes[2],dataNodes[3],dataNodes[4]). + assertEquals(3, targets.length); + targetSet.clear(); + for (DatanodeStorageInfo dsi : targets) { + targetSet.add(dsi.getDatanodeDescriptor()); + } + assertTrue(targetSet.contains(dataNodes[2])); + assertTrue(targetSet.contains(dataNodes[3])); + assertTrue(targetSet.contains(dataNodes[4])); + } finally { + namenode.getNamesystem().writeUnlock(); + } + } +} \ No newline at end of file From 0bcdea7912b18c1c9244a37b9ed3d66d6e748c95 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 15 Apr 2023 04:09:18 +0800 Subject: [PATCH 71/97] YARN-11239. Optimize FederationClientInterceptor audit log. (#5127) --- .../yarn/server/router/RouterAuditLogger.java | 61 +- .../yarn/server/router/RouterServerUtil.java | 13 + .../clientrm/FederationClientInterceptor.java | 526 +++++++++++++----- .../server/router/TestRouterAuditLogger.java | 17 +- 4 files changed, 484 insertions(+), 133 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java index f3b428dab4a..bb814b65283 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterAuditLogger.java @@ -30,7 +30,7 @@ import java.net.InetAddress; * Manages Router audit logs. * Audit log format is written as key=value pairs. Tab separated. */ -public class RouterAuditLogger { +public final class RouterAuditLogger { private static final Logger LOG = LoggerFactory.getLogger(RouterAuditLogger.class); @@ -51,6 +51,43 @@ public class RouterAuditLogger { public static final String GET_APP_REPORT = "Get Application Report"; public static final String TARGET_CLIENT_RM_SERVICE = "RouterClientRMService"; public static final String UNKNOWN = "UNKNOWN"; + public static final String GET_APPLICATIONS = "Get Applications"; + public static final String GET_CLUSTERMETRICS = "Get ClusterMetrics"; + public static final String GET_CLUSTERNODES = "Get ClusterNodes"; + public static final String GET_QUEUEINFO = "Get QueueInfo"; + public static final String GET_QUEUE_USER_ACLS = "Get QueueUserAcls"; + public static final String MOVE_APPLICATION_ACROSS_QUEUES = "Move ApplicationAcrossQueues"; + public static final String GET_NEW_RESERVATION = "Get NewReservation"; + public static final String SUBMIT_RESERVATION = "Submit Reservation"; + public static final String LIST_RESERVATIONS = "List Reservations"; + public static final String UPDATE_RESERVATION = "Update Reservation"; + public static final String DELETE_RESERVATION = "Delete Reservation"; + public static final String GET_NODETOLABELS = "Get NodeToLabels"; + public static final String GET_LABELSTONODES = "Get LabelsToNodes"; + public static final String GET_CLUSTERNODELABELS = "Get ClusterNodeLabels"; + public static final String GET_APPLICATION_ATTEMPT_REPORT = "Get ApplicationAttemptReport"; + public static final String GET_APPLICATION_ATTEMPTS = "Get ApplicationAttempts"; + public static final String GET_CONTAINERREPORT = "Get ContainerReport"; + public static final String GET_CONTAINERS = "Get Containers"; + public static final String GET_DELEGATIONTOKEN = "Get DelegationToken"; + public static final String RENEW_DELEGATIONTOKEN = "Renew DelegationToken"; + public static final String CANCEL_DELEGATIONTOKEN = "Cancel DelegationToken"; + public static final String FAIL_APPLICATIONATTEMPT = "Fail ApplicationAttempt"; + public static final String UPDATE_APPLICATIONPRIORITY = "Update ApplicationPriority"; + public static final String SIGNAL_TOCONTAINER = "Signal ToContainer"; + public static final String UPDATE_APPLICATIONTIMEOUTS = "Update ApplicationTimeouts"; + public static final String GET_RESOURCEPROFILES = "Get ResourceProfiles"; + public static final String GET_RESOURCEPROFILE = "Get ResourceProfile"; + public static final String GET_RESOURCETYPEINFO = "Get ResourceTypeInfo"; + public static final String GET_ATTRIBUTESTONODES = "Get AttributesToNodes"; + public static final String GET_CLUSTERNODEATTRIBUTES = "Get ClusterNodeAttributes"; + public static final String GET_NODESTOATTRIBUTES = "Get NodesToAttributes"; + } + + public static void logSuccess(String user, String operation, String target) { + if (LOG.isInfoEnabled()) { + LOG.info(createSuccessLog(user, operation, target, null, null)); + } } /** @@ -146,6 +183,28 @@ public class RouterAuditLogger { } } + /** + * Create a readable and parseable audit log string for a failed event. + * + * @param user User who made the service request. + * @param operation Operation requested by the user. + * @param perm Target permissions. + * @param target The target on which the operation is being performed. + * @param descriptionFormat the description message format string. + * @param args format parameter. + * + *

+ * Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val + * delimiter and hence the value fields should not contains tabs ('\t'). + */ + public static void logFailure(String user, String operation, String perm, + String target, String descriptionFormat, Object... args) { + if (LOG.isInfoEnabled()) { + String description = String.format(descriptionFormat, args); + LOG.info(createFailureLog(user, operation, perm, target, description, null, null)); + } + } + /** * Create a readable and parseable audit log string for a failed event. * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java index 0dbead33f02..dcd7777779b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterServerUtil.java @@ -131,6 +131,19 @@ public final class RouterServerUtil { } } + /** + * Throws an exception due to an error. + * + * @param errMsg the error message + * @throws YarnException on failure + */ + @Public + @Unstable + public static void logAndThrowException(String errMsg) throws YarnException { + LOG.error(errMsg); + throw new YarnException(errMsg); + } + public static R createRequestInterceptorChain(Configuration conf, String pipeLineClassName, String interceptorClassName, Class clazz) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java index a50ea5bc423..345c3b4ba2a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java @@ -115,6 +115,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityReque import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityResponse; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ReservationId; @@ -152,6 +153,37 @@ import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConsta import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.FORCE_KILL_APP; import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.TARGET_CLIENT_RM_SERVICE; import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UNKNOWN; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERNODES; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_QUEUE_USER_ACLS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_APPLICATIONS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERMETRICS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_QUEUEINFO; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.MOVE_APPLICATION_ACROSS_QUEUES; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_NEW_RESERVATION; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.SUBMIT_RESERVATION; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.LIST_RESERVATIONS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UPDATE_RESERVATION; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.DELETE_RESERVATION; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_NODETOLABELS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_LABELSTONODES; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERNODELABELS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_APPLICATION_ATTEMPT_REPORT; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_APPLICATION_ATTEMPTS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CONTAINERREPORT; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CONTAINERS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_DELEGATIONTOKEN; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.RENEW_DELEGATIONTOKEN; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.CANCEL_DELEGATIONTOKEN; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.FAIL_APPLICATIONATTEMPT; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UPDATE_APPLICATIONPRIORITY; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.SIGNAL_TOCONTAINER; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.UPDATE_APPLICATIONTIMEOUTS; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_RESOURCEPROFILES; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_RESOURCEPROFILE; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_RESOURCETYPEINFO; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_ATTRIBUTESTONODES; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_CLUSTERNODEATTRIBUTES; +import static org.apache.hadoop.yarn.server.router.RouterAuditLogger.AuditConstants.GET_NODESTOATTRIBUTES; /** * Extends the {@code AbstractRequestInterceptorClient} class and provides an @@ -328,6 +360,8 @@ public class FederationClientInterceptor } } catch (Exception e) { routerMetrics.incrAppsFailedCreated(); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_APP, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, e.getMessage()); RouterServerUtil.logAndThrowException(e.getMessage(), e); } @@ -485,6 +519,8 @@ public class FederationClientInterceptor } catch (Exception e) { routerMetrics.incrAppsFailedSubmitted(); + RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_NEW_APP, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, e.getMessage(), applicationId); RouterServerUtil.logAndThrowException(e.getMessage(), e); } @@ -734,7 +770,10 @@ public class FederationClientInterceptor throws YarnException, IOException { if (request == null) { routerMetrics.incrMultipleAppsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getApplications request.", null); + String msg = "Missing getApplications request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATIONS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getApplications", @@ -744,10 +783,15 @@ public class FederationClientInterceptor applications = invokeConcurrent(remoteMethod, GetApplicationsResponse.class); } catch (Exception ex) { routerMetrics.incrMultipleAppsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get applications due to exception.", ex); + String msg = "Unable to get applications due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATIONS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_APPLICATIONS, + TARGET_CLIENT_RM_SERVICE); // Merge the Application Reports return RouterYarnClientUtils.mergeApplications(applications, returnPartialReport); } @@ -757,7 +801,10 @@ public class FederationClientInterceptor GetClusterMetricsRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrGetClusterMetricsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getClusterMetrics request.", null); + String msg = "Missing getApplications request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERMETRICS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getClusterMetrics", @@ -767,10 +814,15 @@ public class FederationClientInterceptor clusterMetrics = invokeConcurrent(remoteMethod, GetClusterMetricsResponse.class); } catch (Exception ex) { routerMetrics.incrGetClusterMetricsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get cluster metrics due to exception.", ex); + String msg = "Unable to get cluster metrics due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERMETRICS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetClusterMetricsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERMETRICS, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.merge(clusterMetrics); } @@ -811,7 +863,7 @@ public class FederationClientInterceptor results.put(subClusterId, clazz.cast(result)); } catch (InterruptedException | ExecutionException e) { Throwable cause = e.getCause(); - LOG.error("Cannot execute {} on {}: {}", request.getMethodName(), + LOG.error("Cannot execute {} on {} : {}", request.getMethodName(), subClusterId.getId(), cause.getMessage()); exceptions.put(subClusterId, e); } @@ -837,7 +889,10 @@ public class FederationClientInterceptor throws YarnException, IOException { if (request == null) { routerMetrics.incrClusterNodesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getClusterNodes request.", null); + String msg = "Missing getClusterNodes request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getClusterNodes", @@ -847,10 +902,15 @@ public class FederationClientInterceptor invokeConcurrent(remoteMethod, GetClusterNodesResponse.class); long stopTime = clock.getTime(); routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERNODES, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes); } catch (Exception ex) { routerMetrics.incrClusterNodesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get cluster nodes due to exception.", ex); + String msg = "Unable to get cluster nodes due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } throw new YarnException("Unable to get cluster nodes."); } @@ -860,7 +920,10 @@ public class FederationClientInterceptor throws YarnException, IOException { if (request == null || request.getQueueName() == null) { routerMetrics.incrGetQueueInfoFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getQueueInfo request or queueName.", null); + String msg = "Missing getQueueInfo request or queueName."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUEINFO, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -871,11 +934,14 @@ public class FederationClientInterceptor queues = invokeConcurrent(remoteMethod, GetQueueInfoResponse.class); } catch (Exception ex) { routerMetrics.incrGetQueueInfoFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get queue [" + - request.getQueueName() + "] to exception.", ex); + String msg = "Unable to get queue [" + request.getQueueName() + "] to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUEINFO, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetQueueInfoRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_QUEUEINFO, TARGET_CLIENT_RM_SERVICE); // Merge the GetQueueInfoResponse return RouterYarnClientUtils.mergeQueues(queues); } @@ -885,7 +951,10 @@ public class FederationClientInterceptor GetQueueUserAclsInfoRequest request) throws YarnException, IOException { if(request == null){ routerMetrics.incrQueueUserAclsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getQueueUserAcls request.", null); + String msg = "Missing getQueueUserAcls request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUE_USER_ACLS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getQueueUserAcls", @@ -895,10 +964,15 @@ public class FederationClientInterceptor queueUserAcls = invokeConcurrent(remoteMethod, GetQueueUserAclsInfoResponse.class); } catch (Exception ex) { routerMetrics.incrQueueUserAclsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get queue user Acls due to exception.", ex); + String msg = "Unable to get queue user Acls due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_QUEUE_USER_ACLS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetQueueUserAclsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_QUEUE_USER_ACLS, + TARGET_CLIENT_RM_SERVICE); // Merge the QueueUserAclsInfoResponse return RouterYarnClientUtils.mergeQueueUserAcls(queueUserAcls); } @@ -909,8 +983,11 @@ public class FederationClientInterceptor throws YarnException, IOException { if (request == null || request.getApplicationId() == null || request.getTargetQueue() == null) { routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing moveApplicationAcrossQueues request or " + - "applicationId or target queue.", null); + String msg = "Missing moveApplicationAcrossQueues request or " + + "applicationId or target queue."; + RouterAuditLogger.logFailure(user.getShortUserName(), MOVE_APPLICATION_ACROSS_QUEUES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg); } long startTime = clock.getTime(); @@ -922,8 +999,10 @@ public class FederationClientInterceptor .getApplicationHomeSubCluster(applicationId); } catch (YarnException e) { routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + - applicationId + " does not exist in FederationStateStore.", e); + String errMsgFormat = "Application %s does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), MOVE_APPLICATION_ACROSS_QUEUES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, String.format(errMsgFormat, applicationId)); + RouterServerUtil.logAndThrowException(e, errMsgFormat, applicationId); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -932,8 +1011,9 @@ public class FederationClientInterceptor response = clientRMProxy.moveApplicationAcrossQueues(request); } catch (Exception e) { routerMetrics.incrMoveApplicationAcrossQueuesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to moveApplicationAcrossQueues for " + - applicationId + " to SubCluster " + subClusterId.getId(), e); + RouterServerUtil.logAndThrowException(e, + "Unable to moveApplicationAcrossQueues for %s to SubCluster %s.", applicationId, + subClusterId.getId()); } if (response == null) { @@ -943,6 +1023,8 @@ public class FederationClientInterceptor } long stopTime = clock.getTime(); + RouterAuditLogger.logSuccess(user.getShortUserName(), MOVE_APPLICATION_ACROSS_QUEUES, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); routerMetrics.succeededMoveApplicationAcrossQueuesRetrieved(stopTime - startTime); return response; } @@ -954,6 +1036,8 @@ public class FederationClientInterceptor if (request == null) { routerMetrics.incrGetNewReservationFailedRetrieved(); String errMsg = "Missing getNewReservation request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, errMsg); RouterServerUtil.logAndThrowException(errMsg, null); } @@ -969,16 +1053,23 @@ public class FederationClientInterceptor if (response != null) { long stopTime = clock.getTime(); routerMetrics.succeededGetNewReservationRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_NEW_RESERVATION, + TARGET_CLIENT_RM_SERVICE); return response; } } catch (Exception e) { - LOG.warn("Unable to create a new Reservation in SubCluster {}.", subClusterId.getId(), e); + String logFormatted = "Unable to create a new Reservation in SubCluster {}."; + LOG.warn(logFormatted, subClusterId.getId(), e); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, logFormatted, subClusterId.getId()); subClustersActive.remove(subClusterId); } } routerMetrics.incrGetNewReservationFailedRetrieved(); String errMsg = "Failed to create a new reservation."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NEW_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, errMsg); throw new YarnException(errMsg); } @@ -989,9 +1080,11 @@ public class FederationClientInterceptor if (request == null || request.getReservationId() == null || request.getReservationDefinition() == null || request.getQueue() == null) { routerMetrics.incrSubmitReservationFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing submitReservation request or reservationId " + - "or reservation definition or queue.", null); + String msg = "Missing submitReservation request or reservationId " + + "or reservation definition or queue."; + RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1028,6 +1121,8 @@ public class FederationClientInterceptor LOG.info("Reservation {} submitted on subCluster {}.", reservationId, subClusterId); long stopTime = clock.getTime(); routerMetrics.succeededSubmitReservationRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), SUBMIT_RESERVATION, + TARGET_CLIENT_RM_SERVICE); return response; } } catch (Exception e) { @@ -1037,6 +1132,8 @@ public class FederationClientInterceptor routerMetrics.incrSubmitReservationFailedRetrieved(); String msg = String.format("Reservation %s failed to be submitted.", reservationId); + RouterAuditLogger.logFailure(user.getShortUserName(), SUBMIT_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); throw new YarnException(msg); } @@ -1045,7 +1142,10 @@ public class FederationClientInterceptor ReservationListRequest request) throws YarnException, IOException { if (request == null || request.getReservationId() == null) { routerMetrics.incrListReservationsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing listReservations request.", null); + String msg = "Missing listReservations request."; + RouterAuditLogger.logFailure(user.getShortUserName(), LIST_RESERVATIONS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("listReservations", @@ -1054,12 +1154,16 @@ public class FederationClientInterceptor try { listResponses = invokeConcurrent(remoteMethod, ReservationListResponse.class); } catch (Exception ex) { + String msg = "Unable to list reservations node due to exception."; routerMetrics.incrListReservationsFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Unable to list reservations node due to exception.", ex); + RouterAuditLogger.logFailure(user.getShortUserName(), LIST_RESERVATIONS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededListReservationsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), LIST_RESERVATIONS, + TARGET_CLIENT_RM_SERVICE); // Merge the ReservationListResponse return RouterYarnClientUtils.mergeReservationsList(listResponses); } @@ -1071,8 +1175,10 @@ public class FederationClientInterceptor if (request == null || request.getReservationId() == null || request.getReservationDefinition() == null) { routerMetrics.incrUpdateReservationFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing updateReservation request or reservationId or reservation definition.", null); + String msg = "Missing updateReservation request or reservationId or reservation definition."; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1085,16 +1191,22 @@ public class FederationClientInterceptor if (response != null) { long stopTime = clock.getTime(); routerMetrics.succeededUpdateReservationRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), UPDATE_RESERVATION, + TARGET_CLIENT_RM_SERVICE); return response; } } catch (Exception ex) { routerMetrics.incrUpdateReservationFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Unable to reservation update due to exception.", ex); + String msg = "Unable to reservation update due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } routerMetrics.incrUpdateReservationFailedRetrieved(); String msg = String.format("Reservation %s failed to be update.", reservationId); + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); throw new YarnException(msg); } @@ -1103,8 +1215,10 @@ public class FederationClientInterceptor ReservationDeleteRequest request) throws YarnException, IOException { if (request == null || request.getReservationId() == null) { routerMetrics.incrDeleteReservationFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing deleteReservation request or reservationId.", null); + String msg = "Missing deleteReservation request or reservationId."; + RouterServerUtil.logAndThrowException(msg, null); + RouterAuditLogger.logFailure(user.getShortUserName(), DELETE_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); } long startTime = clock.getTime(); @@ -1118,16 +1232,22 @@ public class FederationClientInterceptor federationFacade.deleteReservationHomeSubCluster(reservationId); long stopTime = clock.getTime(); routerMetrics.succeededDeleteReservationRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), DELETE_RESERVATION, + TARGET_CLIENT_RM_SERVICE); return response; } } catch (Exception ex) { routerMetrics.incrUpdateReservationFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Unable to reservation delete due to exception.", ex); + String msg = "Unable to reservation delete due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), DELETE_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } routerMetrics.incrDeleteReservationFailedRetrieved(); String msg = String.format("Reservation %s failed to be delete.", reservationId); + RouterAuditLogger.logFailure(user.getShortUserName(), DELETE_RESERVATION, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); throw new YarnException(msg); } @@ -1136,20 +1256,28 @@ public class FederationClientInterceptor GetNodesToLabelsRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrNodeToLabelsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getNodesToLabels request.", null); + String msg = "Missing getNodesToLabels request."; + RouterServerUtil.logAndThrowException(msg, null); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODETOLABELS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", - new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request}); + new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request}); Collection clusterNodes = null; try { clusterNodes = invokeConcurrent(remoteMethod, GetNodesToLabelsResponse.class); } catch (Exception ex) { routerMetrics.incrNodeToLabelsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get node label due to exception.", ex); + String msg = "Unable to get node label due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODETOLABELS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetNodeToLabelsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_NODETOLABELS, + TARGET_CLIENT_RM_SERVICE); // Merge the NodesToLabelsResponse return RouterYarnClientUtils.mergeNodesToLabelsResponse(clusterNodes); } @@ -1159,7 +1287,10 @@ public class FederationClientInterceptor GetLabelsToNodesRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrLabelsToNodesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getLabelsToNodes request.", null); + String msg = "Missing getNodesToLabels request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_LABELSTONODES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", @@ -1169,10 +1300,15 @@ public class FederationClientInterceptor labelNodes = invokeConcurrent(remoteMethod, GetLabelsToNodesResponse.class); } catch (Exception ex) { routerMetrics.incrLabelsToNodesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get label node due to exception.", ex); + String msg = "Unable to get label node due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_LABELSTONODES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetLabelsToNodesRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_LABELSTONODES, + TARGET_CLIENT_RM_SERVICE); // Merge the LabelsToNodesResponse return RouterYarnClientUtils.mergeLabelsToNodes(labelNodes); } @@ -1182,7 +1318,10 @@ public class FederationClientInterceptor GetClusterNodeLabelsRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrClusterNodeLabelsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getClusterNodeLabels request.", null); + String msg = "Missing getClusterNodeLabels request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODELABELS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getClusterNodeLabels", @@ -1192,11 +1331,15 @@ public class FederationClientInterceptor nodeLabels = invokeConcurrent(remoteMethod, GetClusterNodeLabelsResponse.class); } catch (Exception ex) { routerMetrics.incrClusterNodeLabelsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get cluster nodeLabels due to exception.", - ex); + String msg = "Unable to get cluster nodeLabels due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODELABELS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetClusterNodeLabelsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERNODELABELS, + TARGET_CLIENT_RM_SERVICE); // Merge the ClusterNodeLabelsResponse return RouterYarnClientUtils.mergeClusterNodeLabelsResponse(nodeLabels); } @@ -1225,9 +1368,11 @@ public class FederationClientInterceptor if (request == null || request.getApplicationAttemptId() == null || request.getApplicationAttemptId().getApplicationId() == null) { routerMetrics.incrAppAttemptReportFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing getApplicationAttemptReport request or applicationId " + - "or applicationAttemptId information.", null); + String msg = "Missing getApplicationAttemptReport request or applicationId " + + "or applicationAttemptId information."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1237,10 +1382,12 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException e) { routerMetrics.incrAppAttemptReportFailedRetrieved(); - RouterServerUtil.logAndThrowException("ApplicationAttempt " + - request.getApplicationAttemptId() + " belongs to Application " + - request.getApplicationAttemptId().getApplicationId() + - " does not exist in FederationStateStore.", e); + String msgFormat = "ApplicationAttempt %s belongs to " + + "Application %s does not exist in FederationStateStore."; + ApplicationAttemptId applicationAttemptId = request.getApplicationAttemptId(); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msgFormat, applicationAttemptId, applicationId); + RouterServerUtil.logAndThrowException(e, msgFormat, applicationAttemptId, applicationId); } ApplicationClientProtocol clientRMProxy = @@ -1254,6 +1401,8 @@ public class FederationClientInterceptor String msg = String.format( "Unable to get the applicationAttempt report for %s to SubCluster %s.", request.getApplicationAttemptId(), subClusterId.getId()); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); RouterServerUtil.logAndThrowException(msg, e); } @@ -1265,6 +1414,8 @@ public class FederationClientInterceptor long stopTime = clock.getTime(); routerMetrics.succeededAppAttemptReportRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_APPLICATION_ATTEMPT_REPORT, + TARGET_CLIENT_RM_SERVICE); return response; } @@ -1273,8 +1424,10 @@ public class FederationClientInterceptor GetApplicationAttemptsRequest request) throws YarnException, IOException { if (request == null || request.getApplicationId() == null) { routerMetrics.incrAppAttemptsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getApplicationAttempts " + - "request or application id.", null); + String msg = "Missing getApplicationAttempts request or application id."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg); } long startTime = clock.getTime(); @@ -1284,8 +1437,10 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException ex) { routerMetrics.incrAppAttemptsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + applicationId + - " does not exist in FederationStateStore.", ex); + String msg = "Application " + applicationId + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1294,8 +1449,11 @@ public class FederationClientInterceptor response = clientRMProxy.getApplicationAttempts(request); } catch (Exception ex) { routerMetrics.incrAppAttemptsFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get the application attempts for " + - applicationId + " from SubCluster " + subClusterId.getId(), ex); + String msg = "Unable to get the application attempts for " + + applicationId + " from SubCluster " + subClusterId.getId(); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } if (response == null) { @@ -1305,6 +1463,8 @@ public class FederationClientInterceptor } long stopTime = clock.getTime(); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_APPLICATION_ATTEMPTS, + TARGET_CLIENT_RM_SERVICE, applicationId); routerMetrics.succeededAppAttemptsRetrieved(stopTime - startTime); return response; } @@ -1314,8 +1474,10 @@ public class FederationClientInterceptor GetContainerReportRequest request) throws YarnException, IOException { if(request == null || request.getContainerId() == null){ routerMetrics.incrGetContainerReportFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getContainerReport request " + - "or containerId", null); + String msg = "Missing getContainerReport request or containerId"; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERREPORT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1326,8 +1488,10 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException ex) { routerMetrics.incrGetContainerReportFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + applicationId + - " does not exist in FederationStateStore.", ex); + String msg = "Application " + applicationId + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERREPORT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1348,6 +1512,8 @@ public class FederationClientInterceptor } long stopTime = clock.getTime(); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CONTAINERREPORT, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); routerMetrics.succeededGetContainerReportRetrieved(stopTime - startTime); return response; } @@ -1357,8 +1523,10 @@ public class FederationClientInterceptor throws YarnException, IOException { if (request == null || request.getApplicationAttemptId() == null) { routerMetrics.incrGetContainersFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing getContainers request or ApplicationAttemptId.", null); + String msg = "Missing getContainers request or ApplicationAttemptId."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1368,8 +1536,10 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException ex) { routerMetrics.incrGetContainersFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + applicationId + - " does not exist in FederationStateStore.", ex); + String msg = "Application " + applicationId + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1379,8 +1549,11 @@ public class FederationClientInterceptor response = clientRMProxy.getContainers(request); } catch (Exception ex) { routerMetrics.incrGetContainersFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get the containers for " + - applicationId + " from SubCluster " + subClusterId.getId(), ex); + String msg = "Unable to get the containers for " + + applicationId + " from SubCluster " + subClusterId.getId(); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CONTAINERS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } if (response == null) { @@ -1390,6 +1563,8 @@ public class FederationClientInterceptor } long stopTime = clock.getTime(); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CONTAINERS, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); routerMetrics.succeededGetContainersRetrieved(stopTime - startTime); return response; } @@ -1400,16 +1575,20 @@ public class FederationClientInterceptor if (request == null || request.getRenewer() == null) { routerMetrics.incrGetDelegationTokenFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing getDelegationToken request or Renewer.", null); + String msg = "Missing getDelegationToken request or Renewer."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } try { // Verify that the connection is kerberos authenticated if (!RouterServerUtil.isAllowedDelegationTokenOp()) { routerMetrics.incrGetDelegationTokenFailedRetrieved(); - throw new IOException( - "Delegation Token can be issued only with kerberos authentication."); + String msg = "Delegation Token can be issued only with kerberos authentication."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + throw new IOException(msg); } long startTime = clock.getTime(); @@ -1432,9 +1611,13 @@ public class FederationClientInterceptor long stopTime = clock.getTime(); routerMetrics.succeededGetDelegationTokenRetrieved((stopTime - startTime)); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_DELEGATIONTOKEN, + TARGET_CLIENT_RM_SERVICE); return GetDelegationTokenResponse.newInstance(routerRMDTToken); } catch(IOException e) { routerMetrics.incrGetDelegationTokenFailedRetrieved(); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, "getDelegationToken error, errMsg = " + e.getMessage()); throw new YarnException(e); } } @@ -1446,8 +1629,10 @@ public class FederationClientInterceptor if (!RouterServerUtil.isAllowedDelegationTokenOp()) { routerMetrics.incrRenewDelegationTokenFailedRetrieved(); - throw new IOException( - "Delegation Token can be renewed only with kerberos authentication"); + String msg = "Delegation Token can be renewed only with kerberos authentication"; + RouterAuditLogger.logFailure(user.getShortUserName(), RENEW_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + throw new IOException(msg); } long startTime = clock.getTime(); @@ -1455,17 +1640,21 @@ public class FederationClientInterceptor Token token = new Token<>( protoToken.getIdentifier().array(), protoToken.getPassword().array(), new Text(protoToken.getKind()), new Text(protoToken.getService())); - String user = RouterServerUtil.getRenewerForToken(token); - long nextExpTime = this.getTokenSecretManager().renewToken(token, user); + String renewer = RouterServerUtil.getRenewerForToken(token); + long nextExpTime = this.getTokenSecretManager().renewToken(token, renewer); RenewDelegationTokenResponse renewResponse = Records.newRecord(RenewDelegationTokenResponse.class); renewResponse.setNextExpirationTime(nextExpTime); long stopTime = clock.getTime(); routerMetrics.succeededRenewDelegationTokenRetrieved((stopTime - startTime)); + RouterAuditLogger.logSuccess(user.getShortUserName(), RENEW_DELEGATIONTOKEN, + TARGET_CLIENT_RM_SERVICE); return renewResponse; } catch (IOException e) { routerMetrics.incrRenewDelegationTokenFailedRetrieved(); + RouterAuditLogger.logFailure(user.getShortUserName(), RENEW_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, "renewDelegationToken error, errMsg = " + e.getMessage()); throw new YarnException(e); } } @@ -1476,8 +1665,10 @@ public class FederationClientInterceptor try { if (!RouterServerUtil.isAllowedDelegationTokenOp()) { routerMetrics.incrCancelDelegationTokenFailedRetrieved(); - throw new IOException( - "Delegation Token can be cancelled only with kerberos authentication"); + String msg = "Delegation Token can be cancelled only with kerberos authentication"; + RouterAuditLogger.logFailure(user.getShortUserName(), CANCEL_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + throw new IOException(msg); } long startTime = clock.getTime(); @@ -1485,13 +1676,17 @@ public class FederationClientInterceptor Token token = new Token<>( protoToken.getIdentifier().array(), protoToken.getPassword().array(), new Text(protoToken.getKind()), new Text(protoToken.getService())); - String user = UserGroupInformation.getCurrentUser().getUserName(); - this.getTokenSecretManager().cancelToken(token, user); + String currentUser = UserGroupInformation.getCurrentUser().getUserName(); + this.getTokenSecretManager().cancelToken(token, currentUser); long stopTime = clock.getTime(); routerMetrics.succeededCancelDelegationTokenRetrieved((stopTime - startTime)); + RouterAuditLogger.logSuccess(user.getShortUserName(), CANCEL_DELEGATIONTOKEN, + TARGET_CLIENT_RM_SERVICE); return Records.newRecord(CancelDelegationTokenResponse.class); } catch (IOException e) { routerMetrics.incrCancelDelegationTokenFailedRetrieved(); + RouterAuditLogger.logFailure(user.getShortUserName(), CANCEL_DELEGATIONTOKEN, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, "cancelDelegationToken error, errMsg = " + e.getMessage()); throw new YarnException(e); } } @@ -1502,22 +1697,27 @@ public class FederationClientInterceptor if (request == null || request.getApplicationAttemptId() == null || request.getApplicationAttemptId().getApplicationId() == null) { routerMetrics.incrFailAppAttemptFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing failApplicationAttempt request or applicationId " + - "or applicationAttemptId information.", null); + String msg = "Missing failApplicationAttempt request or applicationId " + + "or applicationAttemptId information."; + RouterAuditLogger.logFailure(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); SubClusterId subClusterId = null; - ApplicationId applicationId = request.getApplicationAttemptId().getApplicationId(); + ApplicationAttemptId applicationAttemptId = request.getApplicationAttemptId(); + ApplicationId applicationId = applicationAttemptId.getApplicationId(); try { subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException e) { routerMetrics.incrFailAppAttemptFailedRetrieved(); - RouterServerUtil.logAndThrowException("ApplicationAttempt " + - request.getApplicationAttemptId() + " belongs to Application " + - request.getApplicationAttemptId().getApplicationId() + - " does not exist in FederationStateStore.", e); + String msg = "ApplicationAttempt " + + applicationAttemptId + " belongs to Application " + applicationId + + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, e); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1526,8 +1726,11 @@ public class FederationClientInterceptor response = clientRMProxy.failApplicationAttempt(request); } catch (Exception e) { routerMetrics.incrFailAppAttemptFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get the applicationAttempt report for " + - request.getApplicationAttemptId() + " to SubCluster " + subClusterId.getId(), e); + String msg = "Unable to get the applicationAttempt report for " + + applicationAttemptId + " to SubCluster " + subClusterId; + RouterAuditLogger.logFailure(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, e); } if (response == null) { @@ -1538,6 +1741,8 @@ public class FederationClientInterceptor long stopTime = clock.getTime(); routerMetrics.succeededFailAppAttemptRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), FAIL_APPLICATIONATTEMPT, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); return response; } @@ -1548,9 +1753,11 @@ public class FederationClientInterceptor if (request == null || request.getApplicationId() == null || request.getApplicationPriority() == null) { routerMetrics.incrUpdateAppPriorityFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing updateApplicationPriority request or applicationId " + - "or applicationPriority information.", null); + String msg = "Missing updateApplicationPriority request or applicationId " + + "or applicationPriority information."; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1561,8 +1768,11 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException e) { routerMetrics.incrUpdateAppPriorityFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + - request.getApplicationId() + " does not exist in FederationStateStore.", e); + String msg = "Application " + + applicationId + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, e); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1571,8 +1781,11 @@ public class FederationClientInterceptor response = clientRMProxy.updateApplicationPriority(request); } catch (Exception e) { routerMetrics.incrFailAppAttemptFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to update application priority for " + - request.getApplicationId() + " to SubCluster " + subClusterId.getId(), e); + String msg = "Unable to update application priority for " + + applicationId + " to SubCluster " + subClusterId; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, e); } if (response == null) { @@ -1583,6 +1796,8 @@ public class FederationClientInterceptor long stopTime = clock.getTime(); routerMetrics.succeededUpdateAppPriorityRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), UPDATE_APPLICATIONPRIORITY, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); return response; } @@ -1592,9 +1807,10 @@ public class FederationClientInterceptor if (request == null || request.getContainerId() == null || request.getCommand() == null) { routerMetrics.incrSignalToContainerFailedRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing signalToContainer request or containerId " + - "or command information.", null); + String msg = "Missing signalToContainer request or containerId or command information."; + RouterAuditLogger.logFailure(user.getShortUserName(), SIGNAL_TOCONTAINER, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1605,8 +1821,10 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException ex) { routerMetrics.incrSignalToContainerFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + applicationId + - " does not exist in FederationStateStore.", ex); + String msg = "Application " + applicationId + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), SIGNAL_TOCONTAINER, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1614,17 +1832,22 @@ public class FederationClientInterceptor try { response = clientRMProxy.signalToContainer(request); } catch (Exception ex) { - RouterServerUtil.logAndThrowException("Unable to signal to container for " + - applicationId + " from SubCluster " + subClusterId.getId(), ex); + String msg = "Unable to signal to container for " + applicationId + + " from SubCluster " + subClusterId; + RouterAuditLogger.logFailure(user.getShortUserName(), SIGNAL_TOCONTAINER, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } if (response == null) { LOG.error("No response when signal to container of " + - "the applicationId {} to SubCluster {}.", applicationId, subClusterId.getId()); + "the applicationId {} to SubCluster {}.", applicationId, subClusterId); } long stopTime = clock.getTime(); routerMetrics.succeededSignalToContainerRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), SIGNAL_TOCONTAINER, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); return response; } @@ -1635,9 +1858,11 @@ public class FederationClientInterceptor if (request == null || request.getApplicationId() == null || request.getApplicationTimeouts() == null) { routerMetrics.incrUpdateApplicationTimeoutsRetrieved(); - RouterServerUtil.logAndThrowException( - "Missing updateApplicationTimeouts request or applicationId " + - "or applicationTimeouts information.", null); + String msg = "Missing updateApplicationTimeouts request or applicationId or " + + "applicationTimeouts information."; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); @@ -1647,9 +1872,10 @@ public class FederationClientInterceptor subClusterId = getApplicationHomeSubCluster(applicationId); } catch (YarnException e) { routerMetrics.incrFailAppAttemptFailedRetrieved(); - RouterServerUtil.logAndThrowException("Application " + - request.getApplicationId() + - " does not exist in FederationStateStore.", e); + String msg = "Application " + applicationId + " does not exist in FederationStateStore."; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, e); } ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster(subClusterId); @@ -1658,8 +1884,11 @@ public class FederationClientInterceptor response = clientRMProxy.updateApplicationTimeouts(request); } catch (Exception e) { routerMetrics.incrFailAppAttemptFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to update application timeout for " + - request.getApplicationId() + " to SubCluster " + subClusterId.getId(), e); + String msg = "Unable to update application timeout for " + applicationId + + " to SubCluster " + subClusterId; + RouterAuditLogger.logFailure(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, e); } if (response == null) { @@ -1670,6 +1899,8 @@ public class FederationClientInterceptor long stopTime = clock.getTime(); routerMetrics.succeededUpdateAppTimeoutsRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), UPDATE_APPLICATIONTIMEOUTS, + TARGET_CLIENT_RM_SERVICE, applicationId, subClusterId); return response; } @@ -1678,7 +1909,10 @@ public class FederationClientInterceptor GetAllResourceProfilesRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrGetResourceProfilesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getResourceProfiles request.", null); + String msg = "Missing getResourceProfiles request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getResourceProfiles", @@ -1688,11 +1922,16 @@ public class FederationClientInterceptor resourceProfiles = invokeConcurrent(remoteMethod, GetAllResourceProfilesResponse.class); } catch (Exception ex) { routerMetrics.incrGetResourceProfilesFailedRetrieved(); + String msg = "Unable to get resource profiles due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); RouterServerUtil.logAndThrowException("Unable to get resource profiles due to exception.", ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetResourceProfilesRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_RESOURCEPROFILES, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.mergeClusterResourceProfilesResponse(resourceProfiles); } @@ -1701,8 +1940,10 @@ public class FederationClientInterceptor GetResourceProfileRequest request) throws YarnException, IOException { if (request == null || request.getProfileName() == null) { routerMetrics.incrGetResourceProfileFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getResourceProfile request or profileName.", - null); + String msg = "Missing getResourceProfile request or profileName."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILE, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getResourceProfile", @@ -1712,11 +1953,15 @@ public class FederationClientInterceptor resourceProfile = invokeConcurrent(remoteMethod, GetResourceProfileResponse.class); } catch (Exception ex) { routerMetrics.incrGetResourceProfileFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get resource profile due to exception.", - ex); + String msg = "Unable to get resource profile due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCEPROFILE, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetResourceProfileRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_RESOURCEPROFILE, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.mergeClusterResourceProfileResponse(resourceProfile); } @@ -1725,7 +1970,10 @@ public class FederationClientInterceptor GetAllResourceTypeInfoRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrResourceTypeInfoFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getResourceTypeInfo request.", null); + String msg = "Missing getResourceTypeInfo request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCETYPEINFO, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getResourceTypeInfo", @@ -1735,11 +1983,16 @@ public class FederationClientInterceptor listResourceTypeInfo = invokeConcurrent(remoteMethod, GetAllResourceTypeInfoResponse.class); } catch (Exception ex) { routerMetrics.incrResourceTypeInfoFailedRetrieved(); - LOG.error("Unable to get all resource type info node due to exception.", ex); + String msg = "Unable to get all resource type info node due to exception."; + LOG.error(msg, ex); + RouterAuditLogger.logFailure(user.getShortUserName(), GET_RESOURCETYPEINFO, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); throw ex; } long stopTime = clock.getTime(); routerMetrics.succeededGetResourceTypeInfoRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_RESOURCETYPEINFO, + TARGET_CLIENT_RM_SERVICE); // Merge the GetAllResourceTypeInfoResponse return RouterYarnClientUtils.mergeResourceTypes(listResourceTypeInfo); } @@ -1755,8 +2008,10 @@ public class FederationClientInterceptor GetAttributesToNodesRequest request) throws YarnException, IOException { if (request == null || request.getNodeAttributes() == null) { routerMetrics.incrGetAttributesToNodesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getAttributesToNodes request " + - "or nodeAttributes.", null); + String msg = "Missing getAttributesToNodes request or nodeAttributes."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_ATTRIBUTESTONODES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getAttributesToNodes", @@ -1767,11 +2022,15 @@ public class FederationClientInterceptor invokeConcurrent(remoteMethod, GetAttributesToNodesResponse.class); } catch (Exception ex) { routerMetrics.incrGetAttributesToNodesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get attributes to nodes due to exception.", - ex); + String msg = "Unable to get attributes to nodes due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_ATTRIBUTESTONODES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetAttributesToNodesRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_ATTRIBUTESTONODES, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.mergeAttributesToNodesResponse(attributesToNodesResponses); } @@ -1780,7 +2039,10 @@ public class FederationClientInterceptor GetClusterNodeAttributesRequest request) throws YarnException, IOException { if (request == null) { routerMetrics.incrGetClusterNodeAttributesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getClusterNodeAttributes request.", null); + String msg = "Missing getClusterNodeAttributes request."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODEATTRIBUTES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getClusterNodeAttributes", @@ -1791,11 +2053,15 @@ public class FederationClientInterceptor GetClusterNodeAttributesResponse.class); } catch (Exception ex) { routerMetrics.incrGetClusterNodeAttributesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get cluster node attributes due " + - " to exception.", ex); + String msg = "Unable to get cluster node attributes due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_CLUSTERNODEATTRIBUTES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetClusterNodeAttributesRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_CLUSTERNODEATTRIBUTES, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.mergeClusterNodeAttributesResponse(clusterNodeAttributesResponses); } @@ -1804,8 +2070,10 @@ public class FederationClientInterceptor GetNodesToAttributesRequest request) throws YarnException, IOException { if (request == null || request.getHostNames() == null) { routerMetrics.incrGetNodesToAttributesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Missing getNodesToAttributes request or " + - "hostNames.", null); + String msg = "Missing getNodesToAttributes request or hostNames."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODESTOATTRIBUTES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, null); } long startTime = clock.getTime(); ClientMethod remoteMethod = new ClientMethod("getNodesToAttributes", @@ -1816,11 +2084,15 @@ public class FederationClientInterceptor GetNodesToAttributesResponse.class); } catch (Exception ex) { routerMetrics.incrGetNodesToAttributesFailedRetrieved(); - RouterServerUtil.logAndThrowException("Unable to get nodes to attributes due " + - " to exception.", ex); + String msg = "Unable to get nodes to attributes due to exception."; + RouterAuditLogger.logFailure(user.getShortUserName(), GET_NODESTOATTRIBUTES, UNKNOWN, + TARGET_CLIENT_RM_SERVICE, msg); + RouterServerUtil.logAndThrowException(msg, ex); } long stopTime = clock.getTime(); routerMetrics.succeededGetNodesToAttributesRetrieved(stopTime - startTime); + RouterAuditLogger.logSuccess(user.getShortUserName(), GET_NODESTOATTRIBUTES, + TARGET_CLIENT_RM_SERVICE); return RouterYarnClientUtils.mergeNodesToAttributesResponse(nodesToAttributesResponses); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java index 48d3ef6c0fe..287048237ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterAuditLogger.java @@ -65,7 +65,7 @@ public class TestRouterAuditLogger { * Test the AuditLog format with key-val pair. */ @Test - public void testKeyValLogFormat() throws Exception { + public void testKeyValLogFormat() { StringBuilder actLog = new StringBuilder(); StringBuilder expLog = new StringBuilder(); @@ -80,7 +80,7 @@ public class TestRouterAuditLogger { assertEquals(expLog.toString(), actLog.toString()); // append another k1=null pair and test - RouterAuditLogger.add(RouterAuditLogger.Keys.APPID, (String) null, actLog); + RouterAuditLogger.add(RouterAuditLogger.Keys.APPID, null, actLog); expLog.append("\tAPPID=null"); assertEquals(expLog.toString(), actLog.toString()); @@ -102,7 +102,10 @@ public class TestRouterAuditLogger { expLog.append("USER=test\t"); if (checkIP) { InetAddress ip = Server.getRemoteIp(); - expLog.append(RouterAuditLogger.Keys.IP.name() + "=" + ip.getHostAddress() + "\t"); + if (ip != null && ip.getHostAddress() != null) { + expLog.append(RouterAuditLogger.Keys.IP.name()) + .append("=").append(ip.getHostAddress()).append("\t"); + } } expLog.append("OPERATION=oper\tTARGET=tgt\tRESULT=SUCCESS"); if (appId != null) { @@ -149,7 +152,11 @@ public class TestRouterAuditLogger { expLog.append("USER=test\t"); if (checkIP) { InetAddress ip = Server.getRemoteIp(); - expLog.append(RouterAuditLogger.Keys.IP.name() + "=" + ip.getHostAddress() + "\t"); + if (ip != null && ip.getHostAddress() != null) { + expLog.append(RouterAuditLogger.Keys.IP.name()) + .append("=") + .append(ip.getHostAddress()).append("\t"); + } } expLog.append("OPERATION=oper\tTARGET=tgt\tRESULT=FAILURE\t"); expLog.append("DESCRIPTION=description of an audit log"); @@ -179,7 +186,7 @@ public class TestRouterAuditLogger { * Test {@link RouterAuditLogger}. */ @Test - public void testRouterAuditLoggerWithOutIP() throws Exception { + public void testRouterAuditLoggerWithOutIP() { testSuccessLogFormat(false); testFailureLogFormat(false); } From 0d1b4a3556d24641c14bbfc7ae1b985d4a998649 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 15 Apr 2023 09:05:43 -0700 Subject: [PATCH 72/97] HADOOP-18590. Publish SBOM artifacts (#5555). Contributed by Dongjoon Hyun. --- pom.xml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pom.xml b/pom.xml index fa768296e37..42a11795274 100644 --- a/pom.xml +++ b/pom.xml @@ -118,6 +118,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x 4.2.0 1.1.1 3.10.1 + 2.7.6 bash @@ -607,6 +608,10 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x com.github.spotbugs spotbugs-maven-plugin + + org.cyclonedx + cyclonedx-maven-plugin + @@ -748,6 +753,26 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + dist + + + + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} + + + package + + makeBom + + + + + + + sign From 2c4d6bf33da56a0538ca95047daeefc918e26c41 Mon Sep 17 00:00:00 2001 From: yl09099 <33595968+yl09099@users.noreply.github.com> Date: Mon, 17 Apr 2023 09:27:52 +0800 Subject: [PATCH 73/97] YARN-11465. Improved YarnClient Log Format (#5550) Co-authored-by: yl09099 Reviewed-by: Shilun Fan Signed-off-by: Shilun Fan --- .../client/api/ContainerShellWebSocket.java | 9 ++- .../api/async/impl/NMClientAsyncImpl.java | 50 +++++++-------- .../yarn/client/api/impl/AMRMClientImpl.java | 62 +++++++++---------- .../yarn/client/api/impl/NMClientImpl.java | 10 ++- .../yarn/client/api/impl/YarnClientImpl.java | 31 +++++----- .../hadoop/yarn/client/TestGetGroups.java | 2 +- .../client/TestRMFailoverProxyProvider.java | 1 + ...gerAdministrationProtocolPBClientImpl.java | 4 +- .../api/impl/TestSharedCacheClientImpl.java | 2 +- 9 files changed, 82 insertions(+), 89 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java index 66a901fc36a..5656484fca1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/ContainerShellWebSocket.java @@ -69,17 +69,16 @@ public class ContainerShellWebSocket { @OnWebSocketConnect public void onConnect(Session s) { initTerminal(s); - LOG.info(s.getRemoteAddress().getHostString() + " connected!"); + LOG.info("{} connected!", s.getRemoteAddress().getHostString()); } @OnWebSocketClose public void onClose(Session session, int status, String reason) { if (status==1000) { - LOG.info(session.getRemoteAddress().getHostString() + - " closed, status: " + status); + LOG.info("{} closed, status: {}", session.getRemoteAddress().getHostString(), status); } else { - LOG.warn(session.getRemoteAddress().getHostString() + - " closed, status: " + status + " Reason: " + reason); + LOG.warn("{} closed, status:" + + " {} Reason: {}.", session.getRemoteAddress().getHostString(), status, reason); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java index eb5b9b227fb..4a4c50607da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/NMClientAsyncImpl.java @@ -133,7 +133,7 @@ public class NMClientAsyncImpl extends NMClientAsync { this.maxThreadPoolSize = conf.getInt( YarnConfiguration.NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE, YarnConfiguration.DEFAULT_NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE); - LOG.info("Upper bound of the thread pool size is " + maxThreadPoolSize); + LOG.info("Upper bound of the thread pool size is {}.", maxThreadPoolSize); client.init(conf); super.serviceInit(conf); @@ -186,9 +186,8 @@ public class NMClientAsyncImpl extends NMClientAsync { // always increasing the pool-size int newThreadPoolSize = Math.min(maxThreadPoolSize, idealThreadPoolSize + INITIAL_THREAD_POOL_SIZE); - LOG.info("Set NMClientAsync thread pool size to " + - newThreadPoolSize + " as the number of nodes to talk to is " - + nodeNum); + LOG.info("Set NMClientAsync thread pool size to {} " + + "as the number of nodes to talk to is {}.", newThreadPoolSize, nodeNum); threadPool.setCorePoolSize(newThreadPoolSize); } } @@ -252,8 +251,7 @@ public class NMClientAsyncImpl extends NMClientAsync { try { events.put(new StartContainerEvent(container, containerLaunchContext)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of starting Container " + - container.getId()); + LOG.warn("Exception when scheduling the event of starting Container {}", container.getId()); callbackHandler.onStartContainerError(container.getId(), e); } } @@ -276,8 +274,8 @@ public class NMClientAsyncImpl extends NMClientAsync { try { events.put(new UpdateContainerResourceEvent(container, true)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of increasing resource of " - + "Container " + container.getId()); + LOG.warn("Exception when scheduling the event of increasing " + + "resource of Container {}", container.getId()); handler.onIncreaseContainerResourceError(container.getId(), e); } } @@ -300,8 +298,8 @@ public class NMClientAsyncImpl extends NMClientAsync { try { events.put(new UpdateContainerResourceEvent(container, false)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of increasing resource of " - + "Container " + container.getId()); + LOG.warn("Exception when scheduling the event of " + + "increasing resource of Container {}.", container.getId()); handler.onUpdateContainerResourceError(container.getId(), e); } } @@ -325,8 +323,8 @@ public class NMClientAsyncImpl extends NMClientAsync { client.getNodeIdOfStartedContainer(containerId), containerLaunchContex, autoCommit)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of re-initializing of " - + "Container " + containerId); + LOG.warn("Exception when scheduling the event of " + + "re-initializing of Container {}", containerId); handler.onContainerReInitializeError(containerId, e); } } @@ -349,8 +347,7 @@ public class NMClientAsyncImpl extends NMClientAsync { client.getNodeIdOfStartedContainer(containerId), null, ContainerEventType.RESTART_CONTAINER)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of restart of " - + "Container " + containerId); + LOG.warn("Exception when scheduling the event of restart of Container {}", containerId); handler.onContainerRestartError(containerId, e); } } @@ -373,8 +370,8 @@ public class NMClientAsyncImpl extends NMClientAsync { client.getNodeIdOfStartedContainer(containerId), null, ContainerEventType.ROLLBACK_LAST_REINIT)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event Rollback re-initialization" - + " of Container " + containerId); + LOG.warn("Exception when scheduling the event Rollback " + + "re-initialization of Container {}", containerId); handler.onRollbackLastReInitializationError(containerId, e); } } @@ -397,8 +394,8 @@ public class NMClientAsyncImpl extends NMClientAsync { client.getNodeIdOfStartedContainer(containerId), null, ContainerEventType.COMMIT_LAST_REINT)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event Commit re-initialization" - + " of Container " + containerId); + LOG.warn("Exception when scheduling the event " + + "Commit re-initialization of Container {}", containerId); handler.onCommitLastReInitializationError(containerId, e); } } @@ -413,8 +410,7 @@ public class NMClientAsyncImpl extends NMClientAsync { events.put(new ContainerEvent(containerId, nodeId, null, ContainerEventType.STOP_CONTAINER)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of stopping Container " + - containerId); + LOG.warn("Exception when scheduling the event of stopping Container {}", containerId); callbackHandler.onStopContainerError(containerId, e); } } @@ -424,8 +420,8 @@ public class NMClientAsyncImpl extends NMClientAsync { events.put(new ContainerEvent(containerId, nodeId, null, ContainerEventType.QUERY_CONTAINER)); } catch (InterruptedException e) { - LOG.warn("Exception when scheduling the event of querying the status" + - " of Container " + containerId); + LOG.warn("Exception when scheduling the event of querying " + + "the status of Container {}", containerId); callbackHandler.onGetContainerStatusError(containerId, e); } } @@ -730,7 +726,7 @@ public class NMClientAsyncImpl extends NMClientAsync { switch(containerEvent.getType()) { case REINITIALIZE_CONTAINER: if (!(containerEvent instanceof ReInitializeContainerEvevnt)) { - LOG.error("Unexpected Event.. [" +containerEvent.getType() + "]"); + LOG.error("Unexpected Event.. [{}]", containerEvent.getType()); return ContainerState.FAILED; } ReInitializeContainerEvevnt rEvent = @@ -771,8 +767,8 @@ public class NMClientAsyncImpl extends NMClientAsync { } break; default: - LOG.warn("Event of type [" + containerEvent.getType() + "] not" + - " expected here.."); + LOG.warn("Event of type [{}] not" + + " expected here..", containerEvent.getType()); break; } if (handlerError != null) { @@ -942,7 +938,7 @@ public class NMClientAsyncImpl extends NMClientAsync { @Override public void run() { ContainerId containerId = event.getContainerId(); - LOG.info("Processing Event " + event + " for Container " + containerId); + LOG.info("Processing Event {} for Container {}", event, containerId); if (event.getType() == ContainerEventType.QUERY_CONTAINER) { try { ContainerStatus containerStatus = client.getContainerStatus( @@ -962,7 +958,7 @@ public class NMClientAsyncImpl extends NMClientAsync { } else { StatefulContainer container = containers.get(containerId); if (container == null) { - LOG.info("Container " + containerId + " is already stopped or failed"); + LOG.info("Container {} is already stopped or failed", containerId); } else { container.handle(event); if (isCompletelyDone(container)) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index 274920f7e1b..0a450b532af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -478,11 +478,12 @@ public class AMRMClientImpl extends AMRMClient { continue; } if (LOG.isDebugEnabled()) { - LOG.debug("RM has confirmed changed resource allocation for " - + "container " + containerId + ". Current resource allocation:" - + changedContainer.getContainer().getResource() - + ". Remove pending change request:" - + pendingChange.get(containerId).getValue()); + LOG.debug("RM has confirmed changed resource allocation for container {}. " + + "Current resource allocation:{}. " + + "Remove pending change request:{}", + containerId, + changedContainer.getContainer().getResource(), + pendingChange.get(containerId).getValue()); } pendingChange.remove(containerId); } @@ -495,9 +496,9 @@ public class AMRMClientImpl extends AMRMClient { String nodeId = token.getNodeId().toString(); if (LOG.isDebugEnabled()) { if (getNMTokenCache().containsToken(nodeId)) { - LOG.debug("Replacing token for : " + nodeId); + LOG.debug("Replacing token for : {}", nodeId); } else { - LOG.debug("Received new token for : " + nodeId); + LOG.debug("Received new token for : {}", nodeId); } } getNMTokenCache().setToken(nodeId, token.getToken()); @@ -544,8 +545,7 @@ public class AMRMClientImpl extends AMRMClient { dedupedRacks.addAll(req.getRacks()); if(req.getRacks().size() != dedupedRacks.size()) { Joiner joiner = Joiner.on(','); - LOG.warn("ContainerRequest has duplicate racks: " - + joiner.join(req.getRacks())); + LOG.warn("ContainerRequest has duplicate racks: {}", joiner.join(req.getRacks())); } } Set inferredRacks = resolveRacks(req.getNodes()); @@ -573,8 +573,7 @@ public class AMRMClientImpl extends AMRMClient { HashSet dedupedNodes = new HashSet(req.getNodes()); if(dedupedNodes.size() != req.getNodes().size()) { Joiner joiner = Joiner.on(','); - LOG.warn("ContainerRequest has duplicate nodes: " - + joiner.join(req.getNodes())); + LOG.warn("ContainerRequest has duplicate nodes: {}", joiner.join(req.getNodes())); } for (String node : dedupedNodes) { addResourceRequest(req.getPriority(), node, @@ -636,11 +635,12 @@ public class AMRMClientImpl extends AMRMClient { Preconditions.checkNotNull(container, "Container cannot be null!!"); Preconditions.checkNotNull(updateContainerRequest, "UpdateContainerRequest cannot be null!!"); - LOG.info("Requesting Container update : " + - "container=" + container + ", " + - "updateType=" + updateContainerRequest.getContainerUpdateType() + ", " + - "targetCapability=" + updateContainerRequest.getCapability() + ", " + - "targetExecType=" + updateContainerRequest.getExecutionType()); + LOG.info("Requesting Container update : container={}, updateType={}," + + " targetCapability={}, targetExecType={}", + container, + updateContainerRequest.getContainerUpdateType(), + updateContainerRequest.getCapability(), + updateContainerRequest.getExecutionType()); if (updateContainerRequest.getCapability() != null && updateContainerRequest.getExecutionType() == null) { validateContainerResourceChangeRequest( @@ -770,7 +770,7 @@ public class AMRMClientImpl extends AMRMClient { // Ensure node requests are accompanied by requests for // corresponding rack if (rack == null) { - LOG.warn("Failed to resolve rack for node " + node + "."); + LOG.warn("Failed to resolve rack for node {}.", node); } else { racks.add(rack); } @@ -941,12 +941,13 @@ public class AMRMClientImpl extends AMRMClient { addResourceRequestToAsk(resourceRequestInfo.remoteRequest); if (LOG.isDebugEnabled()) { - LOG.debug("Adding request to ask " + resourceRequestInfo.remoteRequest); - LOG.debug("addResourceRequest:" + " applicationId=" - + " priority=" + priority.getPriority() - + " resourceName=" + resourceName + " numContainers=" - + resourceRequestInfo.remoteRequest.getNumContainers() - + " #asks=" + ask.size()); + LOG.debug("Adding request to ask {}", resourceRequestInfo.remoteRequest); + LOG.debug("addResourceRequest: applicationId= priority={}" + + " resourceName={} numContainers={} #asks={}", + priority.getPriority(), + resourceName, + resourceRequestInfo.remoteRequest.getNumContainers(), + ask.size()); } } @@ -972,17 +973,16 @@ public class AMRMClientImpl extends AMRMClient { } if (LOG.isDebugEnabled()) { - LOG.debug("AFTER decResourceRequest:" - + " allocationRequestId=" + req.getAllocationRequestId() - + " priority=" + priority.getPriority() - + " resourceName=" + resourceName + " numContainers=" - + resourceRequestInfo.remoteRequest.getNumContainers() - + " #asks=" + ask.size()); + LOG.debug("AFTER decResourceRequest: allocationRequestId={} " + + "priority={} resourceName={} numContainers={} #asks={}", + req.getAllocationRequestId(), priority.getPriority(), + resourceName, + resourceRequestInfo.remoteRequest.getNumContainers(), ask.size()); } } } else { - LOG.info("No remoteRequestTable found with allocationRequestId=" - + req.getAllocationRequestId()); + LOG.info("No remoteRequestTable found with allocationRequestId={}", + req.getAllocationRequestId()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java index 6b2cf46bfa9..e2d7b9f28ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java @@ -128,13 +128,11 @@ public class NMClientImpl extends NMClient { stopContainer(startedContainer.getContainerId(), startedContainer.getNodeId()); } catch (YarnException e) { - LOG.error("Failed to stop Container " + - startedContainer.getContainerId() + - " when stopping NMClientImpl"); + LOG.error("Failed to stop Container {} when stopping NMClientImpl", + startedContainer.getContainerId()); } catch (IOException e) { - LOG.error("Failed to stop Container " + - startedContainer.getContainerId() + - " when stopping NMClientImpl"); + LOG.error("Failed to stop Container {} when stopping NMClientImpl", + startedContainer.getContainerId()); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 36a5b04ad11..19d03a7da73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -353,7 +353,7 @@ public class YarnClientImpl extends YarnClient { throw new YarnException("Failed to submit " + applicationId + " to YARN : " + appReport.getDiagnostics()); } - LOG.info("Submitted application " + applicationId); + LOG.info("Submitted application {}", applicationId); break; } @@ -368,8 +368,9 @@ public class YarnClientImpl extends YarnClient { // is blocked here too long. if (++pollCount % 10 == 0) { LOG.info("Application submission is not finished, " + - "submitted application " + applicationId + - " is still in " + state); + "submitted application {} is still in {}", + applicationId, + state); } try { Thread.sleep(submitPollIntervalMillis); @@ -382,8 +383,8 @@ public class YarnClientImpl extends YarnClient { } catch (ApplicationNotFoundException ex) { // FailOver or RM restart happens before RMStateStore saves // ApplicationState - LOG.info("Re-submit application " + applicationId + "with the " + - "same ApplicationSubmissionContext"); + LOG.info("Re-submit application {} with the" + + " same ApplicationSubmissionContext", applicationId); rmClient.submitApplication(request); } } @@ -408,7 +409,7 @@ public class YarnClientImpl extends YarnClient { throw new IOException( "Can't get Master Kerberos principal for use as renewer"); } - LOG.debug("Delegation Token Renewer: " + masterPrincipal); + LOG.debug("Delegation Token Renewer: {}", masterPrincipal); LogAggregationFileControllerFactory factory = new LogAggregationFileControllerFactory(conf); @@ -421,8 +422,7 @@ public class YarnClientImpl extends YarnClient { fs.addDelegationTokens(masterPrincipal, credentials); if (finalTokens != null) { for (org.apache.hadoop.security.token.Token token : finalTokens) { - LOG.info("Added delegation token for log aggregation path " - + remoteRootLogDir + "; "+token); + LOG.info("Added delegation token for log aggregation path {}; {}", remoteRootLogDir, token); } } @@ -485,8 +485,7 @@ public class YarnClientImpl extends YarnClient { return timelineClient.getDelegationToken(timelineDTRenewer); } catch (Exception e) { if (timelineServiceBestEffort) { - LOG.warn("Failed to get delegation token from the timeline server: " - + e.getMessage()); + LOG.warn("Failed to get delegation token from the timeline server: {}", e.getMessage()); return null; } throw new IOException(e); @@ -527,7 +526,7 @@ public class YarnClientImpl extends YarnClient { @Override public void failApplicationAttempt(ApplicationAttemptId attemptId) throws YarnException, IOException { - LOG.info("Failing application attempt " + attemptId); + LOG.info("Failing application attempt {}.", attemptId); FailApplicationAttemptRequest request = Records.newRecord(FailApplicationAttemptRequest.class); request.setApplicationAttemptId(attemptId); @@ -560,7 +559,7 @@ public class YarnClientImpl extends YarnClient { KillApplicationResponse response = rmClient.forceKillApplication(request); if (response.getIsKillCompleted()) { - LOG.info("Killed application " + applicationId); + LOG.info("Killed application {}", applicationId); break; } @@ -573,7 +572,7 @@ public class YarnClientImpl extends YarnClient { if (++pollCount % 10 == 0) { LOG.info( - "Waiting for application " + applicationId + " to be killed."); + "Waiting for application {} to be killed.", applicationId); } Thread.sleep(asyncApiPollIntervalMillis); } @@ -1080,7 +1079,7 @@ public class YarnClientImpl extends YarnClient { public void signalToContainer(ContainerId containerId, SignalContainerCommand command) throws YarnException, IOException { - LOG.info("Signalling container " + containerId + " with command " + command); + LOG.info("Signalling container {} with command {}", containerId, command); SignalContainerRequest request = SignalContainerRequest.newInstance(containerId, command); rmClient.signalToContainer(request); @@ -1186,9 +1185,9 @@ public class YarnClientImpl extends YarnClient { client.stop(); } } catch (WebSocketException e) { - LOG.debug("Websocket exception: " + e.getMessage()); + LOG.debug("Websocket exception: {}", e.getMessage()); } catch (Throwable t) { - LOG.error("Fail to shell to container: " + t.getMessage()); + LOG.error("Fail to shell to container: {}", t.getMessage()); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java index 51b522ae392..d11ea78607c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java @@ -79,7 +79,7 @@ public class TestGetGroups extends GetGroupsTestBase { boolean rmStarted = rmStartedSignal.await(60000L, TimeUnit.MILLISECONDS); Assert.assertTrue("ResourceManager failed to start up.", rmStarted); - LOG.info("ResourceManager RMAdmin address: " + + LOG.info("ResourceManager RMAdmin address: {}.", conf.get(YarnConfiguration.RM_ADMIN_ADDRESS)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java index ce9af23744f..b4fd175fae8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailoverProxyProvider.java @@ -189,6 +189,7 @@ public class TestRMFailoverProxyProvider { * and {@link AutoRefreshRMFailoverProxyProvider#performFailover(Object)} * gets called. */ + @SuppressWarnings("unchecked") @Test public void testAutoRefreshFailoverChange() throws Exception { conf.setClass(YarnConfiguration.CLIENT_FAILOVER_NO_HA_PROXY_PROVIDER, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java index dfc2a0fc4b7..08a6e0c78ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java @@ -106,8 +106,8 @@ public class TestResourceManagerAdministrationProtocolPBClientImpl { boolean rmStarted = rmStartedSignal.await(60000L, TimeUnit.MILLISECONDS); Assert.assertTrue("ResourceManager failed to start up.", rmStarted); - LOG.info("ResourceManager RMAdmin address: " - + configuration.get(YarnConfiguration.RM_ADMIN_ADDRESS)); + LOG.info("ResourceManager RMAdmin address: {}.", + configuration.get(YarnConfiguration.RM_ADMIN_ADDRESS)); client = new ResourceManagerAdministrationProtocolPBClientImpl(1L, getProtocolAddress(configuration), configuration); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java index b297d926c05..1b179b138a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestSharedCacheClientImpl.java @@ -76,7 +76,7 @@ public class TestSharedCacheClientImpl { localFs.close(); } } catch (IOException ioe) { - LOG.info("IO exception in closing file system)"); + LOG.info("IO exception in closing file system"); ioe.printStackTrace(); } } From 6ea10cf41b78bcc54a2b205b2a9f6231f6f574b7 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 17 Apr 2023 10:18:33 +0100 Subject: [PATCH 74/97] HADOOP-18696. ITestS3ABucketExistence arn test failures. (#5557) Explicitly sets the fs.s3a.endpoint.region to eu-west-1 so the ARN-referenced fs creation fails with unknown store rather than IllegalArgumentException. Steve Loughran --- .../hadoop/fs/s3a/ITestS3ABucketExistence.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index fb295f3f09f..9485202f64c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -34,6 +34,7 @@ import org.apache.hadoop.test.LambdaTestUtils; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; @@ -47,7 +48,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { private FileSystem fs; private final String randomBucket = - "random-bucket-" + UUID.randomUUID().toString(); + "random-bucket-" + UUID.randomUUID(); private final URI uri = URI.create(FS_S3A + "://" + randomBucket + "/"); @@ -163,7 +164,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { @Test public void testAccessPointProbingV2() throws Exception { describe("Test V2 bucket probing using an AccessPoint ARN"); - Configuration configuration = createConfigurationWithProbe(2); + Configuration configuration = createArnConfiguration(); String accessPointArn = "arn:aws:s3:eu-west-1:123456789012:accesspoint/" + randomBucket; configuration.set(String.format(InternalConstants.ARN_BUCKET_OPTION, randomBucket), accessPointArn); @@ -175,7 +176,7 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { @Test public void testAccessPointRequired() throws Exception { describe("Test V2 bucket probing with 'fs.s3a.accesspoint.required' property."); - Configuration configuration = createConfigurationWithProbe(2); + Configuration configuration = createArnConfiguration(); configuration.set(AWS_S3_ACCESSPOINT_REQUIRED, "true"); intercept(PathIOException.class, InternalConstants.AP_REQUIRED_EXCEPTION, @@ -189,6 +190,17 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { () -> FileSystem.get(uri, configuration)); } + /** + * Create a configuration which has bucket probe 2 and the endpoint.region + * option set to "eu-west-1" to match that of the ARNs generated. + * @return a configuration for tests which are expected to fail in specific ways. + */ + private Configuration createArnConfiguration() { + Configuration configuration = createConfigurationWithProbe(2); + configuration.set(AWS_REGION, "eu-west-1"); + return configuration; + } + @Override protected Configuration getConfiguration() { Configuration configuration = super.getConfiguration(); From 405ed1dde6bcccca1e07e45a356a89c1b583e236 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 18 Apr 2023 10:12:07 +0100 Subject: [PATCH 75/97] HADOOP-18470. Hadoop 3.3.5 release wrap-up (#5558) Post-release updates of the branches * Add jdiff xml files from 3.3.5 release. * Declare 3.3.5 as the latest stable release. * Copy release notes. --- .../jdiff/Apache_Hadoop_Common_3.3.5.xml | 40640 ++++++++++++++++ .../markdown/release/3.3.5/CHANGELOG.3.3.5.md | 359 + .../release/3.3.5/RELEASENOTES.3.3.5.md | 89 + .../jdiff/Apache_Hadoop_HDFS_3.3.5.xml | 835 + .../Apache_Hadoop_MapReduce_Common_3.3.5.xml | 113 + .../Apache_Hadoop_MapReduce_Core_3.3.5.xml | 28963 +++++++++++ ...pache_Hadoop_MapReduce_JobClient_3.3.5.xml | 16 + hadoop-project-dist/pom.xml | 2 +- .../jdiff/Apache_Hadoop_YARN_API_3.3.5.xml | 26420 ++++++++++ .../jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml | 3067 ++ .../jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml | 3982 ++ ...Apache_Hadoop_YARN_Server_Common_3.3.5.xml | 1456 + 12 files changed, 105941 insertions(+), 1 deletion(-) create mode 100644 hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md create mode 100644 hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml new file mode 100644 index 00000000000..b788b4497fe --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml @@ -0,0 +1,40640 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @param customMessage depcrication message + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKey key that take up the values of deprecated key + @param customMessage deprecation message]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + As a side effect get loads the properties from the sources if called for + the first time as a lazy init. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value + @param enumeration type]]> + + + + + + + enumeration type + @throws IllegalArgumentException If mapping is illegal for the type + provided + @return enumeration type]]> + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

+ This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port. + @param name property name. + @param addr inetSocketAddress addr.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the conf key name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the conf key name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @param Interface class type. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @param Interface class type. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. +
  • + +
  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • + +
  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • + + @param propertyName xml property name. + @param out the writer to write to. + @throws IOException raised on errors performing I/O.]]> + +
    + + + + + + + +
  • + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +
    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    +
  • + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • + +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException raised on errors performing I/O. + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + } with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    
    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    
    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    
    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException raised on errors performing I/O. + @throws NoSuchAlgorithmException no such algorithm exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException raised on errors performing I/O. + @throws NoSuchAlgorithmException This exception is thrown when a particular + cryptographic algorithm is requested + but is not available in the environment.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supported.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException raised on errors performing I/O. + @throws UnsupportedOperationException Unsupported Operation Exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing modifications, must + include entries for user, group, and others for compatibility with + permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BlockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. +

    + This does not change the current offset of a file, and is thread-safe. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if reached + end-of-stream + @throws IOException if there is some error performing the read]]> + + + + + + + + + This operation provides similar semantics to + {@link #read(long, ByteBuffer)}, the difference is that this method is + guaranteed to read data until the {@link ByteBuffer} is full, or until + the end of the data stream is reached. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @throws IOException if there is some error performing the read + @throws EOFException the end of the data was reached before + the read operation completed + @see #read(long, ByteBuffer)]]> + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException if an I/O error occurs. + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + A higher number here does not necessarily improve performance, especially + for object stores, where multiple threads may be attempting to create an FS + instance for the same URI. +

    + Default value: {@value}.]]> +
    + + + + + Default value: {@value}. +

    ]]> +
    +
    + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + @throws UnresolvedLinkException If unresolved link occurred. + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid + + @return if delete success true, not false.]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + @return input stream.]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + @return output stream.]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + @param options rename options. + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + f is + not supported.]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + {@literal <---}X{@literal --->} + fs://host/A/B/link + {@literal <-----}Y{@literal ----->} + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file link already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist. + @throws UnresolvedLinkException If unresolved link occurred. + @throws AccessControlException If access is denied. + @throws IOException If an I/O error occurred. + @return resolve path.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing + modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries + to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing + modifications, must include entries for user, group, and others for + compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns + each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException If an I/O error occurred.]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException If an I/O error occurred.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException If an I/O error occurred.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal <}String{@literal >} of the XAttr names of the + file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @param uri uri of the filesystem. + @param conf configrution. + @return filesystem instance. + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() {@literal <=} start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure + @return block location array.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure + @return if create new file success true,not false.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. +

    + + @param src path to be renamed + @param dst new path after rename + @param options rename options. + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> +
    +
    + + + + + + +
  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +
    ? +
    Matches any single character. + +
    * +
    Matches zero or more characters. + +
    [abc] +
    Matches a single character from character set + {a,b,c}. + +
    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +
    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +
    \c +
    Removes (escapes) any special meaning of character c. + +
    {ab,cd} +
    Matches a string from the string set {ab, cd} + +
    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException If an I/O error occurred. + @throws UnsupportedOperationException if the operation is unsupported.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal } of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. +

    +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +
      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    + + This is a carefully evolving class. + New methods may be marked as Unstable or Evolving for their initial release, + as a warning that they are new and may change based on the + experience of use in applications. +

    + Important note for developers +

    + If you are making changes here to the public API or protected methods, + you must review the following subclasses and make sure that + they are filtering/passing through new methods as appropriate. + + {@link FilterFileSystem}: methods are passed through. If not, + then {@code TestFilterFileSystem.MustNotImplement} must be + updated with the unsupported interface. + Furthermore, if the new API's support is probed for via + {@link #hasPathCapability(Path, String)} then + {@link FilterFileSystem#hasPathCapability(Path, String)} + must return false, always. +

    + {@link ChecksumFileSystem}: checksums are created and + verified. +

    + {@code TestHarFileSystem} will need its {@code MustNotImplement} + interface updated. + +

    + There are some external places your changes will break things. + Do co-ordinate changes here. +

    + + HBase: HBoss +

    + Hive: HiveShim23 +

    + {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + returns true if the operation succeeded. When deleteSource is true, + this means "after the copy, delete(source) returned true" + If the destination is a directory, and mkdirs (dest) fails, + the operation will return false rather than raise any exception. +

    + The overwrite flag is about overwriting files; it has no effect about + handing an attempt to copy a file atop a directory (expect an IOException), + or a directory over a path which contains a file (mkdir will fail, so + "false"). +

    + The operation is recursive, and the deleteSource operation takes place + as each subdirectory is copied. Therefore, if an operation fails partway + through, the source tree may be partially deleted. +

    + @param srcFS source filesystem + @param srcStatus status of source + @param dstFS destination filesystem + @param dst path of source + @param deleteSource delete the source? + @param overwrite overwrite files at destination? + @param conf configuration to use when opening files + @return true if the operation succeeded. + @throws IOException failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } caller's + environment variables to use for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return type on the {@link #build()} call. + @param type of builder itself.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been written.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // Don't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The interface extends {@link IOStatisticsSource} so that there is no + need to cast an instance to see if is a source of statistics. + However, implementations MAY return null for their actual statistics. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A partial listing of the children of a parent directory. Since it is a + partial listing, multiple PartialListing may need to be combined to obtain + the full listing of a parent directory. +

    + ListingBatch behaves similar to a Future, in that getting the result via + {@link #get()} will throw an Exception if there was a failure.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + + + + + + + + + + + + The position returned by getPos() after readVectored() is undefined. +

    +

    + If a file is changed while the readVectored() operation is in progress, the output is + undefined. Some ranges may have old data, some may have new and some may have both. +

    +

    + While a readVectored() operation is in progress, normal read api calls may block. +

    + @param ranges the byte ranges to read + @param allocate the function to allocate ByteBuffer + @throws IOException any IOE.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note
    : Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()} API.)]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + XAttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + MUST NOT be part of/refer to + any object instance of significant memory size. + Applications SHOULD remove references when they are + no longer needed. + When logged at TRACE, prints the key and stack trace of the caller, + to allow for debugging of any problems. + @param key key + @param value new value + @return old value or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is for reporting and testing.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + These are low-cost per-instance statistics provided by any Hadoop + I/O class instance. +

    + Consult the filesystem specification document for the requirements + of an implementation of this interface.]]> + + + + + + + + + + + + + + + + + + + + + + + Exceptions are caught and downgraded to debug logging. + @param source source of statistics. + @return a string for logging.]]> + + + + + + + + + + + + + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is designed to affordable to use in log statements. + @param source source of statistics -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is for use in log statements where for the cost of creation + of this entry is low; it is affordable to use in log statements. + @param statistics statistics to stringify -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It is serializable so that frameworks which can use java serialization + to propagate data (Spark, Flink...) can send the statistics + back. For this reason, TreeMaps are explicitly used as field types, + even though IDEs can recommend use of Map instead. + For security reasons, untrusted java object streams should never be + deserialized. If for some reason this is required, use + {@link #requiredSerializationClasses()} to get the list of classes + used when deserializing instances of this object. +

    +

    + It is annotated for correct serializations with jackson2. +

    ]]> +
    + + + + + + + + + This is not an atomic option. +

    + The instance can be serialized, and its + {@code toString()} method lists all the values. + @param statistics statistics + @return a snapshot of the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used to accrue values so as to dynamically update + the mean. If so, know that there is no synchronization + on the methods. +

    +

    + If a statistic has 0 samples then it is considered to be empty. +

    +

    + All 'empty' statistics are equivalent, independent of the sum value. +

    +

    + For non-empty statistics, sum and sample values must match + for equality. +

    +

    + It is serializable and annotated for correct serializations with jackson2. +

    +

    + Thread safety. The operations to add/copy sample data, are thread safe. +

    +
      +
    1. {@link #add(MeanStatistic)}
    2. +
    3. {@link #addSample(long)}
    4. +
    5. {@link #clear()}
    6. +
    7. {@link #setSamplesAndSum(long, long)}
    8. +
    9. {@link #set(MeanStatistic)}
    10. +
    11. {@link #setSamples(long)} and {@link #setSum(long)}
    12. +
    +

    + So is the {@link #mean()} method. This ensures that when + used to aggregated statistics, the aggregate value and sample + count are set and evaluated consistently. +

    +

    + Other methods marked as synchronized because Findbugs overreacts + to the idea that some operations to update sum and sample count + are synchronized, but that things like equals are not. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + names)}: {@value}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention: +

    +
      +
    • the name of the constants are uppercase, words separated by + underscores.
    • +
    • the value of the constants are lowercase of the constant names.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention, they are implicitly unique: +
      +
    • + The name of the constants are uppercase, words separated by + underscores. +
    • +
    • + The value of the constants are lowercase of the constant names. +
    • +
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus + @return HAServiceStatus.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value enumSet value. + @param elementType elementType.]]> + + + + + value should not be null + or empty. + + @param value enumSet value.]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value enumSet Value. + @param elementType elementType.]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly. + + @param conf configuration. + @param className classname. + @return Class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param generic type. + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation. + + @param position input position. + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte.]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + + @param what input what. + @param start input start. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + + @param utf8 input utf8. + @param start input start. + @param length input length. + @param replace input replace. + @throws CharacterCodingException a character encoding or + decoding error occurs.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + + @param string input string. + @param replace input replace. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit() + @throws CharacterCodingException a character encoding or decoding error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException any other problem for write.]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException any other problem for readFields.]]> +
    +
    + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +
    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 
    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +
    +     public class MyWritableComparable implements
    +      WritableComparable{@literal } {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type T. + @param orig The object to copy + @param conf input Configuration. + @return The copied object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec + io.compress.passthrough.extension = .gz + + + Note: this is not a Splittable codec: it doesn't know the + capabilities of the passed in stream. It should be possible to + extend this in a subclass: the inner classes are marked as protected + to enable this. Do not retrofit splitting to this class..]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException raised on errors performing I/O. + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; + byte[3]=n&0xff. + Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; + byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; + byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff; + byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; + byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff; + byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; + byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]= + (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; + byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff; + byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]= + (n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException raised on errors performing I/O.]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException raised on errors performing I/O. + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + + (NB[0]&0xff)<<8 + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff) + <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param generic type. + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException Metrics Exception.]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException Metrics Exception.]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } (aggregate). + Filter out entries that don't have at least minSamples. + + @param minSamples input minSamples. + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + + + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>", where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName serviceName. + @param nameName nameName. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName serviceName. + @param nameName nameName. + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. +

    + @param conf input Configuration.]]> +
    + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } mapping and {@literal <}groupId, groupName{@literal >} + mapping.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + }/host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generic type T. + @return generic type T.]]> + + + + + + + Generics Type T. + @return the result of the action + @throws IOException in the event of error]]> + + + + + + + generic type T. + @return the result of the action + @throws IOException in the event of error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } "jack" + + @param userName input userName. + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifier]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + T extends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return abstract delegation token identifier.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return abstract delegation token identifier.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param dToken abstract delegation token identifier. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + live. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. + @param exitCode exit code + @param cause inner cause + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This will be 0 until a call + to {@link #finished()} has been made. +

    + @return the currently recorded duration.]]> +
    + + + + + + + + + +
    + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type. + @param theClass class of which an object is created + @param conf Configuration + @return a new object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type T + @param o object whose correctly-typed Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + Generics Type. + @param conf input Configuration. + @param src the object to copy from + @param dst the object to copy into, which is destroyed + @return dst param (the copy) + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects. + + @return all shells set.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +
    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 
    + + @see GenericOptionsParser + @see ToolRunner]]> +
    + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method. + @throws Exception Exception.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method. + @throws Exception exception.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr {@literal <=} n + (n is the cardinality of the set A to record in + the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown]]> +
    + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param timeout timeout to wait + @param unit time unit. + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown + @throws TimeoutException the future timed out.]]> +
    +
    + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + +
  • If it is an IOE: Return.
  • +
  • If it is a {@link UncheckedIOException}: return the cause
  • +
  • Completion/Execution Exceptions: extract and repeat
  • +
  • If it is an RTE or Error: throw.
  • +
  • Any other type: wrap in an IOE
  • + + + Recursively handles wrapped Execution and Completion Exceptions in + case something very complicated has happened. + @param e exception. + @return an IOException extracted or built from the cause. + @throws RuntimeException if that is the inner cause. + @throws Error if that is the inner cause.]]> +
    +
    + + + + + + + type of result + @param type of builder + @return the builder passed in.]]> + + + + + + + + + + fs.example.s3a.option becomes "s3a.option" + fs.example.fs.io.policy becomes "fs.io.policy" + fs.example.something becomes "something" + + @param builder builder to modify + @param conf configuration to read + @param prefix prefix to scan/strip + @param mandatory are the options to be mandatory or optional?]]> + + + + + + Return type. + @return the evaluated result. + @throws UnsupportedOperationException fail fast if unsupported + @throws IllegalArgumentException invalid argument]]> + + + + + Contains methods promoted from + {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they + are a key part of integrating async IO in application code. +

    +

    + One key feature is that the {@link #awaitFuture(Future)} and + {@link #awaitFuture(Future, long, TimeUnit)} calls will + extract and rethrow exceptions raised in the future's execution, + including extracting the inner IOException of any + {@code UncheckedIOException} raised in the future. + This makes it somewhat easier to execute IOException-raising + code inside futures. +

    ]]> +
    +
    + + + + + + + type + @return a remote iterator]]> + + + + + + type + @param iterator iterator. + @return a remote iterator]]> + + + + + + type + @param iterable iterable. + @return a remote iterator]]> + + + + + + type + @param array array. + @return a remote iterator]]> + + + + + + + source type + @param result type + @param iterator source + @param mapper transformation + @return a remote iterator]]> + + + + + + source type + @param result type + @param iterator source + @return a remote iterator]]> + + + + + + + + Elements are filtered in the hasNext() method; if not used + the filtering will be done on demand in the {@code next()} + call. +

    + @param type + @param iterator source + @param filter filter + @return a remote iterator]]> +
    +
    + + + + + source type. + @return a new iterator]]> + + + + + + + type + @return a list of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + type + @return an array of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + and this classes log is set to DEBUG, + then the statistics of the operation are evaluated and logged at + debug. +

    + The number of entries processed is returned, as it is useful to + know this, especially during tests or when reporting values + to users. +

    + This does not close the iterator afterwards. + @param source iterator source + @param consumer consumer of the values. + @return the number of elements processed + @param type of source + @throws IOException if the source RemoteIterator or the consumer raise one.]]> +
    +
    + + + + type of source]]> + + + + + This aims to make it straightforward to use lambda-expressions to + transform the results of an iterator, without losing the statistics + in the process, and to chain the operations together. +

    + The closeable operation will be passed through RemoteIterators which + wrap other RemoteIterators. This is to support any iterator which + can be closed to release held connections, file handles etc. + Unless client code is written to assume that RemoteIterator instances + may be closed, this is not likely to be broadly used. It is added + to make it possible to adopt this feature in a managed way. +

    + One notable feature is that the + {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will + LOG at debug any IOStatistics provided by the iterator, if such + statistics are provided. There's no attempt at retrieval and logging + if the LOG is not set to debug, so it is a zero cost feature unless + the logger {@code org.apache.hadoop.fs.functional.RemoteIterators} + is at DEBUG. +

    + Based on the S3A Listing code, and some some work on moving other code + to using iterative listings so as to pick up the statistics.]]> +
    +
    + +
    + + + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md new file mode 100644 index 00000000000..0bdd1844b6e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md @@ -0,0 +1,359 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.5 - 2023-03-14 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | Replace all default Charset usage with UTF-8 | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | CryptoOutputStream::close leak when encrypted zones + quota exceptions | Critical | fs | Colm Dougan | Colm Dougan | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18003](https://issues.apache.org/jira/browse/HADOOP-18003) | Add a method appendIfAbsent for CallerContext | Minor | . | Tao Li | Tao Li | +| [HDFS-16331](https://issues.apache.org/jira/browse/HDFS-16331) | Make dfs.blockreport.intervalMsec reconfigurable | Major | . | Tao Li | Tao Li | +| [HDFS-16371](https://issues.apache.org/jira/browse/HDFS-16371) | Exclude slow disks when choosing volume | Major | . | Tao Li | Tao Li | +| [HDFS-16400](https://issues.apache.org/jira/browse/HDFS-16400) | Reconfig DataXceiver parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16399](https://issues.apache.org/jira/browse/HDFS-16399) | Reconfig cache report parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16398](https://issues.apache.org/jira/browse/HDFS-16398) | Reconfig block report parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16396](https://issues.apache.org/jira/browse/HDFS-16396) | Reconfig slow peer parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16397](https://issues.apache.org/jira/browse/HDFS-16397) | Reconfig slow disk parameters for datanode | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7341](https://issues.apache.org/jira/browse/MAPREDUCE-7341) | Add a task-manifest output committer for Azure and GCS | Major | client | Steve Loughran | Steve Loughran | +| [HADOOP-18163](https://issues.apache.org/jira/browse/HADOOP-18163) | hadoop-azure support for the Manifest Committer of MAPREDUCE-7341 | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HDFS-16413](https://issues.apache.org/jira/browse/HDFS-16413) | Reconfig dfs usage parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16521](https://issues.apache.org/jira/browse/HDFS-16521) | DFS API to retrieve slow datanodes | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16568](https://issues.apache.org/jira/browse/HDFS-16568) | dfsadmin -reconfig option to start/query reconfig on all live datanodes | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16582](https://issues.apache.org/jira/browse/HDFS-16582) | Expose aggregate latency of slow node as perceived by the reporting node | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | Slow peer metrics - add median, mad and upper latency limits | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-11241](https://issues.apache.org/jira/browse/YARN-11241) | Add uncleaning option for local app log file with log-aggregation enabled | Major | log-aggregation | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103) | High performance vectored read API in Hadoop | Major | common, fs, fs/adl, fs/s3 | Mukund Thakur | Mukund Thakur | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17276](https://issues.apache.org/jira/browse/HADOOP-17276) | Extend CallerContext to make it include many items | Major | . | Hui Fei | Hui Fei | +| [HDFS-15745](https://issues.apache.org/jira/browse/HDFS-15745) | Make DataNodePeerMetrics#LOW\_THRESHOLD\_MS and MIN\_OUTLIER\_DETECTION\_NODES configurable | Major | . | Haibin Huang | Haibin Huang | +| [HDFS-16266](https://issues.apache.org/jira/browse/HDFS-16266) | Add remote port information to HDFS audit log | Major | . | Tao Li | Tao Li | +| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori | +| [HDFS-16310](https://issues.apache.org/jira/browse/HDFS-16310) | RBF: Add client port to CallerContext for Router | Major | . | Tao Li | Tao Li | +| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren | +| [HDFS-16426](https://issues.apache.org/jira/browse/HDFS-16426) | fix nextBlockReportTime when trigger full block report force | Major | . | qinyuren | qinyuren | +| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin | +| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin | +| [HDFS-16262](https://issues.apache.org/jira/browse/HDFS-16262) | Async refresh of cached locations in DFSInputStream | Major | . | Bryan Beaudreault | Bryan Beaudreault | +| [HADOOP-18093](https://issues.apache.org/jira/browse/HADOOP-18093) | Better exception handling for testFileStatusOnMountLink() in ViewFsBaseTest.java | Trivial | . | Xing Lin | Xing Lin | +| [HDFS-16423](https://issues.apache.org/jira/browse/HDFS-16423) | balancer should not get blocks on stale storages | Major | balancer & mover | qinyuren | qinyuren | +| [HADOOP-18139](https://issues.apache.org/jira/browse/HADOOP-18139) | Allow configuration of zookeeper server principal | Major | auth | Owen O'Malley | Owen O'Malley | +| [YARN-11076](https://issues.apache.org/jira/browse/YARN-11076) | Upgrade jQuery version in Yarn UI2 | Major | yarn-ui-v2 | Tamas Domok | Tamas Domok | +| [HDFS-16495](https://issues.apache.org/jira/browse/HDFS-16495) | RBF should prepend the client ip rather than append it. | Major | . | Owen O'Malley | Owen O'Malley | +| [HADOOP-18144](https://issues.apache.org/jira/browse/HADOOP-18144) | getTrashRoot/s in ViewFileSystem should return viewFS path, not targetFS path | Major | common | Xing Lin | Xing Lin | +| [HADOOP-18162](https://issues.apache.org/jira/browse/HADOOP-18162) | hadoop-common enhancements for the Manifest Committer of MAPREDUCE-7341 | Major | fs | Steve Loughran | Steve Loughran | +| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | Zhaohui Wang | Zhaohui Wang | +| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16457](https://issues.apache.org/jira/browse/HDFS-16457) | Make fs.getspaceused.classname reconfigurable | Major | namenode | yanbin.zhang | yanbin.zhang | +| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li | +| [HDFS-16497](https://issues.apache.org/jira/browse/HDFS-16497) | EC: Add param comment for liveBusyBlockIndices with HDFS-14768 | Minor | erasure-coding, namanode | caozhiqiang | caozhiqiang | +| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17551](https://issues.apache.org/jira/browse/HADOOP-17551) | Upgrade maven-site-plugin to 3.11.0 | Major | . | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16519](https://issues.apache.org/jira/browse/HDFS-16519) | Add throttler to EC reconstruction | Minor | datanode, ec | daimin | daimin | +| [HDFS-14478](https://issues.apache.org/jira/browse/HDFS-14478) | Add libhdfs APIs for openFile | Major | hdfs-client, libhdfs, native | Sahil Takiar | Sahil Takiar | +| [HADOOP-16202](https://issues.apache.org/jira/browse/HADOOP-16202) | Enhance openFile() for better read performance against object stores | Major | fs, fs/s3, tools/distcp | Steve Loughran | Steve Loughran | +| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles | +| [HDFS-16520](https://issues.apache.org/jira/browse/HDFS-16520) | Improve EC pread: avoid potential reading whole block | Major | dfsclient, ec, erasure-coding | daimin | daimin | +| [HADOOP-18167](https://issues.apache.org/jira/browse/HADOOP-18167) | Add metrics to track delegation token secret manager operations | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta | +| [HADOOP-18172](https://issues.apache.org/jira/browse/HADOOP-18172) | Change scope of getRootFallbackLink for InodeTree to make them accessible from outside package | Minor | . | Xing Lin | Xing Lin | +| [HDFS-16588](https://issues.apache.org/jira/browse/HDFS-16588) | Backport HDFS-16584 to branch-3.3. | Major | balancer & mover, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HADOOP-18244](https://issues.apache.org/jira/browse/HADOOP-18244) | Fix Hadoop-Common JavaDoc Error on branch-3.3 | Major | common | Shilun Fan | Shilun Fan | +| [HADOOP-18269](https://issues.apache.org/jira/browse/HADOOP-18269) | Misleading method name in DistCpOptions | Minor | tools/distcp | guophilipse | guophilipse | +| [HADOOP-18275](https://issues.apache.org/jira/browse/HADOOP-18275) | update os-maven-plugin to 1.7.0 | Minor | build | Steve Loughran | Steve Loughran | +| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16576](https://issues.apache.org/jira/browse/HDFS-16576) | Remove unused imports in HDFS project | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16629](https://issues.apache.org/jira/browse/HDFS-16629) | [JDK 11] Fix javadoc warnings in hadoop-hdfs module | Minor | hdfs | Shilun Fan | Shilun Fan | +| [YARN-11172](https://issues.apache.org/jira/browse/YARN-11172) | Fix testDelegationToken | Major | test | zhengchenyu | zhengchenyu | +| [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | Improve Magic Committer Performance | Minor | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18288](https://issues.apache.org/jira/browse/HADOOP-18288) | Total requests and total requests per sec served by RPC servers | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18336](https://issues.apache.org/jira/browse/HADOOP-18336) | tag FSDataInputStream.getWrappedStream() @Public/@Stable | Minor | fs | Steve Loughran | Ashutosh Gupta | +| [HADOOP-13144](https://issues.apache.org/jira/browse/HADOOP-13144) | Enhancing IPC client throughput via multiple connections per user | Minor | ipc | Jason Kace | Íñigo Goiri | +| [HDFS-16712](https://issues.apache.org/jira/browse/HDFS-16712) | Fix incorrect placeholder in DataNode.java | Major | . | ZanderXu | ZanderXu | +| [HDFS-16702](https://issues.apache.org/jira/browse/HDFS-16702) | MiniDFSCluster should report cause of exception in assertion error | Minor | hdfs | Steve Vaughan | Steve Vaughan | +| [HADOOP-18365](https://issues.apache.org/jira/browse/HADOOP-18365) | Updated addresses are still accessed using the old IP address | Major | common | Steve Vaughan | Steve Vaughan | +| [HDFS-16687](https://issues.apache.org/jira/browse/HDFS-16687) | RouterFsckServlet replicates code from DfsServlet base class | Major | federation | Steve Vaughan | Steve Vaughan | +| [HADOOP-18333](https://issues.apache.org/jira/browse/HADOOP-18333) | hadoop-client-runtime impact by CVE-2022-2047 CVE-2022-2048 due to shaded jetty | Major | build | phoebe chen | Ashutosh Gupta | +| [HADOOP-18406](https://issues.apache.org/jira/browse/HADOOP-18406) | Adds alignment context to call path for creating RPC proxy with multiple connections per user. | Major | ipc | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [HDFS-16684](https://issues.apache.org/jira/browse/HDFS-16684) | Exclude self from JournalNodeSyncer when using a bind host | Major | journal-node | Steve Vaughan | Steve Vaughan | +| [HDFS-16686](https://issues.apache.org/jira/browse/HDFS-16686) | GetJournalEditServlet fails to authorize valid Kerberos request | Major | journal-node | Steve Vaughan | Steve Vaughan | +| [YARN-11303](https://issues.apache.org/jira/browse/YARN-11303) | Upgrade jquery ui to 1.13.2 | Major | security | D M Murali Krishna Reddy | Ashutosh Gupta | +| [HADOOP-16769](https://issues.apache.org/jira/browse/HADOOP-16769) | LocalDirAllocator to provide diagnostics when file creation fails | Minor | util | Ramesh Kumar Thangarajan | Ashutosh Gupta | +| [HADOOP-18341](https://issues.apache.org/jira/browse/HADOOP-18341) | upgrade commons-configuration2 to 2.8.0 and commons-text to 1.9 | Major | . | PJ Fanning | PJ Fanning | +| [HDFS-16776](https://issues.apache.org/jira/browse/HDFS-16776) | Erasure Coding: The length of targets should be checked when DN gets a reconstruction task | Major | . | Kidd5368 | Kidd5368 | +| [HADOOP-18469](https://issues.apache.org/jira/browse/HADOOP-18469) | Add XMLUtils methods to centralise code that creates secure XML parsers | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | Remove the hadoop-openstack module | Major | build, fs, fs/swift | Steve Loughran | Steve Loughran | +| [HADOOP-18468](https://issues.apache.org/jira/browse/HADOOP-18468) | upgrade jettison json jar due to fix CVE-2022-40149 | Major | build | PJ Fanning | PJ Fanning | +| [HADOOP-17779](https://issues.apache.org/jira/browse/HADOOP-17779) | Lock File System Creator Semaphore Uninterruptibly | Minor | fs | David Mollitor | David Mollitor | +| [HADOOP-18360](https://issues.apache.org/jira/browse/HADOOP-18360) | Update commons-csv from 1.0 to 1.9.0. | Minor | common | Shilun Fan | Shilun Fan | +| [HADOOP-18493](https://issues.apache.org/jira/browse/HADOOP-18493) | update jackson-databind 2.12.7.1 due to CVE fixes | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | Update Bouncy Castle to 1.68 or later | Major | build | Takanobu Asanuma | PJ Fanning | +| [HADOOP-18497](https://issues.apache.org/jira/browse/HADOOP-18497) | Upgrade commons-text version to fix CVE-2022-42889 | Major | build | Xiaoqiao He | PJ Fanning | +| [HDFS-16795](https://issues.apache.org/jira/browse/HDFS-16795) | Use secure XML parser utils in hdfs classes | Major | . | PJ Fanning | PJ Fanning | +| [YARN-11330](https://issues.apache.org/jira/browse/YARN-11330) | Use secure XML parser utils in YARN | Major | . | PJ Fanning | PJ Fanning | +| [MAPREDUCE-7411](https://issues.apache.org/jira/browse/MAPREDUCE-7411) | Use secure XML parser utils in MapReduce | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-18512](https://issues.apache.org/jira/browse/HADOOP-18512) | upgrade woodstox-core to 5.4.0 for security fix | Major | common | phoebe chen | PJ Fanning | +| [YARN-11363](https://issues.apache.org/jira/browse/YARN-11363) | Remove unused TimelineVersionWatcher and TimelineVersion from hadoop-yarn-server-tests | Major | test, yarn | Ashutosh Gupta | Ashutosh Gupta | +| [YARN-11364](https://issues.apache.org/jira/browse/YARN-11364) | Docker Container to accept docker Image name with sha256 digest | Major | yarn | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18517](https://issues.apache.org/jira/browse/HADOOP-18517) | ABFS: Add fs.azure.enable.readahead option to disable readahead | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-18484](https://issues.apache.org/jira/browse/HADOOP-18484) | upgrade hsqldb to v2.7.1 due to CVE | Major | . | PJ Fanning | Ashutosh Gupta | +| [HDFS-16844](https://issues.apache.org/jira/browse/HDFS-16844) | [RBF] The routers should be resiliant against exceptions from StateStore | Major | rbf | Owen O'Malley | Owen O'Malley | +| [HADOOP-18573](https://issues.apache.org/jira/browse/HADOOP-18573) | Improve error reporting on non-standard kerberos names | Blocker | security | Steve Loughran | Steve Loughran | +| [HADOOP-18561](https://issues.apache.org/jira/browse/HADOOP-18561) | CVE-2021-37533 on commons-net is included in hadoop common and hadoop-client-runtime | Blocker | build | phoebe chen | Steve Loughran | +| [HADOOP-18067](https://issues.apache.org/jira/browse/HADOOP-18067) | Über-jira: S3A Hadoop 3.3.5 features | Major | fs/s3 | Steve Loughran | Mukund Thakur | +| [YARN-10444](https://issues.apache.org/jira/browse/YARN-10444) | Node Manager to use openFile() with whole-file read policy for localizing files. | Minor | nodemanager | Steve Loughran | Steve Loughran | +| [HADOOP-18661](https://issues.apache.org/jira/browse/HADOOP-18661) | Fix bin/hadoop usage script terminology | Blocker | scripts | Steve Loughran | Steve Loughran | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru | +| [YARN-10553](https://issues.apache.org/jira/browse/YARN-10553) | Refactor TestDistributedShell | Major | distributed-shell, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15839](https://issues.apache.org/jira/browse/HDFS-15839) | RBF: Cannot get method setBalancerBandwidth on Router Client | Major | rbf | Yang Yun | Yang Yun | +| [HADOOP-17588](https://issues.apache.org/jira/browse/HADOOP-17588) | CryptoInputStream#close() should be synchronized | Major | . | Renukaprasad C | Renukaprasad C | +| [HADOOP-17836](https://issues.apache.org/jira/browse/HADOOP-17836) | Improve logging on ABFS error reporting | Minor | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17989](https://issues.apache.org/jira/browse/HADOOP-17989) | ITestAzureBlobFileSystemDelete failing "Operations has null HTTP response" | Major | fs/azure, test | Steve Loughran | Steve Loughran | +| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov | +| [YARN-11065](https://issues.apache.org/jira/browse/YARN-11065) | Bump follow-redirects from 1.13.3 to 1.14.7 in hadoop-yarn-ui | Major | yarn-ui-v2 | Akira Ajisaka | | +| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant | +| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant | +| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse | +| [YARN-10788](https://issues.apache.org/jira/browse/YARN-10788) | TestCsiClient fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-18126](https://issues.apache.org/jira/browse/HADOOP-18126) | Update junit 5 version due to build issues | Major | bulid | PJ Fanning | PJ Fanning | +| [YARN-11033](https://issues.apache.org/jira/browse/YARN-11033) | isAbsoluteResource is not correct for dynamically created queues | Minor | yarn | Tamas Domok | Tamas Domok | +| [YARN-10894](https://issues.apache.org/jira/browse/YARN-10894) | Follow up YARN-10237: fix the new test case in TestRMWebServicesCapacitySched | Major | . | Tamas Domok | Tamas Domok | +| [YARN-11022](https://issues.apache.org/jira/browse/YARN-11022) | Fix the documentation for max-parallel-apps in CS | Major | capacity scheduler | Tamas Domok | Tamas Domok | +| [HADOOP-18150](https://issues.apache.org/jira/browse/HADOOP-18150) | Fix ITestAuditManagerDisabled after S3A audit logging was enabled in HADOOP-18091 | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17976](https://issues.apache.org/jira/browse/HADOOP-17976) | abfs etag extraction inconsistent between LIST and HEAD calls | Minor | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-18129](https://issues.apache.org/jira/browse/HADOOP-18129) | Change URI[] in INodeLink to String[] to reduce memory footprint of ViewFileSystem | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18145](https://issues.apache.org/jira/browse/HADOOP-18145) | Fileutil's unzip method causes unzipped files to lose their original permissions | Major | common | jingxiong zhong | jingxiong zhong | +| [HDFS-16518](https://issues.apache.org/jira/browse/HDFS-16518) | KeyProviderCache close cached KeyProvider with Hadoop ShutdownHookManager | Major | hdfs | Lei Yang | Lei Yang | +| [HADOOP-18169](https://issues.apache.org/jira/browse/HADOOP-18169) | getDelegationTokens in ViewFs should also fetch the token from the fallback FS | Major | . | Xing Lin | Xing Lin | +| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma | +| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin | +| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang | +| [HADOOP-18201](https://issues.apache.org/jira/browse/HADOOP-18201) | Remove base and bucket overrides for endpoint in ITestS3ARequesterPays.java | Major | fs/s3 | Mehakmeet Singh | Daniel Carl Jones | +| [HDFS-16536](https://issues.apache.org/jira/browse/HDFS-16536) | TestOfflineImageViewer fails on branch-3.3 | Major | test | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren | +| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren | +| [HADOOP-17564](https://issues.apache.org/jira/browse/HADOOP-17564) | Fix typo in UnixShellGuide.html | Trivial | . | Takanobu Asanuma | Ashutosh Gupta | +| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta | +| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta | +| [HADOOP-16515](https://issues.apache.org/jira/browse/HADOOP-16515) | Update the link to compatibility guide | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-18222](https://issues.apache.org/jira/browse/HADOOP-18222) | Prevent DelegationTokenSecretManagerMetrics from registering multiple times | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16540](https://issues.apache.org/jira/browse/HDFS-16540) | Data locality is lost when DataNode pod restarts in kubernetes | Major | namenode | Huaxiang Sun | Huaxiang Sun | +| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu | +| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke | +| [YARN-11141](https://issues.apache.org/jira/browse/YARN-11141) | Capacity Scheduler does not support ambiguous queue names when moving application across queues | Major | capacity scheduler | András Győri | András Győri | +| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack | +| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-11128](https://issues.apache.org/jira/browse/YARN-11128) | Fix comments in TestProportionalCapacityPreemptionPolicy\* | Minor | capacityscheduler, documentation | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18234](https://issues.apache.org/jira/browse/HADOOP-18234) | s3a access point xml examples are wrong | Minor | documentation, fs/s3 | Steve Loughran | Ashutosh Gupta | +| [HADOOP-18238](https://issues.apache.org/jira/browse/HADOOP-18238) | Fix reentrancy check in SFTPFileSystem.close() | Major | common | yi liu | Ashutosh Gupta | +| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16608](https://issues.apache.org/jira/browse/HDFS-16608) | Fix the link in TestClientProtocolForPipelineRecovery | Minor | documentation | Samrat Deb | Samrat Deb | +| [HDFS-16563](https://issues.apache.org/jira/browse/HDFS-16563) | Namenode WebUI prints sensitive information on Token Expiry | Major | namanode, security, webhdfs | Renukaprasad C | Renukaprasad C | +| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu | +| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant | +| [HADOOP-18255](https://issues.apache.org/jira/browse/HADOOP-18255) | fsdatainputstreambuilder.md refers to hadoop 3.3.3, when it shouldn't | Minor | documentation | Steve Loughran | Ashutosh Gupta | +| [MAPREDUCE-7387](https://issues.apache.org/jira/browse/MAPREDUCE-7387) | Fix TestJHSSecurity#testDelegationToken AssertionError due to HDFS-16563 | Major | . | Shilun Fan | Shilun Fan | +| [MAPREDUCE-7369](https://issues.apache.org/jira/browse/MAPREDUCE-7369) | MapReduce tasks timing out when spends more time on MultipleOutputs#close | Major | . | Prabhu Joseph | Ashutosh Gupta | +| [MAPREDUCE-7391](https://issues.apache.org/jira/browse/MAPREDUCE-7391) | TestLocalDistributedCacheManager failing after HADOOP-16202 | Major | test | Steve Loughran | Steve Loughran | +| [HDFS-16591](https://issues.apache.org/jira/browse/HDFS-16591) | StateStoreZooKeeper fails to initialize | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HADOOP-18321](https://issues.apache.org/jira/browse/HADOOP-18321) | Fix when to read an additional record from a BZip2 text file split | Critical | io | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18217](https://issues.apache.org/jira/browse/HADOOP-18217) | shutdownhookmanager should not be multithreaded (deadlock possible) | Minor | util | Catherinot Remi | | +| [MAPREDUCE-7372](https://issues.apache.org/jira/browse/MAPREDUCE-7372) | MapReduce set permission too late in copyJar method | Major | mrv2 | Zhang Dongsheng | | +| [HADOOP-18330](https://issues.apache.org/jira/browse/HADOOP-18330) | S3AFileSystem removes Path when calling createS3Client | Minor | fs/s3 | Ashutosh Pant | Ashutosh Pant | +| [HADOOP-18390](https://issues.apache.org/jira/browse/HADOOP-18390) | Fix out of sync import for HADOOP-18321 | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18340](https://issues.apache.org/jira/browse/HADOOP-18340) | deleteOnExit does not work with S3AFileSystem | Minor | fs/s3 | Huaxiang Sun | Huaxiang Sun | +| [HADOOP-18383](https://issues.apache.org/jira/browse/HADOOP-18383) | Codecs with @DoNotPool annotation are not closed causing memory leak | Major | common | Kevin Sewell | Kevin Sewell | +| [HDFS-16729](https://issues.apache.org/jira/browse/HDFS-16729) | RBF: fix some unreasonably annotated docs | Major | documentation, rbf | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18398](https://issues.apache.org/jira/browse/HADOOP-18398) | Prevent AvroRecord\*.class from being included non-test jar | Major | common | YUBI LEE | YUBI LEE | +| [HDFS-4043](https://issues.apache.org/jira/browse/HDFS-4043) | Namenode Kerberos Login does not use proper hostname for host qualified hdfs principal name. | Major | security | Ahad Rana | Steve Vaughan | +| [MAPREDUCE-7403](https://issues.apache.org/jira/browse/MAPREDUCE-7403) | Support spark dynamic partitioning in the Manifest Committer | Major | mrv2 | Steve Loughran | Steve Loughran | +| [HDFS-16732](https://issues.apache.org/jira/browse/HDFS-16732) | [SBN READ] Avoid get location from observer when the block report is delayed. | Critical | hdfs | zhengchenyu | zhengchenyu | +| [HADOOP-18375](https://issues.apache.org/jira/browse/HADOOP-18375) | Fix failure of shelltest for hadoop\_add\_ldlibpath | Minor | test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-16755](https://issues.apache.org/jira/browse/HDFS-16755) | TestQJMWithFaults.testUnresolvableHostName() can fail due to unexpected host resolution | Minor | test | Steve Vaughan | Steve Vaughan | +| [HADOOP-18400](https://issues.apache.org/jira/browse/HADOOP-18400) | Fix file split duplicating records from a succeeding split when reading BZip2 text files | Critical | . | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18242](https://issues.apache.org/jira/browse/HADOOP-18242) | ABFS Rename Failure when tracking metadata is in incomplete state | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18456](https://issues.apache.org/jira/browse/HADOOP-18456) | NullPointerException in ObjectListingIterator's constructor | Blocker | fs/s3 | Quanlong Huang | Steve Loughran | +| [HADOOP-18444](https://issues.apache.org/jira/browse/HADOOP-18444) | Add Support for localized trash for ViewFileSystem in Trash.moveToAppropriateTrash | Major | . | Xing Lin | Xing Lin | +| [HADOOP-18443](https://issues.apache.org/jira/browse/HADOOP-18443) | Upgrade snakeyaml to 1.32 | Major | security | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16766](https://issues.apache.org/jira/browse/HDFS-16766) | hdfs ec command loads (administrator provided) erasure code policy files without disabling xml entity expansion | Major | security | Jing | Ashutosh Gupta | +| [HDFS-13369](https://issues.apache.org/jira/browse/HDFS-13369) | FSCK Report broken with RequestHedgingProxyProvider | Major | hdfs | Harshakiran Reddy | Ranith Sardar | +| [YARN-11039](https://issues.apache.org/jira/browse/YARN-11039) | LogAggregationFileControllerFactory::getFileControllerForRead can leak threads | Blocker | log-aggregation | Rajesh Balamohan | Steve Loughran | +| [HADOOP-18499](https://issues.apache.org/jira/browse/HADOOP-18499) | S3A to support HTTPS web proxies | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18233](https://issues.apache.org/jira/browse/HADOOP-18233) | Possible race condition with TemporaryAWSCredentialsProvider | Major | auth, fs/s3 | Jason Sleight | Jimmy Wong | +| [MAPREDUCE-7425](https://issues.apache.org/jira/browse/MAPREDUCE-7425) | Document Fix for yarn.app.mapreduce.client-am.ipc.max-retries | Major | yarn | teng wang | teng wang | +| [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | Disable abfs prefetching by default | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HDFS-16836](https://issues.apache.org/jira/browse/HDFS-16836) | StandbyCheckpointer can still trigger rollback fs image after RU is finalized | Major | hdfs | Lei Yang | Lei Yang | +| [HADOOP-18324](https://issues.apache.org/jira/browse/HADOOP-18324) | Interrupting RPC Client calls can lead to thread exhaustion | Critical | ipc | Owen O'Malley | Owen O'Malley | +| [HDFS-16832](https://issues.apache.org/jira/browse/HDFS-16832) | [SBN READ] Fix NPE when check the block location of empty directory | Major | . | zhengchenyu | zhengchenyu | +| [HADOOP-18498](https://issues.apache.org/jira/browse/HADOOP-18498) | [ABFS]: Error introduced when SAS Token containing '?' prefix is passed | Minor | fs/azure | Sree Bhattacharyya | Sree Bhattacharyya | +| [HDFS-16847](https://issues.apache.org/jira/browse/HDFS-16847) | RBF: StateStore writer should not commit tmp fail if there was an error in writing the file. | Critical | hdfs, rbf | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [HADOOP-18401](https://issues.apache.org/jira/browse/HADOOP-18401) | No ARM binaries in branch-3.3.x releases | Minor | build | Ling Xu | | +| [HADOOP-18408](https://issues.apache.org/jira/browse/HADOOP-18408) | [ABFS]: ITestAbfsManifestCommitProtocol fails on nonHNS configuration | Minor | fs/azure, test | Pranav Saxena | Sree Bhattacharyya | +| [HADOOP-18402](https://issues.apache.org/jira/browse/HADOOP-18402) | S3A committer NPE in spark job abort | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18569](https://issues.apache.org/jira/browse/HADOOP-18569) | NFS Gateway may release buffer too early | Blocker | nfs | Attila Doroszlai | Attila Doroszlai | +| [HADOOP-18574](https://issues.apache.org/jira/browse/HADOOP-18574) | Changing log level of IOStatistics increment to make the DEBUG logs less noisy | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521) | ABFS ReadBufferManager buffer sharing across concurrent HTTP requests | Critical | fs/azure | Steve Loughran | Steve Loughran | +| [MAPREDUCE-7375](https://issues.apache.org/jira/browse/MAPREDUCE-7375) | JobSubmissionFiles don't set right permission after mkdirs | Major | mrv2 | Zhang Dongsheng | | +| [HADOOP-17717](https://issues.apache.org/jira/browse/HADOOP-17717) | Update wildfly openssl to 1.1.3.Final | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-18598](https://issues.apache.org/jira/browse/HADOOP-18598) | maven site generation doesn't include javadocs | Blocker | site | Steve Loughran | Steve Loughran | +| [HDFS-16895](https://issues.apache.org/jira/browse/HDFS-16895) | NamenodeHeartbeatService should use credentials of logged in user | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16853](https://issues.apache.org/jira/browse/HDFS-16853) | The UT TestLeaseRecovery2#testHardLeaseRecoveryAfterNameNodeRestart failed because HADOOP-18324 | Blocker | . | ZanderXu | ZanderXu | +| [HADOOP-18641](https://issues.apache.org/jira/browse/HADOOP-18641) | cyclonedx maven plugin breaks builds on recent maven releases (3.9.0) | Major | build | Steve Loughran | Steve Loughran | +| [HDFS-16923](https://issues.apache.org/jira/browse/HDFS-16923) | The getListing RPC will throw NPE if the path does not exist | Critical | . | ZanderXu | ZanderXu | +| [HDFS-16896](https://issues.apache.org/jira/browse/HDFS-16896) | HDFS Client hedged read has increased failure rate than without hedged read | Major | hdfs-client | Tom McCormick | Tom McCormick | +| [YARN-11383](https://issues.apache.org/jira/browse/YARN-11383) | Workflow priority mappings is case sensitive | Major | yarn | Aparajita Choudhary | Aparajita Choudhary | +| [HDFS-16939](https://issues.apache.org/jira/browse/HDFS-16939) | Fix the thread safety bug in LowRedundancyBlocks | Major | namanode | Shuyan Zhang | Shuyan Zhang | +| [HDFS-16934](https://issues.apache.org/jira/browse/HDFS-16934) | org.apache.hadoop.hdfs.tools.TestDFSAdmin#testAllDatanodesReconfig regression | Minor | dfsadmin, test | Steve Loughran | Shilun Fan | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-16573](https://issues.apache.org/jira/browse/HDFS-16573) | Fix test TestDFSStripedInputStreamWithRandomECPolicy | Minor | test | daimin | daimin | +| [HDFS-16637](https://issues.apache.org/jira/browse/HDFS-16637) | TestHDFSCLI#testAll consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-11248](https://issues.apache.org/jira/browse/YARN-11248) | Add unit test for FINISHED\_CONTAINERS\_PULLED\_BY\_AM event on DECOMMISSIONING | Major | test | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16625](https://issues.apache.org/jira/browse/HDFS-16625) | Unit tests aren't checking for PMDK availability | Major | test | Steve Vaughan | Steve Vaughan | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13293](https://issues.apache.org/jira/browse/HDFS-13293) | RBF: The RouterRPCServer should transfer client IP via CallerContext to NamenodeRpcServer | Major | rbf | Baolong Mao | Hui Fei | +| [HDFS-15630](https://issues.apache.org/jira/browse/HDFS-15630) | RBF: Fix wrong client IP info in CallerContext when requests mount points with multi-destinations. | Major | rbf | Chengwei Wang | Chengwei Wang | +| [HADOOP-17152](https://issues.apache.org/jira/browse/HADOOP-17152) | Implement wrapper for guava newArrayList and newLinkedList | Major | common | Ahmed Hussein | Viraj Jasani | +| [HADOOP-17851](https://issues.apache.org/jira/browse/HADOOP-17851) | S3A to support user-specified content encoding | Minor | fs/s3 | Holden Karau | Holden Karau | +| [HADOOP-17492](https://issues.apache.org/jira/browse/HADOOP-17492) | abfs listLocatedStatus to support incremental/async page fetching | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17409](https://issues.apache.org/jira/browse/HADOOP-17409) | Remove S3Guard - no longer needed | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18084](https://issues.apache.org/jira/browse/HADOOP-18084) | ABFS: Add testfilePath while verifying test contents are read correctly | Minor | fs/azure, test | Anmol Asrani | Anmol Asrani | +| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree | +| [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) | S3A auditing leaks memory through ThreadLocal references | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18071](https://issues.apache.org/jira/browse/HADOOP-18071) | ABFS: Set driver global timeout for ITestAzureBlobFileSystemBasics | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17765](https://issues.apache.org/jira/browse/HADOOP-17765) | ABFS: Use Unique File Paths in Tests | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17862](https://issues.apache.org/jira/browse/HADOOP-17862) | ABFS: Fix unchecked cast compiler warning for AbfsListStatusRemoteIterator | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-18075](https://issues.apache.org/jira/browse/HADOOP-18075) | ABFS: Fix failure caused by listFiles() in ITestAbfsRestOperationException | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-18112](https://issues.apache.org/jira/browse/HADOOP-18112) | Implement paging during S3 multi object delete. | Critical | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-16204](https://issues.apache.org/jira/browse/HADOOP-16204) | ABFS tests to include terasort | Minor | fs/azure, test | Steve Loughran | Steve Loughran | +| [HDFS-13248](https://issues.apache.org/jira/browse/HDFS-13248) | RBF: Namenode need to choose block location for the client | Major | . | Wu Weiwei | Owen O'Malley | +| [HADOOP-13704](https://issues.apache.org/jira/browse/HADOOP-13704) | S3A getContentSummary() to move to listFiles(recursive) to count children; instrument use | Minor | fs/s3 | Steve Loughran | Ahmar Suhail | +| [HADOOP-14661](https://issues.apache.org/jira/browse/HADOOP-14661) | S3A to support Requester Pays Buckets | Minor | common, util | Mandus Momberg | Daniel Carl Jones | +| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren | +| [HADOOP-17682](https://issues.apache.org/jira/browse/HADOOP-17682) | ABFS: Support FileStatus input to OpenFileWithOptions() via OpenFileParameters | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | Use jersey-json that is built to use jackson2 | Major | build | Akira Ajisaka | PJ Fanning | +| [HADOOP-18104](https://issues.apache.org/jira/browse/HADOOP-18104) | Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [HADOOP-18168](https://issues.apache.org/jira/browse/HADOOP-18168) | ITestMarkerTool.testRunLimitedLandsatAudit failing due to most of bucket content purged | Minor | fs/s3, test | Steve Loughran | Daniel Carl Jones | +| [HADOOP-12020](https://issues.apache.org/jira/browse/HADOOP-12020) | Support configuration of different S3 storage classes | Major | fs/s3 | Yann Landrin-Schweitzer | Monthon Klongklaew | +| [HADOOP-18105](https://issues.apache.org/jira/browse/HADOOP-18105) | Implement a variant of ElasticByteBufferPool which uses weak references for garbage collection. | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [HADOOP-18107](https://issues.apache.org/jira/browse/HADOOP-18107) | Vectored IO support for large S3 files. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18106](https://issues.apache.org/jira/browse/HADOOP-18106) | Handle memory fragmentation in S3 Vectored IO implementation. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17461](https://issues.apache.org/jira/browse/HADOOP-17461) | Add thread-level IOStatistics Context | Major | fs, fs/azure, fs/s3 | Steve Loughran | Mehakmeet Singh | +| [HADOOP-18372](https://issues.apache.org/jira/browse/HADOOP-18372) | ILoadTestS3ABulkDeleteThrottling failing | Minor | fs/s3, test | Steve Loughran | Ahmar Suhail | +| [HADOOP-18368](https://issues.apache.org/jira/browse/HADOOP-18368) | ITestCustomSigner fails when access point name has '-' | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-15964](https://issues.apache.org/jira/browse/HADOOP-15964) | Add S3A support for Async Scatter/Gather IO | Major | fs/s3 | Steve Loughran | Mukund Thakur | +| [HADOOP-18366](https://issues.apache.org/jira/browse/HADOOP-18366) | ITestS3Select.testSelectSeekFullLandsat is timing out | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-18373](https://issues.apache.org/jira/browse/HADOOP-18373) | IOStatisticsContext tuning | Minor | fs/s3, test | Steve Loughran | Viraj Jasani | +| [HADOOP-18227](https://issues.apache.org/jira/browse/HADOOP-18227) | Add input stream IOstats for vectored IO api in S3A. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18392](https://issues.apache.org/jira/browse/HADOOP-18392) | Propagate vectored s3a input stream stats to file system stats. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18355](https://issues.apache.org/jira/browse/HADOOP-18355) | Update previous index properly while validating overlapping ranges. | Major | common, fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18371](https://issues.apache.org/jira/browse/HADOOP-18371) | s3a FS init logs at warn if fs.s3a.create.storage.class is unset | Blocker | fs/s3 | Steve Loughran | Viraj Jasani | +| [HADOOP-18385](https://issues.apache.org/jira/browse/HADOOP-18385) | ITestS3ACannedACLs failure; not in a span | Major | fs/s3, test | Steve Loughran | Ashutosh Gupta | +| [HADOOP-18403](https://issues.apache.org/jira/browse/HADOOP-18403) | Fix FileSystem leak in ITestS3AAWSCredentialsProvider | Minor | fs/s3 | Viraj Jasani | Viraj Jasani | +| [HADOOP-17882](https://issues.apache.org/jira/browse/HADOOP-17882) | distcp to use openFile() with sequential IO; ranges of reads | Major | tools/distcp | Steve Loughran | Steve Loughran | +| [HADOOP-18391](https://issues.apache.org/jira/browse/HADOOP-18391) | Improve VectoredReadUtils#readVectored() for direct buffers | Major | fs | Steve Loughran | Mukund Thakur | +| [HADOOP-18407](https://issues.apache.org/jira/browse/HADOOP-18407) | Improve vectored IO api spec. | Minor | fs, fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18339](https://issues.apache.org/jira/browse/HADOOP-18339) | S3A storage class option only picked up when buffering writes to disk | Major | fs/s3 | Steve Loughran | Monthon Klongklaew | +| [HADOOP-18410](https://issues.apache.org/jira/browse/HADOOP-18410) | S3AInputStream.unbuffer() async drain not releasing http connections | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18439](https://issues.apache.org/jira/browse/HADOOP-18439) | Fix VectoredIO for LocalFileSystem when checksum is enabled. | Major | common | Mukund Thakur | Mukund Thakur | +| [HADOOP-18416](https://issues.apache.org/jira/browse/HADOOP-18416) | ITestS3AIOStatisticsContext failure | Major | fs/s3, test | Steve Loughran | Mehakmeet Singh | +| [HADOOP-18347](https://issues.apache.org/jira/browse/HADOOP-18347) | Restrict vectoredIO threadpool to reduce memory pressure | Major | common, fs, fs/adl, fs/s3 | Rajesh Balamohan | Mukund Thakur | +| [HADOOP-18463](https://issues.apache.org/jira/browse/HADOOP-18463) | Add an integration test to process data asynchronously during vectored read. | Major | . | Mukund Thakur | Mukund Thakur | +| [HADOOP-15460](https://issues.apache.org/jira/browse/HADOOP-15460) | S3A FS to add "fs.s3a.create.performance" to the builder file creation option set | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | Upgrade AWS SDK to V2 - Prerequisites | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-18480](https://issues.apache.org/jira/browse/HADOOP-18480) | upgrade AWS SDK to 1.12.316 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18460](https://issues.apache.org/jira/browse/HADOOP-18460) | ITestS3AContractVectoredRead.testStopVectoredIoOperationsUnbuffer failing | Minor | fs/s3, test | Steve Loughran | Mukund Thakur | +| [HADOOP-18488](https://issues.apache.org/jira/browse/HADOOP-18488) | Cherrypick HADOOP-11245 to branch-3.3 | Major | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HADOOP-18481](https://issues.apache.org/jira/browse/HADOOP-18481) | AWS v2 SDK upgrade log to not warn of use standard AWS Credential Providers | Major | fs/s3 | Steve Loughran | Ahmar Suhail | +| [HADOOP-18476](https://issues.apache.org/jira/browse/HADOOP-18476) | Abfs and S3A FileContext bindings to close wrapped filesystems in finalizer | Blocker | fs/azure, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18304](https://issues.apache.org/jira/browse/HADOOP-18304) | Improve S3A committers documentation clarity | Trivial | documentation | Daniel Carl Jones | Daniel Carl Jones | +| [HADOOP-18465](https://issues.apache.org/jira/browse/HADOOP-18465) | S3A server-side encryption tests fail before checking encryption tests should skip | Minor | fs/s3, test | Daniel Carl Jones | Daniel Carl Jones | +| [HADOOP-18530](https://issues.apache.org/jira/browse/HADOOP-18530) | ChecksumFileSystem::readVectored might return byte buffers not positioned at 0 | Blocker | fs | Harshit Gupta | Harshit Gupta | +| [HADOOP-18457](https://issues.apache.org/jira/browse/HADOOP-18457) | ABFS: Support for account level throttling | Major | . | Anmol Asrani | Anmol Asrani | +| [HADOOP-18560](https://issues.apache.org/jira/browse/HADOOP-18560) | AvroFSInput opens a stream twice and discards the second one without closing | Blocker | fs | Steve Loughran | Steve Loughran | +| [HADOOP-18526](https://issues.apache.org/jira/browse/HADOOP-18526) | Leak of S3AInstrumentation instances via hadoop Metrics references | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546) | disable purging list of in progress reads in abfs stream closed | Blocker | fs/azure | Steve Loughran | Pranav Saxena | +| [HADOOP-18577](https://issues.apache.org/jira/browse/HADOOP-18577) | ABFS: add probes of readahead fix | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-11867](https://issues.apache.org/jira/browse/HADOOP-11867) | Add a high-performance vectored read API. | Major | fs, fs/azure, fs/s3, hdfs-client | Gopal Vijayaraghavan | Mukund Thakur | +| [HADOOP-18507](https://issues.apache.org/jira/browse/HADOOP-18507) | VectorIO FileRange type to support a "reference" field | Major | fs | Steve Loughran | Steve Loughran | +| [HADOOP-18627](https://issues.apache.org/jira/browse/HADOOP-18627) | site intro docs to make clear Kerberos is mandatory for secure clusters | Major | site | Steve Loughran | Arnout Engelen | +| [HADOOP-17584](https://issues.apache.org/jira/browse/HADOOP-17584) | s3a magic committer may commit more data | Major | fs/s3 | yinan zhan | Steve Loughran | +| [HADOOP-18642](https://issues.apache.org/jira/browse/HADOOP-18642) | Cut excess dependencies from hadoop-azure, hadoop-aliyun transitive imports; fix LICENSE-binary | Blocker | build, fs/azure, fs/oss | Steve Loughran | Steve Loughran | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15854](https://issues.apache.org/jira/browse/HDFS-15854) | Make some parameters configurable for SlowDiskTracker and SlowPeerTracker | Major | . | Tao Li | Tao Li | +| [YARN-10747](https://issues.apache.org/jira/browse/YARN-10747) | Bump YARN CSI protobuf version to 3.7.1 | Major | . | Siyao Meng | Siyao Meng | +| [HDFS-16139](https://issues.apache.org/jira/browse/HDFS-16139) | Update BPServiceActor Scheduler's nextBlockReportTime atomically | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18014](https://issues.apache.org/jira/browse/HADOOP-18014) | CallerContext should not include some characters | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [MAPREDUCE-7371](https://issues.apache.org/jira/browse/MAPREDUCE-7371) | DistributedCache alternative APIs should not use DistributedCache APIs internally | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18114](https://issues.apache.org/jira/browse/HADOOP-18114) | Documentation Syntax Error Fix \> AWS Assumed Roles | Trivial | documentation, fs/s3 | Joey Krabacher | Joey Krabacher | +| [HDFS-16481](https://issues.apache.org/jira/browse/HDFS-16481) | Provide support to set Http and Rpc ports in MiniJournalCluster | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16502](https://issues.apache.org/jira/browse/HDFS-16502) | Reconfigure Block Invalidate limit | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16522](https://issues.apache.org/jira/browse/HDFS-16522) | Set Http and Ipc ports for Datanodes in MiniDFSCluster | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update google-gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak | +| [HADOOP-18397](https://issues.apache.org/jira/browse/HADOOP-18397) | Shutdown AWSSecurityTokenService when its resources are no longer in use | Major | fs/s3 | Viraj Jasani | Viraj Jasani | +| [HADOOP-18575](https://issues.apache.org/jira/browse/HADOOP-18575) | Make XML transformer factory more lenient | Major | common | PJ Fanning | PJ Fanning | +| [HADOOP-18586](https://issues.apache.org/jira/browse/HADOOP-18586) | Update the year to 2023 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-18587](https://issues.apache.org/jira/browse/HADOOP-18587) | upgrade to jettison 1.5.3 to fix CVE-2022-40150 | Major | common | PJ Fanning | PJ Fanning | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md new file mode 100644 index 00000000000..b2357e827d2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md @@ -0,0 +1,89 @@ + + +# Apache Hadoop 3.3.5 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | *Major* | **Replace all default Charset usage with UTF-8** + +All of the default charset usages have been replaced to UTF-8. If the default charset of your environment is not UTF-8, the behavior can be different. + + +--- + +* [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | *Major* | **Use jersey-json that is built to use jackson2** + +Use modified jersey-json 1.20 in https://github.com/pjfanning/jersey-1.x/tree/v1.20 that uses Jackson 2.x. By this change, Jackson 1.x dependency has been removed from Hadoop. +downstream applications which explicitly exclude jersey from transitive dependencies must now exclude com.github.pjfanning:jersey-json + + +--- + +* [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | *Major* | **Slow peer metrics - add median, mad and upper latency limits** + +Namenode metrics that represent Slownode Json now include three important factors (median, median absolute deviation, upper latency limit) that can help user determine how urgently a given slownode requires manual intervention. + + +--- + +* [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | *Minor* | **Improve Magic Committer Performance** + +S3A filesytem's createFile() operation supports an option to disable all safety checks when creating a file. Consult the documentation and use with care + + +--- + +* [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | *Minor* | **Upgrade AWS SDK to V2 - Prerequisites** + +In preparation for an (incompatible but necessary) move to the AWS SDK v2, some uses of internal/deprecated uses of AWS classes/interfaces are logged as warnings, though only once during the life of a JVM. Set the log "org.apache.hadoop.fs.s3a.SDKV2Upgrade" to only log at INFO to hide these. + + +--- + +* [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | *Major* | **Remove the hadoop-openstack module** + +The swift:// connector for openstack support has been removed. It had fundamental problems (swift's handling of files \> 4GB). A subset of the S3 protocol is now exported by almost all object store services -please use that through the s3a connector instead. The hadoop-openstack jar remains, only now it is empty of code. This is to ensure that projects which declare the JAR a dependency will still have successful builds. + + +--- + +* [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | *Major* | **Update Bouncy Castle to 1.68 or later** + +bouncy castle 1.68+ is a multirelease JAR containing java classes compiled for different target JREs. older versions of asm.jar and maven shade plugin may have problems with these. fix: upgrade the dependencies + + +--- + +* [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | *Major* | **Disable abfs prefetching by default** + +ABFS block prefetching has been disabled to avoid HADOOP-18521 and buffer sharing on multithreaded processes (Hive, Spark etc). This will have little/no performance impact on queries against Parquet or ORC data, but can slow down sequential stream processing, including CSV files -however, the read data will be correct. +It may slow down distcp downloads, where the race condition does not arise. For maximum distcp performance re-enable the readahead by setting fs.abfs.enable.readahead to true. + + +--- + +* [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | *Critical* | **CryptoOutputStream::close leak when encrypted zones + quota exceptions** + +**WARNING: No release note provided for this change.** + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml new file mode 100644 index 00000000000..399b62b3010 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

    + +

    The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml new file mode 100644 index 00000000000..2c67744dfbe --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml @@ -0,0 +1,113 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml new file mode 100644 index 00000000000..9c9eeee9637 --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml @@ -0,0 +1,28963 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileStatus of a given cache file on hdfs + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. The + DistributedCache assumes that the files specified via urls are + already present on the {@link FileSystem} at the path specified by the url + and are accessible by every machine in the cluster.

    + +

    The framework will copy the necessary files on to the worker node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the workers.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + In older version of Hadoop Map/Reduce users could optionally ask for symlinks + to be created in the working directory of the child task. In the current + version symlinks are always created. If the URL does not have a fragment + the name of the file or directory will be used. If multiple files or + directories map to the same link name, the last one added, will be used. All + others will not even be downloaded.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +
    +     1. Copy the requisite files to the FileSystem:
    +
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +
    +     2. Setup the application's JobConf:
    +
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job);
    +
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +
    +     public static class MapClass extends MapReduceBase
    +     implements Mapper<K, V, K, V> {
    +
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         File f = new File("./map.zip/some/file/in/zip.txt");
    +       }
    +
    +       public void map(K key, V value,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +
    + 
    + + It is also very common to use the DistributedCache by using + {@link org.apache.hadoop.util.GenericOptionsParser}. + + This class includes methods that should be used by users + (specifically those mentioned in the example above, as well + as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}), + as well as methods intended for use by the MapReduce framework + (e.g., {@link org.apache.hadoop.mapred.JobClient}). + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient + @see org.apache.hadoop.mapreduce.Job]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + {@link JobTracker.State} should no longer be used on M/R 2.x. The function + is kept to be compatible with M/R 1.x applications. + + @return the invalid state of the JobTracker.]]> + + + + + + + + + + + + + + ClusterStatus provides clients with information such as: +
      +
    1. + Size of the cluster. +
    2. +
    3. + Name of the trackers. +
    4. +
    5. + Task capacity of the cluster. +
    6. +
    7. + The number of currently running map and reduce tasks. +
    8. +
    9. + State of the JobTracker. +
    10. +
    11. + Details regarding black listed trackers. +
    12. +
    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat always returns + true. Implementations that may deal with non-splittable files must + override this method. + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + + Implementations of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to prevent input files + from being split-up in certain situations. Implementations that may + deal with non-splittable files must override this method, since + the default implementation assumes splitting is always possible.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapreduce.output.fileoutputformat.outputdir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) + are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapreduce.task.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapreduce.task.output.dir} during + execution of a particular task-attempt is actually + ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    or + conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength); +

    + @see FixedLengthRecordReader]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapreduce.input.fileinputformat.split.minsize.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to be respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + SplitLocationInfos describing how the split + data is stored at each location. A null value indicates that all the + locations have the data stored on disk. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the cluster. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the cluster and optionally monitoring + it's status. +
    10. +
    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 
    + + Job Control + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +
    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the combiner sort being + stable in any sense. (In any case, with the order of available + map-outputs to the combiner being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys for the + combiner. It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class) + @see #setCombinerKeyGroupingComparator(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is an application-specified aggregation operation, which + can help cut down the amount of data transferred between the + {@link Mapper} and the {@link Reducer}, leading to better performance.

    + +

    The framework may invoke the combiner 0, 1, or multiple times, in both + the mapper and reducer tasks. In general, the combiner is called as the + sort/merge result is written to disk. The combiner must: +

      +
    • be side-effect free
    • +
    • have the same input and output key types and the same input and + output value types
    • +
    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of map tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + + How many maps? + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapreduce.input.fileinputformat.split.minsize.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by ( + available memory for reduce tasks + (The value of this should be smaller than + numNodes * yarn.nodemanager.resource.memory-mb + since the resource of memory is shared by map tasks and other + applications) / + + mapreduce.reduce.memory.mb). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + + Reducer NONE + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapreduce.map.maxattempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapreduce.reduce.maxattempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf.
    + 
    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 
    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf.
    + 
    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 
    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus]]> +
    +
    + + + + + + + + + + + + + + + When a job starts, a shared directory is created at location + + ${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ . + This directory is exposed to the users through + mapreduce.job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value will be used + after converting it from bytes to MB. + @return memory required to run a map task of the job, in MB,]]> + + + + + + + + + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value will be used + after converting it from bytes to MB. + @return memory required to run a reduce task of the job, in MB.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is deprecated. Now, different memory limits can be + set for map and reduce tasks of a job, in MB. +

    + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. + Otherwise, this method will return the larger of the values returned by + {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} + after converting them into bytes. + + @return Memory required to run a task of this job, in bytes. + @see #setMaxVirtualMemoryForTask(long) + @deprecated Use {@link #getMemoryForMapTask()} and + {@link #getMemoryForReduceTask()}]]> + + + + + + + mapred.task.maxvmem is split into + mapreduce.map.memory.mb + and mapreduce.map.memory.mb,mapred + each of the new key are set + as mapred.task.maxvmem / 1024 + as new values are in MB + + @param vmem Maximum amount of virtual memory in bytes any task of this job + can use. + @see #getMaxVirtualMemoryForTask() + @deprecated + Use {@link #setMemoryForMapTask(long mem)} and + Use {@link #setMemoryForReduceTask(long mem)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + k1=v1,k2=v2. Further it can + reference existing environment variables via $key on + Linux or %key% on Windows. + + Example: +

      +
    • A=foo - This will set the env variable A to foo.
    • +
    + + @deprecated Use {@link #MAPRED_MAP_TASK_ENV} or + {@link #MAPRED_REDUCE_TASK_ENV}]]> +
    + + + + k1=v1,k2=v2. Further it can + reference existing environment variables via $key on + Linux or %key% on Windows. + + Example: +
      +
    • A=foo - This will set the env variable A to foo.
    • +
    + + You can also add environment variables individually by appending + .VARNAME to this configuration key, where VARNAME is + the name of the environment variable. + + Example: +
      +
    • mapreduce.map.env.VARNAME=value
    • +
    ]]> +
    +
    + + + k1=v1,k2=v2. Further it can + reference existing environment variables via $key on + Linux or %key% on Windows. + + Example: +
      +
    • A=foo - This will set the env variable A to foo.
    • +
    + + You can also add environment variables individually by appending + .VARNAME to this configuration key, where VARNAME is + the name of the environment variable. + + Example: +
      +
    • mapreduce.reduce.env.VARNAME=value
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + with the rest of the framework and/or job-configuration and is relatively + more complex for the user to control finely + (e.g. {@link #setNumMapTasks(int)}). +
    4. +
    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 
    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes significant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapreduce.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
    +         inputFile = job.get(JobContext.MAP_INPUT_FILE);
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 
    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if task output recovery is supported, + false otherwise + @throws IOException + @see #recoverTask(TaskAttemptContext)]]> + + + + + + + true repeatable job commit is supported, + false otherwise + @throws IOException]]> + + + + + + + + + + + OutputCommitter. This is called from the application master + process, but it is called individually for each task. + + If an exception is thrown the task will be attempted again. + + @param taskContext Context of the task whose output is being recovered + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + The methods in this class can be called from several different processes and + from several different contexts. It is important to know which process and + which context each is called from. Each method should be marked accordingly + in its documentation. It is also important to note that not all methods are + guaranteed to be called once and only once. If a method is not guaranteed to + have this property the output committer needs to handle this appropriately. + Also note it will only be in rare situations where they may be called + multiple times for the same task. + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + Implementations which write to filesystems which support delegation + tokens usually collect the tokens for the destination path(s) + and attach them to the job configuration. + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + +

    Note: A Partitioner is created only when there are multiple + reducers.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} and {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes a significant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapreduce.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + + Shuffle + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. + Sort + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + + SecondarySort + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. + Reduce + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 
    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes significant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job retired, else false. + @throws IOException]]> + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, TaskType.MAP, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param type the {@link TaskType} + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs + @deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType, + Integer)}]]> +
    + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +
     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param type the {@link TaskType}, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    +
    + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see #setFormat + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    +
    + JobConf mapAConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, mapAConf);
    +
    + JobConf mapBConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, mapBConf);
    +
    + JobConf reduceConf = new JobConf(false);
    + ...
    + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, reduceConf);
    +
    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, null);
    +
    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
    +   LongWritable.class, LongWritable.class, true, null);
    +
    + FileInputFormat.setInputPaths(conf, inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    + ...
    +
    + JobClient jc = new JobClient(conf);
    + RunningJob job = jc.submitJob(conf);
    + ...
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    +
    + JobConf mapAConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, mapAConf);
    +
    + JobConf mapBConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, mapBConf);
    +
    + JobConf reduceConf = new JobConf(false);
    + ...
    + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, reduceConf);
    +
    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, null);
    +
    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
    +   LongWritable.class, LongWritable.class, true, null);
    +
    + FileInputFormat.setInputPaths(conf, inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    + ...
    +
    + JobClient jc = new JobClient(conf);
    + RunningJob job = jc.submitJob(conf);
    + ...
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for CombineFileSplit's. + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileRecordReader. + + Subclassing is needed to get a concrete record reader wrapper because of the + constructor requirement. + + @see CombineFileRecordReader + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + SequenceFileInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + TextInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is referred as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + of {@link org.apache.hadoop.mapred.MapRunner}, when the Map + operation is not CPU bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of threads the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile. + @deprecated Use + {@link #setPartitionFile(Configuration, Path)} + instead]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cluster. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ClusterMetrics provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Number of blacklisted and decommissioned trackers. +
    4. +
    5. + Slot capacity of the cluster. +
    6. +
    7. + The number of currently occupied/reserved map and reduce slots. +
    8. +
    9. + The number of currently running map and reduce tasks. +
    10. +
    11. + The number of job submissions. +
    12. +
    + +

    Clients can query for the latest ClusterMetrics, via + {@link Cluster#getClusterStatus()}.

    + + @see Cluster]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter is named by + an {@link Enum} and has a long for the value.

    + +

    Counters are bunched into Groups, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + the type of counter + @param the type of counter group + @param counters the old counters object]]> + + + + Counters holds per job/task counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link CounterGroup}s, each + comprising of counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. The InputFormat + also creates the {@link RecordReader} to read the {@link InputSplit}. + + @param context job configuration. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + + + + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapreduce.input.fileinputformat.split.minsize.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibility to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see FileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + SplitLocationInfos describing how the split + data is stored at each location. A null value indicates that all the + locations have the data stored on disk. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + A Cluster will be created from the conf parameter only when it's needed. + + @param conf the configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException]]> + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param conf the configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException]]> + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param status job status + @param conf job configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException]]> + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param ignored + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException + @deprecated Use {@link #getInstance()}]]> + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param ignored + @param conf job configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException + @deprecated Use {@link #getInstance(Configuration)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + OutputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Mapper to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Partitioner to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + + true, job-setup and job-cleanup will be + considered from {@link OutputCommitter} + else ignored.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The access permissions of the file will determine whether the localized + file will be shared across jobs. If the file is not readable by other or + if any of its parent directories is not executable by other, then the + file will not be shared. In the case of a path that ends in "/*", + sharing of the localized files will be determined solely from the + access permissions of the parent directories. The access permissions of + the individual files will be ignored. + + @param uri The uri of the cache to be localized. + @param conf Configuration to add the cache to.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker is lost]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Job. + @throws IOException if fail to close.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + It allows the user to configure the + job, submit it, control its execution, and query the state. The set methods + only work until the job is submitted, afterwards they will throw an + IllegalStateException.

    + +

    + Normally the user creates the application, describes various facets of the + job via {@link Job} and then submits the job and monitor its progress.

    + +

    Here is an example on how to submit a job:

    +

    +     // Create a new Job
    +     Job job = Job.getInstance();
    +     job.setJarByClass(MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     job.waitForCompletion(true);
    + 
    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + 1. + @return the number of reduce tasks for this job.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the key input type to the Mapper + @param the value input type to the Mapper + @param the key output type from the Mapper + @param the value output type from the Mapper]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link Configuration} for + the job via the {@link JobContext#getConfiguration()}. + +

    The framework first calls + {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by + {@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)} + for each key/value pair in the InputSplit. Finally + {@link #cleanup(org.apache.hadoop.mapreduce.Mapper.Context)} is called.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the sorting and grouping by + specifying two key {@link RawComparator} classes.

    + +

    The Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link Job#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the Configuration.

    + +

    If the job has zero + reduces then the output of the Mapper is directly written + to the {@link OutputFormat} without sorting by keys.

    + +

    Example:

    +

    + public class TokenCounterMapper 
    +     extends Mapper<Object, Text, Text, IntWritable>{
    +    
    +   private final static IntWritable one = new IntWritable(1);
    +   private Text word = new Text();
    +   
    +   public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    +     StringTokenizer itr = new StringTokenizer(value.toString());
    +     while (itr.hasMoreTokens()) {
    +       word.set(itr.nextToken());
    +       context.write(word, one);
    +     }
    +   }
    + }
    + 
    + +

    Applications may override the + {@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert + greater control on map processing e.g. multi-threaded Mappers + etc.

    + + @see InputFormat + @see JobContext + @see Partitioner + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + MarkableIterator is a wrapper iterator class that + implements the {@link MarkableIteratorInterface}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if task output recovery is supported, + false otherwise + @see #recoverTask(TaskAttemptContext) + @deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]> + + + + + + + true repeatable job commit is supported, + false otherwise + @throws IOException]]> + + + + + + + true if task output recovery is supported, + false otherwise + @throws IOException + @see #recoverTask(TaskAttemptContext)]]> + + + + + + + OutputCommitter. This is called from the application master + process, but it is called individually for each task. + + If an exception is thrown the task will be attempted again. + + This may be called multiple times for the same task. But from different + application attempts. + + @param taskContext Context of the task whose output is being recovered + @throws IOException]]> + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + The methods in this class can be called from several different processes and + from several different contexts. It is important to know which process and + which context each is called from. Each method should be marked accordingly + in its documentation. It is also important to note that not all methods are + guaranteed to be called once and only once. If a method is not guaranteed to + have this property the output committer needs to handle this appropriately. + Also note it will only be in rare situations where they may be called + multiple times for the same task. + + @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + Implementations which write to filesystems which support delegation + tokens usually collect the tokens for the destination path(s) + and attach them to the job context's JobConf. + @param context information about the job + @throws IOException when output should not be attempted]]> +
    +
    + + + + + + + + + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter]]> +
    +
    + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be partioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + +

    Note: A Partitioner is created only when there are multiple + reducers.

    + +

    Note: If you require your Partitioner class to obtain the Job's + configuration object, implement the {@link Configurable} interface.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param ]]> + + + + + + + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param context the context of the task + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + + + + + + + + the class of the input keys + @param the class of the input values + @param the class of the output keys + @param the class of the output values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer implementations + can access the {@link Configuration} for the job via the + {@link JobContext#getConfiguration()} method.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + + Shuffle + +

      The Reducer copies the sorted output from each + {@link Mapper} using HTTP across the network.

      +
    2. + +
    3. + Sort + +

      The framework merge sorts Reducer inputs by + keys + (since different Mappers may have output the same key).

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + + SecondarySort + +

      To achieve a secondary sort on the values returned by the value + iterator, the application should extend the key with the secondary + key and define a grouping comparator. The keys will be sorted using the + entire key, but will be grouped using the grouping comparator to decide + which keys and values are sent in the same call to reduce.The grouping + comparator is specified via + {@link Job#setGroupingComparatorClass(Class)}. The sort order is + controlled by + {@link Job#setSortComparatorClass(Class)}.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. + Reduce + +

      In this phase the + {@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)} + method is called for each <key, (collection of values)> in + the sorted inputs.

      +

      The output of the reduce task is typically written to a + {@link RecordWriter} via + {@link Context#write(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    + public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
    +                                                 Key,IntWritable> {
    +   private IntWritable result = new IntWritable();
    + 
    +   public void reduce(Key key, Iterable<IntWritable> values,
    +                      Context context) throws IOException, InterruptedException {
    +     int sum = 0;
    +     for (IntWritable val : values) {
    +       sum += val.get();
    +     }
    +     result.set(sum);
    +     context.write(key, result);
    +   }
    + }
    + 
    + + @see Mapper + @see Partitioner]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + counterName. + @param counterName counter name + @return the Counter for the given counterName]]> + + + + + + + groupName and + counterName. + @param counterName counter name + @return the Counter for the given groupName and + counterName]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter for the task-attempt]]> + + + + the input key type for the task + @param the input value type for the task + @param the output key type for the task + @param the output value type for the task]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of the other counter + @param type of the other counter group + @param counters the counters object to copy + @param groupFactory the factory for new groups]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of counter inside the counters + @param type of group inside the counters]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of the counter for the group]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The key and values are passed from one element of the chain to the next, by + value. For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's Configuration. This + precedence is in effect when the task is running. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job + The job. + @param klass + the Mapper class to add. + @param inputKeyClass + mapper input key class. + @param inputValueClass + mapper input value class. + @param outputKeyClass + mapper output key class. + @param outputValueClass + mapper output value class. + @param mapperConf + a configuration for the Mapper class. It is recommended to use a + Configuration without default values using the + Configuration(boolean loadDefaults) constructor with + FALSE.]]> +
    + + + + + + + + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use matching output and input key and + value classes as no conversion is done by the chaining code. +

    +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    + +

    + ...
    + Job = new Job(conf);
    +
    + Configuration mapAConf = new Configuration(false);
    + ...
    + ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, mapAConf);
    +
    + Configuration mapBConf = new Configuration(false);
    + ...
    + ChainMapper.addMapper(job, BMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, mapBConf);
    +
    + ...
    +
    + job.waitForComplettion(true);
    + ...
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + The key and values are passed from one element of the chain to the next, by + value. For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's Configuration. + This precedence is in effect when the task is running. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + + @param job + the job + @param klass + the Reducer class to add. + @param inputKeyClass + reducer input key class. + @param inputValueClass + reducer input value class. + @param outputKeyClass + reducer output key class. + @param outputValueClass + reducer output value class. + @param reducerConf + a configuration for the Reducer class. It is recommended to use a + Configuration without default values using the + Configuration(boolean loadDefaults) constructor with + FALSE.]]> +
    +
    + + + + + + + + + + + + The key and values are passed from one element of the chain to the next, by + value For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's Configuration. This + precedence is in effect when the task is running. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the + chain. +

    + + @param job + The job. + @param klass + the Mapper class to add. + @param inputKeyClass + mapper input key class. + @param inputValueClass + mapper input value class. + @param outputKeyClass + mapper output key class. + @param outputValueClass + mapper output value class. + @param mapperConf + a configuration for the Mapper class. It is recommended to use a + Configuration without default values using the + Configuration(boolean loadDefaults) constructor with + FALSE.]]> +
    +
    + + + + + + + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion. The output of the reducer becomes the input of + the first mapper and output of first becomes the input of the second, and so + on until the last Mapper, the output of the last Mapper will be written to + the task's output. +

    +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. This + enables having reusable specialized Mappers that can be combined to perform + composite operations within a single task. +

    +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use matching output and input key and + value classes as no conversion is done by the chaining code. +

    +

    Using the ChainMapper and the ChainReducer classes is possible to + compose Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO.

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    + +

    + ...
    + Job = new Job(conf);
    + ....
    +
    + Configuration reduceConf = new Configuration(false);
    + ...
    + ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, reduceConf);
    +
    + ChainReducer.addMapper(job, CMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, null);
    +
    + ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class,
    +   LongWritable.class, LongWritable.class, true, null);
    +
    + ...
    +
    + job.waitForCompletion(true);
    + ...
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for + CombineFileSplit's. + + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileRecordReader. + + Subclassing is needed to get a concrete record reader wrapper because of the + constructor requirement. + + @see CombineFileRecordReader + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + th Path]]> + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileSplit can be used to implement {@link RecordReader}'s, + with reading one record per file. + + @see FileSplit + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + SequenceFileInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + TextInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat always returns + true. Implementations that may deal with non-splittable files must + override this method. + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param context the job context + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobContext)}. + + Implementations of FileInputFormat can also override the + {@link #isSplitable(JobContext, Path)} method to prevent input files + from being split-up in certain situations. Implementations that may + deal with non-splittable files must override this method, since + the default implementation assumes splitting is always possible.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    or + conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength); +

    + @see FixedLengthRecordReader]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapreduce.join.expr property and + user-supplied join types from mapreduce.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + mapreduce.join.define.<ident> to a classname. + In the expression mapreduce.join.expr, the identifier will be + assumed to be a ComposableRecordReader. + mapreduce.join.keycomparator can be a classname used to compare + keys in the join. + @see #setFormat + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [<child1>,<child2>,...,<childn>]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the map's input key type + @param the map's input value type + @param the map's output key type + @param the map's output value type + @param job the job + @return the mapper class to run]]> + + + + + + + the map input key type + @param the map input value type + @param the map output key type + @param the map output value type + @param job the job to modify + @param cls the class to use as the mapper]]> + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + {@link org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU + bound in order to improve throughput. +

    + Mapper implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured with the mapper to use via + {@link #setMapperClass(Job, Class)} and + the number of thread the thread-pool can use with the + {@link #getNumberOfThreads(JobContext)} method. The default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MapContext to be wrapped + @return a wrapped Mapper.Context for custom implementations]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + In applications which take a classname of committer in + a configuration option, set it to the canonical name of this class + (see {@link #NAME}). When this class is instantiated, it will + use the factory mechanism to locate the configured committer for the + destination. +
  • +
  • + In code, explicitly create an instance of this committer through + its constructor, then invoke commit lifecycle operations on it. + The dynamically configured committer will be created in the constructor + and have the lifecycle operations relayed to it. +
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) + are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in a work directory during execution + of his task i.e. via + {@link #getWorkOutputPath(TaskInputOutputContext)}, and + the framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueFile} method to make the file name + unique for the task.

    + + @param context the context for the task. + @param name the name for the file. + @param extension the extension for the file + @return a unique path accross all tasks of the job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: when the baseOutputPath is a path that resolves + outside of the final job output directory, the directory is created + immediately and then persists through subsequent task retries, breaking + the concept of output committing.]]> + + + + + + + + + + Warning: when the baseOutputPath is a path that resolves + outside of the final job output directory, the directory is created + immediately and then persists through subsequent task retries, breaking + the concept of output committing.]]> + + + + + + + super.close() at the + end of their close()]]> + + + + + Case one: writing to additional outputs other than the job default output. + + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + +

    + Case two: to write data to different files provided by user +

    + +

    + MultipleOutputs supports counters, by default they are disabled. The + counters group is the {@link MultipleOutputs} class name. The names of the + counters are the same as the output name. These count the number records + written to each output name. +

    + + Usage pattern for job submission: +
    +
    + Job job = new Job();
    +
    + FileInputFormat.setInputPath(job, inDir);
    + FileOutputFormat.setOutputPath(job, outDir);
    +
    + job.setMapperClass(MOMap.class);
    + job.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional sequence-file based output 'sequence' for the job
    + MultipleOutputs.addNamedOutput(job, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + job.waitForCompletion(true);
    + ...
    + 
    +

    + Usage in Reducer: +

    + <K, V> String generateFileName(K k, V v) {
    +   return k.toString() + "_" + v.toString();
    + }
    + 
    + public class MOReduce extends
    +   Reducer<WritableComparable, Writable,WritableComparable, Writable> {
    + private MultipleOutputs mos;
    + public void setup(Context context) {
    + ...
    + mos = new MultipleOutputs(context);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + Context context)
    + throws IOException {
    + ...
    + mos.write("text", , key, new Text("Hello"));
    + mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a");
    + mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b");
    + mos.write(key, new Text("value"), generateFileName(key, new Text("value")));
    + ...
    + }
    +
    + public void cleanup(Context) throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    + +

    + When used in conjuction with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat, + MultipleOutputs can mimic the behaviour of MultipleTextOutputFormat and MultipleSequenceFileOutputFormat + from the old Hadoop API - ie, output can be written from the Reducer to more than one location. +

    + +

    + Use MultipleOutputs.write(KEYOUT key, VALUEOUT value, String baseOutputPath) to write key and + value to a path specified by baseOutputPath, with no need to specify a named output. + Warning: when the baseOutputPath passed to MultipleOutputs.write + is a path that resolves outside of the final job output directory, the + directory is created immediately and then persists through subsequent + task retries, breaking the concept of output committing: +

    + +
    + private MultipleOutputs<Text, Text> out;
    + 
    + public void setup(Context context) {
    +   out = new MultipleOutputs<Text, Text>(context);
    +   ...
    + }
    + 
    + public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    + for (Text t : values) {
    +   out.write(key, t, generateFileName(<parameter list...>));
    +   }
    + }
    + 
    + protected void cleanup(Context context) throws IOException, InterruptedException {
    +   out.close();
    + }
    + 
    + +

    + Use your own code in generateFileName() to create a custom path to your results. + '/' characters in baseOutputPath will be translated into directory levels in your file system. + Also, append your custom-generated path with "part" or similar, otherwise your output will be -00000, -00001 etc. + No call to context.write() is necessary. See example generateFileName() code below. +

    + +
    + private String generateFileName(Text k) {
    +   // expect Text k in format "Surname|Forename"
    +   String[] kStr = k.toString().split("\\|");
    +   
    +   String sName = kStr[0];
    +   String fName = kStr[1];
    +
    +   // example for k = Smith|John
    +   // output written to /user/hadoop/path/to/output/Smith/John-r-00000 (etc)
    +   return sName + "/" + fName;
    + }
    + 
    + +

    + Using MultipleOutputs in this way will still create zero-sized default output, eg part-00000. + To prevent this use LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); + instead of job.setOutputFormatClass(TextOutputFormat.class); in your Hadoop job configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • If an explicit committer factory is named, it is used.
  • +
  • The output path is examined. + If is non null and there is an explicit schema for that filesystem, + its factory is instantiated.
  • +
  • Otherwise, an instance of {@link FileOutputCommitter} is + created.
  • + + + In {@link FileOutputFormat}, the created factory has its method + {@link #createOutputCommitter(Path, TaskAttemptContext)} with a task + attempt context and a possibly null path.]]> +
    +
    + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param job the {@link Job} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param job the {@link Job} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + bytes[left:(right+1)] in Python syntax. + + @param conf configuration object + @param left left Python-style offset + @param right right Python-style offset]]> + + + + + + + bytes[offset:] in Python syntax. + + @param conf configuration object + @param offset left Python-style offset]]> + + + + + + + bytes[:(offset+1)] in Python syntax. + + @param conf configuration object + @param offset right Python-style offset]]> + + + + + + + + + + + + + + + + + + + + + Partition {@link BinaryComparable} keys using a configurable part of + the bytes array returned by {@link BinaryComparable#getBytes()}.

    + +

    The subarray to be used for the partitioning can be defined by means + of the following properties: +

      +
    • + mapreduce.partition.binarypartitioner.left.offset: + left offset in array (0 by default) +
    • +
    • + mapreduce.partition.binarypartitioner.right.offset: + right offset in array (-1 by default) +
    • +
    + Like in Python, both negative and positive offsets are allowed, but + the meaning is slightly different. In case of an array of length 5, + for instance, the possible offsets are: +
    
    +  +---+---+---+---+---+
    +  | B | B | B | B | B |
    +  +---+---+---+---+---+
    +    0   1   2   3   4
    +   -5  -4  -3  -2  -1
    + 
    + The first row of numbers gives the position of the offsets 0...5 in + the array; the second row gives the corresponding negative offsets. + Contrary to Python, the specified subarray has byte i + and j as first and last element, repectively, when + i and j are the left and right offset. + +

    For Hadoop programs written in Java, it is advisable to use one of + the following static convenience methods for setting the offsets: +

      +
    • {@link #setOffsets}
    • +
    • {@link #setLeftOffset}
    • +
    • {@link #setRightOffset}
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.]]> + + + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ReduceContext to be wrapped + @return a wrapped Reducer.Context for custom implementations]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml new file mode 100644 index 00000000000..f2650d508e8 --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 53e2de63c4a..5cec569dfa8 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -134,7 +134,7 @@ false - 3.3.4 + 3.3.5 -unstable diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml new file mode 100644 index 00000000000..9e31a72ac07 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml @@ -0,0 +1,26420 @@ + + + + + + + + + + + + + + + + + + + + The interface used by clients to obtain a new {@link ApplicationId} for + submitting new applications.

    + +

    The ResourceManager responds with a new, monotonically + increasing, {@link ApplicationId} which is used by the client to submit + a new application.

    + +

    The ResourceManager also responds with details such + as maximum resource capabilities in the cluster as specified in + {@link GetNewApplicationResponse}.

    + + @param request request to get a new ApplicationId + @return response containing the new ApplicationId to be used + to submit an application + @throws YarnException + @throws IOException + @see #submitApplication(SubmitApplicationRequest)]]> +
    +
    + + + + + + The interface used by clients to submit a new application to the + ResourceManager.

    + +

    The client is required to provide details such as queue, + {@link Resource} required to run the ApplicationMaster, + the equivalent of {@link ContainerLaunchContext} for launching + the ApplicationMaster etc. via the + {@link SubmitApplicationRequest}.

    + +

    Currently the ResourceManager sends an immediate (empty) + {@link SubmitApplicationResponse} on accepting the submission and throws + an exception if it rejects the submission. However, this call needs to be + followed by {@link #getApplicationReport(GetApplicationReportRequest)} + to make sure that the application gets properly submitted - obtaining a + {@link SubmitApplicationResponse} from ResourceManager doesn't guarantee + that RM 'remembers' this application beyond failover or restart. If RM + failover or RM restart happens before ResourceManager saves the + application's state successfully, the subsequent + {@link #getApplicationReport(GetApplicationReportRequest)} will throw + a {@link ApplicationNotFoundException}. The Clients need to re-submit + the application with the same {@link ApplicationSubmissionContext} when + it encounters the {@link ApplicationNotFoundException} on the + {@link #getApplicationReport(GetApplicationReportRequest)} call.

    + +

    During the submission process, it checks whether the application + already exists. If the application exists, it will simply return + SubmitApplicationResponse

    + +

    In secure mode,the ResourceManager verifies access to + queues etc. before accepting the application submission.

    + + @param request request to submit a new application + @return (empty) response on accepting the submission + @throws YarnException + @throws IOException + @see #getNewApplication(GetNewApplicationRequest)]]> +
    +
    + + + + + + The interface used by clients to request the + ResourceManager to fail an application attempt.

    + +

    The client, via {@link FailApplicationAttemptRequest} provides the + {@link ApplicationAttemptId} of the attempt to be failed.

    + +

    In secure mode,the ResourceManager verifies access to the + application, queue etc. before failing the attempt.

    + +

    Currently, the ResourceManager returns an empty response + on success and throws an exception on rejecting the request.

    + + @param request request to fail an attempt + @return ResourceManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException + @see #getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + The interface used by clients to request the + ResourceManager to abort submitted application.

    + +

    The client, via {@link KillApplicationRequest} provides the + {@link ApplicationId} of the application to be aborted.

    + +

    In secure mode,the ResourceManager verifies access to the + application, queue etc. before terminating the application.

    + +

    Currently, the ResourceManager returns an empty response + on success and throws an exception on rejecting the request.

    + + @param request request to abort a submitted application + @return ResourceManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException + @see #getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + The interface used by clients to get metrics about the cluster from + the ResourceManager.

    + +

    The ResourceManager responds with a + {@link GetClusterMetricsResponse} which includes the + {@link YarnClusterMetrics} with details such as number of current + nodes in the cluster.

    + + @param request request for cluster metrics + @return cluster metrics + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to get a report of all nodes + in the cluster from the ResourceManager.

    + +

    The ResourceManager responds with a + {@link GetClusterNodesResponse} which includes the + {@link NodeReport} for all the nodes in the cluster.

    + + @param request request for report on all nodes + @return report on all nodes + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to get information about queues + from the ResourceManager.

    + +

    The client, via {@link GetQueueInfoRequest}, can ask for details such + as used/total resources, child queues, running applications etc.

    + +

    In secure mode,the ResourceManager verifies access before + providing the information.

    + + @param request request to get queue information + @return queue information + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to get information about queue + acls for current user from the ResourceManager. +

    + +

    The ResourceManager responds with queue acls for all + existing queues.

    + + @param request request to get queue acls for current user + @return queue acls for current user + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + The interface used by clients to obtain a new {@link ReservationId} for + submitting new reservations.

    + +

    The ResourceManager responds with a new, unique, + {@link ReservationId} which is used by the client to submit + a new reservation.

    + + @param request to get a new ReservationId + @return response containing the new ReservationId to be used + to submit a new reservation + @throws YarnException if the reservation system is not enabled. + @throws IOException on IO failures. + @see #submitReservation(ReservationSubmissionRequest)]]> +
    +
    + + + + + + + The interface used by clients to submit a new reservation to the + {@code ResourceManager}. +

    + +

    + The client packages all details of its request in a + {@link ReservationSubmissionRequest} object. This contains information + about the amount of capacity, temporal constraints, and concurrency needs. + Furthermore, the reservation might be composed of multiple stages, with + ordering dependencies among them. +

    + +

    + In order to respond, a new admission control component in the + {@code ResourceManager} performs an analysis of the resources that have + been committed over the period of time the user is requesting, verify that + the user requests can be fulfilled, and that it respect a sharing policy + (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined + that the ReservationSubmissionRequest is satisfiable the + {@code ResourceManager} answers with a + {@link ReservationSubmissionResponse} that include a non-null + {@link ReservationId}. Upon failure to find a valid allocation the response + is an exception with the reason. + + On application submission the client can use this {@link ReservationId} to + obtain access to the reserved resources. +

    + +

    + The system guarantees that during the time-range specified by the user, the + reservationID will be corresponding to a valid reservation. The amount of + capacity dedicated to such queue can vary overtime, depending of the + allocation that has been determined. But it is guaranteed to satisfy all + the constraint expressed by the user in the + {@link ReservationSubmissionRequest}. +

    + + @param request the request to submit a new Reservation + @return response the {@link ReservationId} on accepting the submission + @throws YarnException if the request is invalid or reservation cannot be + created successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to update an existing Reservation. This is + referred to as a re-negotiation process, in which a user that has + previously submitted a Reservation. +

    + +

    + The allocation is attempted by virtually substituting all previous + allocations related to this Reservation with new ones, that satisfy the new + {@link ReservationUpdateRequest}. Upon success the previous allocation is + substituted by the new one, and on failure (i.e., if the system cannot find + a valid allocation for the updated request), the previous allocation + remains valid. + + The {@link ReservationId} is not changed, and applications currently + running within this reservation will automatically receive the resources + based on the new allocation. +

    + + @param request to update an existing Reservation (the ReservationRequest + should refer to an existing valid {@link ReservationId}) + @return response empty on successfully updating the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + updated successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to remove an existing Reservation. + + Upon deletion of a reservation applications running with this reservation, + are automatically downgraded to normal jobs running without any dedicated + reservation. +

    + + @param request to remove an existing Reservation (the ReservationRequest + should refer to an existing valid {@link ReservationId}) + @return response empty on successfully deleting the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + deleted successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to get the list of reservations in a plan. + The reservationId will be used to search for reservations to list if it is + provided. Otherwise, it will select active reservations within the + startTime and endTime (inclusive). +

    + + @param request to list reservations in a plan. Contains fields to select + String queue, ReservationId reservationId, long startTime, + long endTime, and a bool includeReservationAllocations. + + queue: Required. Cannot be null or empty. Refers to the + reservable queue in the scheduler that was selected when + creating a reservation submission + {@link ReservationSubmissionRequest}. + + reservationId: Optional. If provided, other fields will + be ignored. + + startTime: Optional. If provided, only reservations that + end after the startTime will be selected. This defaults + to 0 if an invalid number is used. + + endTime: Optional. If provided, only reservations that + start on or before endTime will be selected. This defaults + to Long.MAX_VALUE if an invalid number is used. + + includeReservationAllocations: Optional. Flag that + determines whether the entire reservation allocations are + to be returned. Reservation allocations are subject to + change in the event of re-planning as described by + {@code ReservationDefinition}. + + @return response that contains information about reservations that are + being searched for. + @throws YarnException if the request is invalid + @throws IOException on IO failures]]> +
    +
    + + + + + + + The interface used by client to get node to labels mappings in existing cluster +

    + + @param request + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get labels to nodes mappings + in existing cluster +

    + + @param request + @return labels to nodes mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get node labels in the cluster +

    + + @param request to get node labels collection of this cluster + @return node labels collection of this cluster + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to set priority of an application. +

    + @param request to set priority of an application + @return an empty response + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to request the + ResourceManager to signal a container. For example, + the client can send command OUTPUT_THREAD_DUMP to dump threads of the + container.

    + +

    The client, via {@link SignalContainerRequest} provides the + id of the container and the signal command.

    + +

    In secure mode,the ResourceManager verifies access to the + application before signaling the container. + The user needs to have MODIFY_APP permission.

    + +

    Currently, the ResourceManager returns an empty response + on success and throws an exception on rejecting the request.

    + + @param request request to signal a container + @return ResourceManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to set ApplicationTimeouts of an application. + The UpdateApplicationTimeoutsRequest should have timeout value with + absolute time with ISO8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ. +

    + Note: If application timeout value is less than or equal to current + time then update application throws YarnException. + @param request to set ApplicationTimeouts of an application + @return a response with updated timeouts. + @throws YarnException if update request has empty values or application is + in completing states. + @throws IOException on IO failures]]> +
    +
    + + + + + + + The interface used by clients to get all the resource profiles that are + available on the ResourceManager. +

    + @param request request to get all the resource profiles + @return Response containing a map of the profile name to Resource + capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + The interface to get the details for a specific resource profile. +

    + @param request request to get the details of a resource profile + @return Response containing the details for a particular resource profile + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + The interface to get the details for a specific resource profile. +

    + @param request request to get the details of a resource profile + @return Response containing the details for a particular resource profile + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + The interface used by client to get attributes to nodes mappings + available in ResourceManager. +

    + + @param request request to get details of attributes to nodes mapping. + @return Response containing the details of attributes to nodes mappings. + @throws YarnException if any error happens inside YARN + @throws IOException incase of other errors]]> +
    +
    + + + + + + + The interface used by client to get node attributes available in + ResourceManager. +

    + + @param request request to get node attributes collection of this cluster. + @return Response containing node attributes collection. + @throws YarnException if any error happens inside YARN. + @throws IOException incase of other errors.]]> +
    +
    + + + + + + + The interface used by client to get node to attributes mappings. + in existing cluster. +

    + + @param request request to get nodes to attributes mapping. + @return nodes to attributes mappings. + @throws YarnException if any error happens inside YARN. + @throws IOException]]> +
    +
    + + The protocol between clients and the ResourceManager + to submit/abort jobs and to get information on applications, cluster metrics, + nodes, queues and ACLs.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The protocol between clients and the ApplicationHistoryServer to + get the information of completed applications etc. +

    ]]> +
    +
    + + + + + + + + + + The interface used by a new ApplicationMaster to register with + the ResourceManager. +

    + +

    + The ApplicationMaster needs to provide details such as RPC + Port, HTTP tracking url etc. as specified in + {@link RegisterApplicationMasterRequest}. +

    + +

    + The ResourceManager responds with critical details such as + maximum resource capabilities in the cluster as specified in + {@link RegisterApplicationMasterResponse}. +

    + +

    + Re-register is only allowed for Unmanaged Application Master + (UAM) HA, with + {@link org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext#getKeepContainersAcrossApplicationAttempts()} + set to true. +

    + + @param request registration request + @return registration respose + @throws YarnException + @throws IOException + @throws InvalidApplicationMasterRequestException The exception is thrown + when an ApplicationMaster tries to register more then once. + @see RegisterApplicationMasterRequest + @see RegisterApplicationMasterResponse]]> +
    +
    + + + + + + The interface used by an ApplicationMaster to notify the + ResourceManager about its completion (success or failed).

    + +

    The ApplicationMaster has to provide details such as + final state, diagnostics (in case of failures) etc. as specified in + {@link FinishApplicationMasterRequest}.

    + +

    The ResourceManager responds with + {@link FinishApplicationMasterResponse}.

    + + @param request completion request + @return completion response + @throws YarnException + @throws IOException + @see FinishApplicationMasterRequest + @see FinishApplicationMasterResponse]]> +
    +
    + + + + + + + The main interface between an ApplicationMaster and the + ResourceManager. +

    + +

    + The ApplicationMaster uses this interface to provide a list of + {@link ResourceRequest} and returns unused {@link Container} allocated to + it via {@link AllocateRequest}. Optionally, the + ApplicationMaster can also blacklist resources which + it doesn't want to use. +

    + +

    + This also doubles up as a heartbeat to let the + ResourceManager know that the ApplicationMaster + is alive. Thus, applications should periodically make this call to be kept + alive. The frequency depends on + {@link YarnConfiguration#RM_AM_EXPIRY_INTERVAL_MS} which defaults to + {@link YarnConfiguration#DEFAULT_RM_AM_EXPIRY_INTERVAL_MS}. +

    + +

    + The ResourceManager responds with list of allocated + {@link Container}, status of completed containers and headroom information + for the application. +

    + +

    + The ApplicationMaster can use the available headroom + (resources) to decide how to utilized allocated resources and make informed + decisions about future resource requests. +

    + + @param request + allocation request + @return allocation response + @throws YarnException + @throws IOException + @throws InvalidApplicationMasterRequestException + This exception is thrown when an ApplicationMaster calls allocate + without registering first. + @throws InvalidResourceBlacklistRequestException + This exception is thrown when an application provides an invalid + specification for blacklist of resources. + @throws InvalidResourceRequestException + This exception is thrown when a {@link ResourceRequest} is out of + the range of the configured lower and upper limits on the + resources. + @see AllocateRequest + @see AllocateResponse]]> +
    +
    + + The protocol between a live instance of ApplicationMaster + and the ResourceManager.

    + +

    This is used by the ApplicationMaster to register/unregister + and to request and obtain resources in the cluster from the + ResourceManager.

    ]]> +
    +
    + + + + + + + + + + The interface used by clients to claim a resource with the + SharedCacheManager. The client uses a checksum to identify the + resource and an {@link ApplicationId} to identify which application will be + using the resource. +

    + +

    + The SharedCacheManager responds with whether or not the + resource exists in the cache. If the resource exists, a Path + to the resource in the shared cache is returned. If the resource does not + exist, the response is empty. +

    + + @param request request to claim a resource in the shared cache + @return response indicating if the resource is already in the cache + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to release a resource with the + SharedCacheManager. This method is called once an application + is no longer using a claimed resource in the shared cache. The client uses + a checksum to identify the resource and an {@link ApplicationId} to + identify which application is releasing the resource. +

    + +

    + Note: This method is an optimization and the client is not required to call + it for correctness. +

    + +

    + Currently the SharedCacheManager sends an empty response. +

    + + @param request request to release a resource in the shared cache + @return (empty) response on releasing the resource + @throws YarnException + @throws IOException]]> +
    +
    + + + The protocol between clients and the SharedCacheManager to claim + and release resources in the shared cache. +

    ]]> +
    +
    + + + + + + + + + + The ApplicationMaster provides a list of + {@link StartContainerRequest}s to a NodeManager to + start {@link Container}s allocated to it using this interface. +

    + +

    + The ApplicationMaster has to provide details such as allocated + resource capability, security tokens (if enabled), command to be executed + to start the container, environment for the process, necessary + binaries/jar/shared-objects etc. via the {@link ContainerLaunchContext} in + the {@link StartContainerRequest}. +

    + +

    + The NodeManager sends a response via + {@link StartContainersResponse} which includes a list of + {@link Container}s of successfully launched {@link Container}s, a + containerId-to-exception map for each failed {@link StartContainerRequest} in + which the exception indicates errors from per container and a + allServicesMetaData map between the names of auxiliary services and their + corresponding meta-data. Note: None-container-specific exceptions will + still be thrown by the API method itself. +

    +

    + The ApplicationMaster can use + {@link #getContainerStatuses(GetContainerStatusesRequest)} to get updated + statuses of the to-be-launched or launched containers. +

    + + @param request + request to start a list of containers + @return response including conatinerIds of all successfully launched + containers, a containerId-to-exception map for failed requests and + a allServicesMetaData map. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The ApplicationMaster requests a NodeManager to + stop a list of {@link Container}s allocated to it using this + interface. +

    + +

    + The ApplicationMaster sends a {@link StopContainersRequest} + which includes the {@link ContainerId}s of the containers to be stopped. +

    + +

    + The NodeManager sends a response via + {@link StopContainersResponse} which includes a list of {@link ContainerId} + s of successfully stopped containers, a containerId-to-exception map for + each failed request in which the exception indicates errors from per + container. Note: None-container-specific exceptions will still be thrown by + the API method itself. ApplicationMaster can use + {@link #getContainerStatuses(GetContainerStatusesRequest)} to get updated + statuses of the containers. +

    + + @param request + request to stop a list of containers + @return response which includes a list of containerIds of successfully + stopped containers, a containerId-to-exception map for failed + requests. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The API used by the ApplicationMaster to request for current + statuses of Containers from the NodeManager. +

    + +

    + The ApplicationMaster sends a + {@link GetContainerStatusesRequest} which includes the {@link ContainerId}s + of all containers whose statuses are needed. +

    + +

    + The NodeManager responds with + {@link GetContainerStatusesResponse} which includes a list of + {@link ContainerStatus} of the successfully queried containers and a + containerId-to-exception map for each failed request in which the exception + indicates errors from per container. Note: None-container-specific + exceptions will still be thrown by the API method itself. +

    + + @param request + request to get ContainerStatuses of containers with + the specified ContainerIds + @return response containing the list of ContainerStatus of the + successfully queried containers and a containerId-to-exception map + for failed requests. + + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The API used by the ApplicationMaster to request for + resource increase of running containers on the NodeManager. +

    + + @param request + request to increase resource of a list of containers + @return response which includes a list of containerIds of containers + whose resource has been successfully increased and a + containerId-to-exception map for failed requests. + + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The API used by the ApplicationMaster to request for + resource update of running containers on the NodeManager. +

    + + @param request + request to update resource of a list of containers + @return response which includes a list of containerIds of containers + whose resource has been successfully updated and a + containerId-to-exception map for failed requests. + + @throws YarnException Exception specific to YARN + @throws IOException IOException thrown from NodeManager]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The protocol between an ApplicationMaster and a + NodeManager to start/stop and increase resource of containers + and to get status of running containers.

    + +

    If security is enabled the NodeManager verifies that the + ApplicationMaster has truly been allocated the container + by the ResourceManager and also verifies all interactions such + as stopping the container or obtaining status information for the container. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + response id used to track duplicate responses. + @return response id]]> + + + + + + response id used to track duplicate responses. + @param id response id]]> + + + + + current progress of application. + @return current progress of application]]> + + + + + + current progress of application + @param progress current progress of application]]> + + + + + ResourceRequest to update the + ResourceManager about the application's resource requirements. + @return the list of ResourceRequest + @see ResourceRequest]]> + + + + + + ResourceRequest to update the + ResourceManager about the application's resource requirements. + @param resourceRequests list of ResourceRequest to update the + ResourceManager about the application's + resource requirements + @see ResourceRequest]]> + + + + + ContainerId of containers being + released by the ApplicationMaster. + @return list of ContainerId of containers being + released by the ApplicationMaster]]> + + + + + + ContainerId of containers being + released by the ApplicationMaster + @param releaseContainers list of ContainerId of + containers being released by the + ApplicationMaster]]> + + + + + ResourceBlacklistRequest being sent by the + ApplicationMaster. + @return the ResourceBlacklistRequest being sent by the + ApplicationMaster + @see ResourceBlacklistRequest]]> + + + + + + ResourceBlacklistRequest to inform the + ResourceManager about the blacklist additions and removals + per the ApplicationMaster. + + @param resourceBlacklistRequest the ResourceBlacklistRequest + to inform the ResourceManager about + the blacklist additions and removals + per the ApplicationMaster + @see ResourceBlacklistRequest]]> + + + + + ApplicationMaster. + @return list of {@link UpdateContainerRequest} + being sent by the + ApplicationMaster.]]> + + + + + + ResourceManager about the containers that need to be + updated. + @param updateRequests list of UpdateContainerRequest for + containers to be updated]]> + + + + + ApplicationMaster. + @return list of {@link SchedulingRequest} being sent by the + ApplicationMaster.]]> + + + + + + ResourceManager about the application's resource requirements + (potentially including allocation tags and placement constraints). + @param schedulingRequests list of {@link SchedulingRequest} to update + the ResourceManager about the application's resource + requirements.]]> + + + + + + + + + + + + + + + + + The core request sent by the ApplicationMaster to the + ResourceManager to obtain resources in the cluster.

    + +

    The request includes: +

      +
    • A response id to track duplicate responses.
    • +
    • Progress information.
    • +
    • + A list of {@link ResourceRequest} to inform the + ResourceManager about the application's + resource requirements. +
    • +
    • + A list of unused {@link Container} which are being returned. +
    • +
    • + A list of {@link UpdateContainerRequest} to inform + the ResourceManager about the change in + requirements of running containers. +
    • +
    + + @see ApplicationMasterProtocol#allocate(AllocateRequest)]]> +
    +
    + + + + + + + responseId of the request. + @see AllocateRequest#setResponseId(int) + @param responseId responseId of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + progress of the request. + @see AllocateRequest#setProgress(float) + @param progress progress of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + askList of the request. + @see AllocateRequest#setAskList(List) + @param askList askList of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + releaseList of the request. + @see AllocateRequest#setReleaseList(List) + @param releaseList releaseList of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + resourceBlacklistRequest of the request. + @see AllocateRequest#setResourceBlacklistRequest( + ResourceBlacklistRequest) + @param resourceBlacklistRequest + resourceBlacklistRequest of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + updateRequests of the request. + @see AllocateRequest#setUpdateRequests(List) + @param updateRequests updateRequests of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + schedulingRequests of the request. + @see AllocateRequest#setSchedulingRequests(List) + @param schedulingRequests SchedulingRequest of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + trackingUrl of the request. + @see AllocateRequest#setTrackingUrl(String) + @param trackingUrl new tracking url + @return {@link AllocateRequestBuilder}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ResourceManager needs the + ApplicationMaster to take some action then it will send an + AMCommand to the ApplicationMaster. See AMCommand + for details on commands and actions for them. + @return AMCommand if the ApplicationMaster should + take action, null otherwise + @see AMCommand]]> + + + + + last response id. + @return last response id]]> + + + + + newly allocated Container by the + ResourceManager. + @return list of newly allocated Container]]> + + + + + available headroom for resources in the cluster for the + application. + @return limit of available headroom for resources in the cluster for the + application]]> + + + + + completed containers' statuses. + @return the list of completed containers' statuses]]> + + + + + updated NodeReports. Updates could + be changes in health, availability etc of the nodes. + @return The delta of updated nodes since the last response]]> + + + + + + + + + + + The message is a snapshot of the resources the RM wants back from the AM. + While demand persists, the RM will repeat its request; applications should + not interpret each message as a request for additional + resources on top of previous messages. Resources requested consistently + over some duration may be forcibly killed by the RM. + + @return A specification of the resources to reclaim from this AM.]]> + + + + + + 1) AM is receiving first container on underlying NodeManager.
    + OR
    + 2) NMToken master key rolled over in ResourceManager and AM is getting new + container on the same underlying NodeManager. +

    + AM will receive one NMToken per NM irrespective of the number of containers + issued on same NM. AM is expected to store these tokens until issued a + new token for the same NM. + @return list of NMTokens required for communicating with NM]]> + + + + + ResourceManager. + @return list of newly increased containers]]> + + + + + + + + + + + + + + + + + + + + + + + + + + UpdateContainerError for + containers updates requests that were in error]]> + + + + + ResourceManager from previous application attempts which + have not been reported to the Application Master yet. +
    + These containers were recovered by the RM after the application master + had already registered. This may happen after RM restart when some NMs get + delayed in connecting to the RM and reporting the active containers. + Since they were not reported in the registration + response, they are reported in the response to the AM heartbeat. + + @return the list of running containers as viewed by + ResourceManager from previous application attempts.]]> +
    +
    + + + + + + + ResourceManager the + ApplicationMaster during resource negotiation. +

    + The response, includes: +

      +
    • Response ID to track duplicate responses.
    • +
    • + An AMCommand sent by ResourceManager to let the + {@code ApplicationMaster} take some actions (resync, shutdown etc.). +
    • +
    • A list of newly allocated {@link Container}.
    • +
    • A list of completed {@link Container}s' statuses.
    • +
    • + The available headroom for resources in the cluster for the + application. +
    • +
    • A list of nodes whose status has been updated.
    • +
    • The number of available nodes in a cluster.
    • +
    • A description of resources requested back by the cluster
    • +
    • AMRMToken, if AMRMToken has been rolled over
    • +
    • + A list of {@link Container} representing the containers + whose resource has been increased. +
    • +
    • + A list of {@link Container} representing the containers + whose resource has been decreased. +
    • +
    + + @see ApplicationMasterProtocol#allocate(AllocateRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: {@link NMToken} will be used for authenticating communication with + {@code NodeManager}. + @return the list of container tokens to be used for authorization during + container resource update. + @see NMToken]]> + + + + + + AllocateResponse.getUpdatedContainers. + The token contains the container id and resource capability required for + container resource update. + @param containersToUpdate the list of container tokens to be used + for container resource increase.]]> + + + + The request sent by Application Master to the + Node Manager to change the resource quota of a container.

    + + @see ContainerManagementProtocol#updateContainer(ContainerUpdateRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + The response sent by the NodeManager to the + ApplicationMaster when asked to update container resource. +

    + + @see ContainerManagementProtocol#updateContainer(ContainerUpdateRequest)]]> +
    +
    + + + + + + + + + + + ApplicationAttemptId of the attempt to be failed. + @return ApplicationAttemptId of the attempt.]]> + + + + + + + The request sent by the client to the ResourceManager + to fail an application attempt.

    + +

    The request includes the {@link ApplicationAttemptId} of the attempt to + be failed.

    + + @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest)]]> +
    +
    + + + + + + + The response sent by the ResourceManager to the client + failing an application attempt.

    + +

    Currently it's empty.

    + + @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest)]]> +
    +
    + + + + + + + + + + + + + final state of the ApplicationMaster. + @return final state of the ApplicationMaster]]> + + + + + + final state of the ApplicationMaster + @param finalState final state of the ApplicationMaster]]> + + + + + diagnostic information on application failure. + @return diagnostic information on application failure]]> + + + + + + diagnostic information on application failure. + @param diagnostics diagnostic information on application failure]]> + + + + + tracking URL for the ApplicationMaster. + This url if contains scheme then that will be used by resource manager + web application proxy otherwise it will default to http. + @return tracking URLfor the ApplicationMaster]]> + + + + + + final tracking URLfor the ApplicationMaster. + This is the web-URL to which ResourceManager or web-application proxy will + redirect client/users once the application is finished and the + ApplicationMaster is gone. +

    + If the passed url has a scheme then that will be used by the + ResourceManager and web-application proxy, otherwise the scheme will + default to http. +

    +

    + Empty, null, "N/A" strings are all valid besides a real URL. In case an url + isn't explicitly passed, it defaults to "N/A" on the ResourceManager. +

    + + @param url + tracking URLfor the ApplicationMaster]]> + + + + + The final request includes details such: +

      +
    • Final state of the {@code ApplicationMaster}
    • +
    • + Diagnostic information in case of failure of the + {@code ApplicationMaster} +
    • +
    • Tracking URL
    • +
    + + @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)]]> +
    +
    + + + + + + + + + + + + ResourceManager to a + ApplicationMaster on it's completion. +

    + The response, includes: +

      +
    • A flag which indicates that the application has successfully unregistered + with the RM and the application can safely stop.
    • +
    +

    + Note: The flag indicates whether the application has successfully + unregistered and is safe to stop. The application may stop after the flag is + true. If the application stops before the flag is true then the RM may retry + the application. + + @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationAttemptId of an application attempt. + + @return ApplicationAttemptId of an application attempt]]> + + + + + + ApplicationAttemptId of an application attempt + + @param applicationAttemptId + ApplicationAttemptId of an application attempt]]> + + + + + The request sent by a client to the ResourceManager to get an + {@link ApplicationAttemptReport} for an application attempt. +

    + +

    + The request should include the {@link ApplicationAttemptId} of the + application attempt. +

    + + @see ApplicationAttemptReport + @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)]]> +
    +
    + + + + + + + + + + + ApplicationAttemptReport for the application attempt. + + @return ApplicationAttemptReport for the application attempt]]> + + + + + + ApplicationAttemptReport for the application attempt. + + @param applicationAttemptReport + ApplicationAttemptReport for the application attempt]]> + + + + + The response sent by the ResourceManager to a client requesting + an application attempt report. +

    + +

    + The response includes an {@link ApplicationAttemptReport} which has the + details about the particular application attempt +

    + + @see ApplicationAttemptReport + @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)]]> +
    +
    + + + + + + + + + + + ApplicationId of an application + + @return ApplicationId of an application]]> + + + + + + ApplicationId of an application + + @param applicationId + ApplicationId of an application]]> + + + + + The request from clients to get a list of application attempt reports of an + application from the ResourceManager. +

    + + @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)]]> +
    +
    + + + + + + + + + + + ApplicationReport of an application. + + @return a list of ApplicationReport of an application]]> + + + + + + ApplicationReport of an application. + + @param applicationAttempts + a list of ApplicationReport of an application]]> + + + + + The response sent by the ResourceManager to a client requesting + a list of {@link ApplicationAttemptReport} for application attempts. +

    + +

    + The ApplicationAttemptReport for each application includes the + details of an application attempt. +

    + + @see ApplicationAttemptReport + @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)]]> +
    +
    + + + + + + + + + + + ApplicationId of the application. + @return ApplicationId of the application]]> + + + + + + ApplicationId of the application + @param applicationId ApplicationId of the application]]> + + + + The request sent by a client to the ResourceManager to + get an {@link ApplicationReport} for an application.

    + +

    The request should include the {@link ApplicationId} of the + application.

    + + @see ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest) + @see ApplicationReport]]> +
    +
    + + + + + + + + ApplicationReport for the application. + @return ApplicationReport for the application]]> + + + + The response sent by the ResourceManager to a client + requesting an application report.

    + +

    The response includes an {@link ApplicationReport} which has details such + as user, queue, name, host on which the ApplicationMaster is + running, RPC port, tracking URL, diagnostics, start time etc.

    + + @see ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + The request from clients to get a report of Applications matching the + giving application types in the cluster from the + ResourceManager. +

    + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + +

    Setting any of the parameters to null, would just disable that + filter

    + + @param scope {@link ApplicationsRequestScope} to filter by + @param users list of users to filter by + @param queues list of scheduler queues to filter by + @param applicationTypes types of applications + @param applicationTags application tags to filter by + @param applicationStates application states to filter by + @param startRange range of application start times to filter by + @param finishRange range of application finish times to filter by + @param limit number of applications to limit to + @return {@link GetApplicationsRequest} to be used with + {@link ApplicationClientProtocol#getApplications(GetApplicationsRequest)}]]> +
    +
    + + + + + The request from clients to get a report of Applications matching the + giving application types in the cluster from the + ResourceManager. +

    + + @param scope {@link ApplicationsRequestScope} to filter by + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in {@link GetApplicationsRequest}]]> +
    +
    + + + + + The request from clients to get a report of Applications matching the + giving application types in the cluster from the + ResourceManager. +

    + + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in {@link GetApplicationsRequest}]]> +
    +
    + + + + + The request from clients to get a report of Applications matching the + giving application states in the cluster from the + ResourceManager. +

    + + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in {@link GetApplicationsRequest}]]> +
    +
    + + + + + + The request from clients to get a report of Applications matching the + giving and application types and application types in the cluster from the + ResourceManager. +

    + + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in GetApplicationsRequest]]> +
    +
    + + + + + + + + + + + + The request from clients to get a report of Applications + in the cluster from the ResourceManager.

    + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)]]> +
    +
    + + + + + + + + ApplicationReport for applications. + @return ApplicationReport for applications]]> + + + + The response sent by the ResourceManager to a client + requesting an {@link ApplicationReport} for applications.

    + +

    The ApplicationReport for each application includes details + such as user, queue, name, host on which the ApplicationMaster + is running, RPC port, tracking URL, diagnostics, start time etc.

    + + @see ApplicationReport + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + The request from clients to get node to attribute value mapping for all or + given set of Node AttributeKey's in the cluster from the + ResourceManager. +

    + + @see ApplicationClientProtocol#getAttributesToNodes + (GetAttributesToNodesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client requesting + node to attribute value mapping for all or given set of Node AttributeKey's. +

    + + @see ApplicationClientProtocol#getAttributesToNodes + (GetAttributesToNodesRequest)]]> +
    +
    + + + + + + + + + The request sent by clients to get cluster metrics from the + ResourceManager.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#getClusterMetrics(GetClusterMetricsRequest)]]> +
    +
    + + + + + + + + YarnClusterMetrics for the cluster. + @return YarnClusterMetrics for the cluster]]> + + + + ResourceManager to a client + requesting cluster metrics. + + @see YarnClusterMetrics + @see ApplicationClientProtocol#getClusterMetrics(GetClusterMetricsRequest)]]> + + + + + + + + + + + + + + + The request from clients to get node attributes in the cluster from the + ResourceManager. +

    + + @see ApplicationClientProtocol#getClusterNodeAttributes + (GetClusterNodeAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client requesting + a node attributes in cluster. +

    + + @see ApplicationClientProtocol#getClusterNodeAttributes + (GetClusterNodeAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The request from clients to get a report of all nodes + in the cluster from the ResourceManager.

    + + The request will ask for all nodes in the given {@link NodeState}s. + + @see ApplicationClientProtocol#getClusterNodes(GetClusterNodesRequest)]]> +
    +
    + + + + + + + + NodeReport for all nodes in the cluster. + @return NodeReport for all nodes in the cluster]]> + + + + The response sent by the ResourceManager to a client + requesting a {@link NodeReport} for all nodes.

    + +

    The NodeReport contains per-node information such as + available resources, number of containers, tracking url, rack name, health + status etc. + + @see NodeReport + @see ApplicationClientProtocol#getClusterNodes(GetClusterNodesRequest)]]> + + + + + + + + + + + + + ContainerId of the Container. + + @return ContainerId of the Container]]> + + + + + + ContainerId of the container + + @param containerId + ContainerId of the container]]> + + + + + The request sent by a client to the ResourceManager to get an + {@link ContainerReport} for a container. +

    ]]> +
    +
    + + + + + + + + + + + ContainerReport for the container. + + @return ContainerReport for the container]]> + + + + + + + + The response sent by the ResourceManager to a client requesting + a container report. +

    + +

    + The response includes a {@link ContainerReport} which has details of a + container. +

    ]]> +
    +
    + + + + + + + + + + + ApplicationAttemptId of an application attempt. + + @return ApplicationAttemptId of an application attempt]]> + + + + + + ApplicationAttemptId of an application attempt + + @param applicationAttemptId + ApplicationAttemptId of an application attempt]]> + + + + + The request from clients to get a list of container reports, which belong to + an application attempt from the ResourceManager. +

    + + @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)]]> +
    +
    + + + + + + + + + + + ContainerReport for all the containers of an + application attempt. + + @return a list of ContainerReport for all the containers of an + application attempt]]> + + + + + + ContainerReport for all the containers of an + application attempt. + + @param containers + a list of ContainerReport for all the containers of + an application attempt]]> + + + + + The response sent by the ResourceManager to a client requesting + a list of {@link ContainerReport} for containers. +

    + +

    + The ContainerReport for each container includes the container + details. +

    + + @see ContainerReport + @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)]]> +
    +
    + + + + + + + + + + + ContainerIds of containers for which to obtain + the ContainerStatus. + + @return the list of ContainerIds of containers for which to + obtain the ContainerStatus.]]> + + + + + + ContainerIds of containers for which to obtain + the ContainerStatus + + @param containerIds + a list of ContainerIds of containers for which to + obtain the ContainerStatus]]> + + + + ApplicationMaster to the + NodeManager to get {@link ContainerStatus} of requested + containers. + + @see ContainerManagementProtocol#getContainerStatuses(GetContainerStatusesRequest)]]> + + + + + + + + + + ContainerStatuses of the requested containers. + + @return ContainerStatuses of the requested containers.]]> + + + + + + + + + NodeManager to the + ApplicationMaster when asked to obtain the + ContainerStatus of requested containers. + + @see ContainerManagementProtocol#getContainerStatuses(GetContainerStatusesRequest)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The request sent by clients to get a new {@link ApplicationId} for + submitting an application.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#getNewApplication(GetNewApplicationRequest)]]> +
    +
    + + + + + + + + new ApplicationId allocated by the + ResourceManager. + @return new ApplicationId allocated by the + ResourceManager]]> + + + + + ResourceManager in the cluster. + @return maximum capability of allocated resources in the cluster]]> + + + + The response sent by the ResourceManager to the client for + a request to get a new {@link ApplicationId} for submitting applications.

    + +

    Clients can submit an application with the returned + {@link ApplicationId}.

    + + @see ApplicationClientProtocol#getNewApplication(GetNewApplicationRequest)]]> +
    +
    + + + + + + + + + The request sent by clients to get a new {@code ReservationId} for + submitting an reservation.

    + + {@code ApplicationClientProtocol#getNewReservation(GetNewReservationRequest)}]]> +
    +
    + + + + + + + + + + + + The response sent by the ResourceManager to the client for + a request to get a new {@link ReservationId} for submitting reservations.

    + +

    Clients can submit an reservation with the returned + {@link ReservationId}.

    + + {@code ApplicationClientProtocol#getNewReservation(GetNewReservationRequest)}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + The request from clients to get nodes to attributes mapping + in the cluster from the ResourceManager. +

    + + @see ApplicationClientProtocol#getNodesToAttributes + (GetNodesToAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client requesting + nodes to attributes mapping. +

    + + @see ApplicationClientProtocol#getNodesToAttributes + (GetNodesToAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + queue name for which to get queue information. + @return queue name for which to get queue information]]> + + + + + + queue name for which to get queue information + @param queueName queue name for which to get queue information]]> + + + + + active applications required? + @return true if applications' information is to be included, + else false]]> + + + + + + active applications? + @param includeApplications fetch information about active + applications?]]> + + + + + child queues required? + @return true if information about child queues is required, + else false]]> + + + + + + child queues? + @param includeChildQueues fetch information about child queues?]]> + + + + + child queue hierarchy required? + @return true if information about entire hierarchy is + required, false otherwise]]> + + + + + + child queue hierarchy? + @param recursive fetch information on the entire child queue + hierarchy?]]> + + + + The request sent by clients to get queue information + from the ResourceManager.

    + + @see ApplicationClientProtocol#getQueueInfo(GetQueueInfoRequest)]]> +
    +
    + + + + + + + + QueueInfo for the specified queue. + @return QueueInfo for the specified queue]]> + + + + + The response includes a {@link QueueInfo} which has details such as + queue name, used/total capacities, running applications, child queues etc. + + @see QueueInfo + @see ApplicationClientProtocol#getQueueInfo(GetQueueInfoRequest)]]> + + + + + + + + + + + The request sent by clients to the ResourceManager to + get queue acls for the current user.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + QueueUserACLInfo per queue for the user. + @return QueueUserACLInfo per queue for the user]]> + + + + The response sent by the ResourceManager to clients + seeking queue acls for the user.

    + +

    The response contains a list of {@link QueueUserACLInfo} which + provides information about {@link QueueACL} per queue.

    + + @see QueueACL + @see QueueUserACLInfo + @see ApplicationClientProtocol#getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: {@link NMToken} will be used for authenticating communication with + {@code NodeManager}. + @return the list of container tokens to be used for authorization during + container resource increase. + @see NMToken]]> + + + + + + AllocateResponse.getIncreasedContainers. + The token contains the container id and resource capability required for + container resource increase. + @param containersToIncrease the list of container tokens to be used + for container resource increase.]]> + + + + The request sent by Application Master to the + Node Manager to change the resource quota of a container.

    + + @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + The response sent by the NodeManager to the + ApplicationMaster when asked to increase container resource. +

    + + @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)]]> +
    +
    + + + + + + + + + + + ApplicationId of the application to be aborted. + @return ApplicationId of the application to be aborted]]> + + + + + + + + diagnostics to which the application is being killed. + @return diagnostics to which the application is being killed]]> + + + + + + diagnostics to which the application is being killed. + @param diagnostics diagnostics to which the application is being + killed]]> + + + + The request sent by the client to the ResourceManager + to abort a submitted application.

    + +

    The request includes the {@link ApplicationId} of the application to be + aborted.

    + + @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)]]> +
    +
    + + + + + + + + + + + + ResourceManager to the client aborting + a submitted application. +

    + The response, includes: +

      +
    • + A flag which indicates that the process of killing the application is + completed or not. +
    • +
    + Note: user is recommended to wait until this flag becomes true, otherwise if + the ResourceManager crashes before the process of killing the + application is completed, the ResourceManager may retry this + application on recovery. + + @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)]]> +
    +
    + + + + + + + + + + + + ApplicationId of the application to be moved. + @return ApplicationId of the application to be moved]]> + + + + + + ApplicationId of the application to be moved. + @param appId ApplicationId of the application to be moved]]> + + + + + + + + + + + + + + + The request sent by the client to the ResourceManager + to move a submitted application to a different queue.

    + +

    The request includes the {@link ApplicationId} of the application to be + moved and the queue to place it in.

    + + @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest)]]> +
    +
    + + + + + + + + The response sent by the ResourceManager to the client moving + a submitted application to a different queue. +

    +

    + A response without exception means that the move has completed successfully. +

    + + @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest)]]> +
    +
    + + + + + + + + + + + RegisterApplicationMasterRequest. + If port, trackingUrl is not used, use the following default value: +
      +
    • port: -1
    • +
    • trackingUrl: null
    • +
    + The port is allowed to be any integer larger than or equal to -1. + @return the new instance of RegisterApplicationMasterRequest]]> +
    +
    + + + host on which the ApplicationMaster is + running. + @return host on which the ApplicationMaster is running]]> + + + + + + host on which the ApplicationMaster is + running. + @param host host on which the ApplicationMaster + is running]]> + + + + + RPC port on which the {@code ApplicationMaster} is + responding. + @return the RPC port on which the {@code ApplicationMaster} + is responding]]> + + + + + + RPC port on which the {@code ApplicationMaster} is + responding. + @param port RPC port on which the {@code ApplicationMaster} + is responding]]> + + + + + tracking URL for the ApplicationMaster. + This url if contains scheme then that will be used by resource manager + web application proxy otherwise it will default to http. + @return tracking URL for the ApplicationMaster]]> + + + + + + tracking URLfor the ApplicationMaster while + it is running. This is the web-URL to which ResourceManager or + web-application proxy will redirect client/users while the application and + the ApplicationMaster are still running. +

    + If the passed url has a scheme then that will be used by the + ResourceManager and web-application proxy, otherwise the scheme will + default to http. +

    +

    + Empty, null, "N/A" strings are all valid besides a real URL. In case an url + isn't explicitly passed, it defaults to "N/A" on the ResourceManager. +

    + + @param trackingUrl + tracking URLfor the ApplicationMaster]]> + + + + + PlacementConstraint associated with the tags, i.e., each + {@link org.apache.hadoop.yarn.api.records.SchedulingRequest} that has those + tags will be placed taking into account the corresponding constraint. + + @return A map of Placement Constraints.]]> + + + + + + PlacementConstraint associated with the tags. + For example: + Map < + <hb_regionserver> -> node_anti_affinity, + <hb_regionserver, hb_master> -> rack_affinity, + ... + > + @param placementConstraints Placement Constraint Mapping.]]> + + + + + The registration includes details such as: +

      +
    • Hostname on which the AM is running.
    • +
    • RPC Port
    • +
    • Tracking URL
    • +
    + + @see ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)]]> +
    +
    + + + + + + + + ResourceManager in the cluster. + @return maximum capability of allocated resources in the cluster]]> + + + + + ApplicationACLs for the application. + @return all the ApplicationACLs]]> + + + + + Get ClientToAMToken master key.

    +

    The ClientToAMToken master key is sent to ApplicationMaster + by ResourceManager via {@link RegisterApplicationMasterResponse} + , used to verify corresponding ClientToAMToken.

    + @return ClientToAMToken master key]]> +
    +
    + + + + + + + + + Get the queue that the application was placed in.

    + @return the queue that the application was placed in.]]> + + + + + + Set the queue that the application was placed in.

    ]]> + + + + + + Get the list of running containers as viewed by + ResourceManager from previous application attempts. +

    + + @return the list of running containers as viewed by + ResourceManager from previous application attempts + @see RegisterApplicationMasterResponse#getNMTokensFromPreviousAttempts()]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + The response contains critical details such as: +
      +
    • Maximum capability for allocated resources in the cluster.
    • +
    • {@code ApplicationACL}s for the application.
    • +
    • ClientToAMToken master key.
    • +
    + + @see ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)]]> +
    +
    + + + + + + + + + + + + + + + + ContainerId of the container to re-initialize. + + @return ContainerId of the container to re-initialize.]]> + + + + + ContainerLaunchContext to re-initialize the container + with. + + @return ContainerLaunchContext of to re-initialize the + container with.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationId of the resource to be released. + + @return ApplicationId]]> + + + + + + ApplicationId of the resource to be released. + + @param id ApplicationId]]> + + + + + key of the resource to be released. + + @return key]]> + + + + + + key of the resource to be released. + + @param key unique identifier for the resource]]> + + + + The request from clients to release a resource in the shared cache.

    ]]> +
    +
    + + + + + + + + The response to clients from the SharedCacheManager when + releasing a resource in the shared cache. +

    + +

    + Currently, this is empty. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client on + reservation submission.

    + +

    Currently, this is empty.

    + + {@code ApplicationClientProtocol#submitReservation( + ReservationSubmissionRequest)}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerId of the container to localize resources. + + @return ContainerId of the container to localize resources.]]> + + + + + LocalResource required by the container. + + @return all LocalResource required by the container]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerId of the container to signal. + @return ContainerId of the container to signal.]]> + + + + + + ContainerId of the container to signal.]]> + + + + + SignalContainerCommand of the signal request. + @return SignalContainerCommand of the signal request.]]> + + + + + + SignalContainerCommand of the signal request.]]> + + + + The request sent by the client to the ResourceManager + or by the ApplicationMaster to the NodeManager + to signal a container. + @see SignalContainerCommand

    ]]> +
    +
    + + + + + + + The response sent by the ResourceManager to the client + signalling a container.

    + +

    Currently it's empty.

    + + @see ApplicationClientProtocol#signalToContainer(SignalContainerRequest)]]> +
    +
    + + + + + + + + + + + + ContainerLaunchContext for the container to be started + by the NodeManager. + + @return ContainerLaunchContext for the container to be started + by the NodeManager]]> + + + + + + ContainerLaunchContext for the container to be started + by the NodeManager + @param context ContainerLaunchContext for the container to be + started by the NodeManager]]> + + + + + + Note: {@link NMToken} will be used for authenticating communication with + {@code NodeManager}. + @return the container token to be used for authorization during starting + container. + @see NMToken + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> + + + + + + + The request sent by the ApplicationMaster to the + NodeManager to start a container.

    + +

    The ApplicationMaster has to provide details such as + allocated resource capability, security tokens (if enabled), command + to be executed to start the container, environment for the process, + necessary binaries/jar/shared-objects etc. via the + {@link ContainerLaunchContext}.

    + + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + The request which contains a list of {@link StartContainerRequest} sent by + the ApplicationMaster to the NodeManager to + start containers. +

    + +

    + In each {@link StartContainerRequest}, the ApplicationMaster has + to provide details such as allocated resource capability, security tokens (if + enabled), command to be executed to start the container, environment for the + process, necessary binaries/jar/shared-objects etc. via the + {@link ContainerLaunchContext}. +

    + + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> +
    +
    + + + + + + + + ContainerId s of the containers that are + started successfully. + + @return the list of ContainerId s of the containers that are + started successfully. + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> + + + + + + + + + + + Get the meta-data from all auxiliary services running on the + NodeManager. +

    +

    + The meta-data is returned as a Map between the auxiliary service names and + their corresponding per service meta-data as an opaque blob + ByteBuffer +

    + +

    + To be able to interpret the per-service meta-data, you should consult the + documentation for the Auxiliary-service configured on the NodeManager +

    + + @return a Map between the names of auxiliary services and their + corresponding meta-data]]> +
    +
    + + + The response sent by the NodeManager to the + ApplicationMaster when asked to start an allocated + container. +

    + + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> +
    +
    + + + + + + + + + + + ContainerIds of the containers to be stopped. + @return ContainerIds of containers to be stopped]]> + + + + + + ContainerIds of the containers to be stopped. + @param containerIds ContainerIds of the containers to be stopped]]> + + + + The request sent by the ApplicationMaster to the + NodeManager to stop containers.

    + + @see ContainerManagementProtocol#stopContainers(StopContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + The response sent by the NodeManager to the + ApplicationMaster when asked to stop allocated + containers. +

    + + @see ContainerManagementProtocol#stopContainers(StopContainersRequest)]]> +
    +
    + + + + + + + + + + + ApplicationSubmissionContext for the application. + @return ApplicationSubmissionContext for the application]]> + + + + + + ApplicationSubmissionContext for the application. + @param context ApplicationSubmissionContext for the + application]]> + + + + The request sent by a client to submit an application to the + ResourceManager.

    + +

    The request, via {@link ApplicationSubmissionContext}, contains + details such as queue, {@link Resource} required to run the + ApplicationMaster, the equivalent of + {@link ContainerLaunchContext} for launching the + ApplicationMaster etc. + + @see ApplicationClientProtocol#submitApplication(SubmitApplicationRequest)]]> + + + + + + + + + The response sent by the ResourceManager to a client on + application submission.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#submitApplication(SubmitApplicationRequest)]]> +
    +
    + + + + + + + + + + + + ApplicationId of the application. + + @return ApplicationId of the application]]> + + + + + + ApplicationId of the application. + + @param applicationId ApplicationId of the application]]> + + + + + Priority of the application to be set. + + @return Priority of the application to be set.]]> + + + + + + Priority of the application. + + @param priority Priority of the application]]> + + + + + The request sent by the client to the ResourceManager to set or + update the application priority. +

    +

    + The request includes the {@link ApplicationId} of the application and + {@link Priority} to be set for an application +

    + + @see ApplicationClientProtocol#updateApplicationPriority(UpdateApplicationPriorityRequest)]]> +
    +
    + + + + + + + + + + + Priority of the application to be set. + @return Updated Priority of the application.]]> + + + + + + Priority of the application. + + @param priority Priority of the application]]> + + + + + The response sent by the ResourceManager to the client on update + the application priority. +

    +

    + A response without exception means that the move has completed successfully. +

    + + @see ApplicationClientProtocol#updateApplicationPriority(UpdateApplicationPriorityRequest)]]> +
    +
    + + + + + + + + + + + + ApplicationId of the application. + @return ApplicationId of the application]]> + + + + + + ApplicationId of the application. + @param applicationId ApplicationId of the application]]> + + + + + ApplicationTimeouts of the application. Timeout value is + in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ. + @return all ApplicationTimeouts of the application.]]> + + + + + + ApplicationTimeouts for the application. Timeout value + is absolute. Timeout value should meet ISO8601 format. Support ISO8601 + format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries + are cleared before adding the new Map. + @param applicationTimeouts ApplicationTimeoutss for the + application]]> + + + + + The request sent by the client to the ResourceManager to set or + update the application timeout. +

    +

    + The request includes the {@link ApplicationId} of the application and timeout + to be set for an application +

    ]]> +
    +
    + + + + + + + + + + ApplicationTimeouts of the application. Timeout value is + in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ. + @return all ApplicationTimeouts of the application.]]> + + + + + + ApplicationTimeouts for the application. Timeout value + is absolute. Timeout value should meet ISO8601 format. Support ISO8601 + format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries + are cleared before adding the new Map. + @param applicationTimeouts ApplicationTimeoutss for the + application]]> + + + + + The response sent by the ResourceManager to the client on update + application timeout. +

    +

    + A response without exception means that the update has completed + successfully. +

    ]]> +
    +
    + + + + + + + + ApplicationId of the resource to be used. + + @return ApplicationId]]> + + + + + + ApplicationId of the resource to be used. + + @param id ApplicationId]]> + + + + + key of the resource to be used. + + @return key]]> + + + + + + key of the resource to be used. + + @param key unique identifier for the resource]]> + + + + + The request from clients to the SharedCacheManager that claims a + resource in the shared cache. +

    ]]> +
    +
    + + + + + + + + Path corresponding to the requested resource in the + shared cache. + + @return String A Path if the resource exists in the shared + cache, null otherwise]]> + + + + + + Path corresponding to a resource in the shared cache. + + @param p A Path corresponding to a resource in the shared + cache]]> + + + + + The response from the SharedCacheManager to the client that indicates whether + a requested resource exists in the cache. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationId of the ApplicationAttempId. + @return ApplicationId of the ApplicationAttempId]]> + + + + + attempt id of the Application. + @return attempt id of the Application]]> + + + + + + + + + + + + + + + + + + + + + ApplicationAttemptId denotes the particular attempt + of an ApplicationMaster for a given {@link ApplicationId}.

    + +

    Multiple attempts might be needed to run an application to completion due + to temporal failures of the ApplicationMaster such as hardware + failures, connectivity issues etc. on the node on which it was scheduled.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + YarnApplicationAttemptState of the application attempt. + + @return YarnApplicationAttemptState of the application attempt]]> + + + + + RPC port of this attempt ApplicationMaster. + + @return RPC port of this attempt ApplicationMaster]]> + + + + + host on which this attempt of + ApplicationMaster is running. + + @return host on which this attempt of + ApplicationMaster is running]]> + + + + + diagnositic information of the application attempt in case + of errors. + + @return diagnositic information of the application attempt in case + of errors]]> + + + + + tracking url for the application attempt. + + @return tracking url for the application attempt]]> + + + + + original tracking url for the application attempt. + + @return original tracking url for the application attempt]]> + + + + + ApplicationAttemptId of this attempt of the + application + + @return ApplicationAttemptId of the attempt]]> + + + + + ContainerId of AMContainer for this attempt + + @return ContainerId of the attempt]]> + + + + + + + finish time of the application. + + @return finish time of the application]]> + + + + + It includes details such as: +
      +
    • {@link ApplicationAttemptId} of the application.
    • +
    • Host on which the ApplicationMaster of this attempt is + running.
    • +
    • RPC port of the ApplicationMaster of this attempt.
    • +
    • Tracking URL.
    • +
    • Diagnostic information in case of errors.
    • +
    • {@link YarnApplicationAttemptState} of the application attempt.
    • +
    • {@link ContainerId} of the master Container.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + ApplicationId + which is unique for all applications started by a particular instance + of the ResourceManager. + @return short integer identifier of the ApplicationId]]> + + + + + start time of the ResourceManager which is + used to generate globally unique ApplicationId. + @return start time of the ResourceManager]]> + + + + + + + + + + + + + + + + + + + + + ApplicationId represents the globally unique + identifier for an application.

    + +

    The globally unique nature of the identifier is achieved by using the + cluster timestamp i.e. start-time of the + ResourceManager along with a monotonically increasing counter + for the application.

    ]]> +
    +
    + + + + + + + + ApplicationId of the application. + @return ApplicationId of the application]]> + + + + + ApplicationAttemptId of the current + attempt of the application + @return ApplicationAttemptId of the attempt]]> + + + + + user who submitted the application. + @return user who submitted the application]]> + + + + + queue to which the application was submitted. + @return queue to which the application was submitted]]> + + + + + name of the application. + @return name of the application]]> + + + + + host on which the ApplicationMaster + is running. + @return host on which the ApplicationMaster + is running]]> + + + + + RPC port of the ApplicationMaster. + @return RPC port of the ApplicationMaster]]> + + + + + client token for communicating with the + ApplicationMaster. +

    + ClientToAMToken is the security token used by the AMs to verify + authenticity of any client. +

    + +

    + The ResourceManager, provides a secure token (via + {@link ApplicationReport#getClientToAMToken()}) which is verified by the + ApplicationMaster when the client directly talks to an AM. +

    + @return client token for communicating with the + ApplicationMaster]]> +
    +
    + + + YarnApplicationState of the application. + @return YarnApplicationState of the application]]> + + + + + diagnositic information of the application in case of + errors. + @return diagnositic information of the application in case + of errors]]> + + + + + tracking url for the application. + @return tracking url for the application]]> + + + + + start time of the application. + @return start time of the application]]> + + + + + + + + + finish time of the application. + @return finish time of the application]]> + + + + + final finish status of the application. + @return final finish status of the application]]> + + + + + + + + + + + + + + + + + + + + + + + + + + The AMRM token is required for AM to RM scheduling operations. For + managed Application Masters YARN takes care of injecting it. For unmanaged + Applications Masters, the token must be obtained via this method and set + in the {@link org.apache.hadoop.security.UserGroupInformation} of the + current user. +

    + The AMRM token will be returned only if all the following conditions are + met: +

      +
    • the requester is the owner of the ApplicationMaster
    • +
    • the application master is an unmanaged ApplicationMaster
    • +
    • the application master is in ACCEPTED state
    • +
    + Else this method returns NULL. + + @return the AM to RM token if available.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link ApplicationId} of the application.
    • +
    • Applications user.
    • +
    • Application queue.
    • +
    • Application name.
    • +
    • Host on which the ApplicationMaster is running.
    • +
    • RPC port of the ApplicationMaster.
    • +
    • Tracking URL.
    • +
    • {@link YarnApplicationState} of the application.
    • +
    • Diagnostic information in case of errors.
    • +
    • Start time of the application.
    • +
    • Client {@link Token} of the application (if security is enabled).
    • +
    + + @see ApplicationClientProtocol#getApplicationReport(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest)]]> +
    +
    + + + + + + + + + + + + + Resource. -1 for invalid/inaccessible reports. + @return the used Resource]]> + + + + + Resource. -1 for invalid/inaccessible reports. + @return the reserved Resource]]> + + + + + Resource. -1 for invalid/inaccessible reports. + @return the needed Resource]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationId of the submitted application. + @return ApplicationId of the submitted application]]> + + + + + + ApplicationId of the submitted application. + @param applicationId ApplicationId of the submitted + application]]> + + + + + name. + @return application name]]> + + + + + + name. + @param applicationName application name]]> + + + + + queue to which the application is being submitted. + @return queue to which the application is being submitted]]> + + + + + + queue to which the application is being submitted + @param queue queue to which the application is being submitted]]> + + + + + Priority of the application. + @return Priority of the application]]> + + + + + ContainerLaunchContext to describe the + Container with which the ApplicationMaster is + launched. + @return ContainerLaunchContext for the + ApplicationMaster container]]> + + + + + + ContainerLaunchContext to describe the + Container with which the ApplicationMaster is + launched. + @param amContainer ContainerLaunchContext for the + ApplicationMaster container]]> + + + + + YarnApplicationState. + Such apps will not be retried by the RM on app attempt failure. + The default value is false. + @return true if the AM is not managed by the RM]]> + + + + + + + + + + + + + + + + + + + + + + ApplicationMaster for this + application. Please note this will be DEPRECATED, use getResource + in getAMContainerResourceRequest instead. + + @return the resource required by the ApplicationMaster for + this application.]]> + + + + + + ApplicationMaster for this + application. + + @param resource the resource required by the ApplicationMaster + for this application.]]> + + + + + + + + + + + + + + + + + + + + + + + For managed AM, if the flag is true, running containers will not be killed + when application attempt fails and these containers will be retrieved by + the new application attempt on registration via + {@link ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)}. +

    +

    + For unmanaged AM, if the flag is true, RM allows re-register and returns + the running containers in the same attempt back to the UAM for HA. +

    + + @param keepContainers the flag which indicates whether to keep containers + across application attempts.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + getResource and getPriority of + ApplicationSubmissionContext. + + Number of containers and Priority will be ignored. + + @return ResourceRequest of the AM container + @deprecated See {@link #getAMContainerResourceRequests()}]]> + + + + + + + + + + + getAMContainerResourceRequest and its behavior. + + Number of containers and Priority will be ignored. + + @return List of ResourceRequests of the AM container]]> + + + + + + + + + + + + + + + + + + + + + + LogAggregationContext of the application + + @return LogAggregationContext of the application]]> + + + + + + LogAggregationContext for the application + + @param logAggregationContext + for the application]]> + + + + + + + + + + + + + + + + ApplicationTimeouts of the application. Timeout value is + in seconds. + @return all ApplicationTimeouts of the application.]]> + + + + + + ApplicationTimeouts for the application in seconds. + All pre-existing Map entries are cleared before adding the new Map. +

    + Note: If application timeout value is less than or equal to zero + then application submission will throw an exception. +

    + @param applicationTimeouts ApplicationTimeoutss for the + application]]> +
    +
    + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link ApplicationId} of the application.
    • +
    • Application user.
    • +
    • Application name.
    • +
    • {@link Priority} of the application.
    • +
    • + {@link ContainerLaunchContext} of the container in which the + ApplicationMaster is executed. +
    • +
    • + maxAppAttempts. The maximum number of application attempts. + It should be no larger than the global number of max attempts in the + YARN configuration. +
    • +
    • + attemptFailuresValidityInterval. The default value is -1. + when attemptFailuresValidityInterval in milliseconds is set to + {@literal >} 0, the failure number will no take failures which happen + out of the validityInterval into failure count. If failure count + reaches to maxAppAttempts, the application will be failed. +
    • +
    • Optional, application-specific {@link LogAggregationContext}
    • +
    + + @see ContainerLaunchContext + @see ApplicationClientProtocol#submitApplication(org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + expiryTime for given timeout type. + @return expiryTime in ISO8601 standard with format + yyyy-MM-dd'T'HH:mm:ss.SSSZ.]]> + + + + + + expiryTime for given timeout type. + @param expiryTime in ISO8601 standard with format + yyyy-MM-dd'T'HH:mm:ss.SSSZ.]]> + + + + + Remaining Time of an application for given timeout type. + @return Remaining Time in seconds.]]> + + + + + + Remaining Time of an application for given timeout type. + @param remainingTime in seconds.]]> + + + + +
  • {@link ApplicationTimeoutType} of the timeout type.
  • +
  • Expiry time in ISO8601 standard with format + yyyy-MM-dd'T'HH:mm:ss.SSSZ or "UNLIMITED".
  • +
  • Remaining time in seconds.
  • + + The possible values for {ExpiryTime, RemainingTimeInSeconds} are +
      +
    • {UNLIMITED,-1} : Timeout is not configured for given timeout type + (LIFETIME).
    • +
    • {ISO8601 date string, 0} : Timeout is configured and application has + completed.
    • +
    • {ISO8601 date string, greater than zero} : Timeout is configured and + application is RUNNING. Application will be timed out after configured + value.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resource allocated to the container. + @return Resource allocated to the container]]> + + + + + Priority at which the Container was + allocated. + @return Priority at which the Container was + allocated]]> + + + + + ContainerToken for the container. +

    ContainerToken is the security token used by the framework + to verify authenticity of any Container.

    + +

    The ResourceManager, on container allocation provides a + secure token which is verified by the NodeManager on + container launch.

    + +

    Applications do not need to care about ContainerToken, they + are transparently handled by the framework - the allocated + Container includes the ContainerToken.

    + + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) + + @return ContainerToken for the container]]> +
    +
    + + + ID corresponding to the original {@code + ResourceRequest{@link #getAllocationRequestId()}}s which is satisfied by + this allocated {@code Container}. +

    + The scheduler may return multiple {@code AllocateResponse}s corresponding + to the same ID as and when scheduler allocates {@code Container}s. + Applications can continue to completely ignore the returned ID in + the response and use the allocation for any of their outstanding requests. +

    + + @return the ID corresponding to the original allocation request + which is satisfied by this allocation.]]> + + + + + The {@code ResourceManager} is the sole authority to allocate any + {@code Container} to applications. The allocated {@code Container} + is always on a single node and has a unique {@link ContainerId}. It has + a specific amount of {@link Resource} allocated. +

    + It includes details such as: +

      +
    • {@link ContainerId} for the container, which is globally unique.
    • +
    • + {@link NodeId} of the node on which it is allocated. +
    • +
    • HTTP uri of the node.
    • +
    • {@link Resource} allocated to the container.
    • +
    • {@link Priority} at which the container was allocated.
    • +
    • + Container {@link Token} of the container, used to securely verify + authenticity of the allocation. +
    • +
    + + Typically, an {@code ApplicationMaster} receives the {@code Container} + from the {@code ResourceManager} during resource-negotiation and then + talks to the {@code NodeManager} to start/stop containers. + + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) + @see ContainerManagementProtocol#stopContainers(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationAttemptId of the application to which the + Container was assigned. +

    + Note: If containers are kept alive across application attempts via + {@link ApplicationSubmissionContext#setKeepContainersAcrossApplicationAttempts(boolean)} + the ContainerId does not necessarily contain the current + running application attempt's ApplicationAttemptId This + container can be allocated by previously exited application attempt and + managed by the current running attempt thus have the previous application + attempt's ApplicationAttemptId. +

    + + @return ApplicationAttemptId of the application to which the + Container was assigned]]> +
    +
    + + + ContainerId, + which doesn't include epoch. Note that this method will be marked as + deprecated, so please use getContainerId instead. + @return lower 32 bits of identifier of the ContainerId]]> + + + + + ContainerId. Upper 24 bits are + reserved as epoch of cluster, and lower 40 bits are reserved as + sequential number of containers. + @return identifier of the ContainerId]]> + + + + + + + + + + + + + + + + + + + + + + + + ContainerId represents a globally unique identifier + for a {@link Container} in the cluster.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LocalResource required by the container. + @return all LocalResource required by the container]]> + + + + + + LocalResource required by the container. All pre-existing + Map entries are cleared before adding the new Map + @param localResources LocalResource required by the container]]> + + + + + + Get application-specific binary service data. This is a map keyed + by the name of each {@link AuxiliaryService} that is configured on a + NodeManager and value correspond to the application specific data targeted + for the keyed {@link AuxiliaryService}. +

    + +

    + This will be used to initialize this application on the specific + {@link AuxiliaryService} running on the NodeManager by calling + {@link AuxiliaryService#initializeApplication(ApplicationInitializationContext)} +

    + + @return application-specific binary service data]]> +
    +
    + + + + + Set application-specific binary service data. This is a map keyed + by the name of each {@link AuxiliaryService} that is configured on a + NodeManager and value correspond to the application specific data targeted + for the keyed {@link AuxiliaryService}. All pre-existing Map entries are + preserved. +

    + + @param serviceData + application-specific binary service data]]> +
    +
    + + + environment variables for the container. + @return environment variables for the container]]> + + + + + + environment variables for the container. All pre-existing Map + entries are cleared before adding the new Map + @param environment environment variables for the container]]> + + + + + commands for launching the container. + @return the list of commands for launching the container]]> + + + + + + commands for launching the container. All + pre-existing List entries are cleared before adding the new List + @param commands the list of commands for launching the container]]> + + + + + ApplicationACLs for the application. + @return all the ApplicationACLs]]> + + + + + + ApplicationACLs for the application. All pre-existing + Map entries are cleared before adding the new Map + @param acls ApplicationACLs for the application]]> + + + + + ContainerRetryContext to relaunch container. + @return ContainerRetryContext to relaunch container.]]> + + + + + + ContainerRetryContext to relaunch container. + @param containerRetryContext ContainerRetryContext to + relaunch container.]]> + + + + + It includes details such as: +
      +
    • {@link ContainerId} of the container.
    • +
    • {@link Resource} allocated to the container.
    • +
    • User to whom the container is allocated.
    • +
    • Security tokens (if security is enabled).
    • +
    • + {@link LocalResource} necessary for running the container such + as binaries, jar, shared-objects, side-files etc. +
    • +
    • Optional, application-specific binary service data.
    • +
    • Environment variables for the launched process.
    • +
    • Command to launch the container.
    • +
    • Retry strategy when container exits with failure.
    • +
    + + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + ContainerId of the container. + + @return ContainerId of the container.]]> + + + + + + + + Resource of the container. + + @return allocated Resource of the container.]]> + + + + + + + + NodeId where container is running. + + @return allocated NodeId where container is running.]]> + + + + + + + + Priority of the container. + + @return allocated Priority of the container.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerState of the container. + + @return final ContainerState of the container.]]> + + + + + + + + exit status of the container. + + @return final exit status of the container.]]> + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link ContainerId} of the container.
    • +
    • Allocated Resources to the container.
    • +
    • Assigned Node id.
    • +
    • Assigned Priority.
    • +
    • Creation Time.
    • +
    • Finish Time.
    • +
    • Container Exit Status.
    • +
    • {@link ContainerState} of the container.
    • +
    • Diagnostic information in case of errors.
    • +
    • Log URL.
    • +
    • nodeHttpAddress
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It provides details such as: +
      +
    • + {@link ContainerRetryPolicy} : + - NEVER_RETRY(DEFAULT value): no matter what error code is when container + fails to run, just do not retry. + - RETRY_ON_ALL_ERRORS: no matter what error code is, when container fails + to run, just retry. + - RETRY_ON_SPECIFIC_ERROR_CODES: when container fails to run, do retry if + the error code is one of errorCodes, otherwise do not retry. + + Note: if error code is 137(SIGKILL) or 143(SIGTERM), it will not retry + because it is usually killed on purpose. +
    • +
    • + maxRetries specifies how many times to retry if need to retry. + If the value is -1, it means retry forever. +
    • +
    • retryInterval specifies delaying some time before relaunch + container, the unit is millisecond.
    • +
    • + failuresValidityInterval: default value is -1. + When failuresValidityInterval in milliseconds is set to {@literal >} 0, + the failure number will not take failures which happen out of the + failuresValidityInterval into failure count. If failure count + reaches to maxRetries, the container will be failed. +
    • +
    ]]> +
    +
    + + + + + + + + + + Retry policy for relaunching a Container.

    ]]> +
    +
    + + + + + + + + + + State of a Container.

    ]]> +
    +
    + + + + + + + + ContainerId of the container. + @return ContainerId of the container]]> + + + + + ExecutionType of the container. + @return ExecutionType of the container]]> + + + + + ContainerState of the container. + @return ContainerState of the container]]> + + + + + Get the exit status for the container.

    + +

    Note: This is valid only for completed containers i.e. containers + with state {@link ContainerState#COMPLETE}. + Otherwise, it returns an ContainerExitStatus.INVALID. +

    + +

    Containers killed by the framework, either due to being released by + the application or being 'lost' due to node failures etc. have a special + exit code of ContainerExitStatus.ABORTED.

    + +

    When threshold number of the nodemanager-local-directories or + threshold number of the nodemanager-log-directories become bad, then + container is not launched and is exited with ContainersExitStatus.DISKS_FAILED. +

    + + @return exit status for the container]]> +
    +
    + + + diagnostic messages for failed containers. + @return diagnostic messages for failed containers]]> + + + + + Resource allocated to the container. + @return Resource allocated to the container]]> + + + + + + + + + + + + + + + + + + + + It provides details such as: +
      +
    • {@code ContainerId} of the container.
    • +
    • {@code ExecutionType} of the container.
    • +
    • {@code ContainerState} of the container.
    • +
    • Exit status of a completed container.
    • +
    • Diagnostic message for a failed container.
    • +
    • {@link Resource} allocated to the container.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The execution types are the following: +
      +
    • {@link #GUARANTEED} - this container is guaranteed to start its + execution, once the corresponding start container request is received by + an NM. +
    • {@link #OPPORTUNISTIC} - the execution of this container may not start + immediately at the NM that receives the corresponding start container + request (depending on the NM's available resources). Moreover, it may be + preempted if it blocks a GUARANTEED container from being executed. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + ExecutionType of the requested container. + + @param execType + ExecutionType of the requested container]]> + + + + + ExecutionType. + + @return ExecutionType.]]> + + + + + + + + + + + ResourceRequest. + Defaults to false. + @return whether ExecutionType request should be strictly honored]]> + + + + + + + + + ExecutionType as well as flag that explicitly asks the + configuredScheduler to return Containers of exactly the Execution Type + requested.]]> + + + + + + + + + + + + Application.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • resource key
  • +
  • {@link LocalizationState} of the resource
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + location of the resource to be localized. + @return location of the resource to be localized]]> + + + + + + location of the resource to be localized. + @param resource location of the resource to be localized]]> + + + + + size of the resource to be localized. + @return size of the resource to be localized]]> + + + + + + size of the resource to be localized. + @param size size of the resource to be localized]]> + + + + + timestamp of the resource to be localized, used + for verification. + @return timestamp of the resource to be localized]]> + + + + + + timestamp of the resource to be localized, used + for verification. + @param timestamp timestamp of the resource to be localized]]> + + + + + LocalResourceType of the resource to be localized. + @return LocalResourceType of the resource to be localized]]> + + + + + + LocalResourceType of the resource to be localized. + @param type LocalResourceType of the resource to be localized]]> + + + + + LocalResourceVisibility of the resource to be + localized. + @return LocalResourceVisibility of the resource to be + localized]]> + + + + + + LocalResourceVisibility of the resource to be + localized. + @param visibility LocalResourceVisibility of the resource to be + localized]]> + + + + + pattern that should be used to extract entries from the + archive (only used when type is PATTERN). + @return pattern that should be used to extract entries from the + archive.]]> + + + + + + pattern that should be used to extract entries from the + archive (only used when type is PATTERN). + @param pattern pattern that should be used to extract entries + from the archive.]]> + + + + + + + + + + + shouldBeUploadedToSharedCache + of this request]]> + + + + LocalResource represents a local resource required to + run a container.

    + +

    The NodeManager is responsible for localizing the resource + prior to launching the container.

    + +

    Applications can specify {@link LocalResourceType} and + {@link LocalResourceVisibility}.

    + + @see LocalResourceType + @see LocalResourceVisibility + @see ContainerLaunchContext + @see ApplicationSubmissionContext + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + + + type + of a resource localized by the {@code NodeManager}. +

    + The type can be one of: +

      +
    • + {@link #FILE} - Regular file i.e. uninterpreted bytes. +
    • +
    • + {@link #ARCHIVE} - Archive, which is automatically unarchived by the + NodeManager. +
    • +
    • + {@link #PATTERN} - A hybrid between {@link #ARCHIVE} and {@link #FILE}. +
    • +
    + + @see LocalResource + @see ContainerLaunchContext + @see ApplicationSubmissionContext + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + + + visibility + of a resource localized by the {@code NodeManager}. +

    + The visibility can be one of: +

      +
    • {@link #PUBLIC} - Shared by all users on the node.
    • +
    • + {@link #PRIVATE} - Shared among all applications of the + same user on the node. +
    • +
    • + {@link #APPLICATION} - Shared only among containers of the + same application on the node. +
    • +
    + + @see LocalResource + @see ContainerLaunchContext + @see ApplicationSubmissionContext + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • + includePattern. It uses Java Regex to filter the log files + which match the defined include pattern and those log files + will be uploaded when the application finishes. +
    • +
    • + excludePattern. It uses Java Regex to filter the log files + which match the defined exclude pattern and those log files + will not be uploaded when application finishes. If the log file + name matches both the include and the exclude pattern, this file + will be excluded eventually. +
    • +
    • + rolledLogsIncludePattern. It uses Java Regex to filter the log files + which match the defined include pattern and those log files + will be aggregated in a rolling fashion. +
    • +
    • + rolledLogsExcludePattern. It uses Java Regex to filter the log files + which match the defined exclude pattern and those log files + will not be aggregated in a rolling fashion. If the log file + name matches both the include and the exclude pattern, this file + will be excluded eventually. +
    • +
    • + policyClassName. The policy class name that implements + ContainerLogAggregationPolicy. At runtime, nodemanager will the policy + if a given container's log should be aggregated based on the + ContainerType and other runtime state such as exit code by calling + ContainerLogAggregationPolicy#shouldDoLogAggregation. + This is useful when the app only wants to aggregate logs of a subset of + containers. Here are the available policies. Please make sure to specify + the canonical name by prefixing org.apache.hadoop.yarn.server. + nodemanager.containermanager.logaggregation. + to the class simple name below. + NoneContainerLogAggregationPolicy: skip aggregation for all containers. + AllContainerLogAggregationPolicy: aggregate all containers. + AMOrFailedContainerLogAggregationPolicy: aggregate application master + or failed containers. + FailedOrKilledContainerLogAggregationPolicy: aggregate failed or killed + containers + FailedContainerLogAggregationPolicy: aggregate failed containers + AMOnlyLogAggregationPolicy: aggregate application master containers + SampleContainerLogAggregationPolicy: sample logs of successful worker + containers, in addition to application master and failed/killed + containers. + LimitSizeContainerLogAggregationPolicy: skip aggregation for killed + containers whose log size exceeds the limit of container log size. + If it isn't specified, it will use the cluster-wide default policy + defined by configuration yarn.nodemanager.log-aggregation.policy.class. + The default value of yarn.nodemanager.log-aggregation.policy.class is + AllContainerLogAggregationPolicy. +
    • +
    • + policyParameters. The parameters passed to the policy class via + ContainerLogAggregationPolicy#parseParameters during the policy object + initialization. This is optional. Some policy class might use parameters + to adjust its settings. It is up to policy class to define the scheme of + parameters. + For example, SampleContainerLogAggregationPolicy supports the format of + "SR:0.5,MIN:50", which means sample rate of 50% beyond the first 50 + successful worker containers. +
    • +
    + + @see ApplicationSubmissionContext]]> +
    +
    + + + + + + + + NodeManager for which the NMToken + is used to authenticate. + @return the {@link NodeId} of the NodeManager for which the + NMToken is used to authenticate.]]> + + + + + + + + NodeManager + @return the {@link Token} used for authenticating with NodeManager]]> + + + + + + + + + + + + The NMToken is used for authenticating communication with + NodeManager

    +

    It is issued by ResourceMananger when ApplicationMaster + negotiates resource with ResourceManager and + validated on NodeManager side.

    + @see AllocateResponse#getNMTokens()]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Node Attribute is a kind of a label which represents one of the + attribute/feature of a Node. Its different from node partition label as + resource guarantees across the queues will not be maintained for these type + of labels. +

    +

    + A given Node can be mapped with any kind of attribute, few examples are + HAS_SSD=true, JAVA_VERSION=JDK1.8, OS_TYPE=WINDOWS. +

    +

    + Its not compulsory for all the attributes to have value, empty string is the + default value of the NodeAttributeType.STRING +

    +

    + Node Attribute Prefix is used as namespace to segregate the attributes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + Node Attribute Info describes a NodeAttribute. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + Node AttributeKey uniquely identifies a given Node Attribute. Node Attribute + is identified based on attribute prefix and name. +

    +

    + Node Attribute Prefix is used as namespace to segregate the attributes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Type of a node Attribute. +

    + Based on this attribute expressions and values will be evaluated.]]> +
    +
    + + + + + + + + + + + + + hostname of the node. + @return hostname of the node]]> + + + + + port for communicating with the node. + @return port for communicating with the node]]> + + + + + + + + + + + + + + + + + + + NodeId is the unique identifier for a node.

    + +

    It includes the hostname and port to uniquely + identify the node. Thus, it is unique across restarts of any + NodeManager.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NodeId of the node. + @return NodeId of the node]]> + + + + + NodeState of the node. + @return NodeState of the node]]> + + + + + http address of the node. + @return http address of the node]]> + + + + + rack name for the node. + @return rack name for the node]]> + + + + + used Resource on the node. + @return used Resource on the node]]> + + + + + total Resource on the node. + @return total Resource on the node]]> + + + + + diagnostic health report of the node. + @return diagnostic health report of the node]]> + + + + + last timestamp at which the health report was received. + @return last timestamp at which the health report was received]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link NodeId} of the node.
    • +
    • HTTP Tracking URL of the node.
    • +
    • Rack name for the node.
    • +
    • Used {@link Resource} on the node.
    • +
    • Total available {@link Resource} of the node.
    • +
    • Number of running containers on the node.
    • +
    + + @see ApplicationClientProtocol#getClusterNodes(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + State of a Node.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + Mapping of Attribute Value to a Node. +

    ]]> +
    +
    + + + + + + + + + + + + ResourceManager. + @see PreemptionContract + @see StrictPreemptionContract]]> + + + + + + + + + + ApplicationMaster about resources requested back by the + ResourceManager. + @see AllocateRequest#setAskList(List)]]> + + + + + ApplicationMaster that may be reclaimed by the + ResourceManager. If the AM prefers a different set of + containers, then it may checkpoint or kill containers matching the + description in {@link #getResourceRequest}. + @return Set of containers at risk if the contract is not met.]]> + + + + ResourceManager. + The ApplicationMaster (AM) can satisfy this request according + to its own priorities to prevent containers from being forcibly killed by + the platform. + @see PreemptionMessage]]> + + + + + + + + + + ResourceManager]]> + + + + + + + + + + The AM should decode both parts of the message. The {@link + StrictPreemptionContract} specifies particular allocations that the RM + requires back. The AM can checkpoint containers' state, adjust its execution + plan to move the computation, or take no action and hope that conditions that + caused the RM to ask for the container will change. +

    + In contrast, the {@link PreemptionContract} also includes a description of + resources with a set of containers. If the AM releases containers matching + that profile, then the containers enumerated in {@link + PreemptionContract#getContainers()} may not be killed. +

    + Each preemption message reflects the RM's current understanding of the + cluster state, so a request to return N containers may not + reflect containers the AM is releasing, recently exited containers the RM has + yet to learn about, or new containers allocated before the message was + generated. Conversely, an RM may request a different profile of containers in + subsequent requests. +

    + The policy enforced by the RM is part of the scheduler. Generally, only + containers that have been requested consistently should be killed, but the + details are not specified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The ACL is one of: +

      +
    • + {@link #SUBMIT_APPLICATIONS} - ACL to submit applications to the queue. +
    • +
    • {@link #ADMINISTER_QUEUE} - ACL to administer the queue.
    • +
    + + @see QueueInfo + @see ApplicationClientProtocol#getQueueUserAcls(org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + name of the queue. + @return name of the queue]]> + + + + + configured capacity of the queue. + @return configured capacity of the queue]]> + + + + + maximum capacity of the queue. + @return maximum capacity of the queue]]> + + + + + current capacity of the queue. + @return current capacity of the queue]]> + + + + + child queues of the queue. + @return child queues of the queue]]> + + + + + running applications of the queue. + @return running applications of the queue]]> + + + + + QueueState of the queue. + @return QueueState of the queue]]> + + + + + accessible node labels of the queue. + @return accessible node labels of the queue]]> + + + + + default node label expression of the queue, this takes + affect only when the ApplicationSubmissionContext and + ResourceRequest don't specify their + NodeLabelExpression. + + @return default node label expression of the queue]]> + + + + + + + + queue stats for the queue + + @return queue stats of the queue]]> + + + + + + + + + + + preemption status of the queue. + @return if property is not in proto, return null; + otherwise, return preemption status of the queue]]> + + + + + + + + + + + + + + + It includes information such as: +
      +
    • Queue name.
    • +
    • Capacity of the queue.
    • +
    • Maximum capacity of the queue.
    • +
    • Current capacity of the queue.
    • +
    • Child queues.
    • +
    • Running applications.
    • +
    • {@link QueueState} of the queue.
    • +
    • {@link QueueConfigurations} of the queue.
    • +
    + + @see QueueState + @see QueueConfigurations + @see ApplicationClientProtocol#getQueueInfo(org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest)]]> +
    +
    + + + + + + + + + + + A queue is in one of: +
      +
    • {@link #RUNNING} - normal state.
    • +
    • {@link #STOPPED} - not accepting new application submissions.
    • +
    • + {@link #DRAINING} - not accepting new application submissions + and waiting for applications finish. +
    • +
    + + @see QueueInfo + @see ApplicationClientProtocol#getQueueInfo(org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + queue name of the queue. + @return queue name of the queue]]> + + + + + QueueACL for the given user. + @return list of QueueACL for the given user]]> + + + + QueueUserACLInfo provides information {@link QueueACL} for + the given user.

    + + @see QueueACL + @see ApplicationClientProtocol#getQueueUserAcls(org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The ACL is one of: +
      +
    • + {@link #ADMINISTER_RESERVATIONS} - ACL to create, list, update and + delete reservations. +
    • +
    • {@link #LIST_RESERVATIONS} - ACL to list reservations.
    • +
    • {@link #SUBMIT_RESERVATIONS} - ACL to create reservations.
    • +
    + Users can always list, update and delete their own reservations.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes: +
      +
    • Duration of the reservation.
    • +
    • Acceptance time of the duration.
    • +
    • + List of {@link ResourceAllocationRequest}, which includes the time + interval, and capability of the allocation. + {@code ResourceAllocationRequest} represents an allocation + made for a reservation for the current state of the queue. This can be + changed for reasons such as re-planning, but will always be subject to + the constraints of the user contract as described by + {@link ReservationDefinition} +
    • +
    • {@link ReservationId} of the reservation.
    • +
    • {@link ReservationDefinition} used to make the reservation.
    • +
    + + @see ResourceAllocationRequest + @see ReservationId + @see ReservationDefinition]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start time of the {@code ResourceManager} which is used to + generate globally unique {@link ReservationId}. + + @return start time of the {@code ResourceManager}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link ReservationId} represents the globally unique identifier for + a reservation. +

    + +

    + The globally unique nature of the identifier is achieved by using the + cluster timestamp i.e. start-time of the {@code ResourceManager} + along with a monotonically increasing counter for the reservation. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes: +
      +
    • {@link Resource} required for each request.
    • +
    • + Number of containers, of above specifications, which are required by the + application. +
    • +
    • Concurrency that indicates the gang size of the request.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + memory of the resource. Note - while memory has + never had a unit specified, all YARN configurations have specified memory + in MB. The assumption has been that the daemons and applications are always + using the same units. With the introduction of the ResourceInformation + class we have support for units - so this function will continue to return + memory but in the units of MB + + @return memory(in MB) of the resource]]> + + + + + memory of the resource. Note - while memory has + never had a unit specified, all YARN configurations have specified memory + in MB. The assumption has been that the daemons and applications are always + using the same units. With the introduction of the ResourceInformation + class we have support for units - so this function will continue to return + memory but in the units of MB + + @return memory of the resource]]> + + + + + + memory of the resource. Note - while memory has + never had a unit specified, all YARN configurations have specified memory + in MB. The assumption has been that the daemons and applications are always + using the same units. With the introduction of the ResourceInformation + class we have support for units - so this function will continue to set + memory but the assumption is that the value passed is in units of MB. + + @param memory memory(in MB) of the resource]]> + + + + + + memory of the resource. + @param memory memory of the resource]]> + + + + + number of virtual cpu cores of the resource. + + Virtual cores are a unit for expressing CPU parallelism. A node's capacity + should be configured with virtual cores equal to its number of physical + cores. A container should be requested with the number of cores it can + saturate, i.e. the average number of threads it expects to have runnable + at a time. + + @return num of virtual cpu cores of the resource]]> + + + + + + number of virtual cpu cores of the resource. + + Virtual cores are a unit for expressing CPU parallelism. A node's capacity + should be configured with virtual cores equal to its number of physical + cores. A container should be requested with the number of cores it can + saturate, i.e. the average number of threads it expects to have runnable + at a time. + + @param vCores number of virtual cpu cores of the resource]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resource models a set of computer resources in the + cluster.

    + +

    Currently it models both memory and CPU.

    + +

    The unit for memory is megabytes. CPU is modeled with virtual cores + (vcores), a unit for expressing parallelism. A node's capacity should + be configured with virtual cores equal to its number of physical cores. A + container should be requested with the number of cores it can saturate, i.e. + the average number of threads it expects to have runnable at a time.

    + +

    Virtual cores take integer values and thus currently CPU-scheduling is + very coarse. A complementary axis for CPU requests that represents + processing power will likely be added in the future to enable finer-grained + resource configuration.

    + +

    Typically, applications request Resource of suitable + capability to run their component tasks.

    + + @see ResourceRequest + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes: +
      +
    • StartTime of the allocation.
    • +
    • EndTime of the allocation.
    • +
    • {@link Resource} reserved for the allocation.
    • +
    + + @see Resource]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blacklist of resources + for the application. + + @see ResourceRequest + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + host/rack string represents an arbitrary + host name. + + @param hostName host/rack on which the allocation is desired + @return whether the given host/rack string represents an arbitrary + host name]]> + + + + + Priority of the request. + @return Priority of the request]]> + + + + + + Priority of the request + @param priority Priority of the request]]> + + + + + host/rack) on which the allocation + is desired. + + A special value of * signifies that any resource + (host/rack) is acceptable. + + @return resource (e.g. host/rack) on which the allocation + is desired]]> + + + + + + host/rack) on which the allocation + is desired. + + A special value of * signifies that any resource name + (e.g. host/rack) is acceptable. + + @param resourceName (e.g. host/rack) on which the + allocation is desired]]> + + + + + + + + + + + + + + + + ResourceRequest. Defaults to true. + + @return whether locality relaxation is enabled with this + ResourceRequest.]]> + + + + + + ExecutionTypeRequest of the requested container. + + @param execSpec + ExecutionTypeRequest of the requested container]]> + + + + + ResourceRequest. Defaults to true. + + @return whether locality relaxation is enabled with this + ResourceRequest.]]> + + + + + + For a request at a network hierarchy level, set whether locality can be relaxed + to that level and beyond.

    + +

    If the flag is off on a rack-level ResourceRequest, + containers at that request's priority will not be assigned to nodes on that + request's rack unless requests specifically for those nodes have also been + submitted.

    + +

    If the flag is off on an {@link ResourceRequest#ANY}-level + ResourceRequest, containers at that request's priority will + only be assigned on racks for which specific requests have also been + submitted.

    + +

    For example, to request a container strictly on a specific node, the + corresponding rack-level and any-level requests should have locality + relaxation set to false. Similarly, to request a container strictly on a + specific rack, the corresponding any-level request should have locality + relaxation set to false.

    + + @param relaxLocality whether locality relaxation is enabled with this + ResourceRequest.]]> + + + + + + + + + + + + + + + + ID corresponding to this allocation request. This + ID is an identifier for different {@code ResourceRequest}s from the same + application. The allocated {@code Container}(s) received as part of the + {@code AllocateResponse} response will have the ID corresponding to the + original {@code ResourceRequest} for which the RM made the allocation. +

    + The scheduler may return multiple {@code AllocateResponse}s corresponding + to the same ID as and when scheduler allocates {@code Container}(s). + Applications can continue to completely ignore the returned ID in + the response and use the allocation for any of their outstanding requests. +

    + If one wishes to replace an entire {@code ResourceRequest} corresponding to + a specific ID, they can simply cancel the corresponding {@code + ResourceRequest} and submit a new one afresh. + + @return the ID corresponding to this allocation request.]]> + + + + + + ID corresponding to this allocation request. This + ID is an identifier for different {@code ResourceRequest}s from the same + application. The allocated {@code Container}(s) received as part of the + {@code AllocateResponse} response will have the ID corresponding to the + original {@code ResourceRequest} for which the RM made the allocation. +

    + The scheduler may return multiple {@code AllocateResponse}s corresponding + to the same ID as and when scheduler allocates {@code Container}(s). + Applications can continue to completely ignore the returned ID in + the response and use the allocation for any of their outstanding requests. +

    + If one wishes to replace an entire {@code ResourceRequest} corresponding to + a specific ID, they can simply cancel the corresponding {@code + ResourceRequest} and submit a new one afresh. +

    + If the ID is not set, scheduler will continue to work as previously and all + allocated {@code Container}(s) will have the default ID, -1. + + @param allocationRequestID the ID corresponding to this allocation + request.]]> + + + + + + Resource capability of the request. + @param capability Resource capability of the request]]> + + + + + Resource capability of the request. + @return Resource capability of the request]]> + + + + + + + + + + + + + + + + + + It includes: +

      +
    • {@link Priority} of the request.
    • +
    • + The name of the host or rack on which the allocation is + desired. A special value of * signifies that + any host/rack is acceptable to the application. +
    • +
    • {@link Resource} required for each request.
    • +
    • + Number of containers, of above specifications, which are required + by the application. +
    • +
    • + A boolean relaxLocality flag, defaulting to {@code true}, + which tells the {@code ResourceManager} if the application wants + locality to be loose (i.e. allows fall-through to rack or any) + or strict (i.e. specify hard constraint on resource allocation). +
    • +
    + + @see Resource + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> +
    +
    + + + + + + + priority of the request. + @see ResourceRequest#setPriority(Priority) + @param priority priority of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + resourceName of the request. + @see ResourceRequest#setResourceName(String) + @param resourceName resourceName of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + capability of the request. + @see ResourceRequest#setCapability(Resource) + @param capability capability of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + numContainers of the request. + @see ResourceRequest#setNumContainers(int) + @param numContainers numContainers of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + relaxLocality of the request. + @see ResourceRequest#setRelaxLocality(boolean) + @param relaxLocality relaxLocality of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + nodeLabelExpression of the request. + @see ResourceRequest#setNodeLabelExpression(String) + @param nodeLabelExpression + nodeLabelExpression of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + executionTypeRequest of the request. + @see ResourceRequest#setExecutionTypeRequest( + ExecutionTypeRequest) + @param executionTypeRequest + executionTypeRequest of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + executionTypeRequest of the request with 'ensure + execution type' flag set to true. + @see ResourceRequest#setExecutionTypeRequest( + ExecutionTypeRequest) + @param executionType executionType of the request. + @return {@link ResourceRequestBuilder}]]> + + + + + + allocationRequestId of the request. + @see ResourceRequest#setAllocationRequestId(long) + @param allocationRequestId + allocationRequestId of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + virtual memory. + + @return virtual memory in MB]]> + + + + + + virtual memory. + + @param vmem virtual memory in MB]]> + + + + + physical memory. + + @return physical memory in MB]]> + + + + + + physical memory. + + @param pmem physical memory in MB]]> + + + + + CPU utilization (The amount of vcores used). + + @return CPU utilization]]> + + + + + + CPU utilization (The amount of vcores used). + + @param cpu CPU utilization]]> + + + + + + custom resource utilization + (The amount of custom resource used). + + @param resourceName resourceName of custom resource + @return resourceName utilization]]> + + + + + + + + + + + + custom resource utilization + (The amount of custom resource used). + @param resourceName resourceName + @param utilization utilization of custom resource]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ResourceUtilization models the utilization of a set of computer + resources in the cluster. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + allocationRequestId of the request. + + @see SchedulingRequest#setAllocationRequestId(long) + @param allocationRequestId allocationRequestId of the + request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + priority of the request. + + @param priority priority of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder} + @see SchedulingRequest#setPriority(Priority)]]> + + + + + + executionType of the request. + + @see SchedulingRequest#setExecutionType(ExecutionTypeRequest) + @param executionType executionType of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + allocationTags of the request. + + @see SchedulingRequest#setAllocationTags(Set) + @param allocationTags allocationsTags of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + executionType of the request. + + @see SchedulingRequest#setResourceSizing(ResourceSizing) + @param resourceSizing resourceSizing of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + placementConstraintExpression of the request. + + @see SchedulingRequest#setPlacementConstraint( + PlacementConstraint) + @param placementConstraintExpression placementConstraints of + the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationMaster that may be reclaimed by the + ResourceManager. + @return the set of {@link ContainerId} to be preempted.]]> + + + + ApplicationMaster (AM) + may attempt to checkpoint work or adjust its execution plan to accommodate + it. In contrast to {@link PreemptionContract}, the AM has no flexibility in + selecting which resources to return to the cluster. + @see PreemptionMessage]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Token is the security entity used by the framework + to verify authenticity of any resource.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerId of the container. + @return ContainerId of the container]]> + + + + + + + + + + + ContainerUpdateType of the container. + @return ContainerUpdateType of the container.]]> + + + + + + ContainerUpdateType of the container. + @param updateType of the Container]]> + + + + + ContainerId of the container. + @return ContainerId of the container]]> + + + + + + ContainerId of the container. + @param containerId ContainerId of the container]]> + + + + + ExecutionType of the container. + @return ExecutionType of the container]]> + + + + + + ExecutionType of the container. + @param executionType ExecutionType of the container]]> + + + + + + Resource capability of the request. + @param capability Resource capability of the request]]> + + + + + Resource capability of the request. + @return Resource capability of the request]]> + + + + + + + + + + + + It includes: +
      +
    • version for the container.
    • +
    • {@link ContainerId} for the container.
    • +
    • + {@link Resource} capability of the container after the update request + is completed. +
    • +
    • + {@link ExecutionType} of the container after the update request is + completed. +
    • +
    + + Update rules: +
      +
    • + Currently only ONE aspect of the container can be updated per request + (user can either update Capability OR ExecutionType in one request.. + not both). +
    • +
    • + There must be only 1 update request per container in an allocate call. +
    • +
    • + If a new update request is sent for a container (in a subsequent allocate + call) before the first one is satisfied by the Scheduler, it will + overwrite the previous request. +
    • +
    + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> +
    +
    + + + + + + + + + + + + + + + ContainerUpdateType. + @return ContainerUpdateType]]> + + + + + + ContainerUpdateType. + @param updateType ContainerUpdateType]]> + + + + + Container. + @return Container]]> + + + + + + Container. + @param container Container]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + URL represents a serializable {@link java.net.URL}.

    ]]> +
    +
    + + + + + + + + + + RMAppAttempt.]]> + + + + + + + + + + + + ApplicationMaster.]]> + + + + + + + + + + NodeManagers in the cluster. + @return number of NodeManagers in the cluster]]> + + + + + DecommissionedNodeManagers in the cluster. + + @return number of DecommissionedNodeManagers in the cluster]]> + + + + + ActiveNodeManagers in the cluster. + + @return number of ActiveNodeManagers in the cluster]]> + + + + + LostNodeManagers in the cluster. + + @return number of LostNodeManagers in the cluster]]> + + + + + UnhealthyNodeManagers in the cluster. + + @return number of UnhealthyNodeManagers in the cluster]]> + + + + + RebootedNodeManagers in the cluster. + + @return number of RebootedNodeManagers in the cluster]]> + + + + YarnClusterMetrics represents cluster metrics.

    + +

    Currently only number of NodeManagers is provided.

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class contains the information about a timeline domain, which is used + to a user to host a number of timeline entities, isolating them from others'. + The user can also define the reader and writer users/groups for the the + domain, which is used to control the access to its entities. +

    + +

    + The reader and writer users/groups pattern that the user can supply is the + same as what AccessControlList takes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The class that contains the the meta information of some conceptual entity + and its related events. The entity can be an application, an application + attempt, a container or whatever the user-defined object. +

    + +

    + Primary filters will be used to index the entities in + TimelineStore, such that users should carefully choose the + information they want to store as the primary filters. The remaining can be + stored as other information. +

    ]]> +
    +
    + + + + + + + + + + + + + ApplicationId of the + TimelineEntityGroupId. + + @return ApplicationId of the + TimelineEntityGroupId]]> + + + + + + + + timelineEntityGroupId. + + @return timelineEntityGroupId]]> + + + + + + + + + + + + + + + + + + + TimelineEntityGroupId is an abstract way for + timeline service users to represent #a group of related timeline data. + For example, all entities that represents one data flow DAG execution + can be grouped into one timeline entity group.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class contains the information about a timeline service domain, which is + used to a user to host a number of timeline entities, isolating them from + others'. The user can also define the reader and writer users/groups for + the domain, which is used to control the access to its entities. +

    +

    + The reader and writer users/groups pattern that the user can supply is the + same as what AccessControlList takes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The constuctor is used to construct a proxy {@link TimelineEntity} or its + subclass object from the real entity object that carries information. +

    + +

    + It is usually used in the case where we want to recover class polymorphism + after deserializing the entity from its JSON form. +

    + @param entity the real entity that carries information]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: Entities will be stored in the order of idPrefix specified. + If users decide to set idPrefix for an entity, they MUST provide + the same prefix for every update of this entity. +

    + Example:
    + TimelineEntity entity = new TimelineEntity();
    + entity.setIdPrefix(value);
    + 
    + Users can use {@link TimelineServiceHelper#invertLong(long)} to invert + the prefix if necessary. + + @param entityIdPrefix prefix for an entity.]]> +
    +
    + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a + InetSocketAddress. On an HA cluster, + this fetches the address corresponding to the RM identified by + {@link #RM_HA_ID}. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + yarn.resourcemanager.scheduler.class + cannot handle placement constraints, the corresponding SchedulingRequests + will be rejected. As of now, only the capacity scheduler supports + SchedulingRequests. In particular, it currently supports anti-affinity + constraints (no affinity or cardinality) and places one container at a + time. The advantage of this handler compared to the placement-processor is + that it follows the same ordering rules for queues (sorted by utilization, + priority) and apps (sorted by FIFO/fairness/priority) as the ones followed + by the main scheduler.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OPPORTUNISTIC containers on the NM.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • default
  • +
  • docker
  • +
  • javasandbox
  • +
  • runc
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • NONE - the RM will do nothing special.
  • +
  • LENIENT - the RM will generate and provide a keystore and truststore + to the AM, which it is free to use for HTTPS in its tracking URL web + server. The RM proxy will still allow HTTP connections to AMs that opt + not to use HTTPS.
  • +
  • STRICT - this is the same as LENIENT, except that the RM proxy will + only allow HTTPS connections to AMs; HTTP connections will be blocked + and result in a warning page to the user.
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Default platform-specific CLASSPATH for YARN applications. A + comma-separated list of CLASSPATH entries constructed based on the client + OS environment expansion syntax. +

    +

    + Note: Use {@link #DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH} for + cross-platform practice i.e. submit an application from a Windows client to + a Linux/Unix server or vice versa. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The information is passed along to applications via + {@link StartContainersResponse#getAllServicesMetaData()} that is returned by + {@link ContainerManagementProtocol#startContainers(StartContainersRequest)} +

    + + @return meta-data for this service that should be made available to + applications.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The method used by the NodeManager log aggregation service + to initial the policy object with parameters specified by the application + or the cluster-wide setting. +

    + + @param parameters parameters with scheme defined by the policy class.]]> +
    +
    + + + + + The method used by the NodeManager log aggregation service + to ask the policy object if a given container's logs should be aggregated. +

    + + @param logContext ContainerLogContext + @return Whether or not the container's logs should be aggregated.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The method used by administrators to ask SCM to run cleaner task right away +

    + + @param request request SharedCacheManager to run a cleaner task + @return SharedCacheManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException]]> +
    +
    + + + The protocol between administrators and the SharedCacheManager +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + Tag1(N1),P1:Tag2(N2),P2:...:TagN(Nn),Pn

    + + where TagN(Nn) is a key value pair to determine the source + allocation tag and the number of allocations, such as: + +

    foo(3)

    + + Optional when using NodeAttribute Constraint. + + and where Pn can be any form of a valid constraint expression, + such as: + +
      +
    • in,node,foo,bar
    • +
    • notin,node,foo,bar,1,2
    • +
    • and(notin,node,foo:notin,node,bar)
    • +
    + + and NodeAttribute Constraint such as + +
      +
    • yarn.rm.io/foo=true
    • +
    • java=1.7,1.8
    • +
    + @param expression expression string. + @return a map of source tags to placement constraint mapping. + @throws PlacementConstraintParseException]]> +
    +
    + + + + + +
    + +
    + + + + + +
    diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml new file mode 100644 index 00000000000..a2b0cd041fc --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml @@ -0,0 +1,3067 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + If the user does not have VIEW_APP access then the following + fields in the report will be set to stubbed values: +

      +
    • host - set to "N/A"
    • +
    • RPC port - set to -1
    • +
    • client token - set to "N/A"
    • +
    • diagnostics - set to "N/A"
    • +
    • tracking URL - set to "N/A"
    • +
    • original tracking URL - set to "N/A"
    • +
    • resource usage report - all values are -1
    • +
    + + @param appId + {@link ApplicationId} of the application that needs a report + @return application report + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get a report (ApplicationReport) of all Applications in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @return a list of reports for all applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given ApplicationAttempt. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param applicationAttemptId + {@link ApplicationAttemptId} of the application attempt that needs + a report + @return application attempt report + @throws YarnException + @throws ApplicationAttemptNotFoundException if application attempt + not found + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (ApplicationAttempts) of Application in the cluster. +

    + + @param applicationId + @return a list of reports for all application attempts for specified + application + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given Container. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param containerId + {@link ContainerId} of the container that needs a report + @return container report + @throws YarnException + @throws ContainerNotFoundException if container not found + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (Containers) of ApplicationAttempt in the cluster. +

    + + @param applicationAttemptId + @return a list of reports of all containers for specified application + attempt + @throws YarnException + @throws IOException]]> +
    +
    +
    + + + + + + + + + {@code + AMRMClient.createAMRMClientContainerRequest() + } + @return the newly create AMRMClient instance.]]> + + + + + + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + addContainerRequest are sent to the + ResourceManager. New containers assigned to the master are + retrieved. Status of completed containers and node health updates are also + retrieved. This also doubles up as a heartbeat to the ResourceManager and + must be made periodically. The call may not always return any new + allocations of containers. App should not make concurrent allocate + requests. May cause request loss. + +

    + Note : If the user has not removed container requests that have already + been satisfied, then the re-register may end up sending the entire + container requests to the RM (including matched requests). Which would mean + the RM could end up giving it a lot of new allocated containers. +

    + + @param progressIndicator Indicates progress made by the master + @return the response of the allocate request + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + allocate + @param req Resource request]]> + + + + + + + + + + + + + allocate. + Any previous pending resource change request of the same container will be + removed. + + Application that calls this method is expected to maintain the + Containers that are returned from previous successful + allocations or resource changes. By passing in the existing container and a + target resource capability to this method, the application requests the + ResourceManager to change the existing resource allocation to the target + resource allocation. + + @deprecated use + {@link #requestContainerUpdate(Container, UpdateContainerRequest)} + + @param container The container returned from the last successful resource + allocation or resource change + @param capability The target resource capability of the container]]> + + + + + + + allocate. + Any previous pending update request of the same container will be + removed. + + @param container The container returned from the last successful resource + allocation or update + @param updateContainerRequest The UpdateContainerRequest.]]> + + + + + + + + + + + + + + + + + + + + + + + + ContainerRequests matching the given + parameters. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + Each collection in the list contains requests with identical + Resource size that fit in the given capability. In a + collection, requests will be returned in the same order as they were added. + + NOTE: This API only matches Container requests that were created by the + client WITHOUT the allocationRequestId being set. + + @return Collection of request matching the parameters]]> + + + + + + + + + ContainerRequests matching the given + parameters. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + Each collection in the list contains requests with identical + Resource size that fit in the given capability. In a + collection, requests will be returned in the same order as they were added. + specify an ExecutionType. + + NOTE: This API only matches Container requests that were created by the + client WITHOUT the allocationRequestId being set. + + @param priority Priority + @param resourceName Location + @param executionType ExecutionType + @param capability Capability + @return Collection of request matching the parameters]]> + + + + + + + + + + + + + ContainerRequests matching the given + allocationRequestId. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + + NOTE: This API only matches Container requests that were created by the + client WITH the allocationRequestId being set to a non-default value. + + @param allocationRequestId Allocation Request Id + @return Collection of request matching the parameters]]> + + + + + + + + + + + + + AMRMClient. This cache must + be shared with the {@link NMClient} used to manage containers for the + AMRMClient +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @param nmTokenCache the NM token cache to use.]]> + + + + + AMRMClient. This cache must be + shared with the {@link NMClient} used to manage containers for the + AMRMClient. +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @return the NM token cache.]]> + + + + + + + + + + + + + + + + + + + + + + + + check to return true for each 1000 ms. + See also {@link #waitFor(java.util.function.Supplier, int)} + and {@link #waitFor(java.util.function.Supplier, int, int)} + @param check the condition for which it should wait]]> + + + + + + + + check to return true for each + checkEveryMillis ms. + See also {@link #waitFor(java.util.function.Supplier, int, int)} + @param check user defined checker + @param checkEveryMillis interval to call check]]> + + + + + + + + + check to return true for each + checkEveryMillis ms. In the main loop, this method will log + the message "waiting in main loop" for each logInterval times + iteration to confirm the thread is alive. + @param check user defined checker + @param checkEveryMillis interval to call check + @param logInterval interval to log for each]]> + + + + + + + + + + + + + + + + + + + + + + + + + + Start an allocated container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the allocated container, including the + Id, the assigned node's Id and the token via {@link Container}. In + addition, the AM needs to provide the {@link ContainerLaunchContext} as + well.

    + + @param container the allocated container + @param containerLaunchContext the context information needed by the + NodeManager to launch the + container + @return a map between the auxiliary service names and their outputs + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Increase the resource of a container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

    + + @param container the container with updated token. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Update the resources of a container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

    + + @param container the container with updated token. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + + Stop an started container.

    + + @param containerId the Id of the started container + @param nodeId the Id of the NodeManager + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + + Query the status of a container.

    + + @param containerId the Id of the started container + @param nodeId the Id of the NodeManager + + @return the status of a container. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + + + Re-Initialize the Container.

    + + @param containerId the Id of the container to Re-Initialize. + @param containerLaunchContex the updated ContainerLaunchContext. + @param autoCommit commit re-initialization automatically ? + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Restart the specified container.

    + + @param containerId the Id of the container to restart. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Rollback last reInitialization of the specified container.

    + + @param containerId the Id of the container to restart. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Commit last reInitialization of the specified container.

    + + @param containerId the Id of the container to commit reInitialize. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + Set whether the containers that are started by this client, and are + still running should be stopped when the client stops. By default, the + feature should be enabled.

    However, containers will be stopped only + when service is stopped. i.e. after {@link NMClient#stop()}. + + @param enabled whether the feature is enabled or not]]> +
    +
    + + + + NMClient. This cache must be + shared with the {@link AMRMClient} that requested the containers managed + by this NMClient +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @param nmTokenCache the NM token cache to use.]]> + + + + + NMClient. This cache must be + shared with the {@link AMRMClient} that requested the containers managed + by this NMClient +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @return the NM token cache]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + By default YARN client libraries {@link AMRMClient} and {@link NMClient} use + {@link #getSingleton()} instance of the cache. +

      +
    • + Using the singleton instance of the cache is appropriate when running a + single ApplicationMaster in the same JVM. +
    • +
    • + When using the singleton, users don't need to do anything special, + {@link AMRMClient} and {@link NMClient} are already set up to use the + default singleton {@link NMTokenCache} +
    • +
    + If running multiple Application Masters in the same JVM, a different cache + instance should be used for each Application Master. +
      +
    • + If using the {@link AMRMClient} and the {@link NMClient}, setting up + and using an instance cache is as follows: +
      +   NMTokenCache nmTokenCache = new NMTokenCache();
      +   AMRMClient rmClient = AMRMClient.createAMRMClient();
      +   NMClient nmClient = NMClient.createNMClient();
      +   nmClient.setNMTokenCache(nmTokenCache);
      +   ...
      + 
      +
    • +
    • + If using the {@link AMRMClientAsync} and the {@link NMClientAsync}, + setting up and using an instance cache is as follows: +
      +   NMTokenCache nmTokenCache = new NMTokenCache();
      +   AMRMClient rmClient = AMRMClient.createAMRMClient();
      +   NMClient nmClient = NMClient.createNMClient();
      +   nmClient.setNMTokenCache(nmTokenCache);
      +   AMRMClientAsync rmClientAsync = new AMRMClientAsync(rmClient, 1000, [AMRM_CALLBACK]);
      +   NMClientAsync nmClientAsync = new NMClientAsync("nmClient", nmClient, [NM_CALLBACK]);
      +   ...
      + 
      +
    • +
    • + If using {@link ApplicationMasterProtocol} and + {@link ContainerManagementProtocol} directly, setting up and using an + instance cache is as follows: +
      +   NMTokenCache nmTokenCache = new NMTokenCache();
      +   ...
      +   ApplicationMasterProtocol amPro = ClientRMProxy.createRMProxy(conf, ApplicationMasterProtocol.class);
      +   ...
      +   AllocateRequest allocateRequest = ...
      +   ...
      +   AllocateResponse allocateResponse = rmClient.allocate(allocateRequest);
      +   for (NMToken token : allocateResponse.getNMTokens()) {
      +     nmTokenCache.setToken(token.getNodeId().toString(), token.getToken());
      +   }
      +   ...
      +   ContainerManagementProtocolProxy nmPro = ContainerManagementProtocolProxy(conf, nmTokenCache);
      +   ...
      +   nmPro.startContainer(container, containerContext);
      +   ...
      + 
      +
    • +
    + It is also possible to mix the usage of a client ({@code AMRMClient} or + {@code NMClient}, or the async versions of them) with a protocol proxy + ({@code ContainerManagementProtocolProxy} or + {@code ApplicationMasterProtocol}).]]> +
    +
    + + + + + + + + + + + + + + The method to claim a resource with the SharedCacheManager. + The client uses a checksum to identify the resource and an + {@link ApplicationId} to identify which application will be using the + resource. +

    + +

    + The SharedCacheManager responds with whether or not the + resource exists in the cache. If the resource exists, a URL to + the resource in the shared cache is returned. If the resource does not + exist, null is returned instead. +

    + +

    + Once a URL has been returned for a resource, that URL is safe to use for + the lifetime of the application that corresponds to the provided + ApplicationId. +

    + + @param applicationId ApplicationId of the application using the resource + @param resourceKey the key (i.e. checksum) that identifies the resource + @return URL to the resource, or null if it does not exist]]> +
    +
    + + + + + + + The method to release a resource with the SharedCacheManager. + This method is called once an application is no longer using a claimed + resource in the shared cache. The client uses a checksum to identify the + resource and an {@link ApplicationId} to identify which application is + releasing the resource. +

    + +

    + Note: This method is an optimization and the client is not required to call + it for correctness. +

    + + @param applicationId ApplicationId of the application releasing the + resource + @param resourceKey the key (i.e. checksum) that identifies the resource]]> +
    +
    + + + + + + + + + + +
    + + + + + + + + + + + + + + + + Obtain a {@link YarnClientApplication} for a new application, + which in turn contains the {@link ApplicationSubmissionContext} and + {@link org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse} + objects. +

    + + @return {@link YarnClientApplication} built for a new application + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Submit a new application to YARN. It is a blocking call - it + will not return {@link ApplicationId} until the submitted application is + submitted successfully and accepted by the ResourceManager. +

    + +

    + Users should provide an {@link ApplicationId} as part of the parameter + {@link ApplicationSubmissionContext} when submitting a new application, + otherwise it will throw the {@link ApplicationIdNotProvidedException}. +

    + +

    This internally calls {@link ApplicationClientProtocol#submitApplication + (SubmitApplicationRequest)}, and after that, it internally invokes + {@link ApplicationClientProtocol#getApplicationReport + (GetApplicationReportRequest)} and waits till it can make sure that the + application gets properly submitted. If RM fails over or RM restart + happens before ResourceManager saves the application's state, + {@link ApplicationClientProtocol + #getApplicationReport(GetApplicationReportRequest)} will throw + the {@link ApplicationNotFoundException}. This API automatically resubmits + the application with the same {@link ApplicationSubmissionContext} when it + catches the {@link ApplicationNotFoundException}

    + + @param appContext + {@link ApplicationSubmissionContext} containing all the details + needed to submit a new application + @return {@link ApplicationId} of the accepted application + @throws YarnException + @throws IOException + @see #createApplication()]]> +
    +
    + + + + + + + Fail an application attempt identified by given ID. +

    + + @param applicationAttemptId + {@link ApplicationAttemptId} of the attempt to fail. + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException + @see #getQueueAclsInfo()]]> +
    +
    + + + + + + + Kill an application identified by given ID. +

    + + @param applicationId + {@link ApplicationId} of the application that needs to be killed + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException + @see #getQueueAclsInfo()]]> +
    +
    + + + + + + + + Kill an application identified by given ID. +

    + @param applicationId {@link ApplicationId} of the application that needs to + be killed + @param diagnostics for killing an application. + @throws YarnException in case of errors or if YARN rejects the request due + to access-control restrictions. + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given Application. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + +

    + If the user does not have VIEW_APP access then the following + fields in the report will be set to stubbed values: +

      +
    • host - set to "N/A"
    • +
    • RPC port - set to -1
    • +
    • client token - set to "N/A"
    • +
    • diagnostics - set to "N/A"
    • +
    • tracking URL - set to "N/A"
    • +
    • original tracking URL - set to "N/A"
    • +
    • resource usage report - all values are -1
    • +
    + + @param appId + {@link ApplicationId} of the application that needs a report + @return application report + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The AMRM token is required for AM to RM scheduling operations. For + managed Application Masters YARN takes care of injecting it. For unmanaged + Applications Masters, the token must be obtained via this method and set + in the {@link org.apache.hadoop.security.UserGroupInformation} of the + current user. +

    + The AMRM token will be returned only if all the following conditions are + met: +

      +
    • the requester is the owner of the ApplicationMaster
    • +
    • the application master is an unmanaged ApplicationMaster
    • +
    • the application master is in ACCEPTED state
    • +
    + Else this method returns NULL. + + @param appId {@link ApplicationId} of the application to get the AMRM token + @return the AMRM token if available + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get a report (ApplicationReport) of all Applications in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @return a list of reports of all running applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report (ApplicationReport) of Applications + matching the given application types in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationTypes set of application types you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application states in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application types and application states in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application types, application states and application tags in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @param applicationTags set of application tags you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + Get a report (ApplicationReport) of Applications matching the given users, + queues, application types and application states in the cluster. If any of + the params is set to null, it is not used when filtering. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param queues set of queues you are interested in + @param users set of users you are interested in + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a list of ApplicationReports that match the given + {@link GetApplicationsRequest}. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param request the request object to get the list of applications. + @return The list of ApplicationReports that match the request + @throws YarnException Exception specific to YARN. + @throws IOException Exception mostly related to connection errors.]]> +
    +
    + + + + + + Get metrics ({@link YarnClusterMetrics}) about the cluster. +

    + + @return cluster metrics + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of nodes ({@link NodeReport}) in the cluster. +

    + + @param states The {@link NodeState}s to filter on. If no filter states are + given, nodes in all states will be returned. + @return A list of node reports + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a delegation token so as to be able to talk to YARN using those tokens. + + @param renewer + Address of the renewer who can renew these tokens when needed by + securely talking to YARN. + @return a delegation token ({@link Token}) that can be used to + talk to YARN + @throws YarnException + @throws IOException]]> + + + + + + + + + Get information ({@link QueueInfo}) about a given queue. +

    + + @param queueName + Name of the queue whose information is needed + @return queue information + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException]]> +
    +
    + + + + + + Get information ({@link QueueInfo}) about all queues, recursively if there + is a hierarchy +

    + + @return a list of queue-information for all queues + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get information ({@link QueueInfo}) about top level queues. +

    + + @return a list of queue-information for all the top-level queues + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get information ({@link QueueInfo}) about all the immediate children queues + of the given queue +

    + + @param parent + Name of the queue whose child-queues' information is needed + @return a list of queue-information for all queues who are direct children + of the given parent queue. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get information about acls for current user on all the + existing queues. +

    + + @return a list of queue acls ({@link QueueUserACLInfo}) for + current user + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given ApplicationAttempt. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param applicationAttemptId + {@link ApplicationAttemptId} of the application attempt that needs + a report + @return application attempt report + @throws YarnException + @throws ApplicationAttemptNotFoundException if application attempt + not found + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (ApplicationAttempts) of Application in the cluster. +

    + + @param applicationId application id of the app + @return a list of reports for all application attempts for specified + application. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given Container. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param containerId + {@link ContainerId} of the container that needs a report + @return container report + @throws YarnException + @throws ContainerNotFoundException if container not found. + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (Containers) of ApplicationAttempt in the cluster. +

    + + @param applicationAttemptId application attempt id + @return a list of reports of all containers for specified application + attempts + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + Attempts to move the given application to the given queue. +

    + + @param appId + Application to move. + @param queue + Queue to place it in to. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Obtain a {@link GetNewReservationResponse} for a new reservation, + which contains the {@link ReservationId} object. +

    + + @return The {@link GetNewReservationResponse} containing a new + {@link ReservationId} object. + @throws YarnException if reservation cannot be created. + @throws IOException if reservation cannot be created.]]> +
    +
    + + + + + + + The interface used by clients to submit a new reservation to the + {@code ResourceManager}. +

    + +

    + The client packages all details of its request in a + {@link ReservationSubmissionRequest} object. This contains information + about the amount of capacity, temporal constraints, and gang needs. + Furthermore, the reservation might be composed of multiple stages, with + ordering dependencies among them. +

    + +

    + In order to respond, a new admission control component in the + {@code ResourceManager} performs an analysis of the resources that have + been committed over the period of time the user is requesting, verify that + the user requests can be fulfilled, and that it respect a sharing policy + (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined + that the ReservationRequest is satisfiable the {@code ResourceManager} + answers with a {@link ReservationSubmissionResponse} that includes a + {@link ReservationId}. Upon failure to find a valid allocation the response + is an exception with the message detailing the reason of failure. +

    + +

    + The semantics guarantees that the {@link ReservationId} returned, + corresponds to a valid reservation existing in the time-range request by + the user. The amount of capacity dedicated to such reservation can vary + overtime, depending of the allocation that has been determined. But it is + guaranteed to satisfy all the constraint expressed by the user in the + {@link ReservationDefinition} +

    + + @param request request to submit a new Reservation + @return response contains the {@link ReservationId} on accepting the + submission + @throws YarnException if the reservation cannot be created successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to update an existing Reservation. This is + referred to as a re-negotiation process, in which a user that has + previously submitted a Reservation. +

    + +

    + The allocation is attempted by virtually substituting all previous + allocations related to this Reservation with new ones, that satisfy the new + {@link ReservationDefinition}. Upon success the previous allocation is + atomically substituted by the new one, and on failure (i.e., if the system + cannot find a valid allocation for the updated request), the previous + allocation remains valid. +

    + + @param request to update an existing Reservation (the + {@link ReservationUpdateRequest} should refer to an existing valid + {@link ReservationId}) + @return response empty on successfully updating the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + updated successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to remove an existing Reservation. +

    + + @param request to remove an existing Reservation (the + {@link ReservationDeleteRequest} should refer to an existing valid + {@link ReservationId}) + @return response empty on successfully deleting the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + deleted successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to get the list of reservations in a plan. + The reservationId will be used to search for reservations to list if it is + provided. Otherwise, it will select active reservations within the + startTime and endTime (inclusive). +

    + + @param request to list reservations in a plan. Contains fields to select + String queue, ReservationId reservationId, long startTime, + long endTime, and a bool includeReservationAllocations. + + queue: Required. Cannot be null or empty. Refers to the + reservable queue in the scheduler that was selected when + creating a reservation submission + {@link ReservationSubmissionRequest}. + + reservationId: Optional. If provided, other fields will + be ignored. + + startTime: Optional. If provided, only reservations that + end after the startTime will be selected. This defaults + to 0 if an invalid number is used. + + endTime: Optional. If provided, only reservations that + start on or before endTime will be selected. This defaults + to Long.MAX_VALUE if an invalid number is used. + + includeReservationAllocations: Optional. Flag that + determines whether the entire reservation allocations are + to be returned. Reservation allocations are subject to + change in the event of re-planning as described by + {@link ReservationDefinition}. + + @return response that contains information about reservations that are + being searched for. + @throws YarnException if the request is invalid + @throws IOException if the request failed otherwise]]> +
    +
    + + + + + + The interface used by client to get node to labels mappings in existing cluster +

    + + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by client to get labels to nodes mapping + in existing cluster +

    + + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get labels to nodes mapping + for specified labels in existing cluster +

    + + @param labels labels for which labels to nodes mapping has to be retrieved + @return labels to nodes mappings for specific labels + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by client to get node labels in the cluster +

    + + @return cluster node labels collection + @throws YarnException when there is a failure in + {@link ApplicationClientProtocol} + @throws IOException when there is a failure in + {@link ApplicationClientProtocol}]]> +
    +
    + + + + + + + + The interface used by client to set priority of an application +

    + @param applicationId + @param priority + @return updated priority of an application. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + Signal a container identified by given ID. +

    + + @param containerId + {@link ContainerId} of the container that needs to be signaled + @param command the signal container command + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + + Get the resource profiles available in the RM. +

    + @return a Map of the resource profile names to their capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + Get the details of a specific resource profile from the RM. +

    + @param profile the profile name + @return resource profile name with its capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other others]]> +
    +
    + + + + + + Get available resource types supported by RM. +

    + @return list of supported resource types with detailed information + @throws YarnException if any issue happens inside YARN + @throws IOException in case of other others]]> +
    +
    + + + + + + The interface used by client to get node attributes in the cluster. +

    + + @return cluster node attributes collection + @throws YarnException when there is a failure in + {@link ApplicationClientProtocol} + @throws IOException when there is a failure in + {@link ApplicationClientProtocol}]]> +
    +
    + + + + + + + The interface used by client to get mapping of AttributeKey to associated + NodeToAttributeValue list for specified node attributeKeys in the cluster. +

    + + @param attributes AttributeKeys for which associated NodeToAttributeValue + mapping value has to be retrieved. If empty or null is set then + will return mapping for all attributeKeys in the cluster + @return mapping of AttributeKey to List of associated + NodeToAttributeValue's. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get all node to attribute mapping in + existing cluster. +

    + + @param hostNames HostNames for which host to attributes mapping has to + be retrived.If empty or null is set then will return + all nodes to attributes mapping in cluster. + @return Node to attribute mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get a shell to a container. +

    + + @param containerId Container ID + @param command Shell type + @throws IOException if connection fails.]]> +
    +
    +
    + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + Create a new instance of AMRMClientAsync.

    + + @param intervalMs heartbeat interval in milliseconds between AM and RM + @param callbackHandler callback handler that processes responses from + the ResourceManager]]> +
    +
    + + + + + + Create a new instance of AMRMClientAsync.

    + + @param client the AMRMClient instance + @param intervalMs heartbeat interval in milliseconds between AM and RM + @param callbackHandler callback handler that processes responses from + the ResourceManager]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + + + + + + + + + allocate + @param req Resource request]]> + + + + + + + + + + + + + allocate. + Any previous pending resource change request of the same container will be + removed. + + Application that calls this method is expected to maintain the + Containers that are returned from previous successful + allocations or resource changes. By passing in the existing container and a + target resource capability to this method, the application requests the + ResourceManager to change the existing resource allocation to the target + resource allocation. + + @deprecated use + {@link #requestContainerUpdate(Container, UpdateContainerRequest)} + + @param container The container returned from the last successful resource + allocation or resource change + @param capability The target resource capability of the container]]> + + + + + + + allocate. + Any previous pending update request of the same container will be + removed. + + @param container The container returned from the last successful resource + allocation or update + @param updateContainerRequest The UpdateContainerRequest.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + check to return true for each 1000 ms. + See also {@link #waitFor(java.util.function.Supplier, int)} + and {@link #waitFor(java.util.function.Supplier, int, int)} + @param check the condition for which it should wait]]> + + + + + + + + check to return true for each + checkEveryMillis ms. + See also {@link #waitFor(java.util.function.Supplier, int, int)} + @param check user defined checker + @param checkEveryMillis interval to call check]]> + + + + + + + + + check to return true for each + checkEveryMillis ms. In the main loop, this method will log + the message "waiting in main loop" for each logInterval times + iteration to confirm the thread is alive. + @param check user defined checker + @param checkEveryMillis interval to call check + @param logInterval interval to log for each]]> + + + + + + + + + + AMRMClientAsync handles communication with the ResourceManager + and provides asynchronous updates on events such as container allocations and + completions. It contains a thread that sends periodic heartbeats to the + ResourceManager. + + It should be used by implementing a CallbackHandler: +
    + {@code
    + class MyCallbackHandler extends AMRMClientAsync.AbstractCallbackHandler {
    +   public void onContainersAllocated(List containers) {
    +     [run tasks on the containers]
    +   }
    +
    +   public void onContainersUpdated(List containers) {
    +     [determine if resource allocation of containers have been increased in
    +      the ResourceManager, and if so, inform the NodeManagers to increase the
    +      resource monitor/enforcement on the containers]
    +   }
    +
    +   public void onContainersCompleted(List statuses) {
    +     [update progress, check whether app is done]
    +   }
    +   
    +   public void onNodesUpdated(List updated) {}
    +   
    +   public void onReboot() {}
    + }
    + }
    + 
    + + The client's lifecycle should be managed similarly to the following: + +
    + {@code
    + AMRMClientAsync asyncClient = 
    +     createAMRMClientAsync(appAttId, 1000, new MyCallbackhandler());
    + asyncClient.init(conf);
    + asyncClient.start();
    + RegisterApplicationMasterResponse response = asyncClient
    +    .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
    +       appMasterTrackingUrl);
    + asyncClient.addContainerRequest(containerRequest);
    + [... wait for application to complete]
    + asyncClient.unregisterApplicationMaster(status, appMsg, trackingUrl);
    + asyncClient.stop();
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Update the resources of a container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

    + + @param container the container with updated token.]]> +
    +
    + + + + + + Re-Initialize the Container.

    + + @param containerId the Id of the container to Re-Initialize. + @param containerLaunchContex the updated ContainerLaunchContext. + @param autoCommit commit re-initialization automatically ?]]> +
    +
    + + + + Restart the specified container.

    + + @param containerId the Id of the container to restart.]]> +
    +
    + + + + Rollback last reInitialization of the specified container.

    + + @param containerId the Id of the container to restart.]]> +
    +
    + + + + Commit last reInitialization of the specified container.

    + + @param containerId the Id of the container to commit reInitialize.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + NMClientAsync handles communication with all the NodeManagers + and provides asynchronous updates on getting responses from them. It + maintains a thread pool to communicate with individual NMs where a number of + worker threads process requests to NMs by using {@link NMClientImpl}. The max + size of the thread pool is configurable through + {@link YarnConfiguration#NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE}. + + It should be used in conjunction with a CallbackHandler. For example + +
    + {@code
    + class MyCallbackHandler extends NMClientAsync.AbstractCallbackHandler {
    +   public void onContainerStarted(ContainerId containerId,
    +       Map allServiceResponse) {
    +     [post process after the container is started, process the response]
    +   }
    +
    +   public void onContainerResourceIncreased(ContainerId containerId,
    +       Resource resource) {
    +     [post process after the container resource is increased]
    +   }
    +
    +   public void onContainerStatusReceived(ContainerId containerId,
    +       ContainerStatus containerStatus) {
    +     [make use of the status of the container]
    +   }
    +
    +   public void onContainerStopped(ContainerId containerId) {
    +     [post process after the container is stopped]
    +   }
    +
    +   public void onStartContainerError(
    +       ContainerId containerId, Throwable t) {
    +     [handle the raised exception]
    +   }
    +
    +   public void onGetContainerStatusError(
    +       ContainerId containerId, Throwable t) {
    +     [handle the raised exception]
    +   }
    +
    +   public void onStopContainerError(
    +       ContainerId containerId, Throwable t) {
    +     [handle the raised exception]
    +   }
    + }
    + }
    + 
    + + The client's life-cycle should be managed like the following: + +
    + {@code
    + NMClientAsync asyncClient = 
    +     NMClientAsync.createNMClientAsync(new MyCallbackhandler());
    + asyncClient.init(conf);
    + asyncClient.start();
    + asyncClient.startContainer(container, containerLaunchContext);
    + [... wait for container being started]
    + asyncClient.getContainerStatus(container.getId(), container.getNodeId(),
    +     container.getContainerToken());
    + [... handle the status in the callback instance]
    + asyncClient.stopContainer(container.getId(), container.getNodeId(),
    +     container.getContainerToken());
    + [... wait for container being stopped]
    + asyncClient.stop();
    + }
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml new file mode 100644 index 00000000000..311a793df5a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml @@ -0,0 +1,3982 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Type of proxy. + @return Proxy to the ResourceManager for the specified client protocol. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create a new instance of AppAdminClient. +

    + + @param appType application type + @param conf configuration + @return app admin client]]> +
    +
    + + + + + + + + + + Launch a new YARN application. +

    + + @param fileName specification of application + @param appName name of the application + @param lifetime lifetime of the application + @param queue queue of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Stop a YARN application (attempt to stop gracefully before killing the + application). In the case of a long-running service, the service may be + restarted later. +

    + + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Start a YARN application from a previously saved specification. In the + case of a long-running service, the service must have been previously + launched/started and then stopped, or previously saved but not started. +

    + + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + + + + Save the specification for a YARN application / long-running service. + The application may be started later. +

    + + @param fileName specification of application to save + @param appName name of the application + @param lifetime lifetime of the application + @param queue queue of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Remove the specification and all application data for a YARN application. + The application cannot be running. +

    + + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + + Change the number of running containers for a component of a YARN + application / long-running service. +

    + + @param appName the name of the application + @param componentCounts map of component name to new component count or + amount to change existing component count (e.g. + 5, +5, -5) + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Upload AM dependencies to HDFS. This makes future application launches + faster since the dependencies do not have to be uploaded on each launch. +

    + + @param destinationFolder + an optional HDFS folder where dependency tarball will be uploaded + @return exit code + @throws IOException + IOException + @throws YarnException + exception in client or server]]> +
    +
    + + + + + + + Get detailed app specific status string for a YARN application. +

    + + @param appIdOrName appId or appName + @return status string + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + Send the information of a number of conceptual entities to the timeline + server. It is a blocking API. The method will not return until it gets the + response from the timeline server. +

    + + @param entities + the collection of {@link TimelineEntity} + @return the error information if the sent entities are not correctly stored + @throws IOException if there are I/O errors + @throws YarnException if entities are incomplete/invalid]]> +
    +
    + + + + + + + + + Send the information of a number of conceptual entities to the timeline + server. It is a blocking API. The method will not return until it gets the + response from the timeline server. + + This API is only for timeline service v1.5 +

    + + @param appAttemptId {@link ApplicationAttemptId} + @param groupId {@link TimelineEntityGroupId} + @param entities + the collection of {@link TimelineEntity} + @return the error information if the sent entities are not correctly stored + @throws IOException if there are I/O errors + @throws YarnException if entities are incomplete/invalid]]> +
    +
    + + + + + + + Send the information of a domain to the timeline server. It is a + blocking API. The method will not return until it gets the response from + the timeline server. +

    + + @param domain + an {@link TimelineDomain} object + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + + Send the information of a domain to the timeline server. It is a + blocking API. The method will not return until it gets the response from + the timeline server. + + This API is only for timeline service v1.5 +

    + + @param domain + an {@link TimelineDomain} object + @param appAttemptId {@link ApplicationAttemptId} + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + Get a delegation token so as to be able to talk to the timeline server in a + secure way. +

    + + @param renewer + Address of the renewer who can renew these tokens when needed by + securely talking to the timeline server + @return a delegation token ({@link Token}) that can be used to talk to the + timeline server + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + Renew a timeline delegation token. +

    + + @param timelineDT + the delegation token to renew + @return the new expiration time + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + Cancel a timeline delegation token. +

    + + @param timelineDT + the delegation token to cancel + @throws IOException + @throws YarnException]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parameterized event of type T]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputStream to be checksumed + @return the message digest of the input stream + @throws IOException]]> + + + + + + + + + + + + SharedCacheChecksum object based on the configurable + algorithm implementation + (see yarn.sharedcache.checksum.algo.impl) + + @return SharedCacheChecksum object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The object type on which this state machine operates. + @param The state of the entity. + @param The external eventType to be handled. + @param The event object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When {@link #limit} would be reached on append, past messages will be + truncated from head, and a header telling the user about truncation will be + prepended, with ellipses in between header and messages. +

    + Note that header and ellipses are not counted against {@link #limit}. +

    + An example: + +

    + {@code
    +   // At the beginning it's an empty string
    +   final Appendable shortAppender = new BoundedAppender(80);
    +   // The whole message fits into limit
    +   shortAppender.append(
    +       "message1 this is a very long message but fitting into limit\n");
    +   // The first message is truncated, the second not
    +   shortAppender.append("message2 this is shorter than the previous one\n");
    +   // The first message is deleted, the second truncated, the third
    +   // preserved
    +   shortAppender.append("message3 this is even shorter message, maybe.\n");
    +   // The first two are deleted, the third one truncated, the last preserved
    +   shortAppender.append("message4 the shortest one, yet the greatest :)");
    +   // Current contents are like this:
    +   // Diagnostic messages truncated, showing last 80 chars out of 199:
    +   // ...s is even shorter message, maybe.
    +   // message4 the shortest one, yet the greatest :)
    + }
    + 
    +

    + Note that null values are {@link #append(CharSequence) append}ed + just like in {@link StringBuilder#append(CharSequence) original + implementation}. +

    + Note that this class is not thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml new file mode 100644 index 00000000000..123217545fe --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml @@ -0,0 +1,1456 @@ + + + + + + + + + + + + + + + + + + + + + + + + true if the node is healthy, else false]]> + + + + + diagnostic health report of the node. + @return diagnostic health report of the node]]> + + + + + last timestamp at which the health report was received. + @return last timestamp at which the health report was received]]> + + + + + It includes information such as: +

      +
    • + An indicator of whether the node is healthy, as determined by the + health-check script. +
    • +
    • The previous time at which the health status was reported.
    • +
    • A diagnostic report on the health status.
    • +
    + + @see NodeReport + @see ApplicationClientProtocol#getClusterNodes(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest)]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of the proxy + @return the proxy instance + @throws IOException if fails to create the proxy]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the iteration has more elements.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    From 0e3aafe6c08ada5cbc48053e800f141be6890423 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 18 Apr 2023 08:37:48 -0700 Subject: [PATCH 76/97] HADOOP-18399. S3A Prefetch - SingleFilePerBlockCache to use LocalDirAllocator (#5054) Contributed by Viraj Jasani --- .../hadoop/fs/impl/prefetch/BlockCache.java | 8 +- .../fs/impl/prefetch/CachingBlockManager.java | 20 ++- .../prefetch/SingleFilePerBlockCache.java | 87 ++++++++--- .../fs/impl/prefetch/TestBlockCache.java | 11 +- hadoop-tools/hadoop-aws/pom.xml | 5 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 27 +++- .../s3a/prefetch/S3ACachingBlockManager.java | 12 +- .../s3a/prefetch/S3ACachingInputStream.java | 25 ++- .../prefetch/S3APrefetchingInputStream.java | 13 +- .../fs/s3a/ITestS3APrefetchingCacheFiles.java | 144 ++++++++++++++++++ .../fs/s3a/prefetch/S3APrefetchFakes.java | 27 +++- .../prefetch/TestS3ACachingBlockManager.java | 34 +++-- .../prefetch/TestS3ARemoteInputStream.java | 13 +- 13 files changed, 356 insertions(+), 70 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3APrefetchingCacheFiles.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java index c18dc519188..2990696ee1b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java @@ -23,6 +23,9 @@ import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; + /** * Provides functionality necessary for caching blocks of data read from FileSystem. */ @@ -64,7 +67,10 @@ public interface BlockCache extends Closeable { * * @param blockNumber the id of the given block. * @param buffer contents of the given block to be added to this cache. + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. * @throws IOException if there is an error writing the given block. */ - void put(int blockNumber, ByteBuffer buffer) throws IOException; + void put(int blockNumber, ByteBuffer buffer, Configuration conf, + LocalDirAllocator localDirAllocator) throws IOException; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java index a0db4b308b6..e43b176d0bf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java @@ -33,6 +33,8 @@ import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.statistics.DurationTracker; import static java.util.Objects.requireNonNull; @@ -95,6 +97,10 @@ public abstract class CachingBlockManager extends BlockManager { private final PrefetchingStatistics prefetchingStatistics; + private final Configuration conf; + + private final LocalDirAllocator localDirAllocator; + /** * Constructs an instance of a {@code CachingBlockManager}. * @@ -102,14 +108,17 @@ public abstract class CachingBlockManager extends BlockManager { * @param blockData information about each block of the underlying file. * @param bufferPoolSize size of the in-memory cache in terms of number of blocks. * @param prefetchingStatistics statistics for this stream. - * + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. * @throws IllegalArgumentException if bufferPoolSize is zero or negative. */ public CachingBlockManager( ExecutorServiceFuturePool futurePool, BlockData blockData, int bufferPoolSize, - PrefetchingStatistics prefetchingStatistics) { + PrefetchingStatistics prefetchingStatistics, + Configuration conf, + LocalDirAllocator localDirAllocator) { super(blockData); Validate.checkPositiveInteger(bufferPoolSize, "bufferPoolSize"); @@ -129,6 +138,8 @@ public abstract class CachingBlockManager extends BlockManager { this.ops = new BlockOperations(); this.ops.setDebug(false); + this.conf = requireNonNull(conf); + this.localDirAllocator = localDirAllocator; } /** @@ -468,7 +479,8 @@ public abstract class CachingBlockManager extends BlockManager { blockFuture = cf; } - CachePutTask task = new CachePutTask(data, blockFuture, this, Instant.now()); + CachePutTask task = + new CachePutTask(data, blockFuture, this, Instant.now()); Future actionFuture = futurePool.executeFunction(task); data.setCaching(actionFuture); ops.end(op); @@ -554,7 +566,7 @@ public abstract class CachingBlockManager extends BlockManager { return; } - cache.put(blockNumber, buffer); + cache.put(blockNumber, buffer, conf, localDirAllocator); } private static class CachePutTask implements Supplier { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java index c84335a763e..11416032651 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java @@ -27,10 +27,9 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Files; import java.nio.file.OpenOption; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardOpenOption; -import java.nio.file.attribute.FileAttribute; import java.nio.file.attribute.PosixFilePermission; -import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; @@ -39,9 +38,13 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; + import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull; @@ -67,6 +70,12 @@ public class SingleFilePerBlockCache implements BlockCache { private final PrefetchingStatistics prefetchingStatistics; + /** + * File attributes attached to any intermediate temporary file created during index creation. + */ + private static final Set TEMP_FILE_ATTRS = + ImmutableSet.of(PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE); + /** * Cache entry. * Each block is stored as a separate file. @@ -172,11 +181,17 @@ public class SingleFilePerBlockCache implements BlockCache { /** * Puts the given block in this cache. * - * @throws IllegalArgumentException if buffer is null. - * @throws IllegalArgumentException if buffer.limit() is zero or negative. + * @param blockNumber the block number, used as a key for blocks map. + * @param buffer buffer contents of the given block to be added to this cache. + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. + * @throws IOException if either local dir allocator fails to allocate file or if IO error + * occurs while writing the buffer content to the file. + * @throws IllegalArgumentException if buffer is null, or if buffer.limit() is zero or negative. */ @Override - public void put(int blockNumber, ByteBuffer buffer) throws IOException { + public void put(int blockNumber, ByteBuffer buffer, Configuration conf, + LocalDirAllocator localDirAllocator) throws IOException { if (closed) { return; } @@ -191,7 +206,7 @@ public class SingleFilePerBlockCache implements BlockCache { Validate.checkPositiveInteger(buffer.limit(), "buffer.limit()"); - Path blockFilePath = getCacheFilePath(); + Path blockFilePath = getCacheFilePath(conf, localDirAllocator); long size = Files.size(blockFilePath); if (size != 0) { String message = @@ -221,8 +236,19 @@ public class SingleFilePerBlockCache implements BlockCache { writeChannel.close(); } - protected Path getCacheFilePath() throws IOException { - return getTempFilePath(); + /** + * Return temporary file created based on the file path retrieved from local dir allocator. + * + * @param conf The configuration object. + * @param localDirAllocator Local dir allocator instance. + * @return Path of the temporary file created. + * @throws IOException if IO error occurs while local dir allocator tries to retrieve path + * from local FS or file creation fails or permission set fails. + */ + protected Path getCacheFilePath(final Configuration conf, + final LocalDirAllocator localDirAllocator) + throws IOException { + return getTempFilePath(conf, localDirAllocator); } @Override @@ -323,9 +349,19 @@ public class SingleFilePerBlockCache implements BlockCache { private static final String CACHE_FILE_PREFIX = "fs-cache-"; - public static boolean isCacheSpaceAvailable(long fileSize) { + /** + * Determine if the cache space is available on the local FS. + * + * @param fileSize The size of the file. + * @param conf The configuration. + * @param localDirAllocator Local dir allocator instance. + * @return True if the given file size is less than the available free space on local FS, + * False otherwise. + */ + public static boolean isCacheSpaceAvailable(long fileSize, Configuration conf, + LocalDirAllocator localDirAllocator) { try { - Path cacheFilePath = getTempFilePath(); + Path cacheFilePath = getTempFilePath(conf, localDirAllocator); long freeSpace = new File(cacheFilePath.toString()).getUsableSpace(); LOG.info("fileSize = {}, freeSpace = {}", fileSize, freeSpace); Files.deleteIfExists(cacheFilePath); @@ -339,16 +375,25 @@ public class SingleFilePerBlockCache implements BlockCache { // The suffix (file extension) of each serialized index file. private static final String BINARY_FILE_SUFFIX = ".bin"; - // File attributes attached to any intermediate temporary file created during index creation. - private static final FileAttribute> TEMP_FILE_ATTRS = - PosixFilePermissions.asFileAttribute(EnumSet.of(PosixFilePermission.OWNER_READ, - PosixFilePermission.OWNER_WRITE)); - - private static Path getTempFilePath() throws IOException { - return Files.createTempFile( - CACHE_FILE_PREFIX, - BINARY_FILE_SUFFIX, - TEMP_FILE_ATTRS - ); + /** + * Create temporary file based on the file path retrieved from local dir allocator + * instance. The file is created with .bin suffix. The created file has been granted + * posix file permissions available in TEMP_FILE_ATTRS. + * + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. + * @return path of the file created. + * @throws IOException if IO error occurs while local dir allocator tries to retrieve path + * from local FS or file creation fails or permission set fails. + */ + private static Path getTempFilePath(final Configuration conf, + final LocalDirAllocator localDirAllocator) throws IOException { + org.apache.hadoop.fs.Path path = + localDirAllocator.getLocalPathForWrite(CACHE_FILE_PREFIX, conf); + File dir = new File(path.getParent().toUri().getPath()); + String prefix = path.getName(); + File tmpFile = File.createTempFile(prefix, BINARY_FILE_SUFFIX, dir); + Path tmpFilePath = Paths.get(tmpFile.toURI()); + return Files.setPosixFilePermissions(tmpFilePath, TEMP_FILE_ATTRS); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java index 2ea041283a7..3b60c1c7953 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/TestBlockCache.java @@ -23,8 +23,11 @@ import java.nio.ByteBuffer; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.test.AbstractHadoopTestBase; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_TMP_DIR; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -36,6 +39,8 @@ public class TestBlockCache extends AbstractHadoopTestBase { private static final int BUFFER_SIZE = 16; + private static final Configuration CONF = new Configuration(); + @Test public void testArgChecks() throws Exception { // Should not throw. @@ -46,7 +51,7 @@ public class TestBlockCache extends AbstractHadoopTestBase { // Verify it throws correctly. intercept(IllegalArgumentException.class, "'buffer' must not be null", - () -> cache.put(42, null)); + () -> cache.put(42, null, null, null)); intercept(NullPointerException.class, null, @@ -67,7 +72,7 @@ public class TestBlockCache extends AbstractHadoopTestBase { assertEquals(0, cache.size()); assertFalse(cache.containsBlock(0)); - cache.put(0, buffer1); + cache.put(0, buffer1, CONF, new LocalDirAllocator(HADOOP_TMP_DIR)); assertEquals(1, cache.size()); assertTrue(cache.containsBlock(0)); ByteBuffer buffer2 = ByteBuffer.allocate(BUFFER_SIZE); @@ -77,7 +82,7 @@ public class TestBlockCache extends AbstractHadoopTestBase { assertEquals(1, cache.size()); assertFalse(cache.containsBlock(1)); - cache.put(1, buffer1); + cache.put(1, buffer1, CONF, new LocalDirAllocator(HADOOP_TMP_DIR)); assertEquals(2, cache.size()); assertTrue(cache.containsBlock(1)); ByteBuffer buffer3 = ByteBuffer.allocate(BUFFER_SIZE); diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index ae8db93329d..3bd973567c1 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -200,6 +200,9 @@ **/ITestMarkerToolRootOperations.java **/ITestAggregateIOStatistics.java + + **/ITestS3APrefetchingCacheFiles.java @@ -246,6 +249,8 @@ **/ITestS3AContractRootDir.java **/ITestAggregateIOStatistics.java + + **/ITestS3APrefetchingCacheFiles.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index a73bd55b55e..30b2813caf7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1368,6 +1368,21 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ File createTmpFileForWrite(String pathStr, long size, Configuration conf) throws IOException { + initLocalDirAllocatorIfNotInitialized(conf); + Path path = directoryAllocator.getLocalPathForWrite(pathStr, + size, conf); + File dir = new File(path.getParent().toUri().getPath()); + String prefix = path.getName(); + // create a temp file on this directory + return File.createTempFile(prefix, null, dir); + } + + /** + * Initialize dir allocator if not already initialized. + * + * @param conf The Configuration object. + */ + private void initLocalDirAllocatorIfNotInitialized(Configuration conf) { if (directoryAllocator == null) { synchronized (this) { String bufferDir = conf.get(BUFFER_DIR) != null @@ -1375,12 +1390,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, directoryAllocator = new LocalDirAllocator(bufferDir); } } - Path path = directoryAllocator.getLocalPathForWrite(pathStr, - size, conf); - File dir = new File(path.getParent().toUri().getPath()); - String prefix = path.getName(); - // create a temp file on this directory - return File.createTempFile(prefix, null, dir); } /** @@ -1573,12 +1582,16 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, LOG.debug("Opening '{}'", readContext); if (this.prefetchEnabled) { + Configuration configuration = getConf(); + initLocalDirAllocatorIfNotInitialized(configuration); return new FSDataInputStream( new S3APrefetchingInputStream( readContext.build(), createObjectAttributes(path, fileStatus), createInputStreamCallbacks(auditSpan), - inputStreamStats)); + inputStreamStats, + configuration, + directoryAllocator)); } else { return new FSDataInputStream( new S3AInputStream( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingBlockManager.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingBlockManager.java index f82786659da..c166943c00e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingBlockManager.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingBlockManager.java @@ -25,6 +25,8 @@ import java.nio.ByteBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.impl.prefetch.BlockData; import org.apache.hadoop.fs.impl.prefetch.CachingBlockManager; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; @@ -52,7 +54,8 @@ public class S3ACachingBlockManager extends CachingBlockManager { * @param blockData information about each block of the S3 file. * @param bufferPoolSize size of the in-memory cache in terms of number of blocks. * @param streamStatistics statistics for this stream. - * + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. * @throws IllegalArgumentException if reader is null. */ public S3ACachingBlockManager( @@ -60,8 +63,11 @@ public class S3ACachingBlockManager extends CachingBlockManager { S3ARemoteObjectReader reader, BlockData blockData, int bufferPoolSize, - S3AInputStreamStatistics streamStatistics) { - super(futurePool, blockData, bufferPoolSize, streamStatistics); + S3AInputStreamStatistics streamStatistics, + Configuration conf, + LocalDirAllocator localDirAllocator) { + + super(futurePool, blockData, bufferPoolSize, streamStatistics, conf, localDirAllocator); Validate.checkNotNull(reader, "reader"); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingInputStream.java index f9ee4e412fc..fe950486480 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ACachingInputStream.java @@ -24,6 +24,8 @@ import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.impl.prefetch.BlockData; import org.apache.hadoop.fs.impl.prefetch.BlockManager; import org.apache.hadoop.fs.impl.prefetch.BufferData; @@ -61,7 +63,8 @@ public class S3ACachingInputStream extends S3ARemoteInputStream { * @param s3Attributes attributes of the S3 object being read. * @param client callbacks used for interacting with the underlying S3 client. * @param streamStatistics statistics for this stream. - * + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance. * @throws IllegalArgumentException if context is null. * @throws IllegalArgumentException if s3Attributes is null. * @throws IllegalArgumentException if client is null. @@ -70,7 +73,9 @@ public class S3ACachingInputStream extends S3ARemoteInputStream { S3AReadOpContext context, S3ObjectAttributes s3Attributes, S3AInputStream.InputStreamCallbacks client, - S3AInputStreamStatistics streamStatistics) { + S3AInputStreamStatistics streamStatistics, + Configuration conf, + LocalDirAllocator localDirAllocator) { super(context, s3Attributes, client, streamStatistics); this.numBlocksToPrefetch = this.getContext().getPrefetchBlockCount(); @@ -79,7 +84,9 @@ public class S3ACachingInputStream extends S3ARemoteInputStream { this.getContext().getFuturePool(), this.getReader(), this.getBlockData(), - bufferPoolSize); + bufferPoolSize, + conf, + localDirAllocator); int fileSize = (int) s3Attributes.getLen(); LOG.debug("Created caching input stream for {} (size = {})", this.getName(), fileSize); @@ -176,9 +183,15 @@ public class S3ACachingInputStream extends S3ARemoteInputStream { ExecutorServiceFuturePool futurePool, S3ARemoteObjectReader reader, BlockData blockData, - int bufferPoolSize) { - return new S3ACachingBlockManager(futurePool, reader, blockData, + int bufferPoolSize, + Configuration conf, + LocalDirAllocator localDirAllocator) { + return new S3ACachingBlockManager(futurePool, + reader, + blockData, bufferPoolSize, - getS3AStreamStatistics()); + getS3AStreamStatistics(), + conf, + localDirAllocator); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchingInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchingInputStream.java index f778f40b74c..9b9ee12ad75 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchingInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchingInputStream.java @@ -27,9 +27,11 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CanSetReadahead; import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.fs.s3a.S3AInputStream; @@ -79,7 +81,8 @@ public class S3APrefetchingInputStream * @param s3Attributes attributes of the S3 object being read. * @param client callbacks used for interacting with the underlying S3 client. * @param streamStatistics statistics for this stream. - * + * @param conf the configuration. + * @param localDirAllocator the local dir allocator instance retrieved from S3A FS. * @throws IllegalArgumentException if context is null. * @throws IllegalArgumentException if s3Attributes is null. * @throws IllegalArgumentException if client is null. @@ -88,7 +91,9 @@ public class S3APrefetchingInputStream S3AReadOpContext context, S3ObjectAttributes s3Attributes, S3AInputStream.InputStreamCallbacks client, - S3AInputStreamStatistics streamStatistics) { + S3AInputStreamStatistics streamStatistics, + Configuration conf, + LocalDirAllocator localDirAllocator) { Validate.checkNotNull(context, "context"); Validate.checkNotNull(s3Attributes, "s3Attributes"); @@ -114,7 +119,9 @@ public class S3APrefetchingInputStream context, s3Attributes, client, - streamStatistics); + streamStatistics, + conf, + localDirAllocator); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3APrefetchingCacheFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3APrefetchingCacheFiles.java new file mode 100644 index 00000000000..6ad8ef58a7f --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3APrefetchingCacheFiles.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.File; +import java.net.URI; + +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.s3a.performance.AbstractS3ACostTest; + +import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR; +import static org.apache.hadoop.fs.s3a.Constants.PREFETCH_BLOCK_DEFAULT_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.PREFETCH_BLOCK_SIZE_KEY; +import static org.apache.hadoop.fs.s3a.Constants.PREFETCH_ENABLED_KEY; +import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; + +/** + * Test the cache file behaviour with prefetching input stream. + */ +public class ITestS3APrefetchingCacheFiles extends AbstractS3ACostTest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestS3APrefetchingCacheFiles.class); + + private Path testFile; + private FileSystem fs; + private int prefetchBlockSize; + private Configuration conf; + + public ITestS3APrefetchingCacheFiles() { + super(true); + } + + @Before + public void setUp() throws Exception { + super.setup(); + // Sets BUFFER_DIR by calling S3ATestUtils#prepareTestConfiguration + conf = createConfiguration(); + String testFileUri = S3ATestUtils.getCSVTestFile(conf); + + testFile = new Path(testFileUri); + prefetchBlockSize = conf.getInt(PREFETCH_BLOCK_SIZE_KEY, PREFETCH_BLOCK_DEFAULT_SIZE); + fs = getFileSystem(); + fs.initialize(new URI(testFileUri), conf); + } + + @Override + public Configuration createConfiguration() { + Configuration configuration = super.createConfiguration(); + S3ATestUtils.removeBaseAndBucketOverrides(configuration, PREFETCH_ENABLED_KEY); + configuration.setBoolean(PREFETCH_ENABLED_KEY, true); + return configuration; + } + + @Override + public synchronized void teardown() throws Exception { + super.teardown(); + File tmpFileDir = new File(conf.get(BUFFER_DIR)); + File[] tmpFiles = tmpFileDir.listFiles(); + if (tmpFiles != null) { + for (File filePath : tmpFiles) { + String path = filePath.getPath(); + if (path.endsWith(".bin") && path.contains("fs-cache-")) { + filePath.delete(); + } + } + } + cleanupWithLogger(LOG, fs); + fs = null; + testFile = null; + } + + /** + * Test to verify the existence of the cache file. + * Tries to perform inputStream read and seek ops to make the prefetching take place and + * asserts whether file with .bin suffix is present. It also verifies certain file stats. + */ + @Test + public void testCacheFileExistence() throws Throwable { + describe("Verify that FS cache files exist on local FS"); + + try (FSDataInputStream in = fs.open(testFile)) { + byte[] buffer = new byte[prefetchBlockSize]; + + in.read(buffer, 0, prefetchBlockSize - 10240); + in.seek(prefetchBlockSize * 2); + in.read(buffer, 0, prefetchBlockSize); + + File tmpFileDir = new File(conf.get(BUFFER_DIR)); + assertTrue("The dir to keep cache files must exist", tmpFileDir.exists()); + File[] tmpFiles = tmpFileDir + .listFiles((dir, name) -> name.endsWith(".bin") && name.contains("fs-cache-")); + boolean isCacheFileForBlockFound = tmpFiles != null && tmpFiles.length > 0; + if (!isCacheFileForBlockFound) { + LOG.warn("No cache files found under " + tmpFileDir); + } + assertTrue("File to cache block data must exist", isCacheFileForBlockFound); + + for (File tmpFile : tmpFiles) { + Path path = new Path(tmpFile.getAbsolutePath()); + try (FileSystem localFs = FileSystem.getLocal(conf)) { + FileStatus stat = localFs.getFileStatus(path); + ContractTestUtils.assertIsFile(path, stat); + assertEquals("File length not matching with prefetchBlockSize", prefetchBlockSize, + stat.getLen()); + assertEquals("User permissions should be RW", FsAction.READ_WRITE, + stat.getPermission().getUserAction()); + assertEquals("Group permissions should be NONE", FsAction.NONE, + stat.getPermission().getGroupAction()); + assertEquals("Other permissions should be NONE", FsAction.NONE, + stat.getPermission().getOtherAction()); + } + } + } + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java index bab07f4f9ec..cf6aa7ba1aa 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java @@ -36,7 +36,9 @@ import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectInputStream; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.impl.prefetch.BlockCache; import org.apache.hadoop.fs.impl.prefetch.BlockData; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; @@ -60,6 +62,8 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR; +import static org.apache.hadoop.fs.s3a.Constants.HADOOP_TMP_DIR; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.emptyStatisticsStore; /** @@ -86,6 +90,8 @@ public final class S3APrefetchFakes { public static final long MODIFICATION_TIME = 0L; + private static final Configuration CONF = new Configuration(); + public static final ChangeDetectionPolicy CHANGE_POLICY = ChangeDetectionPolicy.createPolicy( ChangeDetectionPolicy.Mode.None, @@ -335,7 +341,9 @@ public final class S3APrefetchFakes { private long fileCount = 0; @Override - protected Path getCacheFilePath() throws IOException { + protected Path getCacheFilePath(final Configuration conf, + final LocalDirAllocator localDirAllocator) + throws IOException { fileCount++; return Paths.get(Long.toString(fileCount)); } @@ -363,9 +371,12 @@ public final class S3APrefetchFakes { ExecutorServiceFuturePool futurePool, S3ARemoteObjectReader reader, BlockData blockData, - int bufferPoolSize) { + int bufferPoolSize, + Configuration conf, + LocalDirAllocator localDirAllocator) { super(futurePool, reader, blockData, bufferPoolSize, - new EmptyS3AStatisticsContext().newInputStreamStatistics()); + new EmptyS3AStatisticsContext().newInputStreamStatistics(), + conf, localDirAllocator); } @Override @@ -390,7 +401,9 @@ public final class S3APrefetchFakes { S3ObjectAttributes s3Attributes, S3AInputStream.InputStreamCallbacks client, S3AInputStreamStatistics streamStatistics) { - super(context, s3Attributes, client, streamStatistics); + super(context, s3Attributes, client, streamStatistics, CONF, + new LocalDirAllocator( + CONF.get(BUFFER_DIR) != null ? BUFFER_DIR : HADOOP_TMP_DIR)); } @Override @@ -405,9 +418,11 @@ public final class S3APrefetchFakes { ExecutorServiceFuturePool futurePool, S3ARemoteObjectReader reader, BlockData blockData, - int bufferPoolSize) { + int bufferPoolSize, + Configuration conf, + LocalDirAllocator localDirAllocator) { return new FakeS3ACachingBlockManager(futurePool, reader, blockData, - bufferPoolSize); + bufferPoolSize, conf, localDirAllocator); } } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ACachingBlockManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ACachingBlockManager.java index aecf8802beb..cbfa643ee53 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ACachingBlockManager.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ACachingBlockManager.java @@ -26,13 +26,18 @@ import java.util.concurrent.Executors; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.impl.prefetch.BlockData; import org.apache.hadoop.fs.impl.prefetch.BufferData; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; +import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.test.AbstractHadoopTestBase; +import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR; +import static org.apache.hadoop.fs.s3a.Constants.HADOOP_TMP_DIR; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.assertEquals; @@ -59,44 +64,45 @@ public class TestS3ACachingBlockManager extends AbstractHadoopTestBase { MockS3ARemoteObject s3File = new MockS3ARemoteObject(FILE_SIZE, false); S3ARemoteObjectReader reader = new S3ARemoteObjectReader(s3File); + Configuration conf = new Configuration(); // Should not throw. S3ACachingBlockManager blockManager = new S3ACachingBlockManager(futurePool, reader, blockData, POOL_SIZE, - streamStatistics); + streamStatistics, conf, null); // Verify it throws correctly. intercept( NullPointerException.class, () -> new S3ACachingBlockManager(null, reader, blockData, POOL_SIZE, - streamStatistics)); + streamStatistics, conf, null)); intercept( IllegalArgumentException.class, "'reader' must not be null", () -> new S3ACachingBlockManager(futurePool, null, blockData, POOL_SIZE, - streamStatistics)); + streamStatistics, conf, null)); intercept( IllegalArgumentException.class, "'blockData' must not be null", () -> new S3ACachingBlockManager(futurePool, reader, null, POOL_SIZE, - streamStatistics)); + streamStatistics, conf, null)); intercept( IllegalArgumentException.class, "'bufferPoolSize' must be a positive integer", () -> new S3ACachingBlockManager(futurePool, reader, blockData, 0, - streamStatistics)); + streamStatistics, conf, null)); intercept( IllegalArgumentException.class, "'bufferPoolSize' must be a positive integer", () -> new S3ACachingBlockManager(futurePool, reader, blockData, -1, - streamStatistics)); + streamStatistics, conf, null)); intercept(NullPointerException.class, () -> new S3ACachingBlockManager(futurePool, reader, blockData, - POOL_SIZE, null)); + POOL_SIZE, null, conf, null)); intercept( IllegalArgumentException.class, @@ -125,13 +131,17 @@ public class TestS3ACachingBlockManager extends AbstractHadoopTestBase { private static final class BlockManagerForTesting extends S3ACachingBlockManager { + private static final Configuration CONF = + S3ATestUtils.prepareTestConfiguration(new Configuration()); + BlockManagerForTesting( ExecutorServiceFuturePool futurePool, S3ARemoteObjectReader reader, BlockData blockData, int bufferPoolSize, S3AInputStreamStatistics streamStatistics) { - super(futurePool, reader, blockData, bufferPoolSize, streamStatistics); + super(futurePool, reader, blockData, bufferPoolSize, streamStatistics, CONF, + new LocalDirAllocator(HADOOP_TMP_DIR)); } // If true, forces the next read operation to fail. @@ -154,8 +164,8 @@ public class TestS3ACachingBlockManager extends AbstractHadoopTestBase { private boolean forceNextCachePutToFail; @Override - protected void cachePut(int blockNumber, ByteBuffer buffer) - throws IOException { + protected void cachePut(int blockNumber, + ByteBuffer buffer) throws IOException { if (forceNextCachePutToFail) { forceNextCachePutToFail = false; throw new RuntimeException("bar"); @@ -262,9 +272,11 @@ public class TestS3ACachingBlockManager extends AbstractHadoopTestBase { throws IOException, InterruptedException { MockS3ARemoteObject s3File = new MockS3ARemoteObject(FILE_SIZE, false); S3ARemoteObjectReader reader = new S3ARemoteObjectReader(s3File); + Configuration conf = new Configuration(); S3ACachingBlockManager blockManager = new S3ACachingBlockManager(futurePool, reader, blockData, POOL_SIZE, - streamStatistics); + streamStatistics, conf, new LocalDirAllocator( + conf.get(BUFFER_DIR) != null ? BUFFER_DIR : HADOOP_TMP_DIR)); assertInitialState(blockManager); for (int b = 0; b < blockData.getNumBlocks(); b++) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ARemoteInputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ARemoteInputStream.java index d449a79a5a8..8ce26033c11 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ARemoteInputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/TestS3ARemoteInputStream.java @@ -27,11 +27,13 @@ import java.util.concurrent.Executors; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.impl.prefetch.ExceptionAsserts; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; import org.apache.hadoop.fs.s3a.S3AInputStream; import org.apache.hadoop.fs.s3a.S3AReadOpContext; +import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.test.AbstractHadoopTestBase; @@ -63,24 +65,25 @@ public class TestS3ARemoteInputStream extends AbstractHadoopTestBase { S3AInputStreamStatistics stats = readContext.getS3AStatisticsContext().newInputStreamStatistics(); + Configuration conf = S3ATestUtils.prepareTestConfiguration(new Configuration()); // Should not throw. - new S3ACachingInputStream(readContext, attrs, client, stats); + new S3ACachingInputStream(readContext, attrs, client, stats, conf, null); ExceptionAsserts.assertThrows( NullPointerException.class, - () -> new S3ACachingInputStream(null, attrs, client, stats)); + () -> new S3ACachingInputStream(null, attrs, client, stats, conf, null)); ExceptionAsserts.assertThrows( NullPointerException.class, - () -> new S3ACachingInputStream(readContext, null, client, stats)); + () -> new S3ACachingInputStream(readContext, null, client, stats, conf, null)); ExceptionAsserts.assertThrows( NullPointerException.class, - () -> new S3ACachingInputStream(readContext, attrs, null, stats)); + () -> new S3ACachingInputStream(readContext, attrs, null, stats, conf, null)); ExceptionAsserts.assertThrows( NullPointerException.class, - () -> new S3ACachingInputStream(readContext, attrs, client, null)); + () -> new S3ACachingInputStream(readContext, attrs, client, null, conf, null)); } @Test From 635521db4c6bd8362717b23cf5cb3ab3ce1d2e4f Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 19 Apr 2023 00:05:52 +0800 Subject: [PATCH 77/97] YARN-11438. [Federation] ZookeeperFederationStateStore Support Version. (#5537) --- .../impl/ZookeeperFederationStateStore.java | 46 +++++++++++++++++-- .../TestZookeeperFederationStateStore.java | 46 +++++++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java index 536faa31dca..9a49a6d3a17 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java @@ -30,7 +30,6 @@ import java.util.TimeZone; import java.util.Comparator; import java.util.stream.Collectors; -import org.apache.commons.lang3.NotImplementedException; import org.apache.curator.framework.recipes.shared.SharedCount; import org.apache.curator.framework.recipes.shared.VersionedValue; import org.apache.hadoop.classification.VisibleForTesting; @@ -43,8 +42,10 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterIdProto; import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterInfoProto; import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterPolicyConfigurationProto; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto; import org.apache.hadoop.yarn.security.client.YARNDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; +import org.apache.hadoop.yarn.server.federation.store.exception.FederationStateVersionIncompatibleException; import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest; import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterResponse; import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster; @@ -104,6 +105,7 @@ import org.apache.hadoop.yarn.server.federation.store.utils.FederationReservatio import org.apache.hadoop.yarn.server.federation.store.utils.FederationRouterRMTokenInputValidator; import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.api.records.ReservationId; +import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.SystemClock; @@ -154,6 +156,8 @@ public class ZookeeperFederationStateStore implements FederationStateStore { private final static String ROOT_ZNODE_NAME_POLICY = "policies"; private final static String ROOT_ZNODE_NAME_RESERVATION = "reservation"; + protected static final String ROOT_ZNODE_NAME_VERSION = "version"; + /** Store Delegation Token Node. */ private final static String ROUTER_RM_DT_SECRET_MANAGER_ROOT = "router_rm_dt_secret_manager_root"; private static final String ROUTER_RM_DT_MASTER_KEYS_ROOT_ZNODE_NAME = @@ -184,6 +188,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore { private String membershipZNode; private String policiesZNode; private String reservationsZNode; + private String versionNode; private int maxAppsInStateStore; /** Directory to store the delegation token data. **/ @@ -195,6 +200,8 @@ public class ZookeeperFederationStateStore implements FederationStateStore { private volatile Clock clock = SystemClock.getInstance(); + protected static final Version CURRENT_VERSION_INFO = Version.newInstance(1, 1); + @VisibleForTesting private ZKFederationStateStoreOpDurations opDurations = ZKFederationStateStoreOpDurations.getInstance(); @@ -223,6 +230,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore { appsZNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_APPLICATION); policiesZNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_POLICY); reservationsZNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_RESERVATION); + versionNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_VERSION); // delegation token znodes routerRMDTSecretManagerRoot = getNodePath(baseZNode, ROUTER_RM_DT_SECRET_MANAGER_ROOT); @@ -245,6 +253,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore { zkManager.createRootDirRecursively(routerRMDTSecretManagerRoot, zkAcl); zkManager.createRootDirRecursively(routerRMDTMasterKeysRootPath, zkAcl); zkManager.createRootDirRecursively(routerRMDelegationTokensRootPath, zkAcl); + zkManager.createRootDirRecursively(versionNode, zkAcl); } catch (Exception e) { String errMsg = "Cannot create base directories: " + e.getMessage(); FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); @@ -643,22 +652,49 @@ public class ZookeeperFederationStateStore implements FederationStateStore { @Override public Version getCurrentVersion() { - throw new NotImplementedException("Code is not implemented"); + return CURRENT_VERSION_INFO; } @Override public Version loadVersion() throws Exception { - throw new NotImplementedException("Code is not implemented"); + if (exists(versionNode)) { + byte[] data = get(versionNode); + if (data != null) { + return new VersionPBImpl(VersionProto.parseFrom(data)); + } + } + return null; } @Override public void storeVersion() throws Exception { - throw new NotImplementedException("Code is not implemented"); + byte[] data = ((VersionPBImpl) CURRENT_VERSION_INFO).getProto().toByteArray(); + boolean isUpdate = exists(versionNode); + put(versionNode, data, isUpdate); } @Override public void checkVersion() throws Exception { - throw new NotImplementedException("Code is not implemented"); + Version loadedVersion = loadVersion(); + LOG.info("Loaded Router State Version Info = {}.", loadedVersion); + Version currentVersion = getCurrentVersion(); + if (loadedVersion != null && loadedVersion.equals(currentVersion)) { + return; + } + + // if there is no version info, treat it as CURRENT_VERSION_INFO; + if (loadedVersion == null) { + loadedVersion = currentVersion; + } + + if (loadedVersion.isCompatibleTo(currentVersion)) { + LOG.info("Storing Router State Version Info {}.", currentVersion); + storeVersion(); + } else { + throw new FederationStateVersionIncompatibleException( + "Expecting Router state version " + currentVersion + + ", but loading version " + loadedVersion); + } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java index ba22a1e1894..739f3b6543c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java @@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKey; import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKeyRequest; import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKeyResponse; import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken; +import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.util.Records; import org.junit.After; import org.junit.Before; @@ -52,6 +53,7 @@ import static org.apache.hadoop.util.curator.ZKCuratorManager.getNodePath; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; /** * Unit tests for ZookeeperFederationStateStore. @@ -276,4 +278,48 @@ public class TestZookeeperFederationStateStore extends FederationStateStoreBaseT assertNotNull(zkRouterStoreToken); assertEquals(token, zkRouterStoreToken); } + + @Test + public void testGetCurrentVersion() { + ZookeeperFederationStateStore zkFederationStateStore = + ZookeeperFederationStateStore.class.cast(this.getStateStore()); + Version version = zkFederationStateStore.getCurrentVersion(); + assertEquals(1, version.getMajorVersion()); + assertEquals(1, version.getMinorVersion()); + } + + @Test + public void testStoreVersion() throws Exception { + ZookeeperFederationStateStore zkFederationStateStore = + ZookeeperFederationStateStore.class.cast(this.getStateStore()); + zkFederationStateStore.storeVersion(); + Version version = zkFederationStateStore.loadVersion(); + assertEquals(1, version.getMajorVersion()); + assertEquals(1, version.getMinorVersion()); + } + + @Test + public void testLoadVersion() throws Exception { + ZookeeperFederationStateStore zkFederationStateStore = + ZookeeperFederationStateStore.class.cast(this.getStateStore()); + // We don't store version, loadversion directly will get a null value. + Version version = zkFederationStateStore.loadVersion(); + assertNull(version); + + // After storing the version information, we will get the accurate version information. + zkFederationStateStore.storeVersion(); + Version version1 = zkFederationStateStore.loadVersion(); + assertEquals(1, version1.getMajorVersion()); + assertEquals(1, version1.getMinorVersion()); + } + + @Test + public void testCheckVersion() throws Exception { + ZookeeperFederationStateStore zkFederationStateStore = + ZookeeperFederationStateStore.class.cast(this.getStateStore()); + zkFederationStateStore.checkVersion(); + Version version = zkFederationStateStore.loadVersion(); + assertEquals(1, version.getMajorVersion()); + assertEquals(1, version.getMinorVersion()); + } } \ No newline at end of file From a258f1f235b98cdfa6e66d6e525f9124f615f12d Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 19 Apr 2023 00:13:08 +0800 Subject: [PATCH 78/97] YARN-11326. [Federation] Add RM FederationStateStoreService Metrics. (#4963) --- .../federation/FederationClientMethod.java | 121 +++++++++++ .../FederationStateStoreService.java | 177 ++++++++++++---- .../FederationStateStoreServiceMetrics.java | 196 ++++++++++++++++++ .../TestFederationRMStateStoreService.java | 176 +++++++++++++++- ...estFederationStateStoreServiceMetrics.java | 102 +++++++++ 5 files changed, 731 insertions(+), 41 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationClientMethod.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationStateStoreServiceMetrics.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationClientMethod.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationClientMethod.java new file mode 100644 index 00000000000..4faa9812f3c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationClientMethod.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.federation; + +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; +import org.apache.hadoop.yarn.util.Clock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Method; +import java.util.Arrays; + +/** + * Class to define client method,params and arguments. + */ +public class FederationClientMethod { + + public static final Logger LOG = + LoggerFactory.getLogger(FederationClientMethod.class); + + /** + * List of parameters: static and dynamic values, matchings types. + */ + private final Object[] params; + + /** + * List of method parameters types, matches parameters. + */ + private final Class[] types; + + /** + * String name of the method. + */ + private final String methodName; + + private FederationStateStore stateStoreClient = null; + + private Clock clock = null; + + private Class clazz; + + public FederationClientMethod(String method, Class[] pTypes, Object... pParams) + throws YarnException { + if (pParams.length != pTypes.length) { + throw new YarnException("Invalid parameters for method " + method); + } + + this.params = pParams; + this.types = Arrays.copyOf(pTypes, pTypes.length); + this.methodName = method; + } + + public FederationClientMethod(String method, Class pTypes, Object pParams) + throws YarnException { + this(method, new Class[]{pTypes}, new Object[]{pParams}); + } + + public FederationClientMethod(String method, Class pTypes, Object pParams, Class rTypes, + FederationStateStore fedStateStore, Clock fedClock) throws YarnException { + this(method, pTypes, pParams); + this.stateStoreClient = fedStateStore; + this.clock = fedClock; + this.clazz = rTypes; + } + + public Object[] getParams() { + return Arrays.copyOf(this.params, this.params.length); + } + + public String getMethodName() { + return methodName; + } + + /** + * Get the calling types for this method. + * + * @return An array of calling types. + */ + public Class[] getTypes() { + return Arrays.copyOf(this.types, this.types.length); + } + + /** + * We will use the invoke method to call the method in FederationStateStoreService. + * + * @return The result returned after calling the interface. + * @throws YarnException yarn exception. + */ + protected R invoke() throws YarnException { + try { + long startTime = clock.getTime(); + Method method = FederationStateStore.class.getMethod(methodName, types); + R result = clazz.cast(method.invoke(stateStoreClient, params)); + + long stopTime = clock.getTime(); + FederationStateStoreServiceMetrics.succeededStateStoreServiceCall( + methodName, stopTime - startTime); + return result; + } catch (Exception e) { + LOG.error("stateStoreClient call method {} error.", methodName, e); + FederationStateStoreServiceMetrics.failedStateStoreServiceCall(methodName); + throw new YarnException(e); + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreService.java index 90dcadb721e..d71a7f45e03 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreService.java @@ -85,6 +85,8 @@ import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.MonotonicClock; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.slf4j.Logger; @@ -110,6 +112,8 @@ public class FederationStateStoreService extends AbstractService private long heartbeatInterval; private long heartbeatInitialDelay; private RMContext rmContext; + private final Clock clock = new MonotonicClock(); + private FederationStateStoreServiceMetrics metrics; private String cleanUpThreadNamePrefix = "FederationStateStoreService-Clean-Thread"; private int cleanUpRetryCountNum; private long cleanUpRetrySleepTime; @@ -171,6 +175,9 @@ public class FederationStateStoreService extends AbstractService LOG.info("Initialized federation membership service."); + this.metrics = FederationStateStoreServiceMetrics.getMetrics(); + LOG.info("Initialized federation statestore service metrics."); + super.serviceInit(conf); } @@ -283,154 +290,251 @@ public class FederationStateStoreService extends AbstractService @Override public GetSubClusterPolicyConfigurationResponse getPolicyConfiguration( GetSubClusterPolicyConfigurationRequest request) throws YarnException { - return stateStoreClient.getPolicyConfiguration(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("getPolicyConfiguration", + GetSubClusterPolicyConfigurationRequest.class, request, + GetSubClusterPolicyConfigurationResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public SetSubClusterPolicyConfigurationResponse setPolicyConfiguration( SetSubClusterPolicyConfigurationRequest request) throws YarnException { - return stateStoreClient.setPolicyConfiguration(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("setPolicyConfiguration", + SetSubClusterPolicyConfigurationRequest.class, request, + SetSubClusterPolicyConfigurationResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public GetSubClusterPoliciesConfigurationsResponse getPoliciesConfigurations( GetSubClusterPoliciesConfigurationsRequest request) throws YarnException { - return stateStoreClient.getPoliciesConfigurations(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("getPoliciesConfigurations", + GetSubClusterPoliciesConfigurationsRequest.class, request, + GetSubClusterPoliciesConfigurationsResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override - public SubClusterRegisterResponse registerSubCluster( - SubClusterRegisterRequest registerSubClusterRequest) + public SubClusterRegisterResponse registerSubCluster(SubClusterRegisterRequest request) throws YarnException { - return stateStoreClient.registerSubCluster(registerSubClusterRequest); + FederationClientMethod clientMethod = + new FederationClientMethod<>("registerSubCluster", + SubClusterRegisterRequest.class, request, + SubClusterRegisterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override - public SubClusterDeregisterResponse deregisterSubCluster( - SubClusterDeregisterRequest subClusterDeregisterRequest) + public SubClusterDeregisterResponse deregisterSubCluster(SubClusterDeregisterRequest request) throws YarnException { - return stateStoreClient.deregisterSubCluster(subClusterDeregisterRequest); + FederationClientMethod clientMethod = + new FederationClientMethod<>("deregisterSubCluster", + SubClusterDeregisterRequest.class, request, + SubClusterDeregisterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override - public SubClusterHeartbeatResponse subClusterHeartbeat( - SubClusterHeartbeatRequest subClusterHeartbeatRequest) + public SubClusterHeartbeatResponse subClusterHeartbeat(SubClusterHeartbeatRequest request) throws YarnException { - return stateStoreClient.subClusterHeartbeat(subClusterHeartbeatRequest); + FederationClientMethod clientMethod = + new FederationClientMethod<>("subClusterHeartbeat", + SubClusterHeartbeatRequest.class, request, + SubClusterHeartbeatResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override - public GetSubClusterInfoResponse getSubCluster( - GetSubClusterInfoRequest subClusterRequest) throws YarnException { - return stateStoreClient.getSubCluster(subClusterRequest); + public GetSubClusterInfoResponse getSubCluster(GetSubClusterInfoRequest request) + throws YarnException { + FederationClientMethod clientMethod = + new FederationClientMethod<>("getSubCluster", + GetSubClusterInfoRequest.class, request, + GetSubClusterInfoResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override - public GetSubClustersInfoResponse getSubClusters( - GetSubClustersInfoRequest subClustersRequest) throws YarnException { - return stateStoreClient.getSubClusters(subClustersRequest); + public GetSubClustersInfoResponse getSubClusters(GetSubClustersInfoRequest request) + throws YarnException { + FederationClientMethod clientMethod = + new FederationClientMethod<>("getSubClusters", + GetSubClustersInfoRequest.class, request, + GetSubClustersInfoResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public AddApplicationHomeSubClusterResponse addApplicationHomeSubCluster( AddApplicationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.addApplicationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("addApplicationHomeSubCluster", + AddApplicationHomeSubClusterRequest.class, request, + AddApplicationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public UpdateApplicationHomeSubClusterResponse updateApplicationHomeSubCluster( UpdateApplicationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.updateApplicationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("updateApplicationHomeSubCluster", + AddApplicationHomeSubClusterRequest.class, request, + UpdateApplicationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public GetApplicationHomeSubClusterResponse getApplicationHomeSubCluster( GetApplicationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.getApplicationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("getApplicationHomeSubCluster", + GetApplicationHomeSubClusterRequest.class, request, + GetApplicationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public GetApplicationsHomeSubClusterResponse getApplicationsHomeSubCluster( GetApplicationsHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.getApplicationsHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("getApplicationsHomeSubCluster", + GetApplicationsHomeSubClusterRequest.class, request, + GetApplicationsHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public DeleteApplicationHomeSubClusterResponse deleteApplicationHomeSubCluster( DeleteApplicationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.deleteApplicationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("deleteApplicationHomeSubCluster", + DeleteApplicationHomeSubClusterRequest.class, request, + DeleteApplicationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public AddReservationHomeSubClusterResponse addReservationHomeSubCluster( AddReservationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.addReservationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("addReservationHomeSubCluster", + AddReservationHomeSubClusterRequest.class, request, + AddReservationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public GetReservationHomeSubClusterResponse getReservationHomeSubCluster( GetReservationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.getReservationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("getReservationHomeSubCluster", + GetReservationHomeSubClusterRequest.class, request, + GetReservationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public GetReservationsHomeSubClusterResponse getReservationsHomeSubCluster( GetReservationsHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.getReservationsHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("getReservationsHomeSubCluster", + GetReservationsHomeSubClusterRequest.class, request, + GetReservationsHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public UpdateReservationHomeSubClusterResponse updateReservationHomeSubCluster( UpdateReservationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.updateReservationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("updateReservationHomeSubCluster", + GetReservationsHomeSubClusterRequest.class, request, + UpdateReservationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public DeleteReservationHomeSubClusterResponse deleteReservationHomeSubCluster( DeleteReservationHomeSubClusterRequest request) throws YarnException { - return stateStoreClient.deleteReservationHomeSubCluster(request); + FederationClientMethod clientMethod = + new FederationClientMethod<>("deleteReservationHomeSubCluster", + DeleteReservationHomeSubClusterRequest.class, request, + DeleteReservationHomeSubClusterResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterMasterKeyResponse storeNewMasterKey(RouterMasterKeyRequest request) throws YarnException, IOException { - return stateStoreClient.storeNewMasterKey(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "storeNewMasterKey", + RouterMasterKeyRequest.class, request, + RouterMasterKeyResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterMasterKeyResponse removeStoredMasterKey(RouterMasterKeyRequest request) throws YarnException, IOException { - return stateStoreClient.removeStoredMasterKey(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "removeStoredMasterKey", + RouterMasterKeyRequest.class, request, + RouterMasterKeyResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterMasterKeyResponse getMasterKeyByDelegationKey(RouterMasterKeyRequest request) throws YarnException, IOException { - return stateStoreClient.getMasterKeyByDelegationKey(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "getMasterKeyByDelegationKey", + RouterMasterKeyRequest.class, request, + RouterMasterKeyResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterRMTokenResponse storeNewToken(RouterRMTokenRequest request) throws YarnException, IOException { - return stateStoreClient.storeNewToken(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "storeNewToken", + RouterRMTokenRequest.class, request, + RouterRMTokenResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterRMTokenResponse updateStoredToken(RouterRMTokenRequest request) throws YarnException, IOException { - return stateStoreClient.updateStoredToken(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "updateStoredToken", + RouterRMTokenRequest.class, request, + RouterRMTokenResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterRMTokenResponse removeStoredToken(RouterRMTokenRequest request) throws YarnException, IOException { - return stateStoreClient.removeStoredToken(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "removeStoredToken", + RouterRMTokenRequest.class, request, + RouterRMTokenResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override public RouterRMTokenResponse getTokenByRouterStoreToken(RouterRMTokenRequest request) throws YarnException, IOException { - return stateStoreClient.getTokenByRouterStoreToken(request); + FederationClientMethod clientMethod = new FederationClientMethod<>( + "getTokenByRouterStoreToken", + RouterRMTokenRequest.class, request, + RouterRMTokenResponse.class, stateStoreClient, clock); + return clientMethod.invoke(); } @Override @@ -612,5 +716,4 @@ public class FederationStateStoreService extends AbstractService } return true; } - -} +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java new file mode 100644 index 00000000000..f2312fd5e16 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.federation; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableQuantiles; +import org.apache.hadoop.metrics2.lib.MutableRate; +import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; + +import static org.apache.hadoop.metrics2.lib.Interns.info; + +@Metrics(about = "Metrics for FederationStateStoreService", context = "fedr") +public final class FederationStateStoreServiceMetrics { + + public static final Logger LOG = + LoggerFactory.getLogger(FederationStateStoreServiceMetrics.class); + + private static final MetricsInfo RECORD_INFO = + info("FederationStateStoreServiceMetrics", "Metrics for the RM FederationStateStoreService"); + + private static volatile FederationStateStoreServiceMetrics instance = null; + private MetricsRegistry registry; + + private final static Method[] STATESTORE_API_METHODS = FederationStateStore.class.getMethods(); + + // Map method names to counter objects + private static final Map FAILED_CALLS = new HashMap<>(); + private static final Map SUCCESSFUL_CALLS = new HashMap<>(); + // Provide quantile latency for each api call. + private static final Map QUANTILE_METRICS = new HashMap<>(); + + // Error string templates for logging calls from methods not in + // FederationStateStore API + private static final String UNKOWN_FAIL_ERROR_MSG = + "Not recording failed call for unknown FederationStateStore method {}"; + private static final String UNKNOWN_SUCCESS_ERROR_MSG = + "Not recording successful call for unknown FederationStateStore method {}"; + + /** + * Initialize the singleton instance. + * + * @return the singleton + */ + public static FederationStateStoreServiceMetrics getMetrics() { + synchronized (FederationStateStoreServiceMetrics.class) { + if (instance == null) { + instance = DefaultMetricsSystem.instance() + .register(new FederationStateStoreServiceMetrics()); + } + } + return instance; + } + + private FederationStateStoreServiceMetrics() { + registry = new MetricsRegistry(RECORD_INFO); + registry.tag(RECORD_INFO, "FederationStateStoreServiceMetrics"); + + // Create the metrics for each method and put them into the map + for (Method m : STATESTORE_API_METHODS) { + String methodName = m.getName(); + LOG.debug("Registering Federation StateStore Service metrics for {}", methodName); + + // This metric only records the number of failed calls; it does not + // capture latency information + FAILED_CALLS.put(methodName, registry.newCounter(methodName + "NumFailedCalls", + "# failed calls to " + methodName, 0L)); + + // This metric records both the number and average latency of successful + // calls. + SUCCESSFUL_CALLS.put(methodName, registry.newRate(methodName + "SuccessfulCalls", + "# successful calls and latency(ms) for" + methodName)); + + // This metric records the quantile-based latency of each successful call, + // re-sampled every 10 seconds. + QUANTILE_METRICS.put(methodName, registry.newQuantiles(methodName + "Latency", + "Quantile latency (ms) for " + methodName, "ops", "latency", 10)); + } + } + + // Aggregate metrics are shared, and don't have to be looked up per call + @Metric("Total number of successful calls and latency(ms)") + private static MutableRate totalSucceededCalls; + + @Metric("Total number of failed StateStore calls") + private static MutableCounterLong totalFailedCalls; + + public static void failedStateStoreServiceCall() { + String methodName = Thread.currentThread().getStackTrace()[2].getMethodName(); + MutableCounterLong methodMetric = FAILED_CALLS.get(methodName); + + if (methodMetric == null) { + LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName); + return; + } + + totalFailedCalls.incr(); + methodMetric.incr(); + } + + public static void failedStateStoreServiceCall(String methodName) { + MutableCounterLong methodMetric = FAILED_CALLS.get(methodName); + if (methodMetric == null) { + LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName); + return; + } + totalFailedCalls.incr(); + methodMetric.incr(); + } + + public static void succeededStateStoreServiceCall(long duration) { + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + if (ArrayUtils.isNotEmpty(stackTraceElements) && stackTraceElements.length > 2) { + String methodName = Thread.currentThread().getStackTrace()[2].getMethodName(); + if(SUCCESSFUL_CALLS.containsKey(methodName)) { + succeededStateStoreServiceCall(methodName, duration); + } else { + LOG.error(UNKNOWN_SUCCESS_ERROR_MSG, methodName); + } + } else { + LOG.error("stackTraceElements is empty or length < 2."); + } + } + + public static void succeededStateStoreServiceCall(String methodName, long duration) { + if (SUCCESSFUL_CALLS.containsKey(methodName)) { + MutableRate methodMetric = SUCCESSFUL_CALLS.get(methodName); + MutableQuantiles methodQuantileMetric = QUANTILE_METRICS.get(methodName); + if (methodMetric == null || methodQuantileMetric == null) { + LOG.error(UNKNOWN_SUCCESS_ERROR_MSG, methodName); + return; + } + totalSucceededCalls.add(duration); + methodMetric.add(duration); + methodQuantileMetric.add(duration); + } + } + + // Getters for unit testing + @VisibleForTesting + public static long getNumFailedCallsForMethod(String methodName) { + return FAILED_CALLS.get(methodName).value(); + } + + @VisibleForTesting + public static long getNumSucceessfulCallsForMethod(String methodName) { + return SUCCESSFUL_CALLS.get(methodName).lastStat().numSamples(); + } + + @VisibleForTesting + public static double getLatencySucceessfulCallsForMethod(String methodName) { + return SUCCESSFUL_CALLS.get(methodName).lastStat().mean(); + } + + @VisibleForTesting + public static long getNumFailedCalls() { + return totalFailedCalls.value(); + } + + @VisibleForTesting + public static long getNumSucceededCalls() { + return totalSucceededCalls.lastStat().numSamples(); + } + + @VisibleForTesting + public static double getLatencySucceededCalls() { + return totalSucceededCalls.lastStat().mean(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationRMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationRMStateStoreService.java index b8e2ce6ef32..9a85315628f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationRMStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationRMStateStoreService.java @@ -38,6 +38,8 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyInitializationException; +import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager; import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; import org.apache.hadoop.yarn.server.federation.store.exception.FederationStateStoreException; import org.apache.hadoop.yarn.server.federation.store.records.GetSubClusterInfoRequest; @@ -52,6 +54,17 @@ import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationHome import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationHomeSubClusterResponse; import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationsHomeSubClusterRequest; import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationsHomeSubClusterResponse; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterPolicyConfiguration; +import org.apache.hadoop.yarn.server.federation.store.records.SetSubClusterPolicyConfigurationRequest; +import org.apache.hadoop.yarn.server.federation.store.records.GetSubClusterPolicyConfigurationRequest; +import org.apache.hadoop.yarn.server.federation.store.records.GetSubClusterPolicyConfigurationResponse; +import org.apache.hadoop.yarn.server.federation.store.records.GetSubClusterPoliciesConfigurationsRequest; +import org.apache.hadoop.yarn.server.federation.store.records.GetSubClusterPoliciesConfigurationsResponse; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterRegisterRequest; +import org.apache.hadoop.yarn.server.federation.store.records.GetSubClustersInfoRequest; +import org.apache.hadoop.yarn.server.federation.store.records.GetSubClustersInfoResponse; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterHeartbeatRequest; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterHeartbeatResponse; import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMAppManager; @@ -89,6 +102,7 @@ public class TestFederationRMStateStoreService { private long lastHearbeatTS = 0; private JSONJAXBContext jc; private JSONUnmarshaller unmarshaller; + private MockRM mockRM; @Before public void setUp() throws IOException, YarnException, JAXBException { @@ -97,12 +111,23 @@ public class TestFederationRMStateStoreService { JSONConfiguration.mapped().rootUnwrapping(false).build(), ClusterMetricsInfo.class); unmarshaller = jc.createJSONUnmarshaller(); + + conf.setBoolean(YarnConfiguration.FEDERATION_ENABLED, true); + conf.setInt(YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY, 10); + conf.set(YarnConfiguration.RM_CLUSTER_ID, subClusterId.getId()); + + // set up MockRM + mockRM = new MockRM(conf); + mockRM.init(conf); + mockRM.start(); } @After public void tearDown() throws Exception { unmarshaller = null; jc = null; + mockRM.stop(); + mockRM = null; } @Test @@ -250,10 +275,8 @@ public class TestFederationRMStateStoreService { // init subCluster Heartbeat, // and check that the subCluster is in a running state - FederationStateStoreService stateStoreService = - rm.getFederationStateStoreService(); - FederationStateStoreHeartbeat storeHeartbeat = - stateStoreService.getStateStoreHeartbeatThread(); + FederationStateStoreService stateStoreService = rm.getFederationStateStoreService(); + FederationStateStoreHeartbeat storeHeartbeat = stateStoreService.getStateStoreHeartbeatThread(); storeHeartbeat.run(); checkSubClusterInfo(SubClusterState.SC_RUNNING); @@ -482,4 +505,149 @@ public class TestFederationRMStateStoreService { rmAppMaps.putIfAbsent(application.getApplicationId(), application); } + + + @Test + public void testPolicyConfigurationMethod() throws YarnException { + + // This test case tests 3 methods. + // 1.setPolicyConfiguration + // 2.getPolicyConfiguration + // 3.getPolicyConfigurations + FederationStateStoreService stateStoreService = mockRM.getFederationStateStoreService(); + + // set queue basic information (queue1) + String queue1 = "queue1"; + SubClusterPolicyConfiguration requestPolicyConf1 = getUniformPolicy(queue1); + SetSubClusterPolicyConfigurationRequest configurationRequest1 = + SetSubClusterPolicyConfigurationRequest.newInstance(requestPolicyConf1); + // store policy configuration (queue1) + stateStoreService.setPolicyConfiguration(configurationRequest1); + + // set queue basic information (queue2) + String queue2 = "queue2"; + SubClusterPolicyConfiguration requestPolicyConf2 = getUniformPolicy(queue2); + SetSubClusterPolicyConfigurationRequest configurationRequest2 = + SetSubClusterPolicyConfigurationRequest.newInstance(requestPolicyConf2); + // store policy configuration (queue1) + stateStoreService.setPolicyConfiguration(configurationRequest2); + + // get policy configuration + GetSubClusterPolicyConfigurationRequest request1 = + GetSubClusterPolicyConfigurationRequest.newInstance(queue1); + GetSubClusterPolicyConfigurationResponse response = + stateStoreService.getPolicyConfiguration(request1); + Assert.assertNotNull(response); + + SubClusterPolicyConfiguration responsePolicyConf = + response.getPolicyConfiguration(); + Assert.assertNotNull(responsePolicyConf); + Assert.assertEquals(requestPolicyConf1, responsePolicyConf); + + // get policy configurations + GetSubClusterPoliciesConfigurationsRequest policiesRequest1 = + GetSubClusterPoliciesConfigurationsRequest.newInstance(); + GetSubClusterPoliciesConfigurationsResponse policiesResponse1 = + stateStoreService.getPoliciesConfigurations(policiesRequest1); + Assert.assertNotNull(policiesResponse1); + + List policiesConfigs = policiesResponse1.getPoliciesConfigs(); + Assert.assertNotNull(policiesConfigs); + Assert.assertEquals(2, policiesConfigs.size()); + Assert.assertTrue(policiesConfigs.contains(requestPolicyConf1)); + Assert.assertTrue(policiesConfigs.contains(requestPolicyConf2)); + } + + public SubClusterPolicyConfiguration getUniformPolicy(String queue) + throws FederationPolicyInitializationException { + UniformBroadcastPolicyManager wfp = new UniformBroadcastPolicyManager(); + wfp.setQueue(queue); + SubClusterPolicyConfiguration fpc = wfp.serializeConf(); + return fpc; + } + + @Test + public void testSubClusterMethod() throws YarnException { + + // This test case tests 5 methods. + // 1.registerSubCluster + // 2.deregisterSubCluster + // 3.subClusterHeartbeat + // 4.getSubCluster + // 5.getSubClusters + + FederationStateStoreService stateStoreService = + mockRM.getFederationStateStoreService(); + + // registerSubCluster subCluster1 + SubClusterId subClusterId1 = SubClusterId.newInstance("SC1"); + SubClusterInfo subClusterInfo1 = createSubClusterInfo(subClusterId1); + + SubClusterRegisterRequest registerRequest1 = + SubClusterRegisterRequest.newInstance(subClusterInfo1); + stateStoreService.registerSubCluster(registerRequest1); + + // registerSubCluster subCluster2 + SubClusterId subClusterId2 = SubClusterId.newInstance("SC2"); + SubClusterInfo subClusterInfo2 = createSubClusterInfo(subClusterId2); + + SubClusterRegisterRequest registerRequest2 = + SubClusterRegisterRequest.newInstance(subClusterInfo2); + stateStoreService.registerSubCluster(registerRequest2); + + // getSubCluster subCluster1 + GetSubClusterInfoRequest subClusterRequest = + GetSubClusterInfoRequest.newInstance(subClusterId1); + GetSubClusterInfoResponse subClusterResponse = + stateStoreService.getSubCluster(subClusterRequest); + Assert.assertNotNull(subClusterResponse); + + // We query subCluster1, we want to get SubClusterInfo of subCluster1 + SubClusterInfo subClusterInfo1Resp = subClusterResponse.getSubClusterInfo(); + Assert.assertNotNull(subClusterInfo1Resp); + Assert.assertEquals(subClusterInfo1, subClusterInfo1Resp); + + // We call the getSubClusters method and filter the Active SubCluster + // subCluster1 and subCluster2 are just registered, they are in NEW state, + // so we will get 0 active subclusters + GetSubClustersInfoRequest subClustersInfoRequest = + GetSubClustersInfoRequest.newInstance(true); + GetSubClustersInfoResponse subClustersInfoResp = + stateStoreService.getSubClusters(subClustersInfoRequest); + Assert.assertNotNull(subClustersInfoResp); + List subClusterInfos = subClustersInfoResp.getSubClusters(); + Assert.assertNotNull(subClusterInfos); + Assert.assertEquals(0, subClusterInfos.size()); + + // We let subCluster1 heartbeat and set subCluster1 to Running state + SubClusterHeartbeatRequest heartbeatRequest = + SubClusterHeartbeatRequest.newInstance(subClusterId1, SubClusterState.SC_RUNNING, + "capability"); + SubClusterHeartbeatResponse heartbeatResponse = + stateStoreService.subClusterHeartbeat(heartbeatRequest); + Assert.assertNotNull(heartbeatResponse); + + // We call the getSubClusters method again and filter the Active SubCluster + // We want to get 1 active SubCluster + GetSubClustersInfoRequest subClustersInfoRequest1 = + GetSubClustersInfoRequest.newInstance(true); + GetSubClustersInfoResponse subClustersInfoResp1 = + stateStoreService.getSubClusters(subClustersInfoRequest1); + Assert.assertNotNull(subClustersInfoResp1); + List subClusterInfos1 = subClustersInfoResp1.getSubClusters(); + Assert.assertNotNull(subClusterInfos1); + Assert.assertEquals(1, subClusterInfos1.size()); + } + + private SubClusterInfo createSubClusterInfo(SubClusterId clusterId) { + + String amRMAddress = "1.2.3.4:1"; + String clientRMAddress = "1.2.3.4:2"; + String rmAdminAddress = "1.2.3.4:3"; + String webAppAddress = "1.2.3.4:4"; + + return SubClusterInfo.newInstance(clusterId, amRMAddress, + clientRMAddress, rmAdminAddress, webAppAddress, SubClusterState.SC_NEW, + Time.now(), "capability"); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationStateStoreServiceMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationStateStoreServiceMetrics.java new file mode 100644 index 00000000000..e7a79b843e9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationStateStoreServiceMetrics.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.federation; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertEquals; + +/** + * Unit tests for TestFederationStateStoreServiceMetrics. + */ +public class TestFederationStateStoreServiceMetrics { + + public static final Logger LOG = + LoggerFactory.getLogger(TestFederationStateStoreServiceMetrics.class); + + private static FederationStateStoreServiceMetrics metrics = + FederationStateStoreServiceMetrics.getMetrics(); + + private MockBadFederationStateStoreService badStateStore = + new MockBadFederationStateStoreService(); + private MockGoodFederationStateStoreService goodStateStore = + new MockGoodFederationStateStoreService(); + + // Records failures for all calls + private class MockBadFederationStateStoreService { + public void registerSubCluster() { + LOG.info("Mocked: failed registerSubCluster call"); + FederationStateStoreServiceMetrics.failedStateStoreServiceCall(); + } + } + + // Records successes for all calls + private class MockGoodFederationStateStoreService { + public void registerSubCluster(long duration) { + LOG.info("Mocked: successful registerSubCluster call with duration {}", duration); + FederationStateStoreServiceMetrics.succeededStateStoreServiceCall(duration); + } + } + + @Test + public void testFederationStateStoreServiceMetricInit() { + LOG.info("Test: aggregate metrics are initialized correctly"); + assertEquals(0, FederationStateStoreServiceMetrics.getNumSucceededCalls()); + assertEquals(0, FederationStateStoreServiceMetrics.getNumFailedCalls()); + LOG.info("Test: aggregate metrics are updated correctly"); + } + + @Test + public void testRegisterSubClusterSuccessfulCalls() { + LOG.info("Test: Aggregate and method successful calls updated correctly."); + + long totalGoodBefore = FederationStateStoreServiceMetrics.getNumSucceededCalls(); + long apiGoodBefore = FederationStateStoreServiceMetrics. + getNumSucceessfulCallsForMethod("registerSubCluster"); + + // Call the registerSubCluster method + goodStateStore.registerSubCluster(100); + + assertEquals(totalGoodBefore + 1, + FederationStateStoreServiceMetrics.getNumSucceededCalls()); + assertEquals(100, FederationStateStoreServiceMetrics.getLatencySucceededCalls(), 0); + assertEquals(apiGoodBefore + 1, + FederationStateStoreServiceMetrics.getNumSucceededCalls()); + double latencySucceessfulCalls = + FederationStateStoreServiceMetrics.getLatencySucceessfulCallsForMethod( + "registerSubCluster"); + assertEquals(100, latencySucceessfulCalls, 0); + + LOG.info("Test: Running stats correctly calculated for 2 metrics"); + + // Call the registerSubCluster method + goodStateStore.registerSubCluster(200); + + assertEquals(totalGoodBefore + 2, + FederationStateStoreServiceMetrics.getNumSucceededCalls()); + assertEquals(150, FederationStateStoreServiceMetrics.getLatencySucceededCalls(), 0); + assertEquals(apiGoodBefore + 2, + FederationStateStoreServiceMetrics.getNumSucceededCalls()); + double latencySucceessfulCalls2 = + FederationStateStoreServiceMetrics.getLatencySucceessfulCallsForMethod( + "registerSubCluster"); + assertEquals(150, latencySucceessfulCalls2, 0); + } +} From 5119d0c72fe66da012bab3d8508dfc1b04baba4b Mon Sep 17 00:00:00 2001 From: rdingankar Date: Tue, 18 Apr 2023 10:47:37 -0700 Subject: [PATCH 79/97] HDFS-16982 Use the right Quantiles Array for Inverse Quantiles snapshot (#5556) --- .../metrics2/lib/MutableInverseQuantiles.java | 14 +- .../hadoop/metrics2/lib/MutableQuantiles.java | 32 ++-- .../metrics2/lib/TestMutableMetrics.java | 148 +++++++++++++++--- .../apache/hadoop/test/MetricsAsserts.java | 4 +- .../monitor/ContainerMetrics.java | 4 +- 5 files changed, 163 insertions(+), 39 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java index a3d579cb9e7..f99ff441df6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableInverseQuantiles.java @@ -21,7 +21,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.metrics2.util.Quantile; -import org.apache.hadoop.metrics2.util.SampleQuantiles; import java.text.DecimalFormat; import static org.apache.hadoop.metrics2.lib.Interns.info; @@ -65,7 +64,7 @@ public class MutableInverseQuantiles extends MutableQuantiles{ } /** - * Sets quantileInfo and estimator. + * Sets quantileInfo. * * @param ucName capitalized name of the metric * @param uvName capitalized type of the values @@ -74,8 +73,6 @@ public class MutableInverseQuantiles extends MutableQuantiles{ * @param df Number formatter for inverse percentile value */ void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat df) { - // Construct the MetricsInfos for inverse quantiles, converting to inverse percentiles - setQuantileInfos(INVERSE_QUANTILES.length); for (int i = 0; i < INVERSE_QUANTILES.length; i++) { double inversePercentile = 100 * (1 - INVERSE_QUANTILES[i].quantile); String nameTemplate = ucName + df.format(inversePercentile) + "thInversePercentile" + uvName; @@ -83,7 +80,14 @@ public class MutableInverseQuantiles extends MutableQuantiles{ + " with " + getInterval() + " second interval for " + desc; addQuantileInfo(i, info(nameTemplate, descTemplate)); } + } - setEstimator(new SampleQuantiles(INVERSE_QUANTILES)); + /** + * Returns the array of Inverse Quantiles declared in MutableInverseQuantiles. + * + * @return array of Inverse Quantiles + */ + public synchronized Quantile[] getQuantiles() { + return INVERSE_QUANTILES; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java index edb2159f17b..d4c4c6747b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java @@ -49,9 +49,9 @@ import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFact public class MutableQuantiles extends MutableMetric { @VisibleForTesting - public static final Quantile[] quantiles = { new Quantile(0.50, 0.050), + public static final Quantile[] QUANTILES = {new Quantile(0.50, 0.050), new Quantile(0.75, 0.025), new Quantile(0.90, 0.010), - new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) }; + new Quantile(0.95, 0.005), new Quantile(0.99, 0.001)}; private MetricsInfo numInfo; private MetricsInfo[] quantileInfos; @@ -98,11 +98,15 @@ public class MutableQuantiles extends MutableMetric { "Number of %s for %s with %ds interval", lsName, desc, interval))); scheduledTask = scheduler.scheduleWithFixedDelay(new RolloverSample(this), interval, interval, TimeUnit.SECONDS); + // Construct the MetricsInfos for the quantiles, converting to percentiles + Quantile[] quantilesArray = getQuantiles(); + setQuantileInfos(quantilesArray.length); setQuantiles(ucName, uvName, desc, lvName, decimalFormat); + setEstimator(new SampleQuantiles(quantilesArray)); } /** - * Sets quantileInfo and estimator. + * Sets quantileInfo. * * @param ucName capitalized name of the metric * @param uvName capitalized type of the values @@ -111,30 +115,27 @@ public class MutableQuantiles extends MutableMetric { * @param pDecimalFormat Number formatter for percentile value */ void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat pDecimalFormat) { - // Construct the MetricsInfos for the quantiles, converting to percentiles - setQuantileInfos(quantiles.length); - for (int i = 0; i < quantiles.length; i++) { - double percentile = 100 * quantiles[i].quantile; + for (int i = 0; i < QUANTILES.length; i++) { + double percentile = 100 * QUANTILES[i].quantile; String nameTemplate = ucName + pDecimalFormat.format(percentile) + "thPercentile" + uvName; String descTemplate = pDecimalFormat.format(percentile) + " percentile " + lvName + " with " + getInterval() + " second interval for " + desc; addQuantileInfo(i, info(nameTemplate, descTemplate)); } - - setEstimator(new SampleQuantiles(quantiles)); } public MutableQuantiles() {} @Override public synchronized void snapshot(MetricsRecordBuilder builder, boolean all) { + Quantile[] quantilesArray = getQuantiles(); if (all || changed()) { builder.addGauge(numInfo, previousCount); - for (int i = 0; i < quantiles.length; i++) { + for (int i = 0; i < quantilesArray.length; i++) { long newValue = 0; // If snapshot is null, we failed to update since the window was empty if (previousSnapshot != null) { - newValue = previousSnapshot.get(quantiles[i]); + newValue = previousSnapshot.get(quantilesArray[i]); } builder.addGauge(quantileInfos[i], newValue); } @@ -148,6 +149,15 @@ public class MutableQuantiles extends MutableMetric { estimator.insert(value); } + /** + * Returns the array of Quantiles declared in MutableQuantiles. + * + * @return array of Quantiles + */ + public synchronized Quantile[] getQuantiles() { + return QUANTILES; + } + /** * Set info about the metrics. * diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java index 9984c9b95fb..85635e01e13 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java @@ -52,6 +52,8 @@ public class TestMutableMetrics { private static final Logger LOG = LoggerFactory.getLogger(TestMutableMetrics.class); private static final double EPSILON = 1e-42; + private static final int SLEEP_TIME_MS = 6 * 1000; // 6 seconds. + private static final int SAMPLE_COUNT = 1000; /** * Test the snapshot method @@ -395,14 +397,14 @@ public class TestMutableMetrics { MutableQuantiles quantiles = registry.newQuantiles("foo", "stat", "Ops", "Latency", 5); // Push some values in and wait for it to publish - long start = System.nanoTime() / 1000000; - for (long i = 1; i <= 1000; i++) { + long startTimeMS = System.currentTimeMillis(); + for (long i = 1; i <= SAMPLE_COUNT; i++) { quantiles.add(i); quantiles.add(1001 - i); } - long end = System.nanoTime() / 1000000; + long endTimeMS = System.currentTimeMillis(); - Thread.sleep(6000 - (end - start)); + Thread.sleep(SLEEP_TIME_MS - (endTimeMS - startTimeMS)); registry.snapshot(mb, false); @@ -414,10 +416,8 @@ public class TestMutableMetrics { } // Verify the results are within our requirements - verify(mb).addGauge( - info("FooNumOps", "Number of ops for stat with 5s interval"), - (long) 2000); - Quantile[] quants = MutableQuantiles.quantiles; + verify(mb).addGauge(info("FooNumOps", "Number of ops for stat with 5s interval"), 2000L); + Quantile[] quants = MutableQuantiles.QUANTILES; String name = "Foo%dthPercentileLatency"; String desc = "%d percentile latency with 5 second interval for stat"; for (Quantile q : quants) { @@ -431,6 +431,46 @@ public class TestMutableMetrics { } } + /** + * Ensure that quantile estimates from {@link MutableInverseQuantiles} are within + * specified error bounds. + */ + @Test(timeout = 30000) + public void testMutableInverseQuantilesError() throws Exception { + MetricsRecordBuilder mb = mockMetricsRecordBuilder(); + MetricsRegistry registry = new MetricsRegistry("test"); + // Use a 5s rollover period + MutableQuantiles inverseQuantiles = registry.newInverseQuantiles("foo", "stat", "Ops", + "Latency", 5); + // Push some values in and wait for it to publish + long startTimeMS = System.currentTimeMillis(); + for (long i = 1; i <= SAMPLE_COUNT; i++) { + inverseQuantiles.add(i); + inverseQuantiles.add(1001 - i); + } + long endTimeMS = System.currentTimeMillis(); + + Thread.sleep(SLEEP_TIME_MS - (endTimeMS - startTimeMS)); + + registry.snapshot(mb, false); + + // Verify the results are within our requirements + verify(mb).addGauge( + info("FooNumOps", "Number of ops for stat with 5s interval"), 2000L); + Quantile[] inverseQuants = MutableInverseQuantiles.INVERSE_QUANTILES; + String name = "Foo%dthInversePercentileLatency"; + String desc = "%d inverse percentile latency with 5 second interval for stat"; + for (Quantile q : inverseQuants) { + int inversePercentile = (int) (100 * (1 - q.quantile)); + int error = (int) (1000 * q.error); + String n = String.format(name, inversePercentile); + String d = String.format(desc, inversePercentile); + long expected = (long) (q.quantile * 1000); + verify(mb).addGauge(eq(info(n, d)), leq(expected + error)); + verify(mb).addGauge(eq(info(n, d)), geq(expected - error)); + } + } + /** * Test that {@link MutableQuantiles} rolls the window over at the specified * interval. @@ -443,21 +483,21 @@ public class TestMutableMetrics { MutableQuantiles quantiles = registry.newQuantiles("foo", "stat", "Ops", "Latency", 5); - Quantile[] quants = MutableQuantiles.quantiles; + Quantile[] quants = MutableQuantiles.QUANTILES; String name = "Foo%dthPercentileLatency"; String desc = "%d percentile latency with 5 second interval for stat"; // Push values for three intervals - long start = System.nanoTime() / 1000000; + long startTimeMS = System.currentTimeMillis(); for (int i = 1; i <= 3; i++) { // Insert the values - for (long j = 1; j <= 1000; j++) { + for (long j = 1; j <= SAMPLE_COUNT; j++) { quantiles.add(i); } // Sleep until 1s after the next 5s interval, to let the metrics // roll over - long sleep = (start + (5000 * i) + 1000) - (System.nanoTime() / 1000000); - Thread.sleep(sleep); + long sleepTimeMS = startTimeMS + (5000L * i) + 1000 - System.currentTimeMillis(); + Thread.sleep(sleepTimeMS); // Verify that the window reset, check it has the values we pushed in registry.snapshot(mb, false); for (Quantile q : quants) { @@ -470,8 +510,7 @@ public class TestMutableMetrics { // Verify the metrics were added the right number of times verify(mb, times(3)).addGauge( - info("FooNumOps", "Number of ops for stat with 5s interval"), - (long) 1000); + info("FooNumOps", "Number of ops for stat with 5s interval"), 1000L); for (Quantile q : quants) { int percentile = (int) (100 * q.quantile); String n = String.format(name, percentile); @@ -481,7 +520,56 @@ public class TestMutableMetrics { } /** - * Test that {@link MutableQuantiles} rolls over correctly even if no items + * Test that {@link MutableInverseQuantiles} rolls the window over at the specified + * interval. + */ + @Test(timeout = 30000) + public void testMutableInverseQuantilesRollover() throws Exception { + MetricsRecordBuilder mb = mockMetricsRecordBuilder(); + MetricsRegistry registry = new MetricsRegistry("test"); + // Use a 5s rollover period + MutableQuantiles inverseQuantiles = registry.newInverseQuantiles("foo", "stat", "Ops", + "Latency", 5); + + Quantile[] quants = MutableInverseQuantiles.INVERSE_QUANTILES; + String name = "Foo%dthInversePercentileLatency"; + String desc = "%d inverse percentile latency with 5 second interval for stat"; + + // Push values for three intervals + long startTimeMS = System.currentTimeMillis(); + for (int i = 1; i <= 3; i++) { + // Insert the values + for (long j = 1; j <= SAMPLE_COUNT; j++) { + inverseQuantiles.add(i); + } + // Sleep until 1s after the next 5s interval, to let the metrics + // roll over + long sleepTimeMS = startTimeMS + (5000L * i) + 1000 - System.currentTimeMillis(); + Thread.sleep(sleepTimeMS); + // Verify that the window reset, check it has the values we pushed in + registry.snapshot(mb, false); + for (Quantile q : quants) { + int inversePercentile = (int) (100 * (1 - q.quantile)); + String n = String.format(name, inversePercentile); + String d = String.format(desc, inversePercentile); + verify(mb).addGauge(info(n, d), (long) i); + } + } + + // Verify the metrics were added the right number of times + verify(mb, times(3)).addGauge( + info("FooNumOps", "Number of ops for stat with 5s interval"), 1000L); + + for (Quantile q : quants) { + int inversePercentile = (int) (100 * (1 - q.quantile)); + String n = String.format(name, inversePercentile); + String d = String.format(desc, inversePercentile); + verify(mb, times(3)).addGauge(eq(info(n, d)), anyLong()); + } + } + + /** + * Test that {@link MutableQuantiles} rolls over correctly even if no items. * have been added to the window */ @Test(timeout = 30000) @@ -495,11 +583,33 @@ public class TestMutableMetrics { // Check it initially quantiles.snapshot(mb, true); verify(mb).addGauge( - info("FooNumOps", "Number of ops for stat with 5s interval"), (long) 0); - Thread.sleep(6000); + info("FooNumOps", "Number of ops for stat with 5s interval"), 0L); + Thread.sleep(SLEEP_TIME_MS); quantiles.snapshot(mb, false); verify(mb, times(2)).addGauge( - info("FooNumOps", "Number of ops for stat with 5s interval"), (long) 0); + info("FooNumOps", "Number of ops for stat with 5s interval"), 0L); + } + + /** + * Test that {@link MutableInverseQuantiles} rolls over correctly even if no items + * have been added to the window + */ + @Test(timeout = 30000) + public void testMutableInverseQuantilesEmptyRollover() throws Exception { + MetricsRecordBuilder mb = mockMetricsRecordBuilder(); + MetricsRegistry registry = new MetricsRegistry("test"); + // Use a 5s rollover period + MutableQuantiles inverseQuantiles = registry.newInverseQuantiles("foo", "stat", "Ops", + "Latency", 5); + + // Check it initially + inverseQuantiles.snapshot(mb, true); + verify(mb).addGauge( + info("FooNumOps", "Number of ops for stat with 5s interval"), 0L); + Thread.sleep(SLEEP_TIME_MS); + inverseQuantiles.snapshot(mb, false); + verify(mb, times(2)).addGauge( + info("FooNumOps", "Number of ops for stat with 5s interval"), 0L); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java index 8210322f8f4..38b475a2776 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java @@ -393,7 +393,7 @@ public class MetricsAsserts { public static void assertQuantileGauges(String prefix, MetricsRecordBuilder rb, String valueName) { verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L)); - for (Quantile q : MutableQuantiles.quantiles) { + for (Quantile q : MutableQuantiles.QUANTILES) { String nameTemplate = prefix + "%dthPercentile" + valueName; int percentile = (int) (100 * q.quantile); verify(rb).addGauge( @@ -414,7 +414,7 @@ public class MetricsAsserts { public static void assertInverseQuantileGauges(String prefix, MetricsRecordBuilder rb, String valueName) { verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L)); - for (Quantile q : MutableQuantiles.quantiles) { + for (Quantile q : MutableQuantiles.QUANTILES) { String nameTemplate = prefix + "%dthInversePercentile" + valueName; int percentile = (int) (100 * q.quantile); verify(rb).addGauge( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index bca7c3fa1b3..7b8198366f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -155,7 +155,7 @@ public class ContainerMetrics implements MetricsSource { .newQuantiles(PMEM_USAGE_QUANTILES_NAME, "Physical memory quantiles", "Usage", "MBs", 1); ContainerMetricsQuantiles memEstimator = - new ContainerMetricsQuantiles(MutableQuantiles.quantiles); + new ContainerMetricsQuantiles(MutableQuantiles.QUANTILES); pMemMBQuantiles.setEstimator(memEstimator); this.cpuCoreUsagePercent = registry.newStat( @@ -166,7 +166,7 @@ public class ContainerMetrics implements MetricsSource { "Physical Cpu core percent usage quantiles", "Usage", "Percents", 1); ContainerMetricsQuantiles cpuEstimator = - new ContainerMetricsQuantiles(MutableQuantiles.quantiles); + new ContainerMetricsQuantiles(MutableQuantiles.QUANTILES); cpuCoreUsagePercentQuantiles.setEstimator(cpuEstimator); this.milliVcoresUsed = registry.newStat( VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage", From b6c0ec796ef151e909918a85ece7e4c21f5c7d2c Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 19 Apr 2023 21:17:22 +0200 Subject: [PATCH 80/97] HADOOP-18687. Remove json-smart dependency. (#5549). Contributed by PJ Fanning. Signed-off-by: Ayush Saxena --- LICENSE-binary | 1 - hadoop-project/pom.xml | 6 ------ 2 files changed, 7 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 101c1f10daa..be2f6d07551 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -299,7 +299,6 @@ javax.inject:javax.inject:1 log4j:log4j:1.2.17 net.java.dev.jna:jna:5.2.0 net.minidev:accessors-smart:1.2 -net.minidev:json-smart:2.4.7 org.apache.avro:avro:1.9.2 org.apache.commons:commons-collections4:4.2 org.apache.commons:commons-compress:1.21 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4b80849af0a..354c0b0884f 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -210,7 +210,6 @@ 1.1.3.Final 1.0.2 5.4.0 - 2.4.7 9.8.1 v12.22.1 v1.22.5 @@ -1729,11 +1728,6 @@ ${dnsjava.version} - - net.minidev - json-smart - ${json-smart.version} - org.skyscreamer jsonassert From 9e3d5c754b4c576d69058c33342f4706d820716c Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 20 Apr 2023 10:26:08 +0530 Subject: [PATCH 81/97] Revert "HADOOP-18687. Remove json-smart dependency. (#5549). Contributed by PJ Fanning." This reverts commit b6c0ec796ef151e909918a85ece7e4c21f5c7d2c. --- LICENSE-binary | 1 + hadoop-project/pom.xml | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/LICENSE-binary b/LICENSE-binary index be2f6d07551..101c1f10daa 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -299,6 +299,7 @@ javax.inject:javax.inject:1 log4j:log4j:1.2.17 net.java.dev.jna:jna:5.2.0 net.minidev:accessors-smart:1.2 +net.minidev:json-smart:2.4.7 org.apache.avro:avro:1.9.2 org.apache.commons:commons-collections4:4.2 org.apache.commons:commons-compress:1.21 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 354c0b0884f..4b80849af0a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -210,6 +210,7 @@ 1.1.3.Final 1.0.2 5.4.0 + 2.4.7 9.8.1 v12.22.1 v1.22.5 @@ -1728,6 +1729,11 @@ ${dnsjava.version} + + net.minidev + json-smart + ${json-smart.version} + org.skyscreamer jsonassert From 1ff7a65b9ffdea9652d1b5b6ddc249548f827cea Mon Sep 17 00:00:00 2001 From: Neil Date: Thu, 20 Apr 2023 17:49:18 +0800 Subject: [PATCH 82/97] HDFS-16954. RBF: The operation of renaming a multi-subcluster directory to a single-cluster directory should throw ioexception. (#5483). Contributed by Max Xie. Reviewed-by: Inigo Goiri Signed-off-by: Ayush Saxena --- .../router/RouterClientProtocol.java | 10 +++++ ...MultipleDestinationMountTableResolver.java | 41 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java index ee8ae5885a6..34e3666a947 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java @@ -614,6 +614,11 @@ public class RouterClientProtocol implements ClientProtocol { new Class[] {String.class, String.class}, new RemoteParam(), dstParam); if (isMultiDestDirectory(src)) { + if (locs.size() != srcLocations.size()) { + throw new IOException("Rename of " + src + " to " + dst + " is not" + + " allowed. The number of remote locations for both source and" + + " target should be same."); + } return rpcClient.invokeAll(locs, method); } else { return rpcClient.invokeSequential(locs, method, Boolean.class, @@ -641,6 +646,11 @@ public class RouterClientProtocol implements ClientProtocol { new Class[] {String.class, String.class, options.getClass()}, new RemoteParam(), dstParam, options); if (isMultiDestDirectory(src)) { + if (locs.size() != srcLocations.size()) { + throw new IOException("Rename of " + src + " to " + dst + " is not" + + " allowed. The number of remote locations for both source and" + + " target should be same."); + } rpcClient.invokeConcurrent(locs, method); } else { rpcClient.invokeSequential(locs, method, null, null); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java index b05337443f6..cbc11b27b2b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCMultipleDestinationMountTableResolver.java @@ -720,6 +720,47 @@ public class TestRouterRPCMultipleDestinationMountTableResolver { } } + /** + * Test rename a dir from src dir (mapped to both ns0 and ns1) to ns0. + */ + @Test + public void testRenameWithMultiDestinations() throws Exception { + //create a mount point with multiple destinations + String srcDir = "/mount-source-dir"; + Path path = new Path(srcDir); + Map destMap = new HashMap<>(); + destMap.put("ns0", srcDir); + destMap.put("ns1", srcDir); + nnFs0.mkdirs(path); + nnFs1.mkdirs(path); + MountTable addEntry = + MountTable.newInstance(srcDir, destMap); + addEntry.setDestOrder(DestinationOrder.RANDOM); + assertTrue(addMountTable(addEntry)); + + //create a mount point with a single destinations ns0 + String targetDir = "/ns0_test"; + nnFs0.mkdirs(new Path(targetDir)); + MountTable addDstEntry = MountTable.newInstance(targetDir, + Collections.singletonMap("ns0", targetDir)); + assertTrue(addMountTable(addDstEntry)); + + //mkdir sub dirs in srcDir mapping ns0 & ns1 + routerFs.mkdirs(new Path(srcDir + "/dir1")); + routerFs.mkdirs(new Path(srcDir + "/dir1/dir_1")); + routerFs.mkdirs(new Path(srcDir + "/dir1/dir_2")); + routerFs.mkdirs(new Path(targetDir)); + + //try to rename sub dir in srcDir (mapping to ns0 & ns1) to targetDir + // (mapping ns0) + LambdaTestUtils.intercept(IOException.class, "The number of" + + " remote locations for both source and target should be same.", + () -> { + routerFs.rename(new Path(srcDir + "/dir1/dir_1"), + new Path(targetDir)); + }); + } + /** * Test to verify rename operation on directories in case of multiple * destinations. From d07356e60e5a46357df0b7d883e89547ccea8f52 Mon Sep 17 00:00:00 2001 From: Nikita Eshkeev Date: Thu, 20 Apr 2023 13:42:44 +0300 Subject: [PATCH 83/97] HADOOP-18597. Simplify single node instructions for creating directories for Map Reduce. (#5305) Signed-off-by: Ayush Saxena --- .../hadoop-common/src/site/markdown/SingleCluster.md.vm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm index bbea16855e5..8153dce5c3f 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm @@ -157,8 +157,7 @@ The following instructions are to run a MapReduce job locally. If you want to ex 4. Make the HDFS directories required to execute MapReduce jobs: - $ bin/hdfs dfs -mkdir /user - $ bin/hdfs dfs -mkdir /user/ + $ bin/hdfs dfs -mkdir -p /user/ 5. Copy the input files into the distributed filesystem: From 0918c87fa2072100c51c774a0b0861cd5d09daaf Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 19 Apr 2023 21:17:22 +0200 Subject: [PATCH 84/97] HADOOP-18687. Remove json-smart dependency. (#5549). Contributed by PJ Fanning. Signed-off-by: Ayush Saxena --- LICENSE-binary | 1 - hadoop-project/pom.xml | 6 ------ 2 files changed, 7 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 101c1f10daa..be2f6d07551 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -299,7 +299,6 @@ javax.inject:javax.inject:1 log4j:log4j:1.2.17 net.java.dev.jna:jna:5.2.0 net.minidev:accessors-smart:1.2 -net.minidev:json-smart:2.4.7 org.apache.avro:avro:1.9.2 org.apache.commons:commons-collections4:4.2 org.apache.commons:commons-compress:1.21 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4b80849af0a..354c0b0884f 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -210,7 +210,6 @@ 1.1.3.Final 1.0.2 5.4.0 - 2.4.7 9.8.1 v12.22.1 v1.22.5 @@ -1729,11 +1728,6 @@ ${dnsjava.version} - - net.minidev - json-smart - ${json-smart.version} - org.skyscreamer jsonassert From 9e24ed2196e062289e1ad81a90b0cc1a4ad45c32 Mon Sep 17 00:00:00 2001 From: Christos Bisias Date: Thu, 20 Apr 2023 20:11:25 +0300 Subject: [PATCH 85/97] HADOOP-18691. Add a CallerContext getter on the Schedulable interface (#5540) --- .../org/apache/hadoop/ipc/Schedulable.java | 14 ++++++++++++++ .../java/org/apache/hadoop/ipc/Server.java | 5 +++++ .../hadoop/ipc/TestIdentityProviders.java | 19 ++++++++++++++----- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java index 3b28d85428b..00c9994e2a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Schedulable.java @@ -29,5 +29,19 @@ import org.apache.hadoop.security.UserGroupInformation; public interface Schedulable { public UserGroupInformation getUserGroupInformation(); + /** + * This is overridden only in {@link Server.Call}. + * The CallerContext field will be used to carry information + * about the user in cases where UGI proves insufficient. + * Any other classes that might try to use this method, + * will get an UnsupportedOperationException. + * + * @return an instance of CallerContext if method + * is overridden else get an UnsupportedOperationException + */ + default CallerContext getCallerContext() { + throw new UnsupportedOperationException("Invalid operation."); + } + int getPriorityLevel(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 26f519716fe..9851f9b09fb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -1086,6 +1086,11 @@ public abstract class Server { return getRemoteUser(); } + @Override + public CallerContext getCallerContext() { + return this.callerContext; + } + @Override public int getPriorityLevel() { return this.priorityLevel; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java index 263841246bf..b528186ad26 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIdentityProviders.java @@ -20,8 +20,9 @@ package org.apache.hadoop.ipc; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.Test; import java.util.List; @@ -33,7 +34,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.conf.Configuration; public class TestIdentityProviders { - public class FakeSchedulable implements Schedulable { + public static class FakeSchedulable implements Schedulable { public FakeSchedulable() { } @@ -61,7 +62,9 @@ public class TestIdentityProviders { CommonConfigurationKeys.IPC_IDENTITY_PROVIDER_KEY, IdentityProvider.class); - assertTrue(providers.size() == 1); + assertThat(providers) + .describedAs("provider list") + .hasSize(1); IdentityProvider ip = providers.get(0); assertNotNull(ip); @@ -69,14 +72,20 @@ public class TestIdentityProviders { } @Test - public void testUserIdentityProvider() throws IOException { + public void testUserIdentityProvider() throws Exception { UserIdentityProvider uip = new UserIdentityProvider(); - String identity = uip.makeIdentity(new FakeSchedulable()); + FakeSchedulable fakeSchedulable = new FakeSchedulable(); + String identity = uip.makeIdentity(fakeSchedulable); // Get our username UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); String username = ugi.getUserName(); assertEquals(username, identity); + + // FakeSchedulable doesn't override getCallerContext() + // accessing it should throw an UnsupportedOperationException + LambdaTestUtils.intercept(UnsupportedOperationException.class, + "Invalid operation.", fakeSchedulable::getCallerContext); } } From 964c1902c8054dfe13c787222a12fb0daf1aaab9 Mon Sep 17 00:00:00 2001 From: Ashutosh Gupta Date: Fri, 21 Apr 2023 13:03:22 +0100 Subject: [PATCH 86/97] YARN-11463. Node Labels root directory creation doesn't have a retry logic (#5562) Co-authored-by: Ashutosh Gupta --- .../hadoop/yarn/conf/YarnConfiguration.java | 10 +++++++ .../nodelabels/store/AbstractFSNodeStore.java | 28 +++++++++++++++++-- .../src/main/resources/yarn-default.xml | 16 +++++++++++ .../TestFileSystemNodeLabelsStore.java | 3 -- 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6d77eb492dc..a3faec7171b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -217,6 +217,16 @@ public class YarnConfiguration extends Configuration { public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100; + public static final String NODE_STORE_ROOT_DIR_NUM_RETRIES = + RM_PREFIX + "nodestore-rootdir.num-retries"; + + public static final int NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES = 1000; + + public static final String NODE_STORE_ROOT_DIR_RETRY_INTERVAL = + RM_PREFIX + "nodestore-rootdir.retry-interval-ms"; + + public static final int NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL = 1000; + public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS = RM_PREFIX + "application-master-service.processors"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java index 81514942af0..a697be19512 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java @@ -65,8 +65,32 @@ public abstract class AbstractFSNodeStore { this.fsWorkingPath = fsStorePath; this.manager = mgr; initFileSystem(conf); - // mkdir of root dir path - fs.mkdirs(fsWorkingPath); + // mkdir of root dir path with retry logic + int maxRetries = conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_RETRIES, + YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES); + int retryCount = 0; + boolean success = fs.mkdirs(fsWorkingPath); + + while (!success && retryCount < maxRetries) { + try { + if (!fs.exists(fsWorkingPath)) { + success = fs.mkdirs(fsWorkingPath); + } else { + success = true; + } + } catch (IOException e) { + retryCount++; + if (retryCount >= maxRetries) { + throw e; + } + try { + Thread.sleep(conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_INTERVAL, + YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL)); + } catch (InterruptedException ie) { + throw new RuntimeException(ie); + } + } + } this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION, YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION); LOG.info("Created store directory :" + fsWorkingPath); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b9385d1c276..4fc414f0e01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -5177,4 +5177,20 @@ 1 + + + Number of Retries while trying to make root directory for node store. + + yarn.resourcemanager.nodestore-rootdir.num-retries + 1000 + + + + + Interval in ms between retries while trying to make root directory for node store. + + yarn.resourcemanager.nodestore-rootdir.retry-interval-ms + 1000 + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java index 099684318f4..a861b0654ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java @@ -359,9 +359,6 @@ public class TestFileSystemNodeLabelsStore extends NodeLabelTestBase { mockStore.setFs(mockFs); verifyMkdirsCount(mockStore, true, 1); - verifyMkdirsCount(mockStore, false, 2); - verifyMkdirsCount(mockStore, true, 3); - verifyMkdirsCount(mockStore, false, 4); } private void verifyMkdirsCount(FileSystemNodeLabelsStore store, From 742e07d9c39eb19dd1bc4fe132c47c82ee7a9f3b Mon Sep 17 00:00:00 2001 From: LiuGuH <444506464@qq.com> Date: Sat, 22 Apr 2023 03:36:08 +0800 Subject: [PATCH 87/97] HADOOP-18710. Add RPC metrics for response time (#5545). Contributed by liuguanghua. Reviewed-by: Inigo Goiri Signed-off-by: Ayush Saxena --- .../main/java/org/apache/hadoop/ipc/Server.java | 3 +++ .../apache/hadoop/ipc/metrics/RpcMetrics.java | 17 +++++++++++++++++ .../java/org/apache/hadoop/ipc/TestRPC.java | 4 ++++ 3 files changed, 24 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 9851f9b09fb..e9a605a0043 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -627,8 +627,11 @@ public abstract class Server { details.get(Timing.PROCESSING, rpcMetrics.getMetricsTimeUnit()); long waitTime = details.get(Timing.LOCKWAIT, rpcMetrics.getMetricsTimeUnit()); + long responseTime = + details.get(Timing.RESPONSE, rpcMetrics.getMetricsTimeUnit()); rpcMetrics.addRpcLockWaitTime(waitTime); rpcMetrics.addRpcProcessingTime(processingTime); + rpcMetrics.addRpcResponseTime(responseTime); // don't include lock wait for detailed metrics. processingTime -= waitTime; String name = call.getDetailedMetricsName(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java index 282eca3cf83..c18562441fc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java @@ -75,6 +75,8 @@ public class RpcMetrics { new MutableQuantiles[intervals.length]; rpcProcessingTimeQuantiles = new MutableQuantiles[intervals.length]; + rpcResponseTimeQuantiles = + new MutableQuantiles[intervals.length]; deferredRpcProcessingTimeQuantiles = new MutableQuantiles[intervals.length]; for (int i = 0; i < intervals.length; i++) { @@ -90,6 +92,10 @@ public class RpcMetrics { "rpcProcessingTime" + interval + "s", "rpc processing time in " + metricsTimeUnit, "ops", "latency", interval); + rpcResponseTimeQuantiles[i] = registry.newQuantiles( + "rpcResponseTime" + interval + "s", + "rpc response time in " + metricsTimeUnit, "ops", + "latency", interval); deferredRpcProcessingTimeQuantiles[i] = registry.newQuantiles( "deferredRpcProcessingTime" + interval + "s", "deferred rpc processing time in " + metricsTimeUnit, "ops", @@ -114,6 +120,8 @@ public class RpcMetrics { MutableQuantiles[] rpcLockWaitTimeQuantiles; @Metric("Processing time") MutableRate rpcProcessingTime; MutableQuantiles[] rpcProcessingTimeQuantiles; + @Metric("Response time") MutableRate rpcResponseTime; + MutableQuantiles[] rpcResponseTimeQuantiles; @Metric("Deferred Processing time") MutableRate deferredRpcProcessingTime; MutableQuantiles[] deferredRpcProcessingTimeQuantiles; @Metric("Number of authentication failures") @@ -282,6 +290,15 @@ public class RpcMetrics { } } + public void addRpcResponseTime(long responseTime) { + rpcResponseTime.add(responseTime); + if (rpcQuantileEnable) { + for (MutableQuantiles q : rpcResponseTimeQuantiles) { + q.add(responseTime); + } + } + } + public void addDeferredRpcProcessingTime(long processingTime) { deferredRpcProcessingTime.add(processingTime); if (rpcQuantileEnable) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index 9126316fca6..bbc241a420e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -1336,12 +1336,16 @@ public class TestRPC extends TestRpcBase { 3000, getLongCounter("RpcProcessingTimeNumOps", rpcMetrics)); assertEquals("Expected correct rpc lock wait count", 3000, getLongCounter("RpcLockWaitTimeNumOps", rpcMetrics)); + assertEquals("Expected correct rpc response count", + 3000, getLongCounter("RpcResponseTimeNumOps", rpcMetrics)); assertEquals("Expected zero rpc lock wait time", 0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001); MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s", rpcMetrics); MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s", rpcMetrics); + MetricsAsserts.assertQuantileGauges("RpcResponseTime" + interval + "s", + rpcMetrics); String actualUserVsCon = MetricsAsserts .getStringMetric("NumOpenConnectionsPerUser", rpcMetrics); String proxyUser = From ad49ddda0e1d9632c8c9fcdc78fca8244e1248c9 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 22 Apr 2023 10:31:09 +0200 Subject: [PATCH 88/97] HADOOP-18711. upgrade nimbus jwt jar due to issues in its embedded shaded json-smart code. (#5573). Contributed by PJ Fanning. Signed-off-by: Ayush Saxena --- LICENSE-binary | 2 +- hadoop-project/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index be2f6d07551..72aca9e662f 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -240,7 +240,7 @@ com.google.guava:guava:20.0 com.google.guava:guava:27.0-jre com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava com.microsoft.azure:azure-storage:7.0.0 -com.nimbusds:nimbus-jose-jwt:9.8.1 +com.nimbusds:nimbus-jose-jwt:9.31 com.squareup.okhttp3:okhttp:4.10.0 com.squareup.okio:okio:3.2.0 com.zaxxer:HikariCP:4.0.3 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 354c0b0884f..a11cbe157f2 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -210,7 +210,7 @@ 1.1.3.Final 1.0.2 5.4.0 - 9.8.1 + 9.31 v12.22.1 v1.22.5 1.10.13 From 3b7783c5494496e08ac24122d0faddfdc55467bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 22 Apr 2023 16:19:21 +0530 Subject: [PATCH 89/97] HADOOP-18689. Bump jettison from 1.5.3 to 1.5.4 in /hadoop-project (#5502) Co-authored-by: Ayush Saxena --- LICENSE-binary | 2 +- hadoop-project/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 72aca9e662f..00d3e1563e9 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -340,7 +340,7 @@ org.apache.kerby:token-provider:2.0.3 org.apache.solr:solr-solrj:8.8.2 org.apache.yetus:audience-annotations:0.5.0 org.apache.zookeeper:zookeeper:3.6.3 -org.codehaus.jettison:jettison:1.5.3 +org.codehaus.jettison:jettison:1.5.4 org.eclipse.jetty:jetty-annotations:9.4.48.v20220622 org.eclipse.jetty:jetty-http:9.4.48.v20220622 org.eclipse.jetty:jetty-io:9.4.48.v20220622 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index a11cbe157f2..62fae1d1822 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1515,7 +1515,7 @@ org.codehaus.jettison jettison - 1.5.3 + 1.5.4 stax From b683769fc97a00b837a0efd2020984405856e574 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sun, 23 Apr 2023 21:31:51 +0200 Subject: [PATCH 90/97] HADOOP-18712. Upgrade to jetty 9.4.51 due to cve (#5574). Contributed by PJ Fanning. Signed-off-by: Ayush Saxena --- LICENSE-binary | 28 ++++++++++++++-------------- hadoop-project/pom.xml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 00d3e1563e9..fa78fa510a3 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -341,20 +341,20 @@ org.apache.solr:solr-solrj:8.8.2 org.apache.yetus:audience-annotations:0.5.0 org.apache.zookeeper:zookeeper:3.6.3 org.codehaus.jettison:jettison:1.5.4 -org.eclipse.jetty:jetty-annotations:9.4.48.v20220622 -org.eclipse.jetty:jetty-http:9.4.48.v20220622 -org.eclipse.jetty:jetty-io:9.4.48.v20220622 -org.eclipse.jetty:jetty-jndi:9.4.48.v20220622 -org.eclipse.jetty:jetty-plus:9.4.48.v20220622 -org.eclipse.jetty:jetty-security:9.4.48.v20220622 -org.eclipse.jetty:jetty-server:9.4.48.v20220622 -org.eclipse.jetty:jetty-servlet:9.4.48.v20220622 -org.eclipse.jetty:jetty-util:9.4.48.v20220622 -org.eclipse.jetty:jetty-util-ajax:9.4.48.v20220622 -org.eclipse.jetty:jetty-webapp:9.4.48.v20220622 -org.eclipse.jetty:jetty-xml:9.4.48.v20220622 -org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.48.v20220622 -org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.48.v20220622 +org.eclipse.jetty:jetty-annotations:9.4.51.v20230217 +org.eclipse.jetty:jetty-http:9.4.51.v20230217 +org.eclipse.jetty:jetty-io:9.4.51.v20230217 +org.eclipse.jetty:jetty-jndi:9.4.51.v20230217 +org.eclipse.jetty:jetty-plus:9.4.51.v20230217 +org.eclipse.jetty:jetty-security:9.4.51.v20230217 +org.eclipse.jetty:jetty-server:9.4.51.v20230217 +org.eclipse.jetty:jetty-servlet:9.4.51.v20230217 +org.eclipse.jetty:jetty-util:9.4.51.v20230217 +org.eclipse.jetty:jetty-util-ajax:9.4.51.v20230217 +org.eclipse.jetty:jetty-webapp:9.4.51.v20230217 +org.eclipse.jetty:jetty-xml:9.4.51.v20230217 +org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.51.v20230217 +org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.51.v20230217 org.ehcache:ehcache:3.3.1 org.ini4j:ini4j:0.5.4 org.jetbrains.kotlin:kotlin-stdlib:1.4.10 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 62fae1d1822..d9c37d9d6f3 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -37,7 +37,7 @@ true true - 9.4.48.v20220622 + 9.4.51.v20230217 _ _ From 51dcbd1d611015a31c9d68b0d3bcf29f4d73db34 Mon Sep 17 00:00:00 2001 From: wangzhaohui <32935220+wzhallright@users.noreply.github.com> Date: Mon, 24 Apr 2023 03:43:02 +0800 Subject: [PATCH 91/97] HDFS-16988. Improve NameServices info at JournalNode web UI (#5584). Contributed by Zhaohui Wang. Signed-off-by: Ayush Saxena --- .../hadoop-hdfs/src/main/webapps/journal/jn.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js index 7be48f1e04d..260615b0e21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/jn.js @@ -61,7 +61,9 @@ function workaround(journals) { for (var i in journals){ - journals[i]['NameService']= journals[i]['modelerType'].split("-")[1]; + var str= journals[i]['modelerType']; + var index= str.indexOf("-"); + journals[i]['NameService']= str.substr(index + 1); } return journals; From 5b23224970b48d41adf96b3f5a520411792fe696 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Mon, 24 Apr 2023 09:17:12 +0200 Subject: [PATCH 92/97] HADOOP-18714. Wrong StringUtils.join() called in AbstractContractRootDirectoryTest (#5578) --- .../fs/contract/AbstractContractRootDirectoryTest.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java index 4b5af02ecda..924ebd0065d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java @@ -195,10 +195,9 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra for (FileStatus status : statuses) { ContractTestUtils.assertDeleted(fs, status.getPath(), false, true, false); } - FileStatus[] rootListStatus = fs.listStatus(root); - assertEquals("listStatus on empty root-directory returned found: " - + join("\n", rootListStatus), - 0, rootListStatus.length); + Assertions.assertThat(fs.listStatus(root)) + .describedAs("ls /") + .hasSize(0); assertNoElements("listFiles(/, false)", fs.listFiles(root, false)); assertNoElements("listFiles(/, true)", From 6a23c376c9d0528e0639dcb2c396f8adb95ec164 Mon Sep 17 00:00:00 2001 From: zhangshuyan <81411509+zhangshuyan0@users.noreply.github.com> Date: Mon, 24 Apr 2023 18:53:25 +0800 Subject: [PATCH 93/97] HDFS-16986. EC: Fix locationBudget in getListing(). (#5582). Contributed by Shuyan Zhang. Signed-off-by: Ayush Saxena Signed-off-by: He Xiaoqiao --- .../namenode/FSDirStatAndListingOp.java | 25 +++++++---- .../hdfs/TestDistributedFileSystem.java | 41 +++++++++++++++++++ 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java index 236e308f410..4547228364d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java @@ -262,13 +262,24 @@ class FSDirStatAndListingOp { needLocation, false); listingCnt++; if (listing[i] instanceof HdfsLocatedFileStatus) { - // Once we hit lsLimit locations, stop. - // This helps to prevent excessively large response payloads. - // Approximate #locations with locatedBlockCount() * repl_factor - LocatedBlocks blks = - ((HdfsLocatedFileStatus)listing[i]).getLocatedBlocks(); - locationBudget -= (blks == null) ? 0 : - blks.locatedBlockCount() * listing[i].getReplication(); + // Once we hit lsLimit locations, stop. + // This helps to prevent excessively large response payloads. + LocatedBlocks blks = + ((HdfsLocatedFileStatus) listing[i]).getLocatedBlocks(); + if (blks != null) { + ErasureCodingPolicy ecPolicy = listing[i].getErasureCodingPolicy(); + if (ecPolicy != null && !ecPolicy.isReplicationPolicy()) { + // Approximate #locations with locatedBlockCount() * + // internalBlocksNum. + locationBudget -= blks.locatedBlockCount() * + (ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits()); + } else { + // Approximate #locations with locatedBlockCount() * + // replicationFactor. + locationBudget -= + blks.locatedBlockCount() * listing[i].getReplication(); + } + } } } // truncate return array if necessary diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index 2773214f45d..9e8c11d7b06 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -29,6 +29,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; @@ -674,6 +677,44 @@ public class TestDistributedFileSystem { } } + /** + * This is to test that {@link DFSConfigKeys#DFS_LIST_LIMIT} works as + * expected when {@link DistributedFileSystem#listLocatedStatus} is called. + */ + @Test + public void testGetListingLimit() throws Exception { + final Configuration conf = getTestConfiguration(); + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9); + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(9).build()) { + cluster.waitActive(); + ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy(); + final DistributedFileSystem fs = cluster.getFileSystem(); + fs.dfs = spy(fs.dfs); + Path dir1 = new Path("/testRep"); + Path dir2 = new Path("/testEC"); + fs.mkdirs(dir1); + fs.mkdirs(dir2); + fs.setErasureCodingPolicy(dir2, ecPolicy.getName()); + for (int i = 0; i < 3; i++) { + DFSTestUtil.createFile(fs, new Path(dir1, String.valueOf(i)), + 20 * 1024L, (short) 3, 1); + DFSTestUtil.createStripedFile(cluster, new Path(dir2, + String.valueOf(i)), dir2, 1, 1, false); + } + + List str = RemoteIterators.toList(fs.listLocatedStatus(dir1)); + assertThat(str).hasSize(3); + Mockito.verify(fs.dfs, Mockito.times(1)).listPaths(anyString(), any(), + anyBoolean()); + + str = RemoteIterators.toList(fs.listLocatedStatus(dir2)); + assertThat(str).hasSize(3); + Mockito.verify(fs.dfs, Mockito.times(4)).listPaths(anyString(), any(), + anyBoolean()); + } + } + @Test public void testStatistics() throws IOException { FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME, From 05e6dc19ea322f474d5d094814873d297475b3fd Mon Sep 17 00:00:00 2001 From: Tamas Domok Date: Mon, 24 Apr 2023 16:46:40 +0200 Subject: [PATCH 94/97] HADOOP-18705. ABFS should exclude incompatible credential providers. (#5560) Contributed by Tamas Domok. --- .../fs/azurebfs/AzureBlobFileSystem.java | 3 ++ .../fs/azurebfs/ITestABFSJceksFiltering.java | 43 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 9c9d6f561d7..bb9ecdd51a6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -46,6 +46,7 @@ import java.util.concurrent.Future; import javax.annotation.Nullable; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -157,6 +158,8 @@ public class AzureBlobFileSystem extends FileSystem @Override public void initialize(URI uri, Configuration configuration) throws IOException { + configuration = ProviderUtils.excludeIncompatibleCredentialProviders( + configuration, AzureBlobFileSystem.class); uri = ensureAuthority(uri, configuration); super.initialize(uri, configuration); setConf(configuration); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java new file mode 100644 index 00000000000..e1b6b39521a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestABFSJceksFiltering.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import org.junit.Test; + +import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +public class ITestABFSJceksFiltering extends AbstractAbfsIntegrationTest { + + public ITestABFSJceksFiltering() throws Exception { + } + + @Test + public void testIncompatibleCredentialProviderIsExcluded() throws Exception { + Configuration rawConfig = getRawConfiguration(); + rawConfig.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + "jceks://abfs@a@b.c.d/tmp/a.jceks,jceks://file/tmp/secret.jceks"); + try (AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.get(rawConfig)) { + assertNotNull("filesystem", fs); + String providers = fs.getConf().get(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH); + assertEquals("jceks://file/tmp/secret.jceks", providers); + } + } +} From dc78849f27da49a78666b86b560c84791cd43487 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Mon, 24 Apr 2023 09:04:28 -0700 Subject: [PATCH 95/97] HDFS-16975. FileWithSnapshotFeature.isCurrentFileDeleted is not reloaded from FSImage. (#5546) --- .../hdfs/server/namenode/FSImageFormat.java | 2 +- .../hadoop/hdfs/server/namenode/INode.java | 18 ++++++++-- .../hdfs/server/namenode/INodeFile.java | 24 ++++++++----- .../snapshot/FSImageFormatPBSnapshot.java | 2 +- .../snapshot/FileWithSnapshotFeature.java | 2 +- .../visitor/NamespacePrintVisitor.java | 35 ++----------------- .../namenode/TestFSImageWithSnapshot.java | 6 ++-- 7 files changed, 40 insertions(+), 49 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 7e679296e25..41824991a49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -798,7 +798,7 @@ public class FSImageFormat { if (underConstruction) { file.toUnderConstruction(clientName, clientMachine); } - return fileDiffs == null ? file : new INodeFile(file, fileDiffs); + return fileDiffs == null ? file : file.loadSnapshotFeature(fileDiffs); } else if (numBlocks == -1) { //directory diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index 102ca72f2b8..b902c37109a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -871,7 +871,14 @@ public abstract class INode implements INodeAttributes, Diff.Element { long id = getId(); return (int)(id^(id>>>32)); } - + + @VisibleForTesting + public final StringBuilder dumpParentINodes() { + final StringBuilder b = parent == null? new StringBuilder() + : parent.dumpParentINodes().append("\n "); + return b.append(toDetailString()); + } + /** * Dump the subtree starting from this inode. * @return a text representation of the tree. @@ -896,10 +903,17 @@ public abstract class INode implements INodeAttributes, Diff.Element { @VisibleForTesting public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix, int snapshotId) { + dumpINode(out, prefix, snapshotId); + } + + public void dumpINode(PrintWriter out, StringBuilder prefix, + int snapshotId) { out.print(prefix); out.print(" "); final String name = getLocalName(); - out.print(name.isEmpty()? "/": name); + out.print(name != null && name.isEmpty()? "/": name); + out.print(", isInCurrentState? "); + out.print(isInCurrentState()); out.print(" ("); out.print(getObjectString()); out.print("), "); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index aa2b95d2ea6..1bd315f1771 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -283,12 +283,6 @@ public class INodeFile extends INodeWithAdditionalFields setBlocks(that.blocks); } - public INodeFile(INodeFile that, FileDiffList diffs) { - this(that); - Preconditions.checkArgument(!that.isWithSnapshot()); - this.addSnapshotFeature(diffs); - } - /** @return true unconditionally. */ @Override public final boolean isFile() { @@ -458,7 +452,16 @@ public class INodeFile extends INodeWithAdditionalFields this.addFeature(sf); return sf; } - + + /** Used by FSImage. */ + public INodeFile loadSnapshotFeature(FileDiffList diffs) { + final FileWithSnapshotFeature sf = addSnapshotFeature(diffs); + if (!isInCurrentState()) { + sf.deleteCurrentFile(); + } + return this; + } + /** * If feature list contains a {@link FileWithSnapshotFeature}, return it; * otherwise, return null. @@ -1092,7 +1095,12 @@ public class INodeFile extends INodeWithAdditionalFields @Override public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix, final int snapshotId) { - super.dumpTreeRecursively(out, prefix, snapshotId); + dumpINodeFile(out, prefix, snapshotId); + } + + public void dumpINodeFile(PrintWriter out, StringBuilder prefix, + final int snapshotId) { + dumpINode(out, prefix, snapshotId); out.print(", fileSize=" + computeFileSize(snapshotId)); // only compare the first block out.print(", blocks="); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java index f1a21cce45a..a9b21919764 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java @@ -269,7 +269,7 @@ public class FSImageFormatPBSnapshot { } diffs.addFirst(diff); } - file.addSnapshotFeature(diffs); + file.loadSnapshotFeature(diffs); short repl = file.getPreferredBlockReplication(); for (BlockInfo b : file.getBlocks()) { if (b.getReplication() < repl) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java index 5263ef357bf..492278391d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java @@ -243,6 +243,6 @@ public class FileWithSnapshotFeature implements INode.Feature { @Override public String toString() { - return "" + diffs; + return "isCurrentFileDeleted? " + isCurrentFileDeleted + ", " + diffs; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/visitor/NamespacePrintVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/visitor/NamespacePrintVisitor.java index 3dcc9e628dc..67dd89b4152 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/visitor/NamespacePrintVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/visitor/NamespacePrintVisitor.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.namenode.visitor; import org.apache.hadoop.util.Preconditions; -import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.INode; @@ -29,7 +28,6 @@ import org.apache.hadoop.hdfs.server.namenode.INodeSymlink; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectorySnapshottableFeature; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff; -import org.apache.hadoop.hdfs.server.namenode.snapshot.FileWithSnapshotFeature; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import java.io.PrintWriter; @@ -63,7 +61,7 @@ public final class NamespacePrintVisitor implements NamespaceVisitor { } private final PrintWriter out; - private final StringBuffer prefix = new StringBuffer(); + private final StringBuilder prefix = new StringBuilder(); private NamespacePrintVisitor(PrintWriter out) { this.out = out; @@ -74,39 +72,12 @@ public final class NamespacePrintVisitor implements NamespaceVisitor { } private void printINode(INode iNode, int snapshot) { - out.print(prefix); - out.print(" "); - final String name = iNode.getLocalName(); - out.print(name != null && name.isEmpty()? "/": name); - out.print(" ("); - out.print(iNode.getObjectString()); - out.print("), "); - out.print(iNode.getParentString()); - out.print(", " + iNode.getPermissionStatus(snapshot)); + iNode.dumpINode(out, prefix, snapshot); } @Override public void visitFile(INodeFile file, int snapshot) { - printINode(file, snapshot); - - out.print(", fileSize=" + file.computeFileSize(snapshot)); - // print only the first block, if it exists - out.print(", blocks="); - final BlockInfo[] blocks = file.getBlocks(); - out.print(blocks.length == 0 ? null: blocks[0]); - out.println(); - - final FileWithSnapshotFeature snapshotFeature - = file.getFileWithSnapshotFeature(); - if (snapshotFeature != null) { - if (prefix.length() >= 2) { - prefix.setLength(prefix.length() - 2); - prefix.append(" "); - } - out.print(prefix); - out.print(snapshotFeature); - } - out.println(); + file.dumpINodeFile(out, prefix, snapshot); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java index de527f0bff7..ab5ac75deb6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java @@ -196,10 +196,8 @@ public class TestFSImageWithSnapshot { cluster.waitActive(); fsn = cluster.getNamesystem(); hdfs = cluster.getFileSystem(); - - INodeDirectory rootNode = fsn.dir.getINode4Write(root.toString()) - .asDirectory(); - assertTrue("The children list of root should be empty", + final INodeDirectory rootNode = fsn.dir.getRoot(); + assertTrue("The children list of root should be empty", rootNode.getChildrenList(Snapshot.CURRENT_STATE_ID).isEmpty()); // one snapshot on root: s1 DiffList diffList = rootNode.getDiffs().asList(); From c9e0af99617dfd5ff817776635dddb052f9acce9 Mon Sep 17 00:00:00 2001 From: zhtttylz Date: Tue, 25 Apr 2023 06:00:56 +0800 Subject: [PATCH 96/97] HDFS-16981. Support getFileLinkStatus API in WebHDFS (#5572). Contributed by Hualong Zhang. Reviewed-by: Simbarashe Dzinamarira Signed-off-by: Ayush Saxena --- .../hadoop/hdfs/web/WebHdfsFileSystem.java | 18 ++++++++++ .../hadoop/hdfs/web/resources/GetOpParam.java | 1 + .../router/RouterWebHdfsMethods.java | 1 + .../web/resources/NamenodeWebHdfsMethods.java | 8 +++++ .../hadoop-hdfs/src/site/markdown/WebHDFS.md | 34 +++++++++++++++++++ .../apache/hadoop/hdfs/web/TestWebHDFS.java | 25 ++++++++++++++ 6 files changed, 87 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 615cf3bd7c2..f5a54dd9bef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -2160,6 +2160,24 @@ public class WebHdfsFileSystem extends FileSystem }.run(); } + @Override + public FileStatus getFileLinkStatus(Path f) throws IOException { + statistics.incrementReadOps(1); + storageStatistics.incrementOpCounter(OpType.GET_FILE_LINK_STATUS); + final HttpOpParam.Op op = GetOpParam.Op.GETFILELINKSTATUS; + HdfsFileStatus status = + new FsPathResponseRunner(op, f) { + @Override + HdfsFileStatus decodeResponse(Map json) { + return JsonUtilClient.toFileStatus(json, true); + } + }.run(); + if (status == null) { + throw new FileNotFoundException("File does not exist: " + f); + } + return status.makeQualified(getUri(), f); + } + @VisibleForTesting InetSocketAddress[] getResolvedNNAddr() { return nnAddrs; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java index 89979295c79..c1dcd76b15e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java @@ -65,6 +65,7 @@ public class GetOpParam extends HttpOpParam { GETSNAPSHOTDIFFLISTING(false, HttpURLConnection.HTTP_OK), GETSNAPSHOTTABLEDIRECTORYLIST(false, HttpURLConnection.HTTP_OK), GETLINKTARGET(false, HttpURLConnection.HTTP_OK), + GETFILELINKSTATUS(false, HttpURLConnection.HTTP_OK), GETSNAPSHOTLIST(false, HttpURLConnection.HTTP_OK); final boolean redirect; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java index 477a59941fe..888def5e637 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterWebHdfsMethods.java @@ -386,6 +386,7 @@ public class RouterWebHdfsMethods extends NamenodeWebHdfsMethods { case LISTXATTRS: case CHECKACCESS: case GETLINKTARGET: + case GETFILELINKSTATUS: { return super.get(ugi, delegation, username, doAsUser, fullpath, op, offset, length, renewer, bufferSize, xattrNames, xattrEncoding, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 4b3b53731ee..cd69f4ebd65 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -1388,6 +1388,14 @@ public class NamenodeWebHdfsMethods { final String js = JsonUtil.toJsonString("Path", target); return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } + case GETFILELINKSTATUS: { + HdfsFileStatus status = cp.getFileLinkInfo(fullpath); + if (status == null) { + throw new FileNotFoundException("File does not exist: " + fullpath); + } + final String js = JsonUtil.toJsonString(status, true); + return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); + } default: throw new UnsupportedOperationException(op + " is not supported"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md index f84018ae821..3d22876946b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md @@ -59,6 +59,7 @@ The HTTP REST API supports the complete [FileSystem](../../api/org/apache/hadoop * [`GETECPOLICY`](#Get_EC_Policy) (see [HDFSErasureCoding](./HDFSErasureCoding.html#Administrative_commands).getErasureCodingPolicy) * [`GETSERVERDEFAULTS`](#Get_Server_Defaults) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServerDefaults) * [`GETLINKTARGET`](#Get_Link_Target) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getLinkTarget) + * [`GETFILELINKSTATUS`](#Get_File_Link_Status) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileLinkStatus) * HTTP PUT * [`CREATE`](#Create_and_Write_to_a_File) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).create) * [`MKDIRS`](#Make_a_Directory) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).mkdirs) @@ -1156,6 +1157,39 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getServer See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getLinkTarget +### Get File Link Status + +* Submit a HTTP GET request. + + curl -i "http://:/webhdfs/v1/?op=GETFILELINKSTATUS" + + The client receives a response with a [`FileStatus` JSON object](#FileStatuses_JSON_Schema): + + HTTP/1.1 200 OK + Content-Type: application/json + Transfer-Encoding: chunked + + { + "FileStatus": { + "accessTime": 0, + "blockSize": 0, + "childrenNum":0, + "fileId": 16388, + "group": "supergroup", + "length": 0, + "modificationTime": 1681916788427, + "owner": "hadoop", + "pathSuffix": "", + "permission": "777", + "replication": 0, + "storagePolicy": 0, + "symlink": "/webHdfsTest/file", + "type": "SYMLINK" + } + } + +See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileLinkInfo + Storage Policy Operations ------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 8f4759d8e30..aec7545fab7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -2230,6 +2230,31 @@ public class TestWebHDFS { } } + @Test + public void testFileLinkStatus() throws Exception { + final Configuration conf = WebHdfsTestUtil.createConf(); + try { + cluster = new MiniDFSCluster.Builder(conf).build(); + cluster.waitActive(); + + final WebHdfsFileSystem webHdfs = + WebHdfsTestUtil.getWebHdfsFileSystem(conf, + WebHdfsConstants.WEBHDFS_SCHEME); + // Symbolic link + Path root = new Path("/webHdfsTest/"); + Path file = new Path(root, "file"); + FileSystemTestHelper.createFile(webHdfs, file); + + Path linkToFile = new Path(root, "linkToFile"); + + webHdfs.createSymlink(file, linkToFile, false); + assertFalse(webHdfs.getFileLinkStatus(file).isSymlink()); + assertTrue(webHdfs.getFileLinkStatus(linkToFile).isSymlink()); + } finally { + cluster.shutdown(); + } + } + /** * Get FileStatus JSONObject from ListStatus response. */ From 2f66f0b83a52b2d80f82a21f91afa85565492a85 Mon Sep 17 00:00:00 2001 From: cxzl25 Date: Tue, 25 Apr 2023 06:22:49 +0800 Subject: [PATCH 97/97] HADOOP-18694. Client.Connection#updateAddress needs to ensure that address is resolved before updating (#5542). Contributed by dzcxzl. Reviewed-by: Steve Vaughan Reviewed-by: He Xiaoqiao Signed-off-by: Ayush Saxena ) () -> { + client.call(RpcKind.RPC_BUILTIN, new LongWritable(RANDOM.nextLong()), + remoteId, RPC.RPC_SERVICE_CLASS_DEFAULT, null); + return null; + }); + + assertFalse(address.isUnresolved()); + assertFalse(remoteId.getAddress().isUnresolved()); + assertEquals(System.identityHashCode(remoteId.getAddress()), + System.identityHashCode(address)); + + NetUtils.addStaticResolution("localhost", "host.invalid"); + LambdaTestUtils.intercept(IOException.class, (Callable) () -> { + client.call(RpcKind.RPC_BUILTIN, new LongWritable(RANDOM.nextLong()), + remoteId, RPC.RPC_SERVICE_CLASS_DEFAULT, null); + return null; + }); + + assertFalse(remoteId.getAddress().isUnresolved()); + assertEquals(System.identityHashCode(remoteId.getAddress()), + System.identityHashCode(address)); + } finally { + client.stop(); + server.stop(); + } + } + private void checkUserBinding(boolean asProxy) throws Exception { Socket s; // don't attempt bind with no service host.