Merge branch 'trunk' into YARN-11444

This commit is contained in:
slfan1989 2023-04-25 16:00:44 +08:00 committed by GitHub
commit da6ccfb6c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
360 changed files with 120351 additions and 3294 deletions

View File

@ -210,9 +210,9 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java
com.aliyun:aliyun-java-sdk-core:3.4.0
com.aliyun:aliyun-java-sdk-ecs:4.2.0
com.aliyun:aliyun-java-sdk-ram:3.0.0
com.aliyun:aliyun-java-sdk-core:4.5.10
com.aliyun:aliyun-java-sdk-kms:2.11.0
com.aliyun:aliyun-java-sdk-ram:3.1.0
com.aliyun:aliyun-java-sdk-sts:3.0.0
com.aliyun.oss:aliyun-sdk-oss:3.13.2
com.amazonaws:aws-java-sdk-bundle:1.12.316
@ -240,7 +240,7 @@ com.google.guava:guava:20.0
com.google.guava:guava:27.0-jre
com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava
com.microsoft.azure:azure-storage:7.0.0
com.nimbusds:nimbus-jose-jwt:9.8.1
com.nimbusds:nimbus-jose-jwt:9.31
com.squareup.okhttp3:okhttp:4.10.0
com.squareup.okio:okio:3.2.0
com.zaxxer:HikariCP:4.0.3
@ -299,7 +299,6 @@ javax.inject:javax.inject:1
log4j:log4j:1.2.17
net.java.dev.jna:jna:5.2.0
net.minidev:accessors-smart:1.2
net.minidev:json-smart:2.4.7
org.apache.avro:avro:1.9.2
org.apache.commons:commons-collections4:4.2
org.apache.commons:commons-compress:1.21
@ -323,44 +322,47 @@ org.apache.htrace:htrace-core4:4.1.0-incubating
org.apache.httpcomponents:httpclient:4.5.6
org.apache.httpcomponents:httpcore:4.4.10
org.apache.kafka:kafka-clients:2.8.2
org.apache.kerby:kerb-admin:2.0.2
org.apache.kerby:kerb-client:2.0.2
org.apache.kerby:kerb-common:2.0.2
org.apache.kerby:kerb-core:2.0.2
org.apache.kerby:kerb-crypto:2.0.2
org.apache.kerby:kerb-identity:2.0.2
org.apache.kerby:kerb-server:2.0.2
org.apache.kerby:kerb-simplekdc:2.0.2
org.apache.kerby:kerb-util:2.0.2
org.apache.kerby:kerby-asn1:2.0.2
org.apache.kerby:kerby-config:2.0.2
org.apache.kerby:kerby-pkix:2.0.2
org.apache.kerby:kerby-util:2.0.2
org.apache.kerby:kerby-xdr:2.0.2
org.apache.kerby:token-provider:2.0.2
org.apache.kerby:kerb-admin:2.0.3
org.apache.kerby:kerb-client:2.0.3
org.apache.kerby:kerb-common:2.0.3
org.apache.kerby:kerb-core:2.0.3
org.apache.kerby:kerb-crypto:2.0.3
org.apache.kerby:kerb-identity:2.0.3
org.apache.kerby:kerb-server:2.0.3
org.apache.kerby:kerb-simplekdc:2.0.3
org.apache.kerby:kerb-util:2.0.3
org.apache.kerby:kerby-asn1:2.0.3
org.apache.kerby:kerby-config:2.0.3
org.apache.kerby:kerby-pkix:2.0.3
org.apache.kerby:kerby-util:2.0.3
org.apache.kerby:kerby-xdr:2.0.3
org.apache.kerby:token-provider:2.0.3
org.apache.solr:solr-solrj:8.8.2
org.apache.yetus:audience-annotations:0.5.0
org.apache.zookeeper:zookeeper:3.6.3
org.codehaus.jettison:jettison:1.5.3
org.eclipse.jetty:jetty-annotations:9.4.48.v20220622
org.eclipse.jetty:jetty-http:9.4.48.v20220622
org.eclipse.jetty:jetty-io:9.4.48.v20220622
org.eclipse.jetty:jetty-jndi:9.4.48.v20220622
org.eclipse.jetty:jetty-plus:9.4.48.v20220622
org.eclipse.jetty:jetty-security:9.4.48.v20220622
org.eclipse.jetty:jetty-server:9.4.48.v20220622
org.eclipse.jetty:jetty-servlet:9.4.48.v20220622
org.eclipse.jetty:jetty-util:9.4.48.v20220622
org.eclipse.jetty:jetty-util-ajax:9.4.48.v20220622
org.eclipse.jetty:jetty-webapp:9.4.48.v20220622
org.eclipse.jetty:jetty-xml:9.4.48.v20220622
org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.48.v20220622
org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.48.v20220622
org.codehaus.jettison:jettison:1.5.4
org.eclipse.jetty:jetty-annotations:9.4.51.v20230217
org.eclipse.jetty:jetty-http:9.4.51.v20230217
org.eclipse.jetty:jetty-io:9.4.51.v20230217
org.eclipse.jetty:jetty-jndi:9.4.51.v20230217
org.eclipse.jetty:jetty-plus:9.4.51.v20230217
org.eclipse.jetty:jetty-security:9.4.51.v20230217
org.eclipse.jetty:jetty-server:9.4.51.v20230217
org.eclipse.jetty:jetty-servlet:9.4.51.v20230217
org.eclipse.jetty:jetty-util:9.4.51.v20230217
org.eclipse.jetty:jetty-util-ajax:9.4.51.v20230217
org.eclipse.jetty:jetty-webapp:9.4.51.v20230217
org.eclipse.jetty:jetty-xml:9.4.51.v20230217
org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.51.v20230217
org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.51.v20230217
org.ehcache:ehcache:3.3.1
org.ini4j:ini4j:0.5.4
org.jetbrains.kotlin:kotlin-stdlib:1.4.10
org.jetbrains.kotlin:kotlin-stdlib-common:1.4.10
org.lz4:lz4-java:1.7.1
org.objenesis:objenesis:2.6
org.xerial.snappy:snappy-java:1.0.5
org.yaml:snakeyaml:1.33
org.yaml:snakeyaml:2.0
org.wildfly.openssl:wildfly-openssl:1.1.3.Final
@ -516,6 +518,8 @@ Eclipse Public License 1.0
--------------------------
junit:junit:4.13.2
org.jacoco:org.jacoco.agent:0.8.5
HSQL License

View File

@ -74,7 +74,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin"
###
# Avoid out of memory errors in builds
###
ENV MAVEN_OPTS -Xms256m -Xmx1536m
ENV MAVEN_OPTS -Xms256m -Xmx3072m
# Skip gpg verification when downloading Yetus via yetus-wrapper
ENV HADOOP_SKIP_YETUS_VERIFICATION true

View File

@ -69,6 +69,10 @@
<groupId>com.github.pjfanning</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
@ -182,6 +186,10 @@
<groupId>com.github.pjfanning</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
@ -233,6 +241,10 @@
<groupId>com.github.pjfanning</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-servlet</artifactId>
@ -290,6 +302,10 @@
<groupId>com.github.pjfanning</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>

View File

@ -110,20 +110,8 @@
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
</exclusion>
<!-- HACK. Transitive dependency for nimbus-jose-jwt. Needed for
packaging. Please re-check this version when updating
nimbus-jose-jwt. Please read HADOOP-14903 for more details.
-->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>

File diff suppressed because one or more lines are too long

View File

@ -175,6 +175,14 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<!--
adding jettison as direct dependency (as jersey-json's jettison dependency is vulnerable with verison 1.1),
so those who depends on hadoop-common externally will get the non-vulnerable jettison
-->
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>

View File

@ -26,9 +26,9 @@ MYNAME="${BASH_SOURCE-$0}"
function hadoop_usage
{
hadoop_add_option "buildpaths" "attempt to add class files from build tree"
hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode"
hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in worker mode"
hadoop_add_option "loglevel level" "set the log4j level for this command"
hadoop_add_option "hosts filename" "list of hosts to use in slave mode"
hadoop_add_option "hosts filename" "list of hosts to use in worker mode"
hadoop_add_option "workers" "turn on worker mode"
hadoop_add_subcommand "checknative" client "check native Hadoop and compression libraries availability"

View File

@ -16,7 +16,7 @@
# limitations under the License.
# Run a Hadoop command on all slave hosts.
# Run a Hadoop command on all worker hosts.
function hadoop_usage
{

View File

@ -75,14 +75,6 @@ log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
#
# TaskLog Appender
#
log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
#
# HDFS block state change log from block manager
#

View File

@ -2413,8 +2413,14 @@ public abstract class FileSystem extends Configured
if (stat.isFile()) { // file
curFile = stat;
} else if (recursive) { // directory
itors.push(curItor);
curItor = listLocatedStatus(stat.getPath());
try {
RemoteIterator<LocatedFileStatus> newDirItor = listLocatedStatus(stat.getPath());
itors.push(curItor);
curItor = newDirItor;
} catch (FileNotFoundException ignored) {
LOGGER.debug("Directory {} deleted while attempting for recursive listing",
stat.getPath());
}
}
}
@ -3936,6 +3942,7 @@ public abstract class FileSystem extends Configured
private volatile long bytesReadDistanceOfThreeOrFour;
private volatile long bytesReadDistanceOfFiveOrLarger;
private volatile long bytesReadErasureCoded;
private volatile long remoteReadTimeMS;
/**
* Add another StatisticsData object to this one.
@ -3953,6 +3960,7 @@ public abstract class FileSystem extends Configured
this.bytesReadDistanceOfFiveOrLarger +=
other.bytesReadDistanceOfFiveOrLarger;
this.bytesReadErasureCoded += other.bytesReadErasureCoded;
this.remoteReadTimeMS += other.remoteReadTimeMS;
}
/**
@ -3971,6 +3979,7 @@ public abstract class FileSystem extends Configured
this.bytesReadDistanceOfFiveOrLarger =
-this.bytesReadDistanceOfFiveOrLarger;
this.bytesReadErasureCoded = -this.bytesReadErasureCoded;
this.remoteReadTimeMS = -this.remoteReadTimeMS;
}
@Override
@ -4019,6 +4028,10 @@ public abstract class FileSystem extends Configured
public long getBytesReadErasureCoded() {
return bytesReadErasureCoded;
}
public long getRemoteReadTimeMS() {
return remoteReadTimeMS;
}
}
private interface StatisticsAggregator<T> {
@ -4246,6 +4259,14 @@ public abstract class FileSystem extends Configured
}
}
/**
* Increment the time taken to read bytes from remote in the statistics.
* @param durationMS time taken in ms to read bytes from remote
*/
public void increaseRemoteReadTime(final long durationMS) {
getThreadStatistics().remoteReadTimeMS += durationMS;
}
/**
* Apply the given aggregator to all StatisticsData objects associated with
* this Statistics object.
@ -4393,6 +4414,25 @@ public abstract class FileSystem extends Configured
return bytesRead;
}
/**
* Get total time taken in ms for bytes read from remote.
* @return time taken in ms for remote bytes read.
*/
public long getRemoteReadTime() {
return visitAll(new StatisticsAggregator<Long>() {
private long remoteReadTimeMS = 0;
@Override
public void accept(StatisticsData data) {
remoteReadTimeMS += data.remoteReadTimeMS;
}
public Long aggregate() {
return remoteReadTimeMS;
}
});
}
/**
* Get all statistics data.
* MR or other frameworks can use the method to get all statistics at once.

View File

@ -47,7 +47,8 @@ public class FileSystemStorageStatistics extends StorageStatistics {
"bytesReadDistanceOfOneOrTwo",
"bytesReadDistanceOfThreeOrFour",
"bytesReadDistanceOfFiveOrLarger",
"bytesReadErasureCoded"
"bytesReadErasureCoded",
"remoteReadTimeMS"
};
private static class LongStatisticIterator
@ -107,6 +108,8 @@ public class FileSystemStorageStatistics extends StorageStatistics {
return data.getBytesReadDistanceOfFiveOrLarger();
case "bytesReadErasureCoded":
return data.getBytesReadErasureCoded();
case "remoteReadTimeMS":
return data.getRemoteReadTimeMS();
default:
return null;
}

View File

@ -23,6 +23,9 @@ import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
/**
* Provides functionality necessary for caching blocks of data read from FileSystem.
*/
@ -64,7 +67,10 @@ public interface BlockCache extends Closeable {
*
* @param blockNumber the id of the given block.
* @param buffer contents of the given block to be added to this cache.
* @param conf the configuration.
* @param localDirAllocator the local dir allocator instance.
* @throws IOException if there is an error writing the given block.
*/
void put(int blockNumber, ByteBuffer buffer) throws IOException;
void put(int blockNumber, ByteBuffer buffer, Configuration conf,
LocalDirAllocator localDirAllocator) throws IOException;
}

View File

@ -33,6 +33,8 @@ import java.util.function.Supplier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.statistics.DurationTracker;
import static java.util.Objects.requireNonNull;
@ -95,6 +97,10 @@ public abstract class CachingBlockManager extends BlockManager {
private final PrefetchingStatistics prefetchingStatistics;
private final Configuration conf;
private final LocalDirAllocator localDirAllocator;
/**
* Constructs an instance of a {@code CachingBlockManager}.
*
@ -102,14 +108,17 @@ public abstract class CachingBlockManager extends BlockManager {
* @param blockData information about each block of the underlying file.
* @param bufferPoolSize size of the in-memory cache in terms of number of blocks.
* @param prefetchingStatistics statistics for this stream.
*
* @param conf the configuration.
* @param localDirAllocator the local dir allocator instance.
* @throws IllegalArgumentException if bufferPoolSize is zero or negative.
*/
public CachingBlockManager(
ExecutorServiceFuturePool futurePool,
BlockData blockData,
int bufferPoolSize,
PrefetchingStatistics prefetchingStatistics) {
PrefetchingStatistics prefetchingStatistics,
Configuration conf,
LocalDirAllocator localDirAllocator) {
super(blockData);
Validate.checkPositiveInteger(bufferPoolSize, "bufferPoolSize");
@ -129,6 +138,8 @@ public abstract class CachingBlockManager extends BlockManager {
this.ops = new BlockOperations();
this.ops.setDebug(false);
this.conf = requireNonNull(conf);
this.localDirAllocator = localDirAllocator;
}
/**
@ -468,7 +479,8 @@ public abstract class CachingBlockManager extends BlockManager {
blockFuture = cf;
}
CachePutTask task = new CachePutTask(data, blockFuture, this, Instant.now());
CachePutTask task =
new CachePutTask(data, blockFuture, this, Instant.now());
Future<Void> actionFuture = futurePool.executeFunction(task);
data.setCaching(actionFuture);
ops.end(op);
@ -554,7 +566,7 @@ public abstract class CachingBlockManager extends BlockManager {
return;
}
cache.put(blockNumber, buffer);
cache.put(blockNumber, buffer, conf, localDirAllocator);
}
private static class CachePutTask implements Supplier<Void> {

View File

@ -27,10 +27,9 @@ import java.nio.channels.WritableByteChannel;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
@ -39,9 +38,13 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
@ -67,6 +70,12 @@ public class SingleFilePerBlockCache implements BlockCache {
private final PrefetchingStatistics prefetchingStatistics;
/**
* File attributes attached to any intermediate temporary file created during index creation.
*/
private static final Set<PosixFilePermission> TEMP_FILE_ATTRS =
ImmutableSet.of(PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE);
/**
* Cache entry.
* Each block is stored as a separate file.
@ -172,11 +181,17 @@ public class SingleFilePerBlockCache implements BlockCache {
/**
* Puts the given block in this cache.
*
* @throws IllegalArgumentException if buffer is null.
* @throws IllegalArgumentException if buffer.limit() is zero or negative.
* @param blockNumber the block number, used as a key for blocks map.
* @param buffer buffer contents of the given block to be added to this cache.
* @param conf the configuration.
* @param localDirAllocator the local dir allocator instance.
* @throws IOException if either local dir allocator fails to allocate file or if IO error
* occurs while writing the buffer content to the file.
* @throws IllegalArgumentException if buffer is null, or if buffer.limit() is zero or negative.
*/
@Override
public void put(int blockNumber, ByteBuffer buffer) throws IOException {
public void put(int blockNumber, ByteBuffer buffer, Configuration conf,
LocalDirAllocator localDirAllocator) throws IOException {
if (closed) {
return;
}
@ -191,7 +206,7 @@ public class SingleFilePerBlockCache implements BlockCache {
Validate.checkPositiveInteger(buffer.limit(), "buffer.limit()");
Path blockFilePath = getCacheFilePath();
Path blockFilePath = getCacheFilePath(conf, localDirAllocator);
long size = Files.size(blockFilePath);
if (size != 0) {
String message =
@ -221,8 +236,19 @@ public class SingleFilePerBlockCache implements BlockCache {
writeChannel.close();
}
protected Path getCacheFilePath() throws IOException {
return getTempFilePath();
/**
* Return temporary file created based on the file path retrieved from local dir allocator.
*
* @param conf The configuration object.
* @param localDirAllocator Local dir allocator instance.
* @return Path of the temporary file created.
* @throws IOException if IO error occurs while local dir allocator tries to retrieve path
* from local FS or file creation fails or permission set fails.
*/
protected Path getCacheFilePath(final Configuration conf,
final LocalDirAllocator localDirAllocator)
throws IOException {
return getTempFilePath(conf, localDirAllocator);
}
@Override
@ -323,9 +349,19 @@ public class SingleFilePerBlockCache implements BlockCache {
private static final String CACHE_FILE_PREFIX = "fs-cache-";
public static boolean isCacheSpaceAvailable(long fileSize) {
/**
* Determine if the cache space is available on the local FS.
*
* @param fileSize The size of the file.
* @param conf The configuration.
* @param localDirAllocator Local dir allocator instance.
* @return True if the given file size is less than the available free space on local FS,
* False otherwise.
*/
public static boolean isCacheSpaceAvailable(long fileSize, Configuration conf,
LocalDirAllocator localDirAllocator) {
try {
Path cacheFilePath = getTempFilePath();
Path cacheFilePath = getTempFilePath(conf, localDirAllocator);
long freeSpace = new File(cacheFilePath.toString()).getUsableSpace();
LOG.info("fileSize = {}, freeSpace = {}", fileSize, freeSpace);
Files.deleteIfExists(cacheFilePath);
@ -339,16 +375,25 @@ public class SingleFilePerBlockCache implements BlockCache {
// The suffix (file extension) of each serialized index file.
private static final String BINARY_FILE_SUFFIX = ".bin";
// File attributes attached to any intermediate temporary file created during index creation.
private static final FileAttribute<Set<PosixFilePermission>> TEMP_FILE_ATTRS =
PosixFilePermissions.asFileAttribute(EnumSet.of(PosixFilePermission.OWNER_READ,
PosixFilePermission.OWNER_WRITE));
private static Path getTempFilePath() throws IOException {
return Files.createTempFile(
CACHE_FILE_PREFIX,
BINARY_FILE_SUFFIX,
TEMP_FILE_ATTRS
);
/**
* Create temporary file based on the file path retrieved from local dir allocator
* instance. The file is created with .bin suffix. The created file has been granted
* posix file permissions available in TEMP_FILE_ATTRS.
*
* @param conf the configuration.
* @param localDirAllocator the local dir allocator instance.
* @return path of the file created.
* @throws IOException if IO error occurs while local dir allocator tries to retrieve path
* from local FS or file creation fails or permission set fails.
*/
private static Path getTempFilePath(final Configuration conf,
final LocalDirAllocator localDirAllocator) throws IOException {
org.apache.hadoop.fs.Path path =
localDirAllocator.getLocalPathForWrite(CACHE_FILE_PREFIX, conf);
File dir = new File(path.getParent().toUri().getPath());
String prefix = path.getName();
File tmpFile = File.createTempFile(prefix, BINARY_FILE_SUFFIX, dir);
Path tmpFilePath = Paths.get(tmpFile.toURI());
return Files.setPosixFilePermissions(tmpFilePath, TEMP_FILE_ATTRS);
}
}

View File

@ -497,7 +497,12 @@ public final class HttpServer2 implements FilterContainer {
prefix -> this.conf.get(prefix + "type")
.equals(PseudoAuthenticationHandler.TYPE))
) {
server.initSpnego(conf, hostName, usernameConfKey, keytabConfKey);
server.initSpnego(
conf,
hostName,
getFilterProperties(conf, authFilterConfigurationPrefixes),
usernameConfKey,
keytabConfKey);
}
for (URI ep : endpoints) {
@ -1340,8 +1345,12 @@ public final class HttpServer2 implements FilterContainer {
}
private void initSpnego(Configuration conf, String hostName,
String usernameConfKey, String keytabConfKey) throws IOException {
Properties authFilterConfigurationPrefixes, String usernameConfKey, String keytabConfKey)
throws IOException {
Map<String, String> params = new HashMap<>();
for (Map.Entry<Object, Object> entry : authFilterConfigurationPrefixes.entrySet()) {
params.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue()));
}
String principalInConf = conf.get(usernameConfKey);
if (principalInConf != null && !principalInConf.isEmpty()) {
params.put("kerberos.principal", SecurityUtil.getServerPrincipal(

View File

@ -590,9 +590,8 @@ public class Client implements AutoCloseable {
InetSocketAddress currentAddr = NetUtils.createSocketAddrForHost(
server.getHostName(), server.getPort());
if (!server.equals(currentAddr)) {
LOG.warn("Address change detected. Old: " + server.toString() +
" New: " + currentAddr.toString());
if (!currentAddr.isUnresolved() && !server.equals(currentAddr)) {
LOG.warn("Address change detected. Old: {} New: {}", server, currentAddr);
server = currentAddr;
// Update the remote address so that reconnections are with the updated address.
// This avoids thrashing.

View File

@ -29,5 +29,19 @@ import org.apache.hadoop.security.UserGroupInformation;
public interface Schedulable {
public UserGroupInformation getUserGroupInformation();
/**
* This is overridden only in {@link Server.Call}.
* The CallerContext field will be used to carry information
* about the user in cases where UGI proves insufficient.
* Any other classes that might try to use this method,
* will get an UnsupportedOperationException.
*
* @return an instance of CallerContext if method
* is overridden else get an UnsupportedOperationException
*/
default CallerContext getCallerContext() {
throw new UnsupportedOperationException("Invalid operation.");
}
int getPriorityLevel();
}

View File

@ -627,8 +627,11 @@ public abstract class Server {
details.get(Timing.PROCESSING, rpcMetrics.getMetricsTimeUnit());
long waitTime =
details.get(Timing.LOCKWAIT, rpcMetrics.getMetricsTimeUnit());
long responseTime =
details.get(Timing.RESPONSE, rpcMetrics.getMetricsTimeUnit());
rpcMetrics.addRpcLockWaitTime(waitTime);
rpcMetrics.addRpcProcessingTime(processingTime);
rpcMetrics.addRpcResponseTime(responseTime);
// don't include lock wait for detailed metrics.
processingTime -= waitTime;
String name = call.getDetailedMetricsName();
@ -1086,6 +1089,11 @@ public abstract class Server {
return getRemoteUser();
}
@Override
public CallerContext getCallerContext() {
return this.callerContext;
}
@Override
public int getPriorityLevel() {
return this.priorityLevel;

View File

@ -75,6 +75,8 @@ public class RpcMetrics {
new MutableQuantiles[intervals.length];
rpcProcessingTimeQuantiles =
new MutableQuantiles[intervals.length];
rpcResponseTimeQuantiles =
new MutableQuantiles[intervals.length];
deferredRpcProcessingTimeQuantiles =
new MutableQuantiles[intervals.length];
for (int i = 0; i < intervals.length; i++) {
@ -90,6 +92,10 @@ public class RpcMetrics {
"rpcProcessingTime" + interval + "s",
"rpc processing time in " + metricsTimeUnit, "ops",
"latency", interval);
rpcResponseTimeQuantiles[i] = registry.newQuantiles(
"rpcResponseTime" + interval + "s",
"rpc response time in " + metricsTimeUnit, "ops",
"latency", interval);
deferredRpcProcessingTimeQuantiles[i] = registry.newQuantiles(
"deferredRpcProcessingTime" + interval + "s",
"deferred rpc processing time in " + metricsTimeUnit, "ops",
@ -114,6 +120,8 @@ public class RpcMetrics {
MutableQuantiles[] rpcLockWaitTimeQuantiles;
@Metric("Processing time") MutableRate rpcProcessingTime;
MutableQuantiles[] rpcProcessingTimeQuantiles;
@Metric("Response time") MutableRate rpcResponseTime;
MutableQuantiles[] rpcResponseTimeQuantiles;
@Metric("Deferred Processing time") MutableRate deferredRpcProcessingTime;
MutableQuantiles[] deferredRpcProcessingTimeQuantiles;
@Metric("Number of authentication failures")
@ -282,6 +290,15 @@ public class RpcMetrics {
}
}
public void addRpcResponseTime(long responseTime) {
rpcResponseTime.add(responseTime);
if (rpcQuantileEnable) {
for (MutableQuantiles q : rpcResponseTimeQuantiles) {
q.add(responseTime);
}
}
}
public void addDeferredRpcProcessingTime(long processingTime) {
deferredRpcProcessingTime.add(processingTime);
if (rpcQuantileEnable) {

View File

@ -1,263 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.log;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.MappingJsonFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.databind.node.ContainerNode;
import org.apache.log4j.Layout;
import org.apache.log4j.helpers.ISO8601DateFormat;
import org.apache.log4j.spi.LoggingEvent;
import org.apache.log4j.spi.ThrowableInformation;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.text.DateFormat;
import java.util.Date;
/**
* This offers a log layout for JSON, with some test entry points. It's purpose is
* to allow Log4J to generate events that are easy for other programs to parse, but which are somewhat
* human-readable.
*
* Some features.
*
* <ol>
* <li>Every event is a standalone JSON clause</li>
* <li>Time is published as a time_t event since 1/1/1970
* -this is the fastest to generate.</li>
* <li>An ISO date is generated, but this is cached and will only be accurate to within a second</li>
* <li>the stack trace is included as an array</li>
* </ol>
*
* A simple log event will resemble the following
* <pre>
* {"name":"test","time":1318429136789,"date":"2011-10-12 15:18:56,789","level":"INFO","thread":"main","message":"test message"}
* </pre>
*
* An event with an error will contain data similar to that below (which has been reformatted to be multi-line).
*
* <pre>
* {
* "name":"testException",
* "time":1318429136789,
* "date":"2011-10-12 15:18:56,789",
* "level":"INFO",
* "thread":"quoted\"",
* "message":"new line\n and {}",
* "exceptionclass":"java.net.NoRouteToHostException",
* "stack":[
* "java.net.NoRouteToHostException: that box caught fire 3 years ago",
* "\tat org.apache.hadoop.log.TestLog4Json.testException(TestLog4Json.java:49)",
* "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)",
* "\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)",
* "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)",
* "\tat java.lang.reflect.Method.invoke(Method.java:597)",
* "\tat junit.framework.TestCase.runTest(TestCase.java:168)",
* "\tat junit.framework.TestCase.runBare(TestCase.java:134)",
* "\tat junit.framework.TestResult$1.protect(TestResult.java:110)",
* "\tat junit.framework.TestResult.runProtected(TestResult.java:128)",
* "\tat junit.framework.TestResult.run(TestResult.java:113)",
* "\tat junit.framework.TestCase.run(TestCase.java:124)",
* "\tat junit.framework.TestSuite.runTest(TestSuite.java:232)",
* "\tat junit.framework.TestSuite.run(TestSuite.java:227)",
* "\tat org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:83)",
* "\tat org.apache.maven.surefire.junit4.JUnit4TestSet.execute(JUnit4TestSet.java:59)",
* "\tat org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.executeTestSet(AbstractDirectoryTestSuite.java:120)",
* "\tat org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.execute(AbstractDirectoryTestSuite.java:145)",
* "\tat org.apache.maven.surefire.Surefire.run(Surefire.java:104)",
* "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)",
* "\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)",
* "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)",
* "\tat java.lang.reflect.Method.invoke(Method.java:597)",
* "\tat org.apache.maven.surefire.booter.SurefireBooter.runSuitesInProcess(SurefireBooter.java:290)",
* "\tat org.apache.maven.surefire.booter.SurefireBooter.main(SurefireBooter.java:1017)"
* ]
* }
* </pre>
*/
public class Log4Json extends Layout {
/**
* Jackson factories are thread safe when constructing parsers and generators.
* They are not thread safe in configure methods; if there is to be any
* configuration it must be done in a static initializer block.
*/
private static final JsonFactory factory = new MappingJsonFactory();
private static final ObjectReader READER = new ObjectMapper(factory).reader();
public static final String DATE = "date";
public static final String EXCEPTION_CLASS = "exceptionclass";
public static final String LEVEL = "level";
public static final String MESSAGE = "message";
public static final String NAME = "name";
public static final String STACK = "stack";
public static final String THREAD = "thread";
public static final String TIME = "time";
public static final String JSON_TYPE = "application/json";
private final DateFormat dateFormat;
public Log4Json() {
dateFormat = new ISO8601DateFormat();
}
/**
* @return the mime type of JSON
*/
@Override
public String getContentType() {
return JSON_TYPE;
}
@Override
public String format(LoggingEvent event) {
try {
return toJson(event);
} catch (IOException e) {
//this really should not happen, and rather than throw an exception
//which may hide the real problem, the log class is printed
//in JSON format. The classname is used to ensure valid JSON is
//returned without playing escaping games
return "{ \"logfailure\":\"" + e.getClass().toString() + "\"}";
}
}
/**
* Convert an event to JSON
*
* @param event the event -must not be null
* @return a string value
* @throws IOException on problems generating the JSON
*/
public String toJson(LoggingEvent event) throws IOException {
StringWriter writer = new StringWriter();
toJson(writer, event);
return writer.toString();
}
/**
* Convert an event to JSON
*
* @param writer the destination writer
* @param event the event -must not be null
* @return the writer
* @throws IOException on problems generating the JSON
*/
public Writer toJson(final Writer writer, final LoggingEvent event)
throws IOException {
ThrowableInformation ti = event.getThrowableInformation();
toJson(writer,
event.getLoggerName(),
event.getTimeStamp(),
event.getLevel().toString(),
event.getThreadName(),
event.getRenderedMessage(),
ti);
return writer;
}
/**
* Build a JSON entry from the parameters. This is public for testing.
*
* @param writer destination
* @param loggerName logger name
* @param timeStamp time_t value
* @param level level string
* @param threadName name of the thread
* @param message rendered message
* @param ti nullable thrown information
* @return the writer
* @throws IOException on any problem
*/
public Writer toJson(final Writer writer,
final String loggerName,
final long timeStamp,
final String level,
final String threadName,
final String message,
final ThrowableInformation ti) throws IOException {
JsonGenerator json = factory.createGenerator(writer);
json.writeStartObject();
json.writeStringField(NAME, loggerName);
json.writeNumberField(TIME, timeStamp);
Date date = new Date(timeStamp);
json.writeStringField(DATE, dateFormat.format(date));
json.writeStringField(LEVEL, level);
json.writeStringField(THREAD, threadName);
json.writeStringField(MESSAGE, message);
if (ti != null) {
//there is some throwable info, but if the log event has been sent over the wire,
//there may not be a throwable inside it, just a summary.
Throwable thrown = ti.getThrowable();
String eclass = (thrown != null) ?
thrown.getClass().getName()
: "";
json.writeStringField(EXCEPTION_CLASS, eclass);
String[] stackTrace = ti.getThrowableStrRep();
json.writeArrayFieldStart(STACK);
for (String row : stackTrace) {
json.writeString(row);
}
json.writeEndArray();
}
json.writeEndObject();
json.flush();
json.close();
return writer;
}
/**
* This appender does not ignore throwables
*
* @return false, always
*/
@Override
public boolean ignoresThrowable() {
return false;
}
/**
* Do nothing
*/
@Override
public void activateOptions() {
}
/**
* For use in tests
*
* @param json incoming JSON to parse
* @return a node tree
* @throws IOException on any parsing problems
*/
public static ContainerNode parse(String json) throws IOException {
JsonNode jsonNode = READER.readTree(json);
if (!(jsonNode instanceof ContainerNode)) {
throw new IOException("Wrong JSON data: " + json);
}
return (ContainerNode) jsonNode;
}
}

View File

@ -34,6 +34,8 @@ import javax.servlet.http.HttpServletResponse;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Charsets;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -44,6 +46,7 @@ import org.apache.hadoop.security.authentication.client.AuthenticatedURL;
import org.apache.hadoop.security.authentication.client.KerberosAuthenticator;
import org.apache.hadoop.security.ssl.SSLFactory;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.GenericsUtil;
import org.apache.hadoop.util.ServletUtil;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@ -338,14 +341,18 @@ public class LogLevel {
out.println(MARKER
+ "Submitted Class Name: <b>" + logName + "</b><br />");
Logger log = Logger.getLogger(logName);
org.slf4j.Logger log = LoggerFactory.getLogger(logName);
out.println(MARKER
+ "Log Class: <b>" + log.getClass().getName() +"</b><br />");
if (level != null) {
out.println(MARKER + "Submitted Level: <b>" + level + "</b><br />");
}
process(log, level, out);
if (GenericsUtil.isLog4jLogger(logName)) {
process(Logger.getLogger(logName), level, out);
} else {
out.println("Sorry, setting log level is only supported for log4j loggers.<br />");
}
}
out.println(FORMS);

View File

@ -227,6 +227,29 @@ public class MetricsRegistry {
return ret;
}
/**
* Create a mutable inverse metric that estimates inverse quantiles of a stream of values
* @param name of the metric
* @param desc metric description
* @param sampleName of the metric (e.g., "Ops")
* @param valueName of the metric (e.g., "Rate")
* @param interval rollover interval of estimator in seconds
* @return a new inverse quantile estimator object
* @throws MetricsException if interval is not a positive integer
*/
public synchronized MutableQuantiles newInverseQuantiles(String name, String desc,
String sampleName, String valueName, int interval) {
checkMetricName(name);
if (interval <= 0) {
throw new MetricsException("Interval should be positive. Value passed" +
" is: " + interval);
}
MutableQuantiles ret =
new MutableInverseQuantiles(name, desc, sampleName, valueName, interval);
metricsMap.put(name, ret);
return ret;
}
/**
* Create a mutable metric with stats
* @param name of the metric
@ -278,7 +301,7 @@ public class MetricsRegistry {
}
/**
* Create a mutable rate metric (for throughput measurement)
* Create a mutable rate metric (for throughput measurement).
* @param name of the metric
* @param desc description
* @param extended produce extended stat (stdev/min/max etc.) if true

View File

@ -0,0 +1,93 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metrics2.lib;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.metrics2.util.Quantile;
import java.text.DecimalFormat;
import static org.apache.hadoop.metrics2.lib.Interns.info;
/**
* Watches a stream of long values, maintaining online estimates of specific
* quantiles with provably low error bounds. Inverse quantiles are meant for
* highly accurate low-percentile (e.g. 1st, 5th) metrics.
* InverseQuantiles are used for metrics where higher the value better it is.
* ( eg: data transfer rate ).
* The 1st percentile here corresponds to the 99th inverse percentile metric,
* 5th percentile to 95th and so on.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class MutableInverseQuantiles extends MutableQuantiles{
static class InversePercentile extends Quantile {
InversePercentile(double inversePercentile) {
super(inversePercentile/100, inversePercentile/1000);
}
}
@VisibleForTesting
public static final Quantile[] INVERSE_QUANTILES = {new InversePercentile(50),
new InversePercentile(25), new InversePercentile(10),
new InversePercentile(5), new InversePercentile(1)};
/**
* Instantiates a new {@link MutableInverseQuantiles} for a metric that rolls itself
* over on the specified time interval.
*
* @param name of the metric
* @param description long-form textual description of the metric
* @param sampleName type of items in the stream (e.g., "Ops")
* @param valueName type of the values
* @param intervalSecs rollover interval (in seconds) of the estimator
*/
public MutableInverseQuantiles(String name, String description, String sampleName,
String valueName, int intervalSecs) {
super(name, description, sampleName, valueName, intervalSecs);
}
/**
* Sets quantileInfo.
*
* @param ucName capitalized name of the metric
* @param uvName capitalized type of the values
* @param desc uncapitalized long-form textual description of the metric
* @param lvName uncapitalized type of the values
* @param df Number formatter for inverse percentile value
*/
void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat df) {
for (int i = 0; i < INVERSE_QUANTILES.length; i++) {
double inversePercentile = 100 * (1 - INVERSE_QUANTILES[i].quantile);
String nameTemplate = ucName + df.format(inversePercentile) + "thInversePercentile" + uvName;
String descTemplate = df.format(inversePercentile) + " inverse percentile " + lvName
+ " with " + getInterval() + " second interval for " + desc;
addQuantileInfo(i, info(nameTemplate, descTemplate));
}
}
/**
* Returns the array of Inverse Quantiles declared in MutableInverseQuantiles.
*
* @return array of Inverse Quantiles
*/
public synchronized Quantile[] getQuantiles() {
return INVERSE_QUANTILES;
}
}

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.metrics2.lib;
import static org.apache.hadoop.metrics2.lib.Interns.info;
import java.text.DecimalFormat;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@ -48,13 +49,14 @@ import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFact
public class MutableQuantiles extends MutableMetric {
@VisibleForTesting
public static final Quantile[] quantiles = { new Quantile(0.50, 0.050),
public static final Quantile[] QUANTILES = {new Quantile(0.50, 0.050),
new Quantile(0.75, 0.025), new Quantile(0.90, 0.010),
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) };
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001)};
private final MetricsInfo numInfo;
private final MetricsInfo[] quantileInfos;
private final int interval;
private MetricsInfo numInfo;
private MetricsInfo[] quantileInfos;
private int intervalSecs;
private static DecimalFormat decimalFormat = new DecimalFormat("###.####");
private QuantileEstimator estimator;
private long previousCount = 0;
@ -91,35 +93,49 @@ public class MutableQuantiles extends MutableMetric {
String lsName = StringUtils.uncapitalize(sampleName);
String lvName = StringUtils.uncapitalize(valueName);
numInfo = info(ucName + "Num" + usName, String.format(
"Number of %s for %s with %ds interval", lsName, desc, interval));
// Construct the MetricsInfos for the quantiles, converting to percentiles
quantileInfos = new MetricsInfo[quantiles.length];
String nameTemplate = ucName + "%dthPercentile" + uvName;
String descTemplate = "%d percentile " + lvName + " with " + interval
+ " second interval for " + desc;
for (int i = 0; i < quantiles.length; i++) {
int percentile = (int) (100 * quantiles[i].quantile);
quantileInfos[i] = info(String.format(nameTemplate, percentile),
String.format(descTemplate, percentile));
}
estimator = new SampleQuantiles(quantiles);
this.interval = interval;
setInterval(interval);
setNumInfo(info(ucName + "Num" + usName, String.format(
"Number of %s for %s with %ds interval", lsName, desc, interval)));
scheduledTask = scheduler.scheduleWithFixedDelay(new RolloverSample(this),
interval, interval, TimeUnit.SECONDS);
// Construct the MetricsInfos for the quantiles, converting to percentiles
Quantile[] quantilesArray = getQuantiles();
setQuantileInfos(quantilesArray.length);
setQuantiles(ucName, uvName, desc, lvName, decimalFormat);
setEstimator(new SampleQuantiles(quantilesArray));
}
/**
* Sets quantileInfo.
*
* @param ucName capitalized name of the metric
* @param uvName capitalized type of the values
* @param desc uncapitalized long-form textual description of the metric
* @param lvName uncapitalized type of the values
* @param pDecimalFormat Number formatter for percentile value
*/
void setQuantiles(String ucName, String uvName, String desc, String lvName, DecimalFormat pDecimalFormat) {
for (int i = 0; i < QUANTILES.length; i++) {
double percentile = 100 * QUANTILES[i].quantile;
String nameTemplate = ucName + pDecimalFormat.format(percentile) + "thPercentile" + uvName;
String descTemplate = pDecimalFormat.format(percentile) + " percentile " + lvName
+ " with " + getInterval() + " second interval for " + desc;
addQuantileInfo(i, info(nameTemplate, descTemplate));
}
}
public MutableQuantiles() {}
@Override
public synchronized void snapshot(MetricsRecordBuilder builder, boolean all) {
Quantile[] quantilesArray = getQuantiles();
if (all || changed()) {
builder.addGauge(numInfo, previousCount);
for (int i = 0; i < quantiles.length; i++) {
for (int i = 0; i < quantilesArray.length; i++) {
long newValue = 0;
// If snapshot is null, we failed to update since the window was empty
if (previousSnapshot != null) {
newValue = previousSnapshot.get(quantiles[i]);
newValue = previousSnapshot.get(quantilesArray[i]);
}
builder.addGauge(quantileInfos[i], newValue);
}
@ -133,8 +149,59 @@ public class MutableQuantiles extends MutableMetric {
estimator.insert(value);
}
public int getInterval() {
return interval;
/**
* Returns the array of Quantiles declared in MutableQuantiles.
*
* @return array of Quantiles
*/
public synchronized Quantile[] getQuantiles() {
return QUANTILES;
}
/**
* Set info about the metrics.
*
* @param pNumInfo info about the metrics.
*/
public synchronized void setNumInfo(MetricsInfo pNumInfo) {
this.numInfo = pNumInfo;
}
/**
* Initialize quantileInfos array.
*
* @param length of the quantileInfos array.
*/
public synchronized void setQuantileInfos(int length) {
this.quantileInfos = new MetricsInfo[length];
}
/**
* Add entry to quantileInfos array.
*
* @param i array index.
* @param info info to be added to quantileInfos array.
*/
public synchronized void addQuantileInfo(int i, MetricsInfo info) {
this.quantileInfos[i] = info;
}
/**
* Set the rollover interval (in seconds) of the estimator.
*
* @param pIntervalSecs of the estimator.
*/
public synchronized void setInterval(int pIntervalSecs) {
this.intervalSecs = pIntervalSecs;
}
/**
* Get the rollover interval (in seconds) of the estimator.
*
* @return intervalSecs of the estimator.
*/
public synchronized int getInterval() {
return intervalSecs;
}
public void stop() {

View File

@ -314,7 +314,8 @@ public final class SecurityUtil {
String keytabFilename = conf.get(keytabFileKey);
if (keytabFilename == null || keytabFilename.length() == 0) {
throw new IOException("Running in secure mode, but config doesn't have a keytab");
throw new IOException(
"Running in secure mode, but config doesn't have a keytab for key: " + keytabFileKey);
}
String principalConfig = conf.get(userNameKey, System

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.util;
import java.lang.reflect.Array;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -33,6 +34,14 @@ import org.slf4j.LoggerFactory;
@InterfaceStability.Unstable
public class GenericsUtil {
private static final String SLF4J_LOG4J_ADAPTER_CLASS = "org.slf4j.impl.Log4jLoggerAdapter";
/**
* Set to false only if log4j adapter class is not found in the classpath. Once set to false,
* the utility method should not bother re-loading class again.
*/
private static final AtomicBoolean IS_LOG4J_LOGGER = new AtomicBoolean(true);
/**
* Returns the Class object (of type <code>Class&lt;T&gt;</code>) of the
* argument of type <code>T</code>.
@ -87,12 +96,27 @@ public class GenericsUtil {
if (clazz == null) {
return false;
}
Logger log = LoggerFactory.getLogger(clazz);
return isLog4jLogger(clazz.getName());
}
/**
* Determine whether the log of the given logger is of Log4J implementation.
*
* @param logger the logger name, usually class name as string.
* @return true if the logger uses Log4J implementation.
*/
public static boolean isLog4jLogger(String logger) {
if (logger == null || !IS_LOG4J_LOGGER.get()) {
return false;
}
Logger log = LoggerFactory.getLogger(logger);
try {
Class log4jClass = Class.forName("org.slf4j.impl.Log4jLoggerAdapter");
Class<?> log4jClass = Class.forName(SLF4J_LOG4J_ADAPTER_CLASS);
return log4jClass.isInstance(log);
} catch (ClassNotFoundException e) {
IS_LOG4J_LOGGER.set(false);
return false;
}
}
}

View File

@ -370,6 +370,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
|:---- |:---- |
| `BytesWritten` | Total number of bytes written to DataNode |
| `BytesRead` | Total number of bytes read from DataNode |
| `ReadTransferRateNumOps` | Total number of data read transfers |
| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. |
| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. |
| `BlocksWritten` | Total number of blocks written to DataNode |
| `BlocksRead` | Total number of blocks read from DataNode |
| `BlocksReplicated` | Total number of blocks replicated |
@ -589,17 +592,19 @@ StateStoreMetrics
-----------------
StateStoreMetrics shows the statistics of the State Store component in Router-based federation.
| Name | Description |
|:---- |:---- |
| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric |
| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds |
| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric |
| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds |
| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric |
| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds |
| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric |
| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds |
| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store |
| Name | Description |
|:------------------------------------------|:-----------------------------------------------------------------------------------|
| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric |
| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds |
| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric |
| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds |
| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric |
| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds |
| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric |
| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds |
| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store |
| `Cache`*BaseRecord*`LoadNumOps` | Number of times store records are loaded in the State Store Cache from State Store |
| `Cache`*BaseRecord*`LoadAvgTime` | Average time of loading State Store Cache from State Store in milliseconds |
yarn context
============

View File

@ -157,8 +157,7 @@ The following instructions are to run a MapReduce job locally. If you want to ex
4. Make the HDFS directories required to execute MapReduce jobs:
$ bin/hdfs dfs -mkdir /user
$ bin/hdfs dfs -mkdir /user/<username>
$ bin/hdfs dfs -mkdir -p /user/<username>
5. Copy the input files into the distributed filesystem:

View File

@ -0,0 +1,359 @@
<!---
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-->
# Apache Hadoop Changelog
## Release 3.3.5 - 2023-03-14
### IMPORTANT ISSUES:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | Replace all default Charset usage with UTF-8 | Major | . | Viraj Jasani | Viraj Jasani |
| [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | CryptoOutputStream::close leak when encrypted zones + quota exceptions | Critical | fs | Colm Dougan | Colm Dougan |
### NEW FEATURES:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HADOOP-18003](https://issues.apache.org/jira/browse/HADOOP-18003) | Add a method appendIfAbsent for CallerContext | Minor | . | Tao Li | Tao Li |
| [HDFS-16331](https://issues.apache.org/jira/browse/HDFS-16331) | Make dfs.blockreport.intervalMsec reconfigurable | Major | . | Tao Li | Tao Li |
| [HDFS-16371](https://issues.apache.org/jira/browse/HDFS-16371) | Exclude slow disks when choosing volume | Major | . | Tao Li | Tao Li |
| [HDFS-16400](https://issues.apache.org/jira/browse/HDFS-16400) | Reconfig DataXceiver parameters for datanode | Major | . | Tao Li | Tao Li |
| [HDFS-16399](https://issues.apache.org/jira/browse/HDFS-16399) | Reconfig cache report parameters for datanode | Major | . | Tao Li | Tao Li |
| [HDFS-16398](https://issues.apache.org/jira/browse/HDFS-16398) | Reconfig block report parameters for datanode | Major | . | Tao Li | Tao Li |
| [HDFS-16396](https://issues.apache.org/jira/browse/HDFS-16396) | Reconfig slow peer parameters for datanode | Major | . | Tao Li | Tao Li |
| [HDFS-16397](https://issues.apache.org/jira/browse/HDFS-16397) | Reconfig slow disk parameters for datanode | Major | . | Tao Li | Tao Li |
| [MAPREDUCE-7341](https://issues.apache.org/jira/browse/MAPREDUCE-7341) | Add a task-manifest output committer for Azure and GCS | Major | client | Steve Loughran | Steve Loughran |
| [HADOOP-18163](https://issues.apache.org/jira/browse/HADOOP-18163) | hadoop-azure support for the Manifest Committer of MAPREDUCE-7341 | Major | fs/azure | Steve Loughran | Steve Loughran |
| [HDFS-16413](https://issues.apache.org/jira/browse/HDFS-16413) | Reconfig dfs usage parameters for datanode | Major | . | Tao Li | Tao Li |
| [HDFS-16521](https://issues.apache.org/jira/browse/HDFS-16521) | DFS API to retrieve slow datanodes | Major | . | Viraj Jasani | Viraj Jasani |
| [HDFS-16568](https://issues.apache.org/jira/browse/HDFS-16568) | dfsadmin -reconfig option to start/query reconfig on all live datanodes | Major | . | Viraj Jasani | Viraj Jasani |
| [HDFS-16582](https://issues.apache.org/jira/browse/HDFS-16582) | Expose aggregate latency of slow node as perceived by the reporting node | Major | . | Viraj Jasani | Viraj Jasani |
| [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | Slow peer metrics - add median, mad and upper latency limits | Major | . | Viraj Jasani | Viraj Jasani |
| [YARN-11241](https://issues.apache.org/jira/browse/YARN-11241) | Add uncleaning option for local app log file with log-aggregation enabled | Major | log-aggregation | Ashutosh Gupta | Ashutosh Gupta |
| [HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103) | High performance vectored read API in Hadoop | Major | common, fs, fs/adl, fs/s3 | Mukund Thakur | Mukund Thakur |
### IMPROVEMENTS:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HADOOP-17276](https://issues.apache.org/jira/browse/HADOOP-17276) | Extend CallerContext to make it include many items | Major | . | Hui Fei | Hui Fei |
| [HDFS-15745](https://issues.apache.org/jira/browse/HDFS-15745) | Make DataNodePeerMetrics#LOW\_THRESHOLD\_MS and MIN\_OUTLIER\_DETECTION\_NODES configurable | Major | . | Haibin Huang | Haibin Huang |
| [HDFS-16266](https://issues.apache.org/jira/browse/HDFS-16266) | Add remote port information to HDFS audit log | Major | . | Tao Li | Tao Li |
| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori |
| [HDFS-16310](https://issues.apache.org/jira/browse/HDFS-16310) | RBF: Add client port to CallerContext for Router | Major | . | Tao Li | Tao Li |
| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren |
| [HDFS-16426](https://issues.apache.org/jira/browse/HDFS-16426) | fix nextBlockReportTime when trigger full block report force | Major | . | qinyuren | qinyuren |
| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin |
| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin |
| [HDFS-16262](https://issues.apache.org/jira/browse/HDFS-16262) | Async refresh of cached locations in DFSInputStream | Major | . | Bryan Beaudreault | Bryan Beaudreault |
| [HADOOP-18093](https://issues.apache.org/jira/browse/HADOOP-18093) | Better exception handling for testFileStatusOnMountLink() in ViewFsBaseTest.java | Trivial | . | Xing Lin | Xing Lin |
| [HDFS-16423](https://issues.apache.org/jira/browse/HDFS-16423) | balancer should not get blocks on stale storages | Major | balancer & mover | qinyuren | qinyuren |
| [HADOOP-18139](https://issues.apache.org/jira/browse/HADOOP-18139) | Allow configuration of zookeeper server principal | Major | auth | Owen O'Malley | Owen O'Malley |
| [YARN-11076](https://issues.apache.org/jira/browse/YARN-11076) | Upgrade jQuery version in Yarn UI2 | Major | yarn-ui-v2 | Tamas Domok | Tamas Domok |
| [HDFS-16495](https://issues.apache.org/jira/browse/HDFS-16495) | RBF should prepend the client ip rather than append it. | Major | . | Owen O'Malley | Owen O'Malley |
| [HADOOP-18144](https://issues.apache.org/jira/browse/HADOOP-18144) | getTrashRoot/s in ViewFileSystem should return viewFS path, not targetFS path | Major | common | Xing Lin | Xing Lin |
| [HADOOP-18162](https://issues.apache.org/jira/browse/HADOOP-18162) | hadoop-common enhancements for the Manifest Committer of MAPREDUCE-7341 | Major | fs | Steve Loughran | Steve Loughran |
| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | Zhaohui Wang | Zhaohui Wang |
| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell |
| [HDFS-16457](https://issues.apache.org/jira/browse/HDFS-16457) | Make fs.getspaceused.classname reconfigurable | Major | namenode | yanbin.zhang | yanbin.zhang |
| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li |
| [HDFS-16497](https://issues.apache.org/jira/browse/HDFS-16497) | EC: Add param comment for liveBusyBlockIndices with HDFS-14768 | Minor | erasure-coding, namanode | caozhiqiang | caozhiqiang |
| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu |
| [HADOOP-17551](https://issues.apache.org/jira/browse/HADOOP-17551) | Upgrade maven-site-plugin to 3.11.0 | Major | . | Akira Ajisaka | Ashutosh Gupta |
| [HDFS-16519](https://issues.apache.org/jira/browse/HDFS-16519) | Add throttler to EC reconstruction | Minor | datanode, ec | daimin | daimin |
| [HDFS-14478](https://issues.apache.org/jira/browse/HDFS-14478) | Add libhdfs APIs for openFile | Major | hdfs-client, libhdfs, native | Sahil Takiar | Sahil Takiar |
| [HADOOP-16202](https://issues.apache.org/jira/browse/HADOOP-16202) | Enhance openFile() for better read performance against object stores | Major | fs, fs/s3, tools/distcp | Steve Loughran | Steve Loughran |
| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles |
| [HDFS-16520](https://issues.apache.org/jira/browse/HDFS-16520) | Improve EC pread: avoid potential reading whole block | Major | dfsclient, ec, erasure-coding | daimin | daimin |
| [HADOOP-18167](https://issues.apache.org/jira/browse/HADOOP-18167) | Add metrics to track delegation token secret manager operations | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta |
| [HADOOP-18172](https://issues.apache.org/jira/browse/HADOOP-18172) | Change scope of getRootFallbackLink for InodeTree to make them accessible from outside package | Minor | . | Xing Lin | Xing Lin |
| [HDFS-16588](https://issues.apache.org/jira/browse/HDFS-16588) | Backport HDFS-16584 to branch-3.3. | Major | balancer & mover, namenode | JiangHua Zhu | JiangHua Zhu |
| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta |
| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta |
| [HADOOP-18244](https://issues.apache.org/jira/browse/HADOOP-18244) | Fix Hadoop-Common JavaDoc Error on branch-3.3 | Major | common | Shilun Fan | Shilun Fan |
| [HADOOP-18269](https://issues.apache.org/jira/browse/HADOOP-18269) | Misleading method name in DistCpOptions | Minor | tools/distcp | guophilipse | guophilipse |
| [HADOOP-18275](https://issues.apache.org/jira/browse/HADOOP-18275) | update os-maven-plugin to 1.7.0 | Minor | build | Steve Loughran | Steve Loughran |
| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell |
| [HDFS-16576](https://issues.apache.org/jira/browse/HDFS-16576) | Remove unused imports in HDFS project | Minor | . | Ashutosh Gupta | Ashutosh Gupta |
| [HDFS-16629](https://issues.apache.org/jira/browse/HDFS-16629) | [JDK 11] Fix javadoc warnings in hadoop-hdfs module | Minor | hdfs | Shilun Fan | Shilun Fan |
| [YARN-11172](https://issues.apache.org/jira/browse/YARN-11172) | Fix testDelegationToken | Major | test | zhengchenyu | zhengchenyu |
| [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | Improve Magic Committer Performance | Minor | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18288](https://issues.apache.org/jira/browse/HADOOP-18288) | Total requests and total requests per sec served by RPC servers | Major | . | Viraj Jasani | Viraj Jasani |
| [HADOOP-18336](https://issues.apache.org/jira/browse/HADOOP-18336) | tag FSDataInputStream.getWrappedStream() @Public/@Stable | Minor | fs | Steve Loughran | Ashutosh Gupta |
| [HADOOP-13144](https://issues.apache.org/jira/browse/HADOOP-13144) | Enhancing IPC client throughput via multiple connections per user | Minor | ipc | Jason Kace | Íñigo Goiri |
| [HDFS-16712](https://issues.apache.org/jira/browse/HDFS-16712) | Fix incorrect placeholder in DataNode.java | Major | . | ZanderXu | ZanderXu |
| [HDFS-16702](https://issues.apache.org/jira/browse/HDFS-16702) | MiniDFSCluster should report cause of exception in assertion error | Minor | hdfs | Steve Vaughan | Steve Vaughan |
| [HADOOP-18365](https://issues.apache.org/jira/browse/HADOOP-18365) | Updated addresses are still accessed using the old IP address | Major | common | Steve Vaughan | Steve Vaughan |
| [HDFS-16687](https://issues.apache.org/jira/browse/HDFS-16687) | RouterFsckServlet replicates code from DfsServlet base class | Major | federation | Steve Vaughan | Steve Vaughan |
| [HADOOP-18333](https://issues.apache.org/jira/browse/HADOOP-18333) | hadoop-client-runtime impact by CVE-2022-2047 CVE-2022-2048 due to shaded jetty | Major | build | phoebe chen | Ashutosh Gupta |
| [HADOOP-18406](https://issues.apache.org/jira/browse/HADOOP-18406) | Adds alignment context to call path for creating RPC proxy with multiple connections per user. | Major | ipc | Simbarashe Dzinamarira | Simbarashe Dzinamarira |
| [HDFS-16684](https://issues.apache.org/jira/browse/HDFS-16684) | Exclude self from JournalNodeSyncer when using a bind host | Major | journal-node | Steve Vaughan | Steve Vaughan |
| [HDFS-16686](https://issues.apache.org/jira/browse/HDFS-16686) | GetJournalEditServlet fails to authorize valid Kerberos request | Major | journal-node | Steve Vaughan | Steve Vaughan |
| [YARN-11303](https://issues.apache.org/jira/browse/YARN-11303) | Upgrade jquery ui to 1.13.2 | Major | security | D M Murali Krishna Reddy | Ashutosh Gupta |
| [HADOOP-16769](https://issues.apache.org/jira/browse/HADOOP-16769) | LocalDirAllocator to provide diagnostics when file creation fails | Minor | util | Ramesh Kumar Thangarajan | Ashutosh Gupta |
| [HADOOP-18341](https://issues.apache.org/jira/browse/HADOOP-18341) | upgrade commons-configuration2 to 2.8.0 and commons-text to 1.9 | Major | . | PJ Fanning | PJ Fanning |
| [HDFS-16776](https://issues.apache.org/jira/browse/HDFS-16776) | Erasure Coding: The length of targets should be checked when DN gets a reconstruction task | Major | . | Kidd5368 | Kidd5368 |
| [HADOOP-18469](https://issues.apache.org/jira/browse/HADOOP-18469) | Add XMLUtils methods to centralise code that creates secure XML parsers | Major | . | PJ Fanning | PJ Fanning |
| [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | Remove the hadoop-openstack module | Major | build, fs, fs/swift | Steve Loughran | Steve Loughran |
| [HADOOP-18468](https://issues.apache.org/jira/browse/HADOOP-18468) | upgrade jettison json jar due to fix CVE-2022-40149 | Major | build | PJ Fanning | PJ Fanning |
| [HADOOP-17779](https://issues.apache.org/jira/browse/HADOOP-17779) | Lock File System Creator Semaphore Uninterruptibly | Minor | fs | David Mollitor | David Mollitor |
| [HADOOP-18360](https://issues.apache.org/jira/browse/HADOOP-18360) | Update commons-csv from 1.0 to 1.9.0. | Minor | common | Shilun Fan | Shilun Fan |
| [HADOOP-18493](https://issues.apache.org/jira/browse/HADOOP-18493) | update jackson-databind 2.12.7.1 due to CVE fixes | Major | . | PJ Fanning | PJ Fanning |
| [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | Update Bouncy Castle to 1.68 or later | Major | build | Takanobu Asanuma | PJ Fanning |
| [HADOOP-18497](https://issues.apache.org/jira/browse/HADOOP-18497) | Upgrade commons-text version to fix CVE-2022-42889 | Major | build | Xiaoqiao He | PJ Fanning |
| [HDFS-16795](https://issues.apache.org/jira/browse/HDFS-16795) | Use secure XML parser utils in hdfs classes | Major | . | PJ Fanning | PJ Fanning |
| [YARN-11330](https://issues.apache.org/jira/browse/YARN-11330) | Use secure XML parser utils in YARN | Major | . | PJ Fanning | PJ Fanning |
| [MAPREDUCE-7411](https://issues.apache.org/jira/browse/MAPREDUCE-7411) | Use secure XML parser utils in MapReduce | Major | . | PJ Fanning | PJ Fanning |
| [HADOOP-18512](https://issues.apache.org/jira/browse/HADOOP-18512) | upgrade woodstox-core to 5.4.0 for security fix | Major | common | phoebe chen | PJ Fanning |
| [YARN-11363](https://issues.apache.org/jira/browse/YARN-11363) | Remove unused TimelineVersionWatcher and TimelineVersion from hadoop-yarn-server-tests | Major | test, yarn | Ashutosh Gupta | Ashutosh Gupta |
| [YARN-11364](https://issues.apache.org/jira/browse/YARN-11364) | Docker Container to accept docker Image name with sha256 digest | Major | yarn | Ashutosh Gupta | Ashutosh Gupta |
| [HADOOP-18517](https://issues.apache.org/jira/browse/HADOOP-18517) | ABFS: Add fs.azure.enable.readahead option to disable readahead | Major | fs/azure | Steve Loughran | Steve Loughran |
| [HADOOP-18484](https://issues.apache.org/jira/browse/HADOOP-18484) | upgrade hsqldb to v2.7.1 due to CVE | Major | . | PJ Fanning | Ashutosh Gupta |
| [HDFS-16844](https://issues.apache.org/jira/browse/HDFS-16844) | [RBF] The routers should be resiliant against exceptions from StateStore | Major | rbf | Owen O'Malley | Owen O'Malley |
| [HADOOP-18573](https://issues.apache.org/jira/browse/HADOOP-18573) | Improve error reporting on non-standard kerberos names | Blocker | security | Steve Loughran | Steve Loughran |
| [HADOOP-18561](https://issues.apache.org/jira/browse/HADOOP-18561) | CVE-2021-37533 on commons-net is included in hadoop common and hadoop-client-runtime | Blocker | build | phoebe chen | Steve Loughran |
| [HADOOP-18067](https://issues.apache.org/jira/browse/HADOOP-18067) | Über-jira: S3A Hadoop 3.3.5 features | Major | fs/s3 | Steve Loughran | Mukund Thakur |
| [YARN-10444](https://issues.apache.org/jira/browse/YARN-10444) | Node Manager to use openFile() with whole-file read policy for localizing files. | Minor | nodemanager | Steve Loughran | Steve Loughran |
| [HADOOP-18661](https://issues.apache.org/jira/browse/HADOOP-18661) | Fix bin/hadoop usage script terminology | Blocker | scripts | Steve Loughran | Steve Loughran |
### BUG FIXES:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru |
| [YARN-10553](https://issues.apache.org/jira/browse/YARN-10553) | Refactor TestDistributedShell | Major | distributed-shell, test | Ahmed Hussein | Ahmed Hussein |
| [HDFS-15839](https://issues.apache.org/jira/browse/HDFS-15839) | RBF: Cannot get method setBalancerBandwidth on Router Client | Major | rbf | Yang Yun | Yang Yun |
| [HADOOP-17588](https://issues.apache.org/jira/browse/HADOOP-17588) | CryptoInputStream#close() should be synchronized | Major | . | Renukaprasad C | Renukaprasad C |
| [HADOOP-17836](https://issues.apache.org/jira/browse/HADOOP-17836) | Improve logging on ABFS error reporting | Minor | fs/azure | Steve Loughran | Steve Loughran |
| [HADOOP-17989](https://issues.apache.org/jira/browse/HADOOP-17989) | ITestAzureBlobFileSystemDelete failing "Operations has null HTTP response" | Major | fs/azure, test | Steve Loughran | Steve Loughran |
| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov |
| [YARN-11065](https://issues.apache.org/jira/browse/YARN-11065) | Bump follow-redirects from 1.13.3 to 1.14.7 in hadoop-yarn-ui | Major | yarn-ui-v2 | Akira Ajisaka | |
| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant |
| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant |
| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse |
| [YARN-10788](https://issues.apache.org/jira/browse/YARN-10788) | TestCsiClient fails | Major | test | Akira Ajisaka | Akira Ajisaka |
| [HADOOP-18126](https://issues.apache.org/jira/browse/HADOOP-18126) | Update junit 5 version due to build issues | Major | bulid | PJ Fanning | PJ Fanning |
| [YARN-11033](https://issues.apache.org/jira/browse/YARN-11033) | isAbsoluteResource is not correct for dynamically created queues | Minor | yarn | Tamas Domok | Tamas Domok |
| [YARN-10894](https://issues.apache.org/jira/browse/YARN-10894) | Follow up YARN-10237: fix the new test case in TestRMWebServicesCapacitySched | Major | . | Tamas Domok | Tamas Domok |
| [YARN-11022](https://issues.apache.org/jira/browse/YARN-11022) | Fix the documentation for max-parallel-apps in CS | Major | capacity scheduler | Tamas Domok | Tamas Domok |
| [HADOOP-18150](https://issues.apache.org/jira/browse/HADOOP-18150) | Fix ITestAuditManagerDisabled after S3A audit logging was enabled in HADOOP-18091 | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh |
| [HADOOP-17976](https://issues.apache.org/jira/browse/HADOOP-17976) | abfs etag extraction inconsistent between LIST and HEAD calls | Minor | fs/azure | Steve Loughran | Steve Loughran |
| [HADOOP-18129](https://issues.apache.org/jira/browse/HADOOP-18129) | Change URI[] in INodeLink to String[] to reduce memory footprint of ViewFileSystem | Major | . | Abhishek Das | Abhishek Das |
| [HADOOP-18145](https://issues.apache.org/jira/browse/HADOOP-18145) | Fileutil's unzip method causes unzipped files to lose their original permissions | Major | common | jingxiong zhong | jingxiong zhong |
| [HDFS-16518](https://issues.apache.org/jira/browse/HDFS-16518) | KeyProviderCache close cached KeyProvider with Hadoop ShutdownHookManager | Major | hdfs | Lei Yang | Lei Yang |
| [HADOOP-18169](https://issues.apache.org/jira/browse/HADOOP-18169) | getDelegationTokens in ViewFs should also fetch the token from the fallback FS | Major | . | Xing Lin | Xing Lin |
| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma |
| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin |
| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang |
| [HADOOP-18201](https://issues.apache.org/jira/browse/HADOOP-18201) | Remove base and bucket overrides for endpoint in ITestS3ARequesterPays.java | Major | fs/s3 | Mehakmeet Singh | Daniel Carl Jones |
| [HDFS-16536](https://issues.apache.org/jira/browse/HDFS-16536) | TestOfflineImageViewer fails on branch-3.3 | Major | test | Akira Ajisaka | Ashutosh Gupta |
| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren |
| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren |
| [HADOOP-17564](https://issues.apache.org/jira/browse/HADOOP-17564) | Fix typo in UnixShellGuide.html | Trivial | . | Takanobu Asanuma | Ashutosh Gupta |
| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li |
| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta |
| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta |
| [HADOOP-16515](https://issues.apache.org/jira/browse/HADOOP-16515) | Update the link to compatibility guide | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
| [HADOOP-18222](https://issues.apache.org/jira/browse/HADOOP-18222) | Prevent DelegationTokenSecretManagerMetrics from registering multiple times | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
| [HDFS-16540](https://issues.apache.org/jira/browse/HDFS-16540) | Data locality is lost when DataNode pod restarts in kubernetes | Major | namenode | Huaxiang Sun | Huaxiang Sun |
| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu |
| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke |
| [YARN-11141](https://issues.apache.org/jira/browse/YARN-11141) | Capacity Scheduler does not support ambiguous queue names when moving application across queues | Major | capacity scheduler | András Győri | András Győri |
| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack |
| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki |
| [YARN-11128](https://issues.apache.org/jira/browse/YARN-11128) | Fix comments in TestProportionalCapacityPreemptionPolicy\* | Minor | capacityscheduler, documentation | Ashutosh Gupta | Ashutosh Gupta |
| [HADOOP-18234](https://issues.apache.org/jira/browse/HADOOP-18234) | s3a access point xml examples are wrong | Minor | documentation, fs/s3 | Steve Loughran | Ashutosh Gupta |
| [HADOOP-18238](https://issues.apache.org/jira/browse/HADOOP-18238) | Fix reentrancy check in SFTPFileSystem.close() | Major | common | yi liu | Ashutosh Gupta |
| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell |
| [HDFS-16608](https://issues.apache.org/jira/browse/HDFS-16608) | Fix the link in TestClientProtocolForPipelineRecovery | Minor | documentation | Samrat Deb | Samrat Deb |
| [HDFS-16563](https://issues.apache.org/jira/browse/HDFS-16563) | Namenode WebUI prints sensitive information on Token Expiry | Major | namanode, security, webhdfs | Renukaprasad C | Renukaprasad C |
| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu |
| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant |
| [HADOOP-18255](https://issues.apache.org/jira/browse/HADOOP-18255) | fsdatainputstreambuilder.md refers to hadoop 3.3.3, when it shouldn't | Minor | documentation | Steve Loughran | Ashutosh Gupta |
| [MAPREDUCE-7387](https://issues.apache.org/jira/browse/MAPREDUCE-7387) | Fix TestJHSSecurity#testDelegationToken AssertionError due to HDFS-16563 | Major | . | Shilun Fan | Shilun Fan |
| [MAPREDUCE-7369](https://issues.apache.org/jira/browse/MAPREDUCE-7369) | MapReduce tasks timing out when spends more time on MultipleOutputs#close | Major | . | Prabhu Joseph | Ashutosh Gupta |
| [MAPREDUCE-7391](https://issues.apache.org/jira/browse/MAPREDUCE-7391) | TestLocalDistributedCacheManager failing after HADOOP-16202 | Major | test | Steve Loughran | Steve Loughran |
| [HDFS-16591](https://issues.apache.org/jira/browse/HDFS-16591) | StateStoreZooKeeper fails to initialize | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
| [HADOOP-18321](https://issues.apache.org/jira/browse/HADOOP-18321) | Fix when to read an additional record from a BZip2 text file split | Critical | io | Ashutosh Gupta | Ashutosh Gupta |
| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das |
| [HADOOP-18217](https://issues.apache.org/jira/browse/HADOOP-18217) | shutdownhookmanager should not be multithreaded (deadlock possible) | Minor | util | Catherinot Remi | |
| [MAPREDUCE-7372](https://issues.apache.org/jira/browse/MAPREDUCE-7372) | MapReduce set permission too late in copyJar method | Major | mrv2 | Zhang Dongsheng | |
| [HADOOP-18330](https://issues.apache.org/jira/browse/HADOOP-18330) | S3AFileSystem removes Path when calling createS3Client | Minor | fs/s3 | Ashutosh Pant | Ashutosh Pant |
| [HADOOP-18390](https://issues.apache.org/jira/browse/HADOOP-18390) | Fix out of sync import for HADOOP-18321 | Minor | . | Ashutosh Gupta | Ashutosh Gupta |
| [HADOOP-18340](https://issues.apache.org/jira/browse/HADOOP-18340) | deleteOnExit does not work with S3AFileSystem | Minor | fs/s3 | Huaxiang Sun | Huaxiang Sun |
| [HADOOP-18383](https://issues.apache.org/jira/browse/HADOOP-18383) | Codecs with @DoNotPool annotation are not closed causing memory leak | Major | common | Kevin Sewell | Kevin Sewell |
| [HDFS-16729](https://issues.apache.org/jira/browse/HDFS-16729) | RBF: fix some unreasonably annotated docs | Major | documentation, rbf | JiangHua Zhu | JiangHua Zhu |
| [HADOOP-18398](https://issues.apache.org/jira/browse/HADOOP-18398) | Prevent AvroRecord\*.class from being included non-test jar | Major | common | YUBI LEE | YUBI LEE |
| [HDFS-4043](https://issues.apache.org/jira/browse/HDFS-4043) | Namenode Kerberos Login does not use proper hostname for host qualified hdfs principal name. | Major | security | Ahad Rana | Steve Vaughan |
| [MAPREDUCE-7403](https://issues.apache.org/jira/browse/MAPREDUCE-7403) | Support spark dynamic partitioning in the Manifest Committer | Major | mrv2 | Steve Loughran | Steve Loughran |
| [HDFS-16732](https://issues.apache.org/jira/browse/HDFS-16732) | [SBN READ] Avoid get location from observer when the block report is delayed. | Critical | hdfs | zhengchenyu | zhengchenyu |
| [HADOOP-18375](https://issues.apache.org/jira/browse/HADOOP-18375) | Fix failure of shelltest for hadoop\_add\_ldlibpath | Minor | test | Masatake Iwasaki | Masatake Iwasaki |
| [HDFS-16755](https://issues.apache.org/jira/browse/HDFS-16755) | TestQJMWithFaults.testUnresolvableHostName() can fail due to unexpected host resolution | Minor | test | Steve Vaughan | Steve Vaughan |
| [HADOOP-18400](https://issues.apache.org/jira/browse/HADOOP-18400) | Fix file split duplicating records from a succeeding split when reading BZip2 text files | Critical | . | Ashutosh Gupta | Ashutosh Gupta |
| [HADOOP-18242](https://issues.apache.org/jira/browse/HADOOP-18242) | ABFS Rename Failure when tracking metadata is in incomplete state | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh |
| [HADOOP-18456](https://issues.apache.org/jira/browse/HADOOP-18456) | NullPointerException in ObjectListingIterator's constructor | Blocker | fs/s3 | Quanlong Huang | Steve Loughran |
| [HADOOP-18444](https://issues.apache.org/jira/browse/HADOOP-18444) | Add Support for localized trash for ViewFileSystem in Trash.moveToAppropriateTrash | Major | . | Xing Lin | Xing Lin |
| [HADOOP-18443](https://issues.apache.org/jira/browse/HADOOP-18443) | Upgrade snakeyaml to 1.32 | Major | security | Ashutosh Gupta | Ashutosh Gupta |
| [HDFS-16766](https://issues.apache.org/jira/browse/HDFS-16766) | hdfs ec command loads (administrator provided) erasure code policy files without disabling xml entity expansion | Major | security | Jing | Ashutosh Gupta |
| [HDFS-13369](https://issues.apache.org/jira/browse/HDFS-13369) | FSCK Report broken with RequestHedgingProxyProvider | Major | hdfs | Harshakiran Reddy | Ranith Sardar |
| [YARN-11039](https://issues.apache.org/jira/browse/YARN-11039) | LogAggregationFileControllerFactory::getFileControllerForRead can leak threads | Blocker | log-aggregation | Rajesh Balamohan | Steve Loughran |
| [HADOOP-18499](https://issues.apache.org/jira/browse/HADOOP-18499) | S3A to support HTTPS web proxies | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh |
| [HADOOP-18233](https://issues.apache.org/jira/browse/HADOOP-18233) | Possible race condition with TemporaryAWSCredentialsProvider | Major | auth, fs/s3 | Jason Sleight | Jimmy Wong |
| [MAPREDUCE-7425](https://issues.apache.org/jira/browse/MAPREDUCE-7425) | Document Fix for yarn.app.mapreduce.client-am.ipc.max-retries | Major | yarn | teng wang | teng wang |
| [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | Disable abfs prefetching by default | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh |
| [HDFS-16836](https://issues.apache.org/jira/browse/HDFS-16836) | StandbyCheckpointer can still trigger rollback fs image after RU is finalized | Major | hdfs | Lei Yang | Lei Yang |
| [HADOOP-18324](https://issues.apache.org/jira/browse/HADOOP-18324) | Interrupting RPC Client calls can lead to thread exhaustion | Critical | ipc | Owen O'Malley | Owen O'Malley |
| [HDFS-16832](https://issues.apache.org/jira/browse/HDFS-16832) | [SBN READ] Fix NPE when check the block location of empty directory | Major | . | zhengchenyu | zhengchenyu |
| [HADOOP-18498](https://issues.apache.org/jira/browse/HADOOP-18498) | [ABFS]: Error introduced when SAS Token containing '?' prefix is passed | Minor | fs/azure | Sree Bhattacharyya | Sree Bhattacharyya |
| [HDFS-16847](https://issues.apache.org/jira/browse/HDFS-16847) | RBF: StateStore writer should not commit tmp fail if there was an error in writing the file. | Critical | hdfs, rbf | Simbarashe Dzinamarira | Simbarashe Dzinamarira |
| [HADOOP-18401](https://issues.apache.org/jira/browse/HADOOP-18401) | No ARM binaries in branch-3.3.x releases | Minor | build | Ling Xu | |
| [HADOOP-18408](https://issues.apache.org/jira/browse/HADOOP-18408) | [ABFS]: ITestAbfsManifestCommitProtocol fails on nonHNS configuration | Minor | fs/azure, test | Pranav Saxena | Sree Bhattacharyya |
| [HADOOP-18402](https://issues.apache.org/jira/browse/HADOOP-18402) | S3A committer NPE in spark job abort | Blocker | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18569](https://issues.apache.org/jira/browse/HADOOP-18569) | NFS Gateway may release buffer too early | Blocker | nfs | Attila Doroszlai | Attila Doroszlai |
| [HADOOP-18574](https://issues.apache.org/jira/browse/HADOOP-18574) | Changing log level of IOStatistics increment to make the DEBUG logs less noisy | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh |
| [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521) | ABFS ReadBufferManager buffer sharing across concurrent HTTP requests | Critical | fs/azure | Steve Loughran | Steve Loughran |
| [MAPREDUCE-7375](https://issues.apache.org/jira/browse/MAPREDUCE-7375) | JobSubmissionFiles don't set right permission after mkdirs | Major | mrv2 | Zhang Dongsheng | |
| [HADOOP-17717](https://issues.apache.org/jira/browse/HADOOP-17717) | Update wildfly openssl to 1.1.3.Final | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang |
| [HADOOP-18598](https://issues.apache.org/jira/browse/HADOOP-18598) | maven site generation doesn't include javadocs | Blocker | site | Steve Loughran | Steve Loughran |
| [HDFS-16895](https://issues.apache.org/jira/browse/HDFS-16895) | NamenodeHeartbeatService should use credentials of logged in user | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri |
| [HDFS-16853](https://issues.apache.org/jira/browse/HDFS-16853) | The UT TestLeaseRecovery2#testHardLeaseRecoveryAfterNameNodeRestart failed because HADOOP-18324 | Blocker | . | ZanderXu | ZanderXu |
| [HADOOP-18641](https://issues.apache.org/jira/browse/HADOOP-18641) | cyclonedx maven plugin breaks builds on recent maven releases (3.9.0) | Major | build | Steve Loughran | Steve Loughran |
| [HDFS-16923](https://issues.apache.org/jira/browse/HDFS-16923) | The getListing RPC will throw NPE if the path does not exist | Critical | . | ZanderXu | ZanderXu |
| [HDFS-16896](https://issues.apache.org/jira/browse/HDFS-16896) | HDFS Client hedged read has increased failure rate than without hedged read | Major | hdfs-client | Tom McCormick | Tom McCormick |
| [YARN-11383](https://issues.apache.org/jira/browse/YARN-11383) | Workflow priority mappings is case sensitive | Major | yarn | Aparajita Choudhary | Aparajita Choudhary |
| [HDFS-16939](https://issues.apache.org/jira/browse/HDFS-16939) | Fix the thread safety bug in LowRedundancyBlocks | Major | namanode | Shuyan Zhang | Shuyan Zhang |
| [HDFS-16934](https://issues.apache.org/jira/browse/HDFS-16934) | org.apache.hadoop.hdfs.tools.TestDFSAdmin#testAllDatanodesReconfig regression | Minor | dfsadmin, test | Steve Loughran | Shilun Fan |
### TESTS:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HDFS-16573](https://issues.apache.org/jira/browse/HDFS-16573) | Fix test TestDFSStripedInputStreamWithRandomECPolicy | Minor | test | daimin | daimin |
| [HDFS-16637](https://issues.apache.org/jira/browse/HDFS-16637) | TestHDFSCLI#testAll consistently failing | Major | . | Viraj Jasani | Viraj Jasani |
| [YARN-11248](https://issues.apache.org/jira/browse/YARN-11248) | Add unit test for FINISHED\_CONTAINERS\_PULLED\_BY\_AM event on DECOMMISSIONING | Major | test | Ashutosh Gupta | Ashutosh Gupta |
| [HDFS-16625](https://issues.apache.org/jira/browse/HDFS-16625) | Unit tests aren't checking for PMDK availability | Major | test | Steve Vaughan | Steve Vaughan |
### SUB-TASKS:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HDFS-13293](https://issues.apache.org/jira/browse/HDFS-13293) | RBF: The RouterRPCServer should transfer client IP via CallerContext to NamenodeRpcServer | Major | rbf | Baolong Mao | Hui Fei |
| [HDFS-15630](https://issues.apache.org/jira/browse/HDFS-15630) | RBF: Fix wrong client IP info in CallerContext when requests mount points with multi-destinations. | Major | rbf | Chengwei Wang | Chengwei Wang |
| [HADOOP-17152](https://issues.apache.org/jira/browse/HADOOP-17152) | Implement wrapper for guava newArrayList and newLinkedList | Major | common | Ahmed Hussein | Viraj Jasani |
| [HADOOP-17851](https://issues.apache.org/jira/browse/HADOOP-17851) | S3A to support user-specified content encoding | Minor | fs/s3 | Holden Karau | Holden Karau |
| [HADOOP-17492](https://issues.apache.org/jira/browse/HADOOP-17492) | abfs listLocatedStatus to support incremental/async page fetching | Major | fs/azure | Steve Loughran | Steve Loughran |
| [HADOOP-17409](https://issues.apache.org/jira/browse/HADOOP-17409) | Remove S3Guard - no longer needed | Major | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18084](https://issues.apache.org/jira/browse/HADOOP-18084) | ABFS: Add testfilePath while verifying test contents are read correctly | Minor | fs/azure, test | Anmol Asrani | Anmol Asrani |
| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree |
| [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) | S3A auditing leaks memory through ThreadLocal references | Major | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18071](https://issues.apache.org/jira/browse/HADOOP-18071) | ABFS: Set driver global timeout for ITestAzureBlobFileSystemBasics | Major | fs/azure | Sumangala Patki | Sumangala Patki |
| [HADOOP-17765](https://issues.apache.org/jira/browse/HADOOP-17765) | ABFS: Use Unique File Paths in Tests | Major | fs/azure | Sumangala Patki | Sumangala Patki |
| [HADOOP-17862](https://issues.apache.org/jira/browse/HADOOP-17862) | ABFS: Fix unchecked cast compiler warning for AbfsListStatusRemoteIterator | Major | fs/azure | Sumangala Patki | Sumangala Patki |
| [HADOOP-18075](https://issues.apache.org/jira/browse/HADOOP-18075) | ABFS: Fix failure caused by listFiles() in ITestAbfsRestOperationException | Major | fs/azure | Sumangala Patki | Sumangala Patki |
| [HADOOP-18112](https://issues.apache.org/jira/browse/HADOOP-18112) | Implement paging during S3 multi object delete. | Critical | fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-16204](https://issues.apache.org/jira/browse/HADOOP-16204) | ABFS tests to include terasort | Minor | fs/azure, test | Steve Loughran | Steve Loughran |
| [HDFS-13248](https://issues.apache.org/jira/browse/HDFS-13248) | RBF: Namenode need to choose block location for the client | Major | . | Wu Weiwei | Owen O'Malley |
| [HADOOP-13704](https://issues.apache.org/jira/browse/HADOOP-13704) | S3A getContentSummary() to move to listFiles(recursive) to count children; instrument use | Minor | fs/s3 | Steve Loughran | Ahmar Suhail |
| [HADOOP-14661](https://issues.apache.org/jira/browse/HADOOP-14661) | S3A to support Requester Pays Buckets | Minor | common, util | Mandus Momberg | Daniel Carl Jones |
| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren |
| [HADOOP-17682](https://issues.apache.org/jira/browse/HADOOP-17682) | ABFS: Support FileStatus input to OpenFileWithOptions() via OpenFileParameters | Major | fs/azure | Sumangala Patki | Sumangala Patki |
| [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | Use jersey-json that is built to use jackson2 | Major | build | Akira Ajisaka | PJ Fanning |
| [HADOOP-18104](https://issues.apache.org/jira/browse/HADOOP-18104) | Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads | Major | common, fs | Mukund Thakur | Mukund Thakur |
| [HADOOP-18168](https://issues.apache.org/jira/browse/HADOOP-18168) | ITestMarkerTool.testRunLimitedLandsatAudit failing due to most of bucket content purged | Minor | fs/s3, test | Steve Loughran | Daniel Carl Jones |
| [HADOOP-12020](https://issues.apache.org/jira/browse/HADOOP-12020) | Support configuration of different S3 storage classes | Major | fs/s3 | Yann Landrin-Schweitzer | Monthon Klongklaew |
| [HADOOP-18105](https://issues.apache.org/jira/browse/HADOOP-18105) | Implement a variant of ElasticByteBufferPool which uses weak references for garbage collection. | Major | common, fs | Mukund Thakur | Mukund Thakur |
| [HADOOP-18107](https://issues.apache.org/jira/browse/HADOOP-18107) | Vectored IO support for large S3 files. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-18106](https://issues.apache.org/jira/browse/HADOOP-18106) | Handle memory fragmentation in S3 Vectored IO implementation. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-17461](https://issues.apache.org/jira/browse/HADOOP-17461) | Add thread-level IOStatistics Context | Major | fs, fs/azure, fs/s3 | Steve Loughran | Mehakmeet Singh |
| [HADOOP-18372](https://issues.apache.org/jira/browse/HADOOP-18372) | ILoadTestS3ABulkDeleteThrottling failing | Minor | fs/s3, test | Steve Loughran | Ahmar Suhail |
| [HADOOP-18368](https://issues.apache.org/jira/browse/HADOOP-18368) | ITestCustomSigner fails when access point name has '-' | Minor | . | Ahmar Suhail | Ahmar Suhail |
| [HADOOP-15964](https://issues.apache.org/jira/browse/HADOOP-15964) | Add S3A support for Async Scatter/Gather IO | Major | fs/s3 | Steve Loughran | Mukund Thakur |
| [HADOOP-18366](https://issues.apache.org/jira/browse/HADOOP-18366) | ITestS3Select.testSelectSeekFullLandsat is timing out | Minor | . | Ahmar Suhail | Ahmar Suhail |
| [HADOOP-18373](https://issues.apache.org/jira/browse/HADOOP-18373) | IOStatisticsContext tuning | Minor | fs/s3, test | Steve Loughran | Viraj Jasani |
| [HADOOP-18227](https://issues.apache.org/jira/browse/HADOOP-18227) | Add input stream IOstats for vectored IO api in S3A. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-18392](https://issues.apache.org/jira/browse/HADOOP-18392) | Propagate vectored s3a input stream stats to file system stats. | Major | fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-18355](https://issues.apache.org/jira/browse/HADOOP-18355) | Update previous index properly while validating overlapping ranges. | Major | common, fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-18371](https://issues.apache.org/jira/browse/HADOOP-18371) | s3a FS init logs at warn if fs.s3a.create.storage.class is unset | Blocker | fs/s3 | Steve Loughran | Viraj Jasani |
| [HADOOP-18385](https://issues.apache.org/jira/browse/HADOOP-18385) | ITestS3ACannedACLs failure; not in a span | Major | fs/s3, test | Steve Loughran | Ashutosh Gupta |
| [HADOOP-18403](https://issues.apache.org/jira/browse/HADOOP-18403) | Fix FileSystem leak in ITestS3AAWSCredentialsProvider | Minor | fs/s3 | Viraj Jasani | Viraj Jasani |
| [HADOOP-17882](https://issues.apache.org/jira/browse/HADOOP-17882) | distcp to use openFile() with sequential IO; ranges of reads | Major | tools/distcp | Steve Loughran | Steve Loughran |
| [HADOOP-18391](https://issues.apache.org/jira/browse/HADOOP-18391) | Improve VectoredReadUtils#readVectored() for direct buffers | Major | fs | Steve Loughran | Mukund Thakur |
| [HADOOP-18407](https://issues.apache.org/jira/browse/HADOOP-18407) | Improve vectored IO api spec. | Minor | fs, fs/s3 | Mukund Thakur | Mukund Thakur |
| [HADOOP-18339](https://issues.apache.org/jira/browse/HADOOP-18339) | S3A storage class option only picked up when buffering writes to disk | Major | fs/s3 | Steve Loughran | Monthon Klongklaew |
| [HADOOP-18410](https://issues.apache.org/jira/browse/HADOOP-18410) | S3AInputStream.unbuffer() async drain not releasing http connections | Blocker | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18439](https://issues.apache.org/jira/browse/HADOOP-18439) | Fix VectoredIO for LocalFileSystem when checksum is enabled. | Major | common | Mukund Thakur | Mukund Thakur |
| [HADOOP-18416](https://issues.apache.org/jira/browse/HADOOP-18416) | ITestS3AIOStatisticsContext failure | Major | fs/s3, test | Steve Loughran | Mehakmeet Singh |
| [HADOOP-18347](https://issues.apache.org/jira/browse/HADOOP-18347) | Restrict vectoredIO threadpool to reduce memory pressure | Major | common, fs, fs/adl, fs/s3 | Rajesh Balamohan | Mukund Thakur |
| [HADOOP-18463](https://issues.apache.org/jira/browse/HADOOP-18463) | Add an integration test to process data asynchronously during vectored read. | Major | . | Mukund Thakur | Mukund Thakur |
| [HADOOP-15460](https://issues.apache.org/jira/browse/HADOOP-15460) | S3A FS to add "fs.s3a.create.performance" to the builder file creation option set | Major | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | Upgrade AWS SDK to V2 - Prerequisites | Minor | . | Ahmar Suhail | Ahmar Suhail |
| [HADOOP-18480](https://issues.apache.org/jira/browse/HADOOP-18480) | upgrade AWS SDK to 1.12.316 | Major | build, fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18460](https://issues.apache.org/jira/browse/HADOOP-18460) | ITestS3AContractVectoredRead.testStopVectoredIoOperationsUnbuffer failing | Minor | fs/s3, test | Steve Loughran | Mukund Thakur |
| [HADOOP-18488](https://issues.apache.org/jira/browse/HADOOP-18488) | Cherrypick HADOOP-11245 to branch-3.3 | Major | . | Wei-Chiu Chuang | Ashutosh Gupta |
| [HADOOP-18481](https://issues.apache.org/jira/browse/HADOOP-18481) | AWS v2 SDK upgrade log to not warn of use standard AWS Credential Providers | Major | fs/s3 | Steve Loughran | Ahmar Suhail |
| [HADOOP-18476](https://issues.apache.org/jira/browse/HADOOP-18476) | Abfs and S3A FileContext bindings to close wrapped filesystems in finalizer | Blocker | fs/azure, fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18304](https://issues.apache.org/jira/browse/HADOOP-18304) | Improve S3A committers documentation clarity | Trivial | documentation | Daniel Carl Jones | Daniel Carl Jones |
| [HADOOP-18465](https://issues.apache.org/jira/browse/HADOOP-18465) | S3A server-side encryption tests fail before checking encryption tests should skip | Minor | fs/s3, test | Daniel Carl Jones | Daniel Carl Jones |
| [HADOOP-18530](https://issues.apache.org/jira/browse/HADOOP-18530) | ChecksumFileSystem::readVectored might return byte buffers not positioned at 0 | Blocker | fs | Harshit Gupta | Harshit Gupta |
| [HADOOP-18457](https://issues.apache.org/jira/browse/HADOOP-18457) | ABFS: Support for account level throttling | Major | . | Anmol Asrani | Anmol Asrani |
| [HADOOP-18560](https://issues.apache.org/jira/browse/HADOOP-18560) | AvroFSInput opens a stream twice and discards the second one without closing | Blocker | fs | Steve Loughran | Steve Loughran |
| [HADOOP-18526](https://issues.apache.org/jira/browse/HADOOP-18526) | Leak of S3AInstrumentation instances via hadoop Metrics references | Blocker | fs/s3 | Steve Loughran | Steve Loughran |
| [HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546) | disable purging list of in progress reads in abfs stream closed | Blocker | fs/azure | Steve Loughran | Pranav Saxena |
| [HADOOP-18577](https://issues.apache.org/jira/browse/HADOOP-18577) | ABFS: add probes of readahead fix | Major | fs/azure | Steve Loughran | Steve Loughran |
| [HADOOP-11867](https://issues.apache.org/jira/browse/HADOOP-11867) | Add a high-performance vectored read API. | Major | fs, fs/azure, fs/s3, hdfs-client | Gopal Vijayaraghavan | Mukund Thakur |
| [HADOOP-18507](https://issues.apache.org/jira/browse/HADOOP-18507) | VectorIO FileRange type to support a "reference" field | Major | fs | Steve Loughran | Steve Loughran |
| [HADOOP-18627](https://issues.apache.org/jira/browse/HADOOP-18627) | site intro docs to make clear Kerberos is mandatory for secure clusters | Major | site | Steve Loughran | Arnout Engelen |
| [HADOOP-17584](https://issues.apache.org/jira/browse/HADOOP-17584) | s3a magic committer may commit more data | Major | fs/s3 | yinan zhan | Steve Loughran |
| [HADOOP-18642](https://issues.apache.org/jira/browse/HADOOP-18642) | Cut excess dependencies from hadoop-azure, hadoop-aliyun transitive imports; fix LICENSE-binary | Blocker | build, fs/azure, fs/oss | Steve Loughran | Steve Loughran |
### OTHER:
| JIRA | Summary | Priority | Component | Reporter | Contributor |
|:---- |:---- | :--- |:---- |:---- |:---- |
| [HDFS-15854](https://issues.apache.org/jira/browse/HDFS-15854) | Make some parameters configurable for SlowDiskTracker and SlowPeerTracker | Major | . | Tao Li | Tao Li |
| [YARN-10747](https://issues.apache.org/jira/browse/YARN-10747) | Bump YARN CSI protobuf version to 3.7.1 | Major | . | Siyao Meng | Siyao Meng |
| [HDFS-16139](https://issues.apache.org/jira/browse/HDFS-16139) | Update BPServiceActor Scheduler's nextBlockReportTime atomically | Major | . | Viraj Jasani | Viraj Jasani |
| [HADOOP-18014](https://issues.apache.org/jira/browse/HADOOP-18014) | CallerContext should not include some characters | Major | . | Takanobu Asanuma | Takanobu Asanuma |
| [MAPREDUCE-7371](https://issues.apache.org/jira/browse/MAPREDUCE-7371) | DistributedCache alternative APIs should not use DistributedCache APIs internally | Major | . | Viraj Jasani | Viraj Jasani |
| [HADOOP-18114](https://issues.apache.org/jira/browse/HADOOP-18114) | Documentation Syntax Error Fix \> AWS Assumed Roles | Trivial | documentation, fs/s3 | Joey Krabacher | Joey Krabacher |
| [HDFS-16481](https://issues.apache.org/jira/browse/HDFS-16481) | Provide support to set Http and Rpc ports in MiniJournalCluster | Major | . | Viraj Jasani | Viraj Jasani |
| [HDFS-16502](https://issues.apache.org/jira/browse/HDFS-16502) | Reconfigure Block Invalidate limit | Major | . | Viraj Jasani | Viraj Jasani |
| [HDFS-16522](https://issues.apache.org/jira/browse/HDFS-16522) | Set Http and Ipc ports for Datanodes in MiniDFSCluster | Major | . | Viraj Jasani | Viraj Jasani |
| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani |
| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C |
| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani |
| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update google-gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak |
| [HADOOP-18397](https://issues.apache.org/jira/browse/HADOOP-18397) | Shutdown AWSSecurityTokenService when its resources are no longer in use | Major | fs/s3 | Viraj Jasani | Viraj Jasani |
| [HADOOP-18575](https://issues.apache.org/jira/browse/HADOOP-18575) | Make XML transformer factory more lenient | Major | common | PJ Fanning | PJ Fanning |
| [HADOOP-18586](https://issues.apache.org/jira/browse/HADOOP-18586) | Update the year to 2023 | Major | . | Ayush Saxena | Ayush Saxena |
| [HADOOP-18587](https://issues.apache.org/jira/browse/HADOOP-18587) | upgrade to jettison 1.5.3 to fix CVE-2022-40150 | Major | common | PJ Fanning | PJ Fanning |

View File

@ -0,0 +1,89 @@
<!---
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-->
# Apache Hadoop 3.3.5 Release Notes
These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements.
---
* [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | *Major* | **Replace all default Charset usage with UTF-8**
All of the default charset usages have been replaced to UTF-8. If the default charset of your environment is not UTF-8, the behavior can be different.
---
* [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | *Major* | **Use jersey-json that is built to use jackson2**
Use modified jersey-json 1.20 in https://github.com/pjfanning/jersey-1.x/tree/v1.20 that uses Jackson 2.x. By this change, Jackson 1.x dependency has been removed from Hadoop.
downstream applications which explicitly exclude jersey from transitive dependencies must now exclude com.github.pjfanning:jersey-json
---
* [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | *Major* | **Slow peer metrics - add median, mad and upper latency limits**
Namenode metrics that represent Slownode Json now include three important factors (median, median absolute deviation, upper latency limit) that can help user determine how urgently a given slownode requires manual intervention.
---
* [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | *Minor* | **Improve Magic Committer Performance**
S3A filesytem's createFile() operation supports an option to disable all safety checks when creating a file. Consult the documentation and use with care
---
* [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | *Minor* | **Upgrade AWS SDK to V2 - Prerequisites**
In preparation for an (incompatible but necessary) move to the AWS SDK v2, some uses of internal/deprecated uses of AWS classes/interfaces are logged as warnings, though only once during the life of a JVM. Set the log "org.apache.hadoop.fs.s3a.SDKV2Upgrade" to only log at INFO to hide these.
---
* [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | *Major* | **Remove the hadoop-openstack module**
The swift:// connector for openstack support has been removed. It had fundamental problems (swift's handling of files \> 4GB). A subset of the S3 protocol is now exported by almost all object store services -please use that through the s3a connector instead. The hadoop-openstack jar remains, only now it is empty of code. This is to ensure that projects which declare the JAR a dependency will still have successful builds.
---
* [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | *Major* | **Update Bouncy Castle to 1.68 or later**
bouncy castle 1.68+ is a multirelease JAR containing java classes compiled for different target JREs. older versions of asm.jar and maven shade plugin may have problems with these. fix: upgrade the dependencies
---
* [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | *Major* | **Disable abfs prefetching by default**
ABFS block prefetching has been disabled to avoid HADOOP-18521 and buffer sharing on multithreaded processes (Hive, Spark etc). This will have little/no performance impact on queries against Parquet or ORC data, but can slow down sequential stream processing, including CSV files -however, the read data will be correct.
It may slow down distcp downloads, where the race condition does not arise. For maximum distcp performance re-enable the readahead by setting fs.abfs.enable.readahead to true.
---
* [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | *Critical* | **CryptoOutputStream::close leak when encrypted zones + quota exceptions**
**WARNING: No release note provided for this change.**

View File

@ -52,7 +52,8 @@ public class TestFileSystemStorageStatistics {
"bytesReadDistanceOfOneOrTwo",
"bytesReadDistanceOfThreeOrFour",
"bytesReadDistanceOfFiveOrLarger",
"bytesReadErasureCoded"
"bytesReadErasureCoded",
"remoteReadTimeMS"
};
private FileSystem.Statistics statistics =
@ -74,6 +75,7 @@ public class TestFileSystemStorageStatistics {
statistics.incrementBytesReadByDistance(1, RandomUtils.nextInt(0, 100));
statistics.incrementBytesReadByDistance(3, RandomUtils.nextInt(0, 100));
statistics.incrementBytesReadErasureCoded(RandomUtils.nextInt(0, 100));
statistics.increaseRemoteReadTime(RandomUtils.nextInt(0, 100));
}
@Test
@ -128,6 +130,8 @@ public class TestFileSystemStorageStatistics {
return statistics.getBytesReadByDistance(5);
case "bytesReadErasureCoded":
return statistics.getBytesReadErasureCoded();
case "remoteReadTimeMS":
return statistics.getRemoteReadTime();
default:
return 0;
}

View File

@ -195,10 +195,9 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra
for (FileStatus status : statuses) {
ContractTestUtils.assertDeleted(fs, status.getPath(), false, true, false);
}
FileStatus[] rootListStatus = fs.listStatus(root);
assertEquals("listStatus on empty root-directory returned found: "
+ join("\n", rootListStatus),
0, rootListStatus.length);
Assertions.assertThat(fs.listStatus(root))
.describedAs("ls /")
.hasSize(0);
assertNoElements("listFiles(/, false)",
fs.listFiles(root, false));
assertNoElements("listFiles(/, true)",

View File

@ -23,8 +23,11 @@ import java.nio.ByteBuffer;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.test.AbstractHadoopTestBase;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_TMP_DIR;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@ -36,6 +39,8 @@ public class TestBlockCache extends AbstractHadoopTestBase {
private static final int BUFFER_SIZE = 16;
private static final Configuration CONF = new Configuration();
@Test
public void testArgChecks() throws Exception {
// Should not throw.
@ -46,7 +51,7 @@ public class TestBlockCache extends AbstractHadoopTestBase {
// Verify it throws correctly.
intercept(IllegalArgumentException.class, "'buffer' must not be null",
() -> cache.put(42, null));
() -> cache.put(42, null, null, null));
intercept(NullPointerException.class, null,
@ -67,7 +72,7 @@ public class TestBlockCache extends AbstractHadoopTestBase {
assertEquals(0, cache.size());
assertFalse(cache.containsBlock(0));
cache.put(0, buffer1);
cache.put(0, buffer1, CONF, new LocalDirAllocator(HADOOP_TMP_DIR));
assertEquals(1, cache.size());
assertTrue(cache.containsBlock(0));
ByteBuffer buffer2 = ByteBuffer.allocate(BUFFER_SIZE);
@ -77,7 +82,7 @@ public class TestBlockCache extends AbstractHadoopTestBase {
assertEquals(1, cache.size());
assertFalse(cache.containsBlock(1));
cache.put(1, buffer1);
cache.put(1, buffer1, CONF, new LocalDirAllocator(HADOOP_TMP_DIR));
assertEquals(2, cache.size());
assertTrue(cache.containsBlock(1));
ByteBuffer buffer3 = ByteBuffer.allocate(BUFFER_SIZE);

View File

@ -19,8 +19,10 @@ package org.apache.hadoop.http;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AuthenticationFilterInitializer;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authentication.KerberosTestUtils;
import org.apache.hadoop.security.authentication.client.AuthenticatedURL;
@ -104,7 +106,9 @@ public class TestHttpServerWithSpnego {
*/
@Test
public void testAuthenticationWithProxyUser() throws Exception {
Configuration spengoConf = getSpengoConf(new Configuration());
Configuration spnegoConf = getSpnegoConf(new Configuration());
spnegoConf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY,
ProxyUserAuthenticationFilterInitializer.class.getName());
//setup logs dir
System.setProperty("hadoop.log.dir", testRootDir.getAbsolutePath());
@ -118,15 +122,15 @@ public class TestHttpServerWithSpnego {
new String[]{"groupC"});
// Make userA impersonate users in groupB
spengoConf.set("hadoop.proxyuser.userA.hosts", "*");
spengoConf.set("hadoop.proxyuser.userA.groups", "groupB");
ProxyUsers.refreshSuperUserGroupsConfiguration(spengoConf);
spnegoConf.set("hadoop.proxyuser.userA.hosts", "*");
spnegoConf.set("hadoop.proxyuser.userA.groups", "groupB");
ProxyUsers.refreshSuperUserGroupsConfiguration(spnegoConf);
HttpServer2 httpServer = null;
try {
// Create http server to test.
httpServer = getCommonBuilder()
.setConf(spengoConf)
.setConf(spnegoConf)
.setACL(new AccessControlList("userA groupA"))
.build();
httpServer.start();
@ -191,6 +195,48 @@ public class TestHttpServerWithSpnego {
}
}
@Test
public void testAuthenticationToAllowList() throws Exception {
Configuration spnegoConf = getSpnegoConf(new Configuration());
String[] allowList = new String[] {"/jmx", "/prom"};
String[] denyList = new String[] {"/conf", "/stacks", "/logLevel"};
spnegoConf.set(PREFIX + "kerberos.endpoint.whitelist", String.join(",", allowList));
spnegoConf.set(CommonConfigurationKeysPublic.HADOOP_PROMETHEUS_ENABLED, "true");
spnegoConf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY,
AuthenticationFilterInitializer.class.getName());
//setup logs dir
System.setProperty("hadoop.log.dir", testRootDir.getAbsolutePath());
HttpServer2 httpServer = null;
try {
// Create http server to test.
httpServer = getCommonBuilder().setConf(spnegoConf).setSecurityEnabled(true)
.setUsernameConfKey(PREFIX + "kerberos.principal")
.setKeytabConfKey(PREFIX + "kerberos.keytab").build();
httpServer.start();
String serverURL = "http://" + NetUtils.getHostPortString(httpServer.getConnectorAddress(0));
// endpoints in whitelist should not require Kerberos authentication
for (String endpoint : allowList) {
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + endpoint).openConnection();
Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
}
// endpoints not in whitelist should require Kerberos authentication
for (String endpoint : denyList) {
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + endpoint).openConnection();
Assert.assertEquals(HttpURLConnection.HTTP_UNAUTHORIZED, conn.getResponseCode());
}
} finally {
if (httpServer != null) {
httpServer.stop();
}
}
}
private AuthenticatedURL.Token getEncryptedAuthToken(Signer signer,
String user) throws Exception {
AuthenticationToken token =
@ -209,10 +255,8 @@ public class TestHttpServerWithSpnego {
return new Signer(secretProvider);
}
private Configuration getSpengoConf(Configuration conf) {
private Configuration getSpnegoConf(Configuration conf) {
conf = new Configuration();
conf.set(HttpServer2.FILTER_INITIALIZER_PROPERTY,
ProxyUserAuthenticationFilterInitializer.class.getName());
conf.set(PREFIX + "type", "kerberos");
conf.setBoolean(PREFIX + "simple.anonymous.allowed", false);
conf.set(PREFIX + "signature.secret.file",

View File

@ -1728,6 +1728,47 @@ public class TestIPC {
checkUserBinding(true);
}
@Test(timeout=60000)
public void testUpdateAddressEnsureResolved() throws Exception {
// start server
Server server = new TestServer(1, false);
server.start();
SocketFactory mockFactory = Mockito.mock(SocketFactory.class);
doThrow(new ConnectTimeoutException("fake")).when(mockFactory)
.createSocket();
Client client = new Client(LongWritable.class, conf, mockFactory);
InetSocketAddress address =
new InetSocketAddress("localhost", NetUtils.getFreeSocketPort());
ConnectionId remoteId = getConnectionId(address, 100, conf);
try {
LambdaTestUtils.intercept(IOException.class, (Callable<Void>) () -> {
client.call(RpcKind.RPC_BUILTIN, new LongWritable(RANDOM.nextLong()),
remoteId, RPC.RPC_SERVICE_CLASS_DEFAULT, null);
return null;
});
assertFalse(address.isUnresolved());
assertFalse(remoteId.getAddress().isUnresolved());
assertEquals(System.identityHashCode(remoteId.getAddress()),
System.identityHashCode(address));
NetUtils.addStaticResolution("localhost", "host.invalid");
LambdaTestUtils.intercept(IOException.class, (Callable<Void>) () -> {
client.call(RpcKind.RPC_BUILTIN, new LongWritable(RANDOM.nextLong()),
remoteId, RPC.RPC_SERVICE_CLASS_DEFAULT, null);
return null;
});
assertFalse(remoteId.getAddress().isUnresolved());
assertEquals(System.identityHashCode(remoteId.getAddress()),
System.identityHashCode(address));
} finally {
client.stop();
server.stop();
}
}
private void checkUserBinding(boolean asProxy) throws Exception {
Socket s;
// don't attempt bind with no service host.

View File

@ -20,8 +20,9 @@ package org.apache.hadoop.ipc;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.assertj.core.api.Assertions.assertThat;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.Test;
import java.util.List;
@ -33,7 +34,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.conf.Configuration;
public class TestIdentityProviders {
public class FakeSchedulable implements Schedulable {
public static class FakeSchedulable implements Schedulable {
public FakeSchedulable() {
}
@ -61,7 +62,9 @@ public class TestIdentityProviders {
CommonConfigurationKeys.IPC_IDENTITY_PROVIDER_KEY,
IdentityProvider.class);
assertTrue(providers.size() == 1);
assertThat(providers)
.describedAs("provider list")
.hasSize(1);
IdentityProvider ip = providers.get(0);
assertNotNull(ip);
@ -69,14 +72,20 @@ public class TestIdentityProviders {
}
@Test
public void testUserIdentityProvider() throws IOException {
public void testUserIdentityProvider() throws Exception {
UserIdentityProvider uip = new UserIdentityProvider();
String identity = uip.makeIdentity(new FakeSchedulable());
FakeSchedulable fakeSchedulable = new FakeSchedulable();
String identity = uip.makeIdentity(fakeSchedulable);
// Get our username
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
String username = ugi.getUserName();
assertEquals(username, identity);
// FakeSchedulable doesn't override getCallerContext()
// accessing it should throw an UnsupportedOperationException
LambdaTestUtils.intercept(UnsupportedOperationException.class,
"Invalid operation.", fakeSchedulable::getCallerContext);
}
}

View File

@ -1336,12 +1336,16 @@ public class TestRPC extends TestRpcBase {
3000, getLongCounter("RpcProcessingTimeNumOps", rpcMetrics));
assertEquals("Expected correct rpc lock wait count",
3000, getLongCounter("RpcLockWaitTimeNumOps", rpcMetrics));
assertEquals("Expected correct rpc response count",
3000, getLongCounter("RpcResponseTimeNumOps", rpcMetrics));
assertEquals("Expected zero rpc lock wait time",
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
rpcMetrics);
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
rpcMetrics);
MetricsAsserts.assertQuantileGauges("RpcResponseTime" + interval + "s",
rpcMetrics);
String actualUserVsCon = MetricsAsserts
.getStringMetric("NumOpenConnectionsPerUser", rpcMetrics);
String proxyUser =

View File

@ -1,264 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.log;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ContainerNode;
import org.junit.Test;
import static org.junit.Assert.*;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Appender;
import org.apache.log4j.Category;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.WriterAppender;
import org.apache.log4j.spi.HierarchyEventListener;
import org.apache.log4j.spi.LoggerFactory;
import org.apache.log4j.spi.LoggerRepository;
import org.apache.log4j.spi.ThrowableInformation;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.net.NoRouteToHostException;
import java.util.Enumeration;
import java.util.Vector;
public class TestLog4Json {
@Test
public void testConstruction() throws Throwable {
Log4Json l4j = new Log4Json();
String outcome = l4j.toJson(new StringWriter(),
"name", 0, "DEBUG", "thread1",
"hello, world", null).toString();
println("testConstruction", outcome);
}
@Test
public void testException() throws Throwable {
Exception e =
new NoRouteToHostException("that box caught fire 3 years ago");
ThrowableInformation ti = new ThrowableInformation(e);
Log4Json l4j = new Log4Json();
long timeStamp = Time.now();
String outcome = l4j.toJson(new StringWriter(),
"testException",
timeStamp,
"INFO",
"quoted\"",
"new line\n and {}",
ti)
.toString();
println("testException", outcome);
}
@Test
public void testNestedException() throws Throwable {
Exception e =
new NoRouteToHostException("that box caught fire 3 years ago");
Exception ioe = new IOException("Datacenter problems", e);
ThrowableInformation ti = new ThrowableInformation(ioe);
Log4Json l4j = new Log4Json();
long timeStamp = Time.now();
String outcome = l4j.toJson(new StringWriter(),
"testNestedException",
timeStamp,
"INFO",
"quoted\"",
"new line\n and {}",
ti)
.toString();
println("testNestedException", outcome);
ContainerNode rootNode = Log4Json.parse(outcome);
assertEntryEquals(rootNode, Log4Json.LEVEL, "INFO");
assertEntryEquals(rootNode, Log4Json.NAME, "testNestedException");
assertEntryEquals(rootNode, Log4Json.TIME, timeStamp);
assertEntryEquals(rootNode, Log4Json.EXCEPTION_CLASS,
ioe.getClass().getName());
JsonNode node = assertNodeContains(rootNode, Log4Json.STACK);
assertTrue("Not an array: " + node, node.isArray());
node = assertNodeContains(rootNode, Log4Json.DATE);
assertTrue("Not a string: " + node, node.isTextual());
//rather than try and make assertions about the format of the text
//message equalling another ISO date, this test asserts that the hypen
//and colon characters are in the string.
String dateText = node.textValue();
assertTrue("No '-' in " + dateText, dateText.contains("-"));
assertTrue("No '-' in " + dateText, dateText.contains(":"));
}
/**
* Create a log instance and and log to it
* @throws Throwable if it all goes wrong
*/
@Test
public void testLog() throws Throwable {
String message = "test message";
Throwable throwable = null;
String json = logOut(message, throwable);
println("testLog", json);
}
/**
* Create a log instance and and log to it
* @throws Throwable if it all goes wrong
*/
@Test
public void testLogExceptions() throws Throwable {
String message = "test message";
Throwable inner = new IOException("Directory / not found");
Throwable throwable = new IOException("startup failure", inner);
String json = logOut(message, throwable);
println("testLogExceptions", json);
}
void assertEntryEquals(ContainerNode rootNode, String key, String value) {
JsonNode node = assertNodeContains(rootNode, key);
assertEquals(value, node.textValue());
}
private JsonNode assertNodeContains(ContainerNode rootNode, String key) {
JsonNode node = rootNode.get(key);
if (node == null) {
fail("No entry of name \"" + key + "\" found in " + rootNode.toString());
}
return node;
}
void assertEntryEquals(ContainerNode rootNode, String key, long value) {
JsonNode node = assertNodeContains(rootNode, key);
assertEquals(value, node.numberValue());
}
/**
* Print out what's going on. The logging APIs aren't used and the text
* delimited for more details
*
* @param name name of operation
* @param text text to print
*/
private void println(String name, String text) {
System.out.println(name + ": #" + text + "#");
}
private String logOut(String message, Throwable throwable) {
StringWriter writer = new StringWriter();
Logger logger = createLogger(writer);
logger.info(message, throwable);
//remove and close the appender
logger.removeAllAppenders();
return writer.toString();
}
public Logger createLogger(Writer writer) {
TestLoggerRepository repo = new TestLoggerRepository();
Logger logger = repo.getLogger("test");
Log4Json layout = new Log4Json();
WriterAppender appender = new WriterAppender(layout, writer);
logger.addAppender(appender);
return logger;
}
/**
* This test logger avoids integrating with the main runtimes Logger hierarchy
* in ways the reader does not want to know.
*/
private static class TestLogger extends Logger {
private TestLogger(String name, LoggerRepository repo) {
super(name);
repository = repo;
setLevel(Level.INFO);
}
}
public static class TestLoggerRepository implements LoggerRepository {
@Override
public void addHierarchyEventListener(HierarchyEventListener listener) {
}
@Override
public boolean isDisabled(int level) {
return false;
}
@Override
public void setThreshold(Level level) {
}
@Override
public void setThreshold(String val) {
}
@Override
public void emitNoAppenderWarning(Category cat) {
}
@Override
public Level getThreshold() {
return Level.ALL;
}
@Override
public Logger getLogger(String name) {
return new TestLogger(name, this);
}
@Override
public Logger getLogger(String name, LoggerFactory factory) {
return new TestLogger(name, this);
}
@Override
public Logger getRootLogger() {
return new TestLogger("root", this);
}
@Override
public Logger exists(String name) {
return null;
}
@Override
public void shutdown() {
}
@Override
public Enumeration getCurrentLoggers() {
return new Vector().elements();
}
@Override
public Enumeration getCurrentCategories() {
return new Vector().elements();
}
@Override
public void fireAddAppenderEvent(Category logger, Appender appender) {
}
@Override
public void resetConfiguration() {
}
}
}

View File

@ -52,6 +52,8 @@ public class TestMutableMetrics {
private static final Logger LOG =
LoggerFactory.getLogger(TestMutableMetrics.class);
private static final double EPSILON = 1e-42;
private static final int SLEEP_TIME_MS = 6 * 1000; // 6 seconds.
private static final int SAMPLE_COUNT = 1000;
/**
* Test the snapshot method
@ -395,14 +397,14 @@ public class TestMutableMetrics {
MutableQuantiles quantiles = registry.newQuantiles("foo", "stat", "Ops",
"Latency", 5);
// Push some values in and wait for it to publish
long start = System.nanoTime() / 1000000;
for (long i = 1; i <= 1000; i++) {
long startTimeMS = System.currentTimeMillis();
for (long i = 1; i <= SAMPLE_COUNT; i++) {
quantiles.add(i);
quantiles.add(1001 - i);
}
long end = System.nanoTime() / 1000000;
long endTimeMS = System.currentTimeMillis();
Thread.sleep(6000 - (end - start));
Thread.sleep(SLEEP_TIME_MS - (endTimeMS - startTimeMS));
registry.snapshot(mb, false);
@ -414,10 +416,8 @@ public class TestMutableMetrics {
}
// Verify the results are within our requirements
verify(mb).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"),
(long) 2000);
Quantile[] quants = MutableQuantiles.quantiles;
verify(mb).addGauge(info("FooNumOps", "Number of ops for stat with 5s interval"), 2000L);
Quantile[] quants = MutableQuantiles.QUANTILES;
String name = "Foo%dthPercentileLatency";
String desc = "%d percentile latency with 5 second interval for stat";
for (Quantile q : quants) {
@ -431,6 +431,46 @@ public class TestMutableMetrics {
}
}
/**
* Ensure that quantile estimates from {@link MutableInverseQuantiles} are within
* specified error bounds.
*/
@Test(timeout = 30000)
public void testMutableInverseQuantilesError() throws Exception {
MetricsRecordBuilder mb = mockMetricsRecordBuilder();
MetricsRegistry registry = new MetricsRegistry("test");
// Use a 5s rollover period
MutableQuantiles inverseQuantiles = registry.newInverseQuantiles("foo", "stat", "Ops",
"Latency", 5);
// Push some values in and wait for it to publish
long startTimeMS = System.currentTimeMillis();
for (long i = 1; i <= SAMPLE_COUNT; i++) {
inverseQuantiles.add(i);
inverseQuantiles.add(1001 - i);
}
long endTimeMS = System.currentTimeMillis();
Thread.sleep(SLEEP_TIME_MS - (endTimeMS - startTimeMS));
registry.snapshot(mb, false);
// Verify the results are within our requirements
verify(mb).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"), 2000L);
Quantile[] inverseQuants = MutableInverseQuantiles.INVERSE_QUANTILES;
String name = "Foo%dthInversePercentileLatency";
String desc = "%d inverse percentile latency with 5 second interval for stat";
for (Quantile q : inverseQuants) {
int inversePercentile = (int) (100 * (1 - q.quantile));
int error = (int) (1000 * q.error);
String n = String.format(name, inversePercentile);
String d = String.format(desc, inversePercentile);
long expected = (long) (q.quantile * 1000);
verify(mb).addGauge(eq(info(n, d)), leq(expected + error));
verify(mb).addGauge(eq(info(n, d)), geq(expected - error));
}
}
/**
* Test that {@link MutableQuantiles} rolls the window over at the specified
* interval.
@ -443,21 +483,21 @@ public class TestMutableMetrics {
MutableQuantiles quantiles = registry.newQuantiles("foo", "stat", "Ops",
"Latency", 5);
Quantile[] quants = MutableQuantiles.quantiles;
Quantile[] quants = MutableQuantiles.QUANTILES;
String name = "Foo%dthPercentileLatency";
String desc = "%d percentile latency with 5 second interval for stat";
// Push values for three intervals
long start = System.nanoTime() / 1000000;
long startTimeMS = System.currentTimeMillis();
for (int i = 1; i <= 3; i++) {
// Insert the values
for (long j = 1; j <= 1000; j++) {
for (long j = 1; j <= SAMPLE_COUNT; j++) {
quantiles.add(i);
}
// Sleep until 1s after the next 5s interval, to let the metrics
// roll over
long sleep = (start + (5000 * i) + 1000) - (System.nanoTime() / 1000000);
Thread.sleep(sleep);
long sleepTimeMS = startTimeMS + (5000L * i) + 1000 - System.currentTimeMillis();
Thread.sleep(sleepTimeMS);
// Verify that the window reset, check it has the values we pushed in
registry.snapshot(mb, false);
for (Quantile q : quants) {
@ -470,8 +510,7 @@ public class TestMutableMetrics {
// Verify the metrics were added the right number of times
verify(mb, times(3)).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"),
(long) 1000);
info("FooNumOps", "Number of ops for stat with 5s interval"), 1000L);
for (Quantile q : quants) {
int percentile = (int) (100 * q.quantile);
String n = String.format(name, percentile);
@ -481,7 +520,56 @@ public class TestMutableMetrics {
}
/**
* Test that {@link MutableQuantiles} rolls over correctly even if no items
* Test that {@link MutableInverseQuantiles} rolls the window over at the specified
* interval.
*/
@Test(timeout = 30000)
public void testMutableInverseQuantilesRollover() throws Exception {
MetricsRecordBuilder mb = mockMetricsRecordBuilder();
MetricsRegistry registry = new MetricsRegistry("test");
// Use a 5s rollover period
MutableQuantiles inverseQuantiles = registry.newInverseQuantiles("foo", "stat", "Ops",
"Latency", 5);
Quantile[] quants = MutableInverseQuantiles.INVERSE_QUANTILES;
String name = "Foo%dthInversePercentileLatency";
String desc = "%d inverse percentile latency with 5 second interval for stat";
// Push values for three intervals
long startTimeMS = System.currentTimeMillis();
for (int i = 1; i <= 3; i++) {
// Insert the values
for (long j = 1; j <= SAMPLE_COUNT; j++) {
inverseQuantiles.add(i);
}
// Sleep until 1s after the next 5s interval, to let the metrics
// roll over
long sleepTimeMS = startTimeMS + (5000L * i) + 1000 - System.currentTimeMillis();
Thread.sleep(sleepTimeMS);
// Verify that the window reset, check it has the values we pushed in
registry.snapshot(mb, false);
for (Quantile q : quants) {
int inversePercentile = (int) (100 * (1 - q.quantile));
String n = String.format(name, inversePercentile);
String d = String.format(desc, inversePercentile);
verify(mb).addGauge(info(n, d), (long) i);
}
}
// Verify the metrics were added the right number of times
verify(mb, times(3)).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"), 1000L);
for (Quantile q : quants) {
int inversePercentile = (int) (100 * (1 - q.quantile));
String n = String.format(name, inversePercentile);
String d = String.format(desc, inversePercentile);
verify(mb, times(3)).addGauge(eq(info(n, d)), anyLong());
}
}
/**
* Test that {@link MutableQuantiles} rolls over correctly even if no items.
* have been added to the window
*/
@Test(timeout = 30000)
@ -495,11 +583,33 @@ public class TestMutableMetrics {
// Check it initially
quantiles.snapshot(mb, true);
verify(mb).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"), (long) 0);
Thread.sleep(6000);
info("FooNumOps", "Number of ops for stat with 5s interval"), 0L);
Thread.sleep(SLEEP_TIME_MS);
quantiles.snapshot(mb, false);
verify(mb, times(2)).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"), (long) 0);
info("FooNumOps", "Number of ops for stat with 5s interval"), 0L);
}
/**
* Test that {@link MutableInverseQuantiles} rolls over correctly even if no items
* have been added to the window
*/
@Test(timeout = 30000)
public void testMutableInverseQuantilesEmptyRollover() throws Exception {
MetricsRecordBuilder mb = mockMetricsRecordBuilder();
MetricsRegistry registry = new MetricsRegistry("test");
// Use a 5s rollover period
MutableQuantiles inverseQuantiles = registry.newInverseQuantiles("foo", "stat", "Ops",
"Latency", 5);
// Check it initially
inverseQuantiles.snapshot(mb, true);
verify(mb).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"), 0L);
Thread.sleep(SLEEP_TIME_MS);
inverseQuantiles.snapshot(mb, false);
verify(mb, times(2)).addGauge(
info("FooNumOps", "Number of ops for stat with 5s interval"), 0L);
}
/**

View File

@ -24,6 +24,7 @@ import java.util.Collections;
import java.util.Map;
import java.util.Random;
import org.apache.hadoop.metrics2.lib.MutableInverseQuantiles;
import org.junit.Before;
import org.junit.Test;
@ -36,6 +37,7 @@ public class TestSampleQuantiles {
new Quantile(0.95, 0.005), new Quantile(0.99, 0.001) };
SampleQuantiles estimator;
final static int NUM_REPEATS = 10;
@Before
public void init() {
@ -91,28 +93,70 @@ public class TestSampleQuantiles {
@Test
public void testQuantileError() throws IOException {
final int count = 100000;
Random r = new Random(0xDEADDEAD);
Long[] values = new Long[count];
Random rnd = new Random(0xDEADDEAD);
int[] values = new int[count];
for (int i = 0; i < count; i++) {
values[i] = (long) (i + 1);
values[i] = i + 1;
}
// Do 10 shuffle/insert/check cycles
for (int i = 0; i < 10; i++) {
System.out.println("Starting run " + i);
Collections.shuffle(Arrays.asList(values), r);
// Repeat shuffle/insert/check cycles 10 times
for (int i = 0; i < NUM_REPEATS; i++) {
// Shuffle
Collections.shuffle(Arrays.asList(values), rnd);
estimator.clear();
for (int j = 0; j < count; j++) {
estimator.insert(values[j]);
// Insert
for (int value : values) {
estimator.insert(value);
}
Map<Quantile, Long> snapshot;
snapshot = estimator.snapshot();
// Check
for (Quantile q : quantiles) {
long actual = (long) (q.quantile * count);
long error = (long) (q.error * count);
long estimate = snapshot.get(q);
System.out
.println(String.format("Expected %d with error %d, estimated %d",
actual, error, estimate));
assertThat(estimate <= actual + error).isTrue();
assertThat(estimate >= actual - error).isTrue();
}
}
}
/**
* Correctness test that checks that absolute error of the estimate for inverse quantiles
* is within specified error bounds for some randomly permuted streams of items.
*/
@Test
public void testInverseQuantiles() throws IOException {
SampleQuantiles inverseQuantilesEstimator =
new SampleQuantiles(MutableInverseQuantiles.INVERSE_QUANTILES);
final int count = 100000;
Random rnd = new Random(0xDEADDEAD);
int[] values = new int[count];
for (int i = 0; i < count; i++) {
values[i] = i + 1;
}
// Repeat shuffle/insert/check cycles 10 times
for (int i = 0; i < NUM_REPEATS; i++) {
// Shuffle
Collections.shuffle(Arrays.asList(values), rnd);
inverseQuantilesEstimator.clear();
// Insert
for (int value : values) {
inverseQuantilesEstimator.insert(value);
}
Map<Quantile, Long> snapshot;
snapshot = inverseQuantilesEstimator.snapshot();
// Check
for (Quantile q : MutableInverseQuantiles.INVERSE_QUANTILES) {
long actual = (long) (q.quantile * count);
long error = (long) (q.error * count);
long estimate = snapshot.get(q);
assertThat(estimate <= actual + error).isTrue();
assertThat(estimate >= actual - error).isTrue();
}

View File

@ -392,13 +392,34 @@ public class MetricsAsserts {
*/
public static void assertQuantileGauges(String prefix,
MetricsRecordBuilder rb, String valueName) {
verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0l));
for (Quantile q : MutableQuantiles.quantiles) {
verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L));
for (Quantile q : MutableQuantiles.QUANTILES) {
String nameTemplate = prefix + "%dthPercentile" + valueName;
int percentile = (int) (100 * q.quantile);
verify(rb).addGauge(
eqName(info(String.format(nameTemplate, percentile), "")),
geq(0l));
geq(0L));
}
}
/**
* Asserts that the NumOps and inverse quantiles for a metric have been changed at
* some point to a non-zero value, for the specified value name of the
* metrics (e.g., "Rate").
*
* @param prefix of the metric
* @param rb MetricsRecordBuilder with the metric
* @param valueName the value name for the metric
*/
public static void assertInverseQuantileGauges(String prefix,
MetricsRecordBuilder rb, String valueName) {
verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0L));
for (Quantile q : MutableQuantiles.QUANTILES) {
String nameTemplate = prefix + "%dthInversePercentile" + valueName;
int percentile = (int) (100 * q.quantile);
verify(rb).addGauge(
eqName(info(String.format(nameTemplate, percentile), "")),
geq(0L));
}
}

View File

@ -140,7 +140,7 @@ public class TestGenericsUtil {
@Test
public void testIsLog4jLogger() throws Exception {
assertFalse("False if clazz is null", GenericsUtil.isLog4jLogger(null));
assertFalse("False if clazz is null", GenericsUtil.isLog4jLogger((Class<?>) null));
assertTrue("The implementation is Log4j",
GenericsUtil.isLog4jLogger(TestGenericsUtil.class));
}

View File

@ -20,7 +20,7 @@ package org.apache.hadoop.crypto.key.kms.server;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.PropertyConfigurator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -104,8 +104,6 @@ public class KMSConfiguration {
public static final boolean KEY_AUTHORIZATION_ENABLE_DEFAULT = true;
private static final String LOG4J_PROPERTIES = "kms-log4j.properties";
static {
Configuration.addDefaultResource(KMS_DEFAULT_XML);
Configuration.addDefaultResource(KMS_SITE_XML);
@ -163,31 +161,20 @@ public class KMSConfiguration {
return newer;
}
public static void initLogging() {
String confDir = System.getProperty(KMS_CONFIG_DIR);
if (confDir == null) {
throw new RuntimeException("System property '" +
KMSConfiguration.KMS_CONFIG_DIR + "' not defined");
/**
* Validate whether "kms.config.dir" and "log4j.configuration" are defined in the System
* properties. If not, abort the KMS WebServer.
*/
public static void validateSystemProps() {
if (System.getProperty(KMS_CONFIG_DIR) == null) {
String errorMsg = "System property '" + KMS_CONFIG_DIR + "' not defined";
System.err.println("Aborting KMSWebServer because " + errorMsg);
throw new RuntimeException(errorMsg);
}
if (System.getProperty("log4j.configuration") == null) {
System.setProperty("log4j.defaultInitOverride", "true");
boolean fromClasspath = true;
File log4jConf = new File(confDir, LOG4J_PROPERTIES).getAbsoluteFile();
if (log4jConf.exists()) {
PropertyConfigurator.configureAndWatch(log4jConf.getPath(), 1000);
fromClasspath = false;
} else {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
URL log4jUrl = cl.getResource(LOG4J_PROPERTIES);
if (log4jUrl != null) {
PropertyConfigurator.configure(log4jUrl);
}
}
LOG.debug("KMS log starting");
if (fromClasspath) {
LOG.warn("Log4j configuration file '{}' not found", LOG4J_PROPERTIES);
LOG.warn("Logging with INFO level to standard output");
}
String errorMsg = "System property 'log4j.configuration' not defined";
System.err.println("Aborting KMSWebServer because " + errorMsg);
throw new RuntimeException(errorMsg);
}
}
}

View File

@ -185,7 +185,7 @@ public class KMSWebServer {
}
public static void main(String[] args) throws Exception {
KMSConfiguration.initLogging();
KMSConfiguration.validateSystemProps();
StringUtils.startupShutdownMessage(KMSWebServer.class, args, LOG);
Configuration conf = KMSConfiguration.getKMSConf();
Configuration sslConf = SSLFactory.readSSLConfiguration(conf, SSLFactory.Mode.SERVER);

View File

@ -49,6 +49,8 @@ function hadoop_subcommand_kms
"-Dkms.config.dir=${HADOOP_CONF_DIR}"
hadoop_add_param HADOOP_OPTS "-Dkms.log.dir=" \
"-Dkms.log.dir=${HADOOP_LOG_DIR}"
hadoop_add_param HADOOP_OPTS "-Dlog4j.configuration=" \
"-Dlog4j.configuration=file:${HADOOP_CONF_DIR}/kms-log4j.properties"
if [[ "${HADOOP_DAEMON_MODE}" == "default" ]] ||
[[ "${HADOOP_DAEMON_MODE}" == "start" ]]; then

View File

@ -3090,10 +3090,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
}
}
void updateFileSystemReadStats(int distance, int nRead) {
void updateFileSystemReadStats(int distance, int readBytes, long readTimeMS) {
if (stats != null) {
stats.incrementBytesRead(nRead);
stats.incrementBytesReadByDistance(distance, nRead);
stats.incrementBytesRead(readBytes);
stats.incrementBytesReadByDistance(distance, readBytes);
if (distance > 0) {
//remote read
stats.increaseRemoteReadTime(readTimeMS);
}
}
}

View File

@ -224,7 +224,7 @@ public class DFSInputStream extends FSInputStream
}
/**
* Grab the open-file info from namenode
* Grab the open-file info from namenode.
* @param refreshLocatedBlocks whether to re-fetch locatedblocks
*/
void openInfo(boolean refreshLocatedBlocks) throws IOException {
@ -851,8 +851,9 @@ public class DFSInputStream extends FSInputStream
locatedBlocks.getFileLength() - pos);
}
}
long beginReadMS = Time.monotonicNow();
int result = readBuffer(strategy, realLen, corruptedBlocks);
long readTimeMS = Time.monotonicNow() - beginReadMS;
if (result >= 0) {
pos += result;
} else {
@ -861,7 +862,7 @@ public class DFSInputStream extends FSInputStream
}
updateReadStatistics(readStatistics, result, blockReader);
dfsClient.updateFileSystemReadStats(blockReader.getNetworkDistance(),
result);
result, readTimeMS);
if (readStatistics.getBlockType() == BlockType.STRIPED) {
dfsClient.updateFileSystemECReadStats(result);
}
@ -940,7 +941,8 @@ public class DFSInputStream extends FSInputStream
* @return Returns chosen DNAddrPair; Can be null if refetchIfRequired is
* false.
*/
private DNAddrPair chooseDataNode(LocatedBlock block,
@VisibleForTesting
DNAddrPair chooseDataNode(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes, boolean refetchIfRequired)
throws IOException {
while (true) {
@ -955,6 +957,14 @@ public class DFSInputStream extends FSInputStream
}
}
/**
* RefetchLocations should only be called when there are no active requests
* to datanodes. In the hedged read case this means futures should be empty.
* @param block The locatedBlock to get new datanode locations for.
* @param ignoredNodes A list of ignored nodes. This list can be null and can be cleared.
* @return the locatedBlock with updated datanode locations.
* @throws IOException
*/
private LocatedBlock refetchLocations(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes) throws IOException {
String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(),
@ -999,13 +1009,24 @@ public class DFSInputStream extends FSInputStream
throw new InterruptedIOException(
"Interrupted while choosing DataNode for read.");
}
clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId]
clearCachedNodeState(ignoredNodes);
openInfo(true);
block = refreshLocatedBlock(block);
failures++;
return block;
}
/**
* Clear both the dead nodes and the ignored nodes
* @param ignoredNodes is cleared
*/
private void clearCachedNodeState(Collection<DatanodeInfo> ignoredNodes) {
clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId]
if (ignoredNodes != null) {
ignoredNodes.clear();
}
}
/**
* Get the best node from which to stream the data.
* @param block LocatedBlock, containing nodes in priority order.
@ -1164,6 +1185,7 @@ public class DFSInputStream extends FSInputStream
ByteBuffer tmp = buf.duplicate();
tmp.limit(tmp.position() + len);
tmp = tmp.slice();
long beginReadMS = Time.monotonicNow();
int nread = 0;
int ret;
while (true) {
@ -1173,11 +1195,12 @@ public class DFSInputStream extends FSInputStream
}
nread += ret;
}
long readTimeMS = Time.monotonicNow() - beginReadMS;
buf.position(buf.position() + nread);
IOUtilsClient.updateReadStatistics(readStatistics, nread, reader);
dfsClient.updateFileSystemReadStats(
reader.getNetworkDistance(), nread);
reader.getNetworkDistance(), nread, readTimeMS);
if (readStatistics.getBlockType() == BlockType.STRIPED) {
dfsClient.updateFileSystemECReadStats(nread);
}
@ -1337,8 +1360,12 @@ public class DFSInputStream extends FSInputStream
} catch (InterruptedException ie) {
// Ignore and retry
}
if (refetch) {
refetchLocations(block, ignored);
// If refetch is true, then all nodes are in deadNodes or ignoredNodes.
// We should loop through all futures and remove them, so we do not
// have concurrent requests to the same node.
// Once all futures are cleared, we can clear the ignoredNodes and retry.
if (refetch && futures.isEmpty()) {
block = refetchLocations(block, ignored);
}
// We got here if exception. Ignore this node on next go around IFF
// we found a chosenNode to hedge read against.

View File

@ -331,15 +331,17 @@ public class DFSStripedInputStream extends DFSInputStream {
* its ThreadLocal.
*
* @param stats striped read stats
* @param readTimeMS read time metrics in ms
*
*/
void updateReadStats(final StripedBlockUtil.BlockReadStats stats) {
void updateReadStats(final StripedBlockUtil.BlockReadStats stats, long readTimeMS) {
if (stats == null) {
return;
}
updateReadStatistics(readStatistics, stats.getBytesRead(),
stats.isShortCircuit(), stats.getNetworkDistance());
dfsClient.updateFileSystemReadStats(stats.getNetworkDistance(),
stats.getBytesRead());
stats.getBytesRead(), readTimeMS);
assert readStatistics.getBlockType() == BlockType.STRIPED;
dfsClient.updateFileSystemECReadStats(stats.getBytesRead());
}

View File

@ -351,9 +351,12 @@ abstract class StripeReader {
// first read failure
while (!futures.isEmpty()) {
try {
long beginReadMS = Time.monotonicNow();
StripingChunkReadResult r = StripedBlockUtil
.getNextCompletedStripedRead(service, futures, 0);
dfsStripedInputStream.updateReadStats(r.getReadStats());
long readTimeMS = Time.monotonicNow() - beginReadMS;
dfsStripedInputStream.updateReadStats(r.getReadStats(), readTimeMS);
DFSClient.LOG.debug("Read task returned: {}, for stripe {}",
r, alignedStripe);
StripingChunk returnedChunk = alignedStripe.chunks[r.index];

View File

@ -53,6 +53,7 @@ public final class DfsPathCapabilities {
case CommonPathCapabilities.FS_SNAPSHOTS:
case CommonPathCapabilities.FS_STORAGEPOLICY:
case CommonPathCapabilities.FS_XATTRS:
case CommonPathCapabilities.FS_TRUNCATE:
return Optional.of(true);
case CommonPathCapabilities.FS_SYMLINKS:
return Optional.of(FileSystem.areSymlinksEnabled());

View File

@ -2147,6 +2147,37 @@ public class WebHdfsFileSystem extends FileSystem
}.run();
}
@Override
public Path getLinkTarget(Path f) throws IOException {
statistics.incrementReadOps(1);
storageStatistics.incrementOpCounter(OpType.GET_LINK_TARGET);
final HttpOpParam.Op op = GetOpParam.Op.GETLINKTARGET;
return new FsPathResponseRunner<Path>(op, f) {
@Override
Path decodeResponse(Map<?, ?> json) {
return new Path((String) json.get(Path.class.getSimpleName()));
}
}.run();
}
@Override
public FileStatus getFileLinkStatus(Path f) throws IOException {
statistics.incrementReadOps(1);
storageStatistics.incrementOpCounter(OpType.GET_FILE_LINK_STATUS);
final HttpOpParam.Op op = GetOpParam.Op.GETFILELINKSTATUS;
HdfsFileStatus status =
new FsPathResponseRunner<HdfsFileStatus>(op, f) {
@Override
HdfsFileStatus decodeResponse(Map<?, ?> json) {
return JsonUtilClient.toFileStatus(json, true);
}
}.run();
if (status == null) {
throw new FileNotFoundException("File does not exist: " + f);
}
return status.makeQualified(getUri(), f);
}
@VisibleForTesting
InetSocketAddress[] getResolvedNNAddr() {
return nnAddrs;

View File

@ -64,6 +64,8 @@ public class GetOpParam extends HttpOpParam<GetOpParam.Op> {
GETSNAPSHOTDIFF(false, HttpURLConnection.HTTP_OK),
GETSNAPSHOTDIFFLISTING(false, HttpURLConnection.HTTP_OK),
GETSNAPSHOTTABLEDIRECTORYLIST(false, HttpURLConnection.HTTP_OK),
GETLINKTARGET(false, HttpURLConnection.HTTP_OK),
GETFILELINKSTATUS(false, HttpURLConnection.HTTP_OK),
GETSNAPSHOTLIST(false, HttpURLConnection.HTTP_OK);
final boolean redirect;

View File

@ -1646,6 +1646,7 @@ public class HttpFSFileSystem extends FileSystem
case CommonPathCapabilities.FS_SNAPSHOTS:
case CommonPathCapabilities.FS_STORAGEPOLICY:
case CommonPathCapabilities.FS_XATTRS:
case CommonPathCapabilities.FS_TRUNCATE:
return true;
case CommonPathCapabilities.FS_SYMLINKS:
return false;

View File

@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.BlockStoragePolicySpi;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.CommonPathCapabilities;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileChecksum;
@ -302,9 +303,17 @@ public abstract class BaseTestHttpFSWith extends HFSTestCase {
AppendTestUtil.checkFullFile(fs, file, newLength, data, file.toString());
fs.close();
assertPathCapabilityForTruncate(file);
}
}
private void assertPathCapabilityForTruncate(Path file) throws Exception {
FileSystem fs = this.getHttpFSFileSystem();
assertTrue("HttpFS/WebHdfs/SWebHdfs support truncate",
fs.hasPathCapability(file, CommonPathCapabilities.FS_TRUNCATE));
fs.close();
}
private void testConcat() throws Exception {
Configuration config = getProxiedFSConf();
config.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.federation.metrics;
import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName;
import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@ -54,6 +55,7 @@ public class StateStoreMetrics implements StateStoreMBean {
private MutableRate failures;
private Map<String, MutableGaugeInt> cacheSizes;
private final Map<String, MutableRate> cacheLoadMetrics = new HashMap<>();
protected StateStoreMetrics() {}
@ -150,6 +152,32 @@ public class StateStoreMetrics implements StateStoreMBean {
counter.set(count);
}
/**
* Set the cache loading metrics for the state store interface.
*
* @param name Name of the record of the cache.
* @param value The time duration interval as the cache value.
*/
public void setCacheLoading(String name, long value) {
String cacheLoad = "Cache" + name + "Load";
MutableRate cacheLoadMetric = cacheLoadMetrics.get(cacheLoad);
if (cacheLoadMetric == null) {
cacheLoadMetric = registry.newRate(cacheLoad, name, false);
cacheLoadMetrics.put(cacheLoad, cacheLoadMetric);
}
cacheLoadMetrics.get(cacheLoad).add(value);
}
/**
* Retrieve unmodifiable map of cache loading metrics.
*
* @return unmodifiable map of cache loading metrics.
*/
@VisibleForTesting
public Map<String, MutableRate> getCacheLoadMetrics() {
return Collections.unmodifiableMap(cacheLoadMetrics);
}
@VisibleForTesting
public void reset() {
reads.resetMinMax();

View File

@ -678,11 +678,16 @@ public class MountTableResolver
* @return Size of the cache.
* @throws IOException If the cache is not initialized.
*/
protected long getCacheSize() throws IOException{
if (this.locationCache != null) {
return this.locationCache.size();
protected long getCacheSize() throws IOException {
this.readLock.lock();
try {
if (this.locationCache != null) {
return this.locationCache.size();
}
throw new IOException("localCache is null");
} finally {
this.readLock.unlock();
}
throw new IOException("localCache is null");
}
@VisibleForTesting

View File

@ -317,11 +317,8 @@ public class NamenodeHeartbeatService extends PeriodicService {
if (!resolver.registerNamenode(report)) {
LOG.warn("Cannot register namenode {}", report);
}
} catch (IOException e) {
LOG.info("Cannot register namenode in the State Store");
} catch (Exception ex) {
LOG.error("Unhandled exception updating NN registration for {}",
getNamenodeDesc(), ex);
} catch (Exception e) {
LOG.error("Cannot register namenode {} in the State Store", getNamenodeDesc(), e);
}
}

View File

@ -201,6 +201,10 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
FEDERATION_ROUTER_PREFIX + "observer.federated.state.propagation.maxsize";
public static final int DFS_ROUTER_OBSERVER_FEDERATED_STATE_PROPAGATION_MAXSIZE_DEFAULT = 5;
public static final String DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY =
FEDERATION_ROUTER_PREFIX + "observer.state.id.refresh.period";
public static final String DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_DEFAULT = "15s";
public static final String FEDERATION_STORE_SERIALIZER_CLASS =
FEDERATION_STORE_PREFIX + "serializer";
public static final Class<StateStoreSerializerPBImpl>
@ -251,6 +255,15 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
public static final int FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT =
-1;
// HDFS Router-based federation File based store implementation specific configs
public static final String FEDERATION_STORE_FILE_ASYNC_THREADS =
FEDERATION_STORE_PREFIX + "driver.file.async.threads";
public static final int FEDERATION_STORE_FILE_ASYNC_THREADS_DEFAULT = 0;
public static final String FEDERATION_STORE_FS_ASYNC_THREADS =
FEDERATION_STORE_PREFIX + "driver.fs.async.threads";
public static final int FEDERATION_STORE_FS_ASYNC_THREADS_DEFAULT = 0;
// HDFS Router safe mode
public static final String DFS_ROUTER_SAFEMODE_ENABLE =
FEDERATION_ROUTER_PREFIX + "safemode.enable";

View File

@ -614,6 +614,11 @@ public class RouterClientProtocol implements ClientProtocol {
new Class<?>[] {String.class, String.class},
new RemoteParam(), dstParam);
if (isMultiDestDirectory(src)) {
if (locs.size() != srcLocations.size()) {
throw new IOException("Rename of " + src + " to " + dst + " is not"
+ " allowed. The number of remote locations for both source and"
+ " target should be same.");
}
return rpcClient.invokeAll(locs, method);
} else {
return rpcClient.invokeSequential(locs, method, Boolean.class,
@ -641,6 +646,11 @@ public class RouterClientProtocol implements ClientProtocol {
new Class<?>[] {String.class, String.class, options.getClass()},
new RemoteParam(), dstParam, options);
if (isMultiDestDirectory(src)) {
if (locs.size() != srcLocations.size()) {
throw new IOException("Rename of " + src + " to " + dst + " is not"
+ " allowed. The number of remote locations for both source and"
+ " target should be same.");
}
rpcClient.invokeConcurrent(locs, method);
} else {
rpcClient.invokeSequential(locs, method, null, null);

View File

@ -57,6 +57,7 @@ import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAccumulator;
import java.util.concurrent.atomic.LongAdder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -86,6 +87,7 @@ import org.apache.hadoop.net.ConnectTimeoutException;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.eclipse.jetty.util.ajax.JSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -136,6 +138,14 @@ public class RouterRpcClient {
private final boolean observerReadEnabledDefault;
/** Nameservice specific overrides of the default setting for enabling observer reads. */
private HashSet<String> observerReadEnabledOverrides = new HashSet<>();
/**
* Period to refresh namespace stateID using active namenode.
* This ensures the namespace stateID is fresh even when an
* observer is trailing behind.
*/
private long activeNNStateIdRefreshPeriodMs;
/** Last msync times for each namespace. */
private final ConcurrentHashMap<String, LongAccumulator> lastActiveNNRefreshTimes;
/** Pattern to parse a stack trace line. */
private static final Pattern STACK_TRACE_PATTERN =
@ -211,13 +221,25 @@ public class RouterRpcClient {
this.observerReadEnabledDefault = conf.getBoolean(
RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_KEY,
RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_VALUE);
String[] observerReadOverrides = conf.getStrings(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_OVERRIDES);
String[] observerReadOverrides =
conf.getStrings(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_OVERRIDES);
if (observerReadOverrides != null) {
observerReadEnabledOverrides.addAll(Arrays.asList(observerReadOverrides));
}
if (this.observerReadEnabledDefault) {
LOG.info("Observer read is enabled for router.");
}
this.activeNNStateIdRefreshPeriodMs = conf.getTimeDuration(
RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY,
RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_DEFAULT,
TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
if (activeNNStateIdRefreshPeriodMs < 0) {
LOG.info("Periodic stateId freshness check is disabled"
+ " since '{}' is {}ms, which is less than 0.",
RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY,
activeNNStateIdRefreshPeriodMs);
}
this.lastActiveNNRefreshTimes = new ConcurrentHashMap<>();
}
/**
@ -1707,10 +1729,13 @@ public class RouterRpcClient {
boolean isObserverRead) throws IOException {
final List<? extends FederationNamenodeContext> namenodes;
if (RouterStateIdContext.getClientStateIdFromCurrentCall(nsId) > Long.MIN_VALUE) {
namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, isObserverRead);
} else {
namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, false);
boolean listObserverNamenodesFirst = isObserverRead
&& isNamespaceStateIdFresh(nsId)
&& (RouterStateIdContext.getClientStateIdFromCurrentCall(nsId) > Long.MIN_VALUE);
namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, listObserverNamenodesFirst);
if (!listObserverNamenodesFirst) {
// Refresh time of last call to active NameNode.
getTimeOfLastCallToActive(nsId).accumulate(Time.monotonicNow());
}
if (namenodes == null || namenodes.isEmpty()) {
@ -1721,7 +1746,8 @@ public class RouterRpcClient {
}
private boolean isObserverReadEligible(String nsId, Method method) {
boolean isReadEnabledForNamespace = observerReadEnabledDefault != observerReadEnabledOverrides.contains(nsId);
boolean isReadEnabledForNamespace =
observerReadEnabledDefault != observerReadEnabledOverrides.contains(nsId);
return isReadEnabledForNamespace && isReadCall(method);
}
@ -1735,4 +1761,24 @@ public class RouterRpcClient {
}
return !method.getAnnotationsByType(ReadOnly.class)[0].activeOnly();
}
/**
* Checks and sets last refresh time for a namespace's stateId.
* Returns true if refresh time is newer than threshold.
* Otherwise, return false and call should be handled by active namenode.
* @param nsId namespaceID
*/
@VisibleForTesting
boolean isNamespaceStateIdFresh(String nsId) {
if (activeNNStateIdRefreshPeriodMs < 0) {
return true;
}
long timeSinceRefreshMs = Time.monotonicNow() - getTimeOfLastCallToActive(nsId).get();
return (timeSinceRefreshMs <= activeNNStateIdRefreshPeriodMs);
}
private LongAccumulator getTimeOfLastCallToActive(String namespaceId) {
return lastActiveNNRefreshTimes
.computeIfAbsent(namespaceId, key -> new LongAccumulator(Math::max, 0));
}
}

View File

@ -385,6 +385,8 @@ public class RouterWebHdfsMethods extends NamenodeWebHdfsMethods {
case GETXATTRS:
case LISTXATTRS:
case CHECKACCESS:
case GETLINKTARGET:
case GETFILELINKSTATUS:
{
return super.get(ugi, delegation, username, doAsUser, fullpath, op,
offset, length, renewer, bufferSize, xattrNames, xattrEncoding,

View File

@ -113,6 +113,7 @@ public abstract class CachedRecordStore<R extends BaseRecord>
if (force || isUpdateTime()) {
List<R> newRecords = null;
long t = -1;
long startTime = Time.monotonicNow();
try {
QueryResult<R> result = getDriver().get(getRecordClass());
newRecords = result.getRecords();
@ -143,6 +144,7 @@ public abstract class CachedRecordStore<R extends BaseRecord>
StateStoreMetrics metrics = getDriver().getMetrics();
if (metrics != null) {
String recordName = getRecordClass().getSimpleName();
metrics.setCacheLoading(recordName, Time.monotonicNow() - startTime);
metrics.setCacheSize(recordName, this.records.size());
}

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver;
import java.net.InetAddress;
import java.util.Collection;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
@ -35,6 +37,8 @@ import org.slf4j.LoggerFactory;
* provider. Driver implementations will extend this class and implement some of
* the default methods.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class StateStoreDriver implements StateStoreRecordOperations {
private static final Logger LOG =

View File

@ -23,6 +23,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils;
import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver;
import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
@ -39,6 +41,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult;
* optimization, such as custom get/put/remove queries, depending on the
* capabilities of the data store.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class StateStoreBaseImpl extends StateStoreDriver {
@Override

View File

@ -25,14 +25,24 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUnavailableException;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils;
@ -69,6 +79,8 @@ public abstract class StateStoreFileBaseImpl
/** If it is initialized. */
private boolean initialized = false;
private ExecutorService concurrentStoreAccessPool;
/**
* Get the reader of a record for the file system.
@ -137,6 +149,8 @@ public abstract class StateStoreFileBaseImpl
*/
protected abstract String getRootDir();
protected abstract int getConcurrentFilesAccessNumThreads();
/**
* Set the driver as initialized.
*
@ -168,9 +182,31 @@ public abstract class StateStoreFileBaseImpl
return false;
}
setInitialized(true);
int threads = getConcurrentFilesAccessNumThreads();
if (threads > 1) {
this.concurrentStoreAccessPool =
new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<>(),
new ThreadFactoryBuilder()
.setNameFormat("state-store-file-based-concurrent-%d")
.setDaemon(true).build());
LOG.info("File based state store will be accessed concurrently with {} max threads", threads);
} else {
LOG.info("File based state store will be accessed serially");
}
return true;
}
@Override
public void close() throws Exception {
if (this.concurrentStoreAccessPool != null) {
this.concurrentStoreAccessPool.shutdown();
boolean isTerminated = this.concurrentStoreAccessPool.awaitTermination(5, TimeUnit.SECONDS);
LOG.info("Concurrent store access pool is terminated: {}", isTerminated);
this.concurrentStoreAccessPool = null;
}
}
@Override
public <T extends BaseRecord> boolean initRecordStorage(
String className, Class<T> recordClass) {
@ -198,22 +234,29 @@ public abstract class StateStoreFileBaseImpl
verifyDriverReady();
long start = monotonicNow();
StateStoreMetrics metrics = getMetrics();
List<T> ret = new ArrayList<>();
List<T> result = Collections.synchronizedList(new ArrayList<>());
try {
String path = getPathForClass(clazz);
List<String> children = getChildren(path);
for (String child : children) {
String pathRecord = path + "/" + child;
if (child.endsWith(TMP_MARK)) {
LOG.debug("There is a temporary file {} in {}", child, path);
if (isOldTempRecord(child)) {
LOG.warn("Removing {} as it's an old temporary record", child);
remove(pathRecord);
}
} else {
T record = getRecord(pathRecord, clazz);
ret.add(record);
List<Callable<Void>> callables = new ArrayList<>();
children.forEach(child -> callables.add(
() -> getRecordsFromFileAndRemoveOldTmpRecords(clazz, result, path, child)));
if (this.concurrentStoreAccessPool != null) {
// Read records concurrently
List<Future<Void>> futures = this.concurrentStoreAccessPool.invokeAll(callables);
for (Future<Void> future : futures) {
future.get();
}
} else {
// Read records serially
callables.forEach(e -> {
try {
e.call();
} catch (Exception ex) {
LOG.error("Failed to retrieve record using file operations.", ex);
throw new RuntimeException(ex);
}
});
}
} catch (Exception e) {
if (metrics != null) {
@ -227,7 +270,37 @@ public abstract class StateStoreFileBaseImpl
if (metrics != null) {
metrics.addRead(monotonicNow() - start);
}
return new QueryResult<T>(ret, getTime());
return new QueryResult<>(result, getTime());
}
/**
* Get the state store record from the given path (path/child) and add the record to the
* result list.
*
* @param clazz Class of the record.
* @param result The list of results record. The records would be added to it unless the given
* path represents old temp file.
* @param path The parent path.
* @param child The child path under the parent path. Both path and child completes the file
* location for the given record.
* @param <T> Record class of the records.
* @return Void.
* @throws IOException If the file read operation fails.
*/
private <T extends BaseRecord> Void getRecordsFromFileAndRemoveOldTmpRecords(Class<T> clazz,
List<T> result, String path, String child) throws IOException {
String pathRecord = path + "/" + child;
if (child.endsWith(TMP_MARK)) {
LOG.debug("There is a temporary file {} in {}", child, path);
if (isOldTempRecord(child)) {
LOG.warn("Removing {} as it's an old temporary record", child);
remove(pathRecord);
}
} else {
T record = getRecord(pathRecord, clazz);
result.add(record);
}
return null;
}
/**
@ -260,23 +333,17 @@ public abstract class StateStoreFileBaseImpl
*/
private <T extends BaseRecord> T getRecord(
final String path, final Class<T> clazz) throws IOException {
BufferedReader reader = getReader(path);
try {
try (BufferedReader reader = getReader(path)) {
String line;
while ((line = reader.readLine()) != null) {
if (!line.startsWith("#") && line.length() > 0) {
try {
T record = newRecord(line, clazz, false);
return record;
return newRecord(line, clazz, false);
} catch (Exception ex) {
LOG.error("Cannot parse line {} in file {}", line, path, ex);
}
}
}
} finally {
if (reader != null) {
reader.close();
}
}
throw new IOException("Cannot read " + path + " for record " +
clazz.getSimpleName());
@ -330,13 +397,12 @@ public abstract class StateStoreFileBaseImpl
record.setDateModified(this.getTime());
toWrite.put(recordPath, record);
} else if (errorIfExists) {
LOG.error("Attempt to insert record {} that already exists",
recordPath);
LOG.error("Attempt to insert record {} that already exists", recordPath);
if (metrics != null) {
metrics.addFailure(monotonicNow() - start);
}
return false;
} else {
} else {
LOG.debug("Not updating {}", record);
}
} else {
@ -345,36 +411,81 @@ public abstract class StateStoreFileBaseImpl
}
// Write the records
boolean success = true;
for (Entry<String, T> entry : toWrite.entrySet()) {
String recordPath = entry.getKey();
String recordPathTemp = recordPath + "." + now() + TMP_MARK;
boolean recordWrittenSuccessfully = true;
try (BufferedWriter writer = getWriter(recordPathTemp)) {
T record = entry.getValue();
String line = serializeString(record);
writer.write(line);
} catch (IOException e) {
LOG.error("Cannot write {}", recordPathTemp, e);
recordWrittenSuccessfully = false;
success = false;
final AtomicBoolean success = new AtomicBoolean(true);
final List<Callable<Void>> callables = new ArrayList<>();
toWrite.entrySet().forEach(entry -> callables.add(() -> writeRecordToFile(success, entry)));
if (this.concurrentStoreAccessPool != null) {
// Write records concurrently
List<Future<Void>> futures = null;
try {
futures = this.concurrentStoreAccessPool.invokeAll(callables);
} catch (InterruptedException e) {
success.set(false);
LOG.error("Failed to put record concurrently.", e);
}
// Commit
if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) {
LOG.error("Failed committing record into {}", recordPath);
success = false;
if (futures != null) {
for (Future<Void> future : futures) {
try {
future.get();
} catch (InterruptedException | ExecutionException e) {
success.set(false);
LOG.error("Failed to retrieve results from concurrent record put runs.", e);
}
}
}
} else {
// Write records serially
callables.forEach(callable -> {
try {
callable.call();
} catch (Exception e) {
success.set(false);
LOG.error("Failed to put record.", e);
}
});
}
long end = monotonicNow();
if (metrics != null) {
if (success) {
if (success.get()) {
metrics.addWrite(end - start);
} else {
metrics.addFailure(end - start);
}
}
return success;
return success.get();
}
/**
* Writes the state store record to the file. At first, the record is written to a temp location
* and then later renamed to the final location that is passed with the entry key.
*
* @param success The atomic boolean that gets updated to false if the file write operation fails.
* @param entry The entry of the record path and the state store record to be written to the file
* by first writing to a temp location and then renaming it to the record path.
* @param <T> Record class of the records.
* @return Void.
*/
private <T extends BaseRecord> Void writeRecordToFile(AtomicBoolean success,
Entry<String, T> entry) {
String recordPath = entry.getKey();
String recordPathTemp = recordPath + "." + now() + TMP_MARK;
boolean recordWrittenSuccessfully = true;
try (BufferedWriter writer = getWriter(recordPathTemp)) {
T record = entry.getValue();
String line = serializeString(record);
writer.write(line);
} catch (IOException e) {
LOG.error("Cannot write {}", recordPathTemp, e);
recordWrittenSuccessfully = false;
success.set(false);
}
// Commit
if (recordWrittenSuccessfully && !rename(recordPathTemp, recordPath)) {
LOG.error("Failed committing record into {}", recordPath);
success.set(false);
}
return null;
}
@Override

View File

@ -109,6 +109,12 @@ public class StateStoreFileImpl extends StateStoreFileBaseImpl {
return this.rootDirectory;
}
@Override
protected int getConcurrentFilesAccessNumThreads() {
return getConf().getInt(RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS,
RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS_DEFAULT);
}
@Override
protected <T extends BaseRecord> BufferedReader getReader(String filename) {
BufferedReader reader = null;
@ -144,6 +150,7 @@ public class StateStoreFileImpl extends StateStoreFileBaseImpl {
@Override
public void close() throws Exception {
super.close();
setInitialized(false);
}

View File

@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory;
/**
* {@link StateStoreDriver} implementation based on a filesystem. The common
* implementation uses HDFS as a backend. The path can be specified setting
* dfs.federation.router.driver.fs.path=hdfs://host:port/path/to/store.
* dfs.federation.router.store.driver.fs.path=hdfs://host:port/path/to/store.
*/
public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl {
@ -117,8 +117,15 @@ public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl {
return this.workPath;
}
@Override
protected int getConcurrentFilesAccessNumThreads() {
return getConf().getInt(RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS,
RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS_DEFAULT);
}
@Override
public void close() throws Exception {
super.close();
if (fs != null) {
fs.close();
}

View File

@ -0,0 +1,425 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.federation.store.driver.impl;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.router.security.token.SQLConnectionFactory;
import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils;
import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
import org.apache.hadoop.hdfs.server.federation.store.records.DisabledNameservice;
import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
import org.apache.hadoop.hdfs.server.federation.store.records.MountTable;
import org.apache.hadoop.hdfs.server.federation.store.records.Query;
import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult;
import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
import org.apache.hadoop.util.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils.*;
/**
* StateStoreDriver implementation based on MySQL.
* There is a separate table for each record type. Each table just as two
* columns, recordKey and recordValue.
*/
public class StateStoreMySQLImpl extends StateStoreSerializableImpl {
public static final String SQL_STATE_STORE_CONF_PREFIX = "state-store-mysql.";
public static final String CONNECTION_URL =
SQL_STATE_STORE_CONF_PREFIX + "connection.url";
public static final String CONNECTION_USERNAME =
SQL_STATE_STORE_CONF_PREFIX + "connection.username";
public static final String CONNECTION_PASSWORD =
SQL_STATE_STORE_CONF_PREFIX + "connection.password";
public static final String CONNECTION_DRIVER =
SQL_STATE_STORE_CONF_PREFIX + "connection.driver";
private static final Logger LOG =
LoggerFactory.getLogger(StateStoreSerializableImpl.class);
private SQLConnectionFactory connectionFactory;
/** If the driver has been initialized. */
private boolean initialized = false;
private final static Set<String> VALID_TABLES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(
MembershipState.class.getSimpleName(),
RouterState.class.getSimpleName(),
MountTable.class.getSimpleName(),
DisabledNameservice.class.getSimpleName()
))
);
@Override
public boolean initDriver() {
Configuration conf = getConf();
connectionFactory = new MySQLStateStoreHikariDataSourceConnectionFactory(conf);
initialized = true;
LOG.info("MySQL state store connection factory initialized");
return true;
}
@Override
public <T extends BaseRecord> boolean initRecordStorage(String className, Class<T> clazz) {
String tableName = getAndValidateTableNameForClass(clazz);
try (Connection connection = connectionFactory.getConnection();
ResultSet resultSet = connection
.getMetaData()
.getTables(null, null, tableName, null)) {
if (resultSet.next()) {
return true;
}
} catch (SQLException e) {
LOG.error("Could not check if table {} able exists", tableName);
}
try (Connection connection = connectionFactory.getConnection();
Statement statement = connection.createStatement()) {
String sql = String.format("CREATE TABLE %s ("
+ "recordKey VARCHAR (255) NOT NULL,"
+ "recordValue VARCHAR (2047) NOT NULL, "
+ "PRIMARY KEY(recordKey))", tableName);
statement.execute(sql);
return true;
} catch (SQLException e) {
LOG.error(String.format("Cannot create table %s for record type %s.",
tableName, className), e.getMessage());
return false;
}
}
@Override
public boolean isDriverReady() {
return this.initialized;
}
@Override
public void close() throws Exception {
connectionFactory.shutdown();
}
@Override
public <T extends BaseRecord> QueryResult<T> get(Class<T> clazz)
throws IOException {
String tableName = getAndValidateTableNameForClass(clazz);
verifyDriverReady();
long start = Time.monotonicNow();
StateStoreMetrics metrics = getMetrics();
List<T> ret = new ArrayList<>();
try (Connection connection = connectionFactory.getConnection();
PreparedStatement statement = connection.prepareStatement(
String.format("SELECT * FROM %s", tableName))) {
try (ResultSet result = statement.executeQuery()) {
while(result.next()) {
String recordValue = result.getString("recordValue");
T record = newRecord(recordValue, clazz, false);
ret.add(record);
}
}
} catch (SQLException e) {
if (metrics != null) {
metrics.addFailure(Time.monotonicNow() - start);
}
String msg = "Cannot fetch records for " + clazz.getSimpleName();
LOG.error(msg, e);
throw new IOException(msg, e);
}
if (metrics != null) {
metrics.addRead(Time.monotonicNow() - start);
}
return new QueryResult<>(ret, getTime());
}
@Override
public <T extends BaseRecord> boolean putAll(
List<T> records, boolean allowUpdate, boolean errorIfExists) throws IOException {
if (records.isEmpty()) {
return true;
}
verifyDriverReady();
StateStoreMetrics metrics = getMetrics();
long start = Time.monotonicNow();
boolean success = true;
for (T record : records) {
String tableName = getAndValidateTableNameForClass(record.getClass());
String primaryKey = getPrimaryKey(record);
String data = serializeString(record);
if (recordExists(tableName, primaryKey)) {
if (allowUpdate) {
// Update the mod time stamp. Many backends will use their
// own timestamp for the mod time.
record.setDateModified(this.getTime());
if (!updateRecord(tableName, primaryKey, data)) {
LOG.error("Cannot write {} into table {}", primaryKey, tableName);
success = false;
}
} else {
if (errorIfExists) {
LOG.error("Attempted to insert record {} that already exists "
+ "in table {} and updates are disallowed.", primaryKey, tableName);
if (metrics != null) {
metrics.addFailure(Time.monotonicNow() - start);
}
return false;
} else {
LOG.debug("Not updating {} as updates are not allowed", record);
}
}
} else {
if (!insertRecord(tableName, primaryKey, data)) {
LOG.error("Cannot write {} in table {}", primaryKey, tableName);
success = false;
}
}
}
long end = Time.monotonicNow();
if (metrics != null) {
if (success) {
metrics.addWrite(end - start);
} else {
metrics.addFailure(end - start);
}
}
return success;
}
@Override
public <T extends BaseRecord> boolean removeAll(Class<T> clazz) throws IOException {
verifyDriverReady();
long startTimeMs = Time.monotonicNow();
StateStoreMetrics metrics = getMetrics();
boolean success = true;
String tableName = getAndValidateTableNameForClass(clazz);
try (Connection connection = connectionFactory.getConnection(true);
PreparedStatement truncateTable = connection.prepareStatement(
String.format("TRUNCATE TABLE %s", tableName))){
truncateTable.execute();
} catch (SQLException e) {
LOG.error("Could not remove all records in table {}", tableName, e);
success = false;
}
if (metrics != null) {
long durationMs = Time.monotonicNow() - startTimeMs;
if (success) {
metrics.addRemove(durationMs);
} else {
metrics.addFailure(durationMs);
}
}
return success;
}
@Override
public <T extends BaseRecord> int remove(Class<T> clazz, Query<T> query) throws IOException {
verifyDriverReady();
if (query == null) {
return 0;
}
long startTimeMs = Time.monotonicNow();
StateStoreMetrics metrics = getMetrics();
int removed = 0;
// Get the current records
try {
final QueryResult<T> result = get(clazz);
final List<T> existingRecords = result.getRecords();
// Write all of the existing records except those to be removed
final List<T> recordsToRemove = filterMultiple(query, existingRecords);
boolean success = true;
for (T recordToRemove : recordsToRemove) {
String tableName = getAndValidateTableNameForClass(clazz);
String primaryKey = getPrimaryKey(recordToRemove);
if (removeRecord(tableName, primaryKey)) {
removed++;
} else {
LOG.error("Cannot remove record {} from table {}", primaryKey, tableName);
success = false;
}
}
if (!success) {
LOG.error("Cannot remove records {} query {}", clazz, query);
if (metrics != null) {
metrics.addFailure(Time.monotonicNow() - startTimeMs);
}
}
} catch (IOException e) {
LOG.error("Cannot remove records {} query {}", clazz, query, e);
if (metrics != null) {
metrics.addFailure(Time.monotonicNow() - startTimeMs);
}
}
if (removed > 0 && metrics != null) {
metrics.addRemove(Time.monotonicNow() - startTimeMs);
}
return removed;
}
/**
* Insert a record with a given key into the specified table.
* @param tableName Name of table to modify
* @param key Primary key for the record.
* @return True is operation is successful, false otherwise.
*/
protected boolean insertRecord(String tableName, String key, String data) {
try (Connection connection = connectionFactory.getConnection(true);
PreparedStatement statement = connection.prepareStatement(
String.format("INSERT INTO %s (recordKey, recordValue) VALUES (?, ?)", tableName))) {
statement.setString(1, key);
statement.setString(2, data);
statement.execute();
} catch (SQLException e) {
LOG.error("Failed to insert record {} into table {}", key, tableName, e);
return false;
}
return true;
}
/**
* Updates the record with a given key from the specified table.
* @param tableName Name of table to modify
* @param key Primary key for the record.
* @return True is operation is successful, false otherwise.
*/
protected boolean updateRecord(String tableName, String key, String data) {
try (Connection connection = connectionFactory.getConnection(true);
PreparedStatement statement = connection.prepareStatement(
String.format("UPDATE %s SET recordValue = ? WHERE recordKey = ?", tableName))) {
statement.setString(1, data);
statement.setString(2, key);
statement.execute();
} catch (SQLException e){
LOG.error("Failed to update record {} in table {}", key, tableName, e);
return false;
}
return true;
}
/**
* Checks if a record with a given key existing in the specified table.
* @param tableName Name of table to modify
* @param key Primary key for the record.
* @return True is operation is successful, false otherwise.
*/
protected boolean recordExists(String tableName, String key) {
try (Connection connection = connectionFactory.getConnection(true);
PreparedStatement statement = connection.prepareStatement(
String.format("SELECT * FROM %s WHERE recordKey = ?", tableName))) {
statement.setString(1, key);
try (ResultSet result = statement.executeQuery()) {
return result.next();
}
} catch (SQLException e) {
LOG.error("Failed to check existence of record {} in table {}", key, tableName, e);
return false;
}
}
/**
* Removes the record with a given key from the specified table.
* @param tableName Name of table to modify
* @param key Primary key for the record.
* @return True is operation is successful, false otherwise.
*/
protected boolean removeRecord(String tableName, String key) {
try (Connection connection = connectionFactory.getConnection(true);
PreparedStatement statement = connection.prepareStatement(
String.format("DELETE FROM %s WHERE recordKey = ?", tableName))) {
statement.setString(1, key);
statement.execute();
return true;
} catch (SQLException e) {
LOG.error("Failed to remove record {} in table {}", key, tableName, e);
return false;
}
}
/**
* Get the table for a record class and validate is this is one of the supported
* record types.
* @param clazz Class of the record.
* @return Table name for this record class.
*/
private <T extends BaseRecord> String getAndValidateTableNameForClass(final Class<T> clazz) {
String tableName = StateStoreUtils.getRecordName(clazz);
if (VALID_TABLES.contains(tableName)) {
return tableName;
} else {
throw new IllegalArgumentException(tableName + " is not a valid table name");
}
}
/**
* Class that relies on a HikariDataSource to provide SQL connections.
*/
static class MySQLStateStoreHikariDataSourceConnectionFactory
implements SQLConnectionFactory {
protected final static String HIKARI_PROPS = SQL_STATE_STORE_CONF_PREFIX
+ "connection.hikari.";
private final HikariDataSource dataSource;
MySQLStateStoreHikariDataSourceConnectionFactory(Configuration conf) {
Properties properties = new Properties();
properties.setProperty("jdbcUrl", conf.get(StateStoreMySQLImpl.CONNECTION_URL));
properties.setProperty("username", conf.get(StateStoreMySQLImpl.CONNECTION_USERNAME));
properties.setProperty("password", conf.get(StateStoreMySQLImpl.CONNECTION_PASSWORD));
properties.setProperty("driverClassName", conf.get(StateStoreMySQLImpl.CONNECTION_DRIVER));
// Include hikari connection properties
properties.putAll(conf.getPropsWithPrefix(HIKARI_PROPS));
HikariConfig hikariConfig = new HikariConfig(properties);
this.dataSource = new HikariDataSource(hikariConfig);
}
@Override
public Connection getConnection() throws SQLException {
return dataSource.getConnection();
}
@Override
public void shutdown() {
// Close database connections
dataSource.close();
}
}
}

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver.impl;
import java.io.IOException;
import java.util.Collection;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreSerializer;
@ -29,6 +31,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
* State Store driver that stores a serialization of the records. The serializer
* is pluggable.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class StateStoreSerializableImpl extends StateStoreBaseImpl {
/** Mark for slashes in path names. */

View File

@ -117,7 +117,9 @@ public class MountTableStoreImpl extends MountTableStore {
AddMountTableEntryResponse response =
AddMountTableEntryResponse.newInstance();
response.setStatus(status);
updateCacheAllRouters();
if (status) {
updateCacheAllRouters();
}
return response;
} else {
AddMountTableEntryResponse response =
@ -139,7 +141,9 @@ public class MountTableStoreImpl extends MountTableStore {
UpdateMountTableEntryResponse response =
UpdateMountTableEntryResponse.newInstance();
response.setStatus(status);
updateCacheAllRouters();
if (status) {
updateCacheAllRouters();
}
return response;
} else {
UpdateMountTableEntryResponse response =
@ -170,7 +174,9 @@ public class MountTableStoreImpl extends MountTableStore {
RemoveMountTableEntryResponse response =
RemoveMountTableEntryResponse.newInstance();
response.setStatus(status);
updateCacheAllRouters();
if (status) {
updateCacheAllRouters();
}
return response;
}

View File

@ -362,7 +362,8 @@
Class to implement the State Store. There are three implementation classes currently
being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl,
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl and
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl,
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreMySQLImpl and
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.
These implementation classes use the local file, filesystem and ZooKeeper as a backend respectively.
By default it uses the ZooKeeper as the default State Store.
@ -884,4 +885,42 @@
of namespaces in use and the latency of the msync requests.
</description>
</property>
<property>
<name>dfs.federation.router.observer.state.id.refresh.period</name>
<value>15s</value>
<description>
Period to refresh namespace stateID using active namenode. This ensures the
namespace stateID is refresh even when an observer is trailing behind.
If this is below 0, the auto-refresh is disabled.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.file.async.threads</name>
<value>0</value>
<description>
Max threads used by StateStoreFileImpl to access state store files concurrently.
The only class currently being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl.
Default value is 0, which means StateStoreFileImpl would work in sync mode, meaning it
would access one file at a time.
Use positive integer value to enable concurrent files access.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.fs.async.threads</name>
<value>0</value>
<description>
Max threads used by StateStoreFileSystemImpl to access state store files from the given
filesystem concurrently.
The only class currently being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl.
Default value is 0, which means StateStoreFileSystemImpl would work in sync mode, meaning it
would access one file from the filesystem at a time.
Use positive integer value to enable concurrent files access from the given filesystem.
</description>
</property>
</configuration>

View File

@ -552,6 +552,16 @@ public class TestMountTableResolver {
assertEquals(100000, mountTable.getMountPoints("/").size());
assertEquals(100000, mountTable.getMounts("/").size());
// test concurrency for mount table cache size when it gets updated frequently
for (int i = 0; i < 20; i++) {
mountTable.getDestinationForPath("/" + i);
if (i >= 10) {
assertEquals(TEST_MAX_CACHE_SIZE, mountTable.getCacheSize());
} else {
assertEquals(i + 1, mountTable.getCacheSize());
}
}
assertEquals(TEST_MAX_CACHE_SIZE, mountTable.getCacheSize());
// Add 1000 entries in deep list
mountTable.refreshEntries(emptyList);

View File

@ -34,9 +34,11 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAccumulator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.ClientGSIContext;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RouterFederatedStateProto;
@ -50,6 +52,7 @@ import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServi
import org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.AfterEach;
@ -95,7 +98,9 @@ public class TestObserverWithRouter {
conf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "0ms");
conf.setBoolean(DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY, true);
if (confOverrides != null) {
conf.addResource(confOverrides);
confOverrides
.iterator()
.forEachRemaining(entry -> conf.set(entry.getKey(), entry.getValue()));
}
cluster = new MiniRouterDFSCluster(true, 2, numberOfNamenode);
cluster.addNamenodeOverrides(conf);
@ -639,4 +644,42 @@ public class TestObserverWithRouter {
assertEquals("ns0", namespace1.get(0));
assertTrue(namespace2.isEmpty());
}
@Test
@Tag(SKIP_BEFORE_EACH_CLUSTER_STARTUP)
public void testPeriodicStateRefreshUsingActiveNamenode() throws Exception {
Path rootPath = new Path("/");
Configuration confOverride = new Configuration(false);
confOverride.set(RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY, "500ms");
confOverride.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "3s");
startUpCluster(1, confOverride);
fileSystem = routerContext.getFileSystem(getConfToEnableObserverReads());
fileSystem.listStatus(rootPath);
int initialLengthOfRootListing = fileSystem.listStatus(rootPath).length;
DFSClient activeClient = cluster.getNamenodes("ns0")
.stream()
.filter(nnContext -> nnContext.getNamenode().isActiveState())
.findFirst().orElseThrow(() -> new IllegalStateException("No active namenode."))
.getClient();
for (int i = 0; i < 10; i++) {
activeClient.mkdirs("/dir" + i, null, false);
}
activeClient.close();
// Wait long enough for state in router to be considered stale.
GenericTestUtils.waitFor(
() -> !routerContext
.getRouterRpcClient()
.isNamespaceStateIdFresh("ns0"),
100,
10000,
"Timeout: Namespace state was never considered stale.");
FileStatus[] rootFolderAfterMkdir = fileSystem.listStatus(rootPath);
assertEquals("List-status should show newly created directories.",
initialLengthOfRootListing + 10, rootFolderAfterMkdir.length);
}
}

View File

@ -167,8 +167,9 @@ public class TestRouterAdminCLI {
assertEquals(0, ToolRunner.run(admin, argv));
assertEquals(-1, ToolRunner.run(admin, argv));
stateStore.loadCache(MountTableStoreImpl.class, true);
verifyMountTableContents(src, dest);
GetMountTableEntriesRequest getRequest = GetMountTableEntriesRequest
.newInstance(src);
GetMountTableEntriesResponse getResponse = client.getMountTableManager()
@ -207,6 +208,15 @@ public class TestRouterAdminCLI {
assertTrue(mountTable.isFaultTolerant());
}
private void verifyMountTableContents(String src, String dest) throws Exception {
String[] argv = new String[] {"-ls", "/"};
System.setOut(new PrintStream(out));
assertEquals(0, ToolRunner.run(admin, argv));
String response = out.toString();
assertTrue("The response should have " + src + ": " + response, response.contains(src));
assertTrue("The response should have " + dest + ": " + response, response.contains(dest));
}
@Test
public void testAddMountTableNotNormalized() throws Exception {
String nsId = "ns0";

View File

@ -720,6 +720,47 @@ public class TestRouterRPCMultipleDestinationMountTableResolver {
}
}
/**
* Test rename a dir from src dir (mapped to both ns0 and ns1) to ns0.
*/
@Test
public void testRenameWithMultiDestinations() throws Exception {
//create a mount point with multiple destinations
String srcDir = "/mount-source-dir";
Path path = new Path(srcDir);
Map<String, String> destMap = new HashMap<>();
destMap.put("ns0", srcDir);
destMap.put("ns1", srcDir);
nnFs0.mkdirs(path);
nnFs1.mkdirs(path);
MountTable addEntry =
MountTable.newInstance(srcDir, destMap);
addEntry.setDestOrder(DestinationOrder.RANDOM);
assertTrue(addMountTable(addEntry));
//create a mount point with a single destinations ns0
String targetDir = "/ns0_test";
nnFs0.mkdirs(new Path(targetDir));
MountTable addDstEntry = MountTable.newInstance(targetDir,
Collections.singletonMap("ns0", targetDir));
assertTrue(addMountTable(addDstEntry));
//mkdir sub dirs in srcDir mapping ns0 & ns1
routerFs.mkdirs(new Path(srcDir + "/dir1"));
routerFs.mkdirs(new Path(srcDir + "/dir1/dir_1"));
routerFs.mkdirs(new Path(srcDir + "/dir1/dir_2"));
routerFs.mkdirs(new Path(targetDir));
//try to rename sub dir in srcDir (mapping to ns0 & ns1) to targetDir
// (mapping ns0)
LambdaTestUtils.intercept(IOException.class, "The number of" +
" remote locations for both source and target should be same.",
() -> {
routerFs.rename(new Path(srcDir + "/dir1/dir_1"),
new Path(targetDir));
});
}
/**
* Test to verify rename operation on directories in case of multiple
* destinations.

View File

@ -56,7 +56,8 @@ public class TestRouterWithSecureStartup {
@Test
public void testStartupWithoutKeytab() throws Exception {
testCluster(DFS_ROUTER_KEYTAB_FILE_KEY,
"Running in secure mode, but config doesn't have a keytab");
"Running in secure mode, but config doesn't have a keytab for "
+ "key: dfs.federation.router.keytab.file");
}
@Test

View File

@ -48,6 +48,8 @@ import org.apache.hadoop.hdfs.server.federation.store.records.Query;
import org.apache.hadoop.hdfs.server.federation.store.records.QueryResult;
import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
import org.apache.hadoop.hdfs.server.federation.store.records.StateStoreVersion;
import org.apache.hadoop.metrics2.lib.MutableRate;
import org.junit.After;
import org.junit.AfterClass;
import org.slf4j.Logger;
@ -76,6 +78,10 @@ public class TestStateStoreDriverBase {
return stateStore.getDriver();
}
protected StateStoreService getStateStoreService() {
return stateStore;
}
@After
public void cleanMetrics() {
if (stateStore != null) {
@ -88,6 +94,7 @@ public class TestStateStoreDriverBase {
public static void tearDownCluster() {
if (stateStore != null) {
stateStore.stop();
stateStore = null;
}
}
@ -574,6 +581,36 @@ public class TestStateStoreDriverBase {
return getters;
}
public long getMountTableCacheLoadSamples(StateStoreDriver driver) throws IOException {
final MutableRate mountTableCache = getMountTableCache(driver);
return mountTableCache.lastStat().numSamples();
}
private static MutableRate getMountTableCache(StateStoreDriver driver) throws IOException {
StateStoreMetrics metrics = stateStore.getMetrics();
final Query<MountTable> query = new Query<>(MountTable.newInstance());
driver.getMultiple(MountTable.class, query);
final Map<String, MutableRate> cacheLoadMetrics = metrics.getCacheLoadMetrics();
final MutableRate mountTableCache = cacheLoadMetrics.get("CacheMountTableLoad");
assertNotNull("CacheMountTableLoad should be present in the state store metrics",
mountTableCache);
return mountTableCache;
}
public void testCacheLoadMetrics(StateStoreDriver driver, long numRefresh,
double expectedHigherThan) throws IOException, IllegalArgumentException {
final MutableRate mountTableCache = getMountTableCache(driver);
// CacheMountTableLoadNumOps
final long mountTableCacheLoadNumOps = getMountTableCacheLoadSamples(driver);
assertEquals("Num of samples collected should match", numRefresh, mountTableCacheLoadNumOps);
// CacheMountTableLoadAvgTime ms
final double mountTableCacheLoadAvgTimeMs = mountTableCache.lastStat().mean();
assertTrue(
"Mean time duration for cache load is expected to be higher than " + expectedHigherThan
+ " ms." + " Actual value: " + mountTableCacheLoadAvgTimeMs,
mountTableCacheLoadAvgTimeMs > expectedHigherThan);
}
/**
* Get the type of field.
*

View File

@ -18,31 +18,55 @@
package org.apache.hadoop.hdfs.server.federation.store.driver;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_STORE_FILE_ASYNC_THREADS;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
/**
* Test the FileSystem (e.g., HDFS) implementation of the State Store driver.
*/
@RunWith(Parameterized.class)
public class TestStateStoreFile extends TestStateStoreDriverBase {
@BeforeClass
public static void setupCluster() throws Exception {
private final String numFileAsyncThreads;
public TestStateStoreFile(String numFileAsyncThreads) {
this.numFileAsyncThreads = numFileAsyncThreads;
}
@Parameterized.Parameters(name = "numFileAsyncThreads-{0}")
public static List<String[]> data() {
return Arrays.asList(new String[][] {{"20"}, {"0"}});
}
private static void setupCluster(String numFsAsyncThreads) throws Exception {
Configuration conf = getStateStoreConfiguration(StateStoreFileImpl.class);
conf.setInt(FEDERATION_STORE_FILE_ASYNC_THREADS, Integer.parseInt(numFsAsyncThreads));
getStateStore(conf);
}
@Before
public void startup() throws IOException {
public void startup() throws Exception {
setupCluster(numFileAsyncThreads);
removeAll(getStateStoreDriver());
}
@After
public void tearDown() throws Exception {
tearDownCluster();
}
@Test
public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException {
@ -73,4 +97,16 @@ public class TestStateStoreFile extends TestStateStoreDriverBase {
throws IllegalArgumentException, IllegalAccessException, IOException {
testMetrics(getStateStoreDriver());
}
@Test
public void testCacheLoadMetrics() throws IOException {
// inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime
// value as -1 ms, that would mean no other sample with value >= 0 would have been received and
// hence this would be failure to assert that mount table avg load time is higher than -1
getStateStoreService().getMetrics().setCacheLoading("MountTable", -1);
long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver());
getStateStoreService().refreshCaches(true);
testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 1, -1);
}
}

View File

@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.federation.store.driver;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
@ -26,12 +28,15 @@ import org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUt
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileBaseImpl;
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl;
import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
import org.junit.AfterClass;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mockito.stubbing.Answer;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_STORE_FS_ASYNC_THREADS;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doThrow;
@ -41,16 +46,22 @@ import static org.mockito.Mockito.spy;
/**
* Test the FileSystem (e.g., HDFS) implementation of the State Store driver.
*/
@RunWith(Parameterized.class)
public class TestStateStoreFileSystem extends TestStateStoreDriverBase {
private static MiniDFSCluster dfsCluster;
@BeforeClass
public static void setupCluster() throws Exception {
Configuration conf = FederationStateStoreTestUtils
.getStateStoreConfiguration(StateStoreFileSystemImpl.class);
conf.set(StateStoreFileSystemImpl.FEDERATION_STORE_FS_PATH,
"/hdfs-federation/");
private final String numFsAsyncThreads;
public TestStateStoreFileSystem(String numFsAsyncThreads) {
this.numFsAsyncThreads = numFsAsyncThreads;
}
private static void setupCluster(String numFsAsyncThreads) throws Exception {
Configuration conf =
FederationStateStoreTestUtils.getStateStoreConfiguration(StateStoreFileSystemImpl.class);
conf.set(StateStoreFileSystemImpl.FEDERATION_STORE_FS_PATH, "/hdfs-federation/");
conf.setInt(FEDERATION_STORE_FS_ASYNC_THREADS, Integer.parseInt(numFsAsyncThreads));
// Create HDFS cluster to back the state tore
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
@ -60,18 +71,26 @@ public class TestStateStoreFileSystem extends TestStateStoreDriverBase {
getStateStore(conf);
}
@AfterClass
public static void tearDownCluster() {
if (dfsCluster != null) {
dfsCluster.shutdown();
}
@Parameterized.Parameters(name = "numFsAsyncThreads-{0}")
public static List<String[]> data() {
return Arrays.asList(new String[][] {{"20"}, {"0"}});
}
@Before
public void startup() throws IOException {
public void startup() throws Exception {
setupCluster(numFsAsyncThreads);
removeAll(getStateStoreDriver());
}
@After
public void tearDown() throws Exception {
tearDownCluster();
if (dfsCluster != null) {
dfsCluster.shutdown();
dfsCluster = null;
}
}
@Test
public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException {
@ -115,4 +134,16 @@ public class TestStateStoreFileSystem extends TestStateStoreDriverBase {
testInsertWithErrorDuringWrite(driver, MembershipState.class);
}
@Test
public void testCacheLoadMetrics() throws IOException {
// inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime
// value as -1 ms, that would mean no other sample with value >= 0 would have been received and
// hence this would be failure to assert that mount table avg load time is higher than -1
getStateStoreService().getMetrics().setCacheLoading("MountTable", -1);
long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver());
getStateStoreService().refreshCaches(true);
getStateStoreService().refreshCaches(true);
testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 2, -1);
}
}

View File

@ -0,0 +1,102 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.federation.store.driver;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreMySQLImpl;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.*;
/**
* Test the FileSystem (e.g., HDFS) implementation of the State Store driver.
*/
public class TestStateStoreMySQL extends TestStateStoreDriverBase {
private static final String CONNECTION_URL = "jdbc:derby:memory:StateStore";
@BeforeClass
public static void initDatabase() throws Exception {
Connection connection = DriverManager.getConnection(CONNECTION_URL + ";create=true");
Statement s = connection.createStatement();
s.execute("CREATE SCHEMA TESTUSER");
Configuration conf =
getStateStoreConfiguration(StateStoreMySQLImpl.class);
conf.set(StateStoreMySQLImpl.CONNECTION_URL, CONNECTION_URL);
conf.set(StateStoreMySQLImpl.CONNECTION_USERNAME, "testuser");
conf.set(StateStoreMySQLImpl.CONNECTION_PASSWORD, "testpassword");
conf.set(StateStoreMySQLImpl.CONNECTION_DRIVER, "org.apache.derby.jdbc.EmbeddedDriver");
getStateStore(conf);
}
@Before
public void startup() throws IOException {
removeAll(getStateStoreDriver());
}
@AfterClass
public static void cleanupDatabase() {
try {
DriverManager.getConnection(CONNECTION_URL + ";drop=true");
} catch (SQLException e) {
// SQLException expected when database is dropped
if (!e.getMessage().contains("dropped")) {
throw new RuntimeException(e);
}
}
}
@Test
public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException {
testInsert(getStateStoreDriver());
}
@Test
public void testUpdate()
throws IllegalArgumentException, ReflectiveOperationException,
IOException, SecurityException {
testPut(getStateStoreDriver());
}
@Test
public void testDelete()
throws IllegalArgumentException, IllegalAccessException, IOException {
testRemove(getStateStoreDriver());
}
@Test
public void testFetchErrors()
throws IllegalArgumentException, IllegalAccessException, IOException {
testFetchErrors(getStateStoreDriver());
}
@Test
public void testMetrics()
throws IllegalArgumentException, IllegalAccessException, IOException {
testMetrics(getStateStoreDriver());
}
}

View File

@ -206,4 +206,18 @@ public class TestStateStoreZK extends TestStateStoreDriverBase {
stateStoreDriver.setEnableConcurrent(true);
testFetchErrors(stateStoreDriver);
}
@Test
public void testCacheLoadMetrics() throws IOException {
// inject value of CacheMountTableLoad as -1 initially, if tests get CacheMountTableLoadAvgTime
// value as -1 ms, that would mean no other sample with value >= 0 would have been received and
// hence this would be failure to assert that mount table avg load time is higher than -1
getStateStoreService().getMetrics().setCacheLoading("MountTable", -1);
long curMountTableLoadNum = getMountTableCacheLoadSamples(getStateStoreDriver());
getStateStoreService().refreshCaches(true);
getStateStoreService().refreshCaches(true);
getStateStoreService().refreshCaches(true);
testCacheLoadMetrics(getStateStoreDriver(), curMountTableLoadNum + 3, -1);
}
}

View File

@ -310,4 +310,14 @@
<Method name="reconcile" />
<Bug pattern="SWL_SLEEP_WITH_LOCK_HELD" />
</Match>
<!--
conversionPattern is only set once and used to initiate PatternLayout object
only once. It is set by log4j framework if set as part of log4j properties and accessed
only during first append operation.
-->
<Match>
<Class name="org.apache.hadoop.hdfs.util.AsyncRFAAppender"/>
<Field name="conversionPattern"/>
<Bug pattern="IS2_INCONSISTENT_SYNC"/>
</Match>
</FindBugsFilter>

File diff suppressed because one or more lines are too long

View File

@ -271,6 +271,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.namenode.redundancy.considerLoad.factor";
public static final double
DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT = 2.0;
public static final String DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY =
"dfs.namenode.redundancy.considerLoadByVolume";
public static final boolean
DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT
= false;
public static final String DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY =
HdfsClientConfigKeys.DeprecatedKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY;
public static final int DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_DEFAULT = 3;
@ -733,12 +738,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default";
public static final String DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY = "dfs.namenode.audit.log.token.tracking.id";
public static final boolean DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT = false;
public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async";
public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT = false;
public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_KEY = "dfs.namenode.audit.log.async.blocking";
public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_BLOCKING_DEFAULT = true;
public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_KEY = "dfs.namenode.audit.log.async.buffer.size";
public static final int DFS_NAMENODE_AUDIT_LOG_ASYNC_BUFFER_SIZE_DEFAULT = 128;
public static final String DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST = "dfs.namenode.audit.log.debug.cmdlist";
public static final String DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY =
"dfs.namenode.metrics.logger.period.seconds";

View File

@ -69,6 +69,7 @@ import org.apache.commons.cli.PosixParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
import org.apache.hadoop.hdfs.server.namenode.INodesInPath;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
@ -1936,4 +1937,18 @@ public class DFSUtil {
return path.charAt(parent.length()) == Path.SEPARATOR_CHAR
|| parent.equals(Path.SEPARATOR);
}
/**
* Add transfer rate metrics for valid data read and duration values.
* @param metrics metrics for datanodes
* @param read bytes read
* @param duration read duration
*/
public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) {
if (read >= 0 && duration > 0) {
metrics.addReadTransferRate(read * 1000 / duration);
} else {
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
}
}
}

View File

@ -3987,17 +3987,11 @@ public class BlockManager implements BlockStatsMXBean {
}
if (shouldProcessExtraRedundancy(num, expectedRedundancy)) {
if (num.replicasOnStaleNodes() > 0) {
// If any of the replicas of this block are on nodes that are
// considered "stale", then these replicas may in fact have
// already been deleted. So, we cannot safely act on the
// over-replication until a later point in time, when
// the "stale" nodes have block reported.
// extra redundancy block
if (!processExtraRedundancyBlockWithoutPostpone(block, expectedRedundancy,
null, null)) {
return MisReplicationResult.POSTPONE;
}
// extra redundancy block
processExtraRedundancyBlock(block, expectedRedundancy, null, null);
return MisReplicationResult.OVER_REPLICATED;
}
@ -4020,12 +4014,26 @@ public class BlockManager implements BlockStatsMXBean {
}
}
/**
* Process blocks with redundant replicas. If there are replicas in
* stale storages, mark them in the postponedMisreplicatedBlocks.
*/
private void processExtraRedundancyBlock(final BlockInfo block,
final short replication, final DatanodeDescriptor addedNode,
DatanodeDescriptor delNodeHint) {
if (!processExtraRedundancyBlockWithoutPostpone(block, replication,
addedNode, delNodeHint)) {
postponeBlock(block);
}
}
/**
* Find how many of the containing nodes are "extra", if any.
* If there are any extras, call chooseExcessRedundancies() to
* mark them in the excessRedundancyMap.
* @return true if all redundancy replicas are removed.
*/
private void processExtraRedundancyBlock(final BlockInfo block,
private boolean processExtraRedundancyBlockWithoutPostpone(final BlockInfo block,
final short replication, final DatanodeDescriptor addedNode,
DatanodeDescriptor delNodeHint) {
assert namesystem.hasWriteLock();
@ -4035,17 +4043,17 @@ public class BlockManager implements BlockStatsMXBean {
Collection<DatanodeStorageInfo> nonExcess = new ArrayList<>();
Collection<DatanodeDescriptor> corruptNodes = corruptReplicas
.getNodes(block);
boolean hasStaleStorage = false;
Set<DatanodeStorageInfo> staleStorages = new HashSet<>();
for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) {
if (storage.getState() != State.NORMAL) {
continue;
}
final DatanodeDescriptor cur = storage.getDatanodeDescriptor();
if (storage.areBlockContentsStale()) {
LOG.trace("BLOCK* processExtraRedundancyBlock: Postponing {}"
+ " since storage {} does not yet have up-to-date information.",
block, storage);
postponeBlock(block);
return;
hasStaleStorage = true;
staleStorages.add(storage);
continue;
}
if (!isExcess(cur, block)) {
if (cur.isInService()) {
@ -4058,6 +4066,13 @@ public class BlockManager implements BlockStatsMXBean {
}
chooseExcessRedundancies(nonExcess, block, replication, addedNode,
delNodeHint);
if (hasStaleStorage) {
LOG.trace("BLOCK* processExtraRedundancyBlockWithoutPostpone: Postponing {}"
+ " since storages {} does not yet have up-to-date information.",
block, staleStorages);
return false;
}
return true;
}
private void chooseExcessRedundancies(
@ -4071,12 +4086,14 @@ public class BlockManager implements BlockStatsMXBean {
if (storedBlock.isStriped()) {
chooseExcessRedundancyStriped(bc, nonExcess, storedBlock, delNodeHint);
} else {
final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(
bc.getStoragePolicyID());
final List<StorageType> excessTypes = storagePolicy.chooseExcess(
replication, DatanodeStorageInfo.toStorageTypes(nonExcess));
chooseExcessRedundancyContiguous(nonExcess, storedBlock, replication,
addedNode, delNodeHint, excessTypes);
if (nonExcess.size() > replication) {
final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(
bc.getStoragePolicyID());
final List<StorageType> excessTypes = storagePolicy.chooseExcess(
replication, DatanodeStorageInfo.toStorageTypes(nonExcess));
chooseExcessRedundancyContiguous(nonExcess, storedBlock, replication,
addedNode, delNodeHint, excessTypes);
}
}
}
@ -4128,6 +4145,7 @@ public class BlockManager implements BlockStatsMXBean {
BitSet found = new BitSet(groupSize); //indices found
BitSet duplicated = new BitSet(groupSize); //indices found more than once
HashMap<DatanodeStorageInfo, Integer> storage2index = new HashMap<>();
boolean logEmptyExcessType = true;
for (DatanodeStorageInfo storage : nonExcess) {
int index = sblk.getStorageBlockIndex(storage);
assert index >= 0;
@ -4145,6 +4163,7 @@ public class BlockManager implements BlockStatsMXBean {
Integer index = storage2index.get(delStorageHint);
if (index != null && duplicated.get(index)) {
processChosenExcessRedundancy(nonExcess, delStorageHint, storedBlock);
logEmptyExcessType = false;
}
}
@ -4155,8 +4174,10 @@ public class BlockManager implements BlockStatsMXBean {
final List<StorageType> excessTypes = storagePolicy.chooseExcess(
(short) numOfTarget, DatanodeStorageInfo.toStorageTypes(nonExcess));
if (excessTypes.isEmpty()) {
LOG.warn("excess types chosen for block {} among storages {} is empty",
storedBlock, nonExcess);
if(logEmptyExcessType) {
LOG.warn("excess types chosen for block {} among storages {} is empty",
storedBlock, nonExcess);
}
return;
}

View File

@ -82,6 +82,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
NOT_IN_SERVICE("the node is not in service"),
NODE_STALE("the node is stale"),
NODE_TOO_BUSY("the node is too busy"),
NODE_TOO_BUSY_BY_VOLUME("the node is too busy based on volume load"),
TOO_MANY_NODES_ON_RACK("the rack has too many chosen nodes"),
NOT_ENOUGH_STORAGE_SPACE("not enough storage space to place the block"),
NO_REQUIRED_STORAGE_TYPE("required storage types are unavailable"),
@ -101,6 +102,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
protected boolean considerLoad;
private boolean considerLoadByStorageType;
protected double considerLoadFactor;
private boolean considerLoadByVolume = false;
private boolean preferLocalNode;
private boolean dataNodePeerStatsEnabled;
private volatile boolean excludeSlowNodesEnabled;
@ -131,6 +133,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
this.considerLoadFactor = conf.getDouble(
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR,
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_FACTOR_DEFAULT);
this.considerLoadByVolume = conf.getBoolean(
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_KEY,
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT
);
this.stats = stats;
this.clusterMap = clusterMap;
this.host2datanodeMap = host2datanodeMap;
@ -1007,6 +1013,16 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
"(load: " + nodeLoad + " > " + maxLoad + ")");
return true;
}
if (considerLoadByVolume) {
final int numVolumesAvailable = node.getNumVolumesAvailable();
final double maxLoadForVolumes = considerLoadFactor * numVolumesAvailable *
stats.getInServiceXceiverAverageForVolume();
if (maxLoadForVolumes > 0.0 && nodeLoad > maxLoadForVolumes) {
logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY_BY_VOLUME,
"(load: " + nodeLoad + " > " + maxLoadForVolumes + ") ");
return true;
}
}
return false;
}

View File

@ -233,6 +233,9 @@ public class DatanodeDescriptor extends DatanodeInfo {
// HB processing can use it to tell if it is the first HB since DN restarted
private boolean heartbeatedSinceRegistration = false;
/** The number of volumes that can be written.*/
private int numVolumesAvailable = 0;
/**
* DatanodeDescriptor constructor
* @param nodeID id of the data node
@ -411,6 +414,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
long totalNonDfsUsed = 0;
Set<String> visitedMount = new HashSet<>();
Set<DatanodeStorageInfo> failedStorageInfos = null;
int volumesAvailable = 0;
// Decide if we should check for any missing StorageReport and mark it as
// failed. There are different scenarios.
@ -489,7 +493,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
visitedMount.add(mount);
}
}
if (report.getRemaining() > 0 && storage.getState() != State.FAILED) {
volumesAvailable += 1;
}
}
this.numVolumesAvailable = volumesAvailable;
// Update total metrics for the node.
setCapacity(totalCapacity);
@ -981,6 +989,14 @@ public class DatanodeDescriptor extends DatanodeInfo {
return volumeFailureSummary;
}
/**
* Return the number of volumes that can be written.
* @return the number of volumes that can be written.
*/
public int getNumVolumesAvailable() {
return numVolumesAvailable;
}
/**
* @param nodeReg DatanodeID to update registration for.
*/

View File

@ -2101,6 +2101,17 @@ public class DatanodeManager {
return avgLoad;
}
@Override
public double getInServiceXceiverAverageForVolume() {
double avgLoad = 0;
final int volumes = heartbeatManager.getInServiceAvailableVolumeCount();
if (volumes > 0) {
final long xceivers = heartbeatManager.getInServiceXceiverCount();
avgLoad = (double)xceivers/volumes;
}
return avgLoad;
}
@Override
public Map<StorageType, StorageTypeStats> getStorageTypeStats() {
return heartbeatManager.getStorageTypeStats();

View File

@ -60,7 +60,9 @@ public interface DatanodeStatistics {
/** @return number of non-decommission(ing|ed) nodes */
public int getNumDatanodesInService();
/** @return average xceiver count for writable volumes. */
int getInServiceAvailableVolumeCount();
/**
* @return the total used space by data nodes for non-DFS purposes
* such as storing temporary files on the local file system

View File

@ -44,6 +44,7 @@ class DatanodeStats {
private int nodesInService = 0;
private int nodesInServiceXceiverCount = 0;
private int nodesInServiceAvailableVolumeCount = 0;
private int expiredHeartbeats = 0;
synchronized void add(final DatanodeDescriptor node) {
@ -58,6 +59,7 @@ class DatanodeStats {
capacityRemaining += node.getRemaining();
cacheCapacity += node.getCacheCapacity();
cacheUsed += node.getCacheUsed();
nodesInServiceAvailableVolumeCount += node.getNumVolumesAvailable();
} else if (node.isDecommissionInProgress() ||
node.isEnteringMaintenance()) {
cacheCapacity += node.getCacheCapacity();
@ -87,6 +89,7 @@ class DatanodeStats {
capacityRemaining -= node.getRemaining();
cacheCapacity -= node.getCacheCapacity();
cacheUsed -= node.getCacheUsed();
nodesInServiceAvailableVolumeCount -= node.getNumVolumesAvailable();
} else if (node.isDecommissionInProgress() ||
node.isEnteringMaintenance()) {
cacheCapacity -= node.getCacheCapacity();
@ -149,6 +152,10 @@ class DatanodeStats {
return nodesInServiceXceiverCount;
}
synchronized int getNodesInServiceAvailableVolumeCount() {
return nodesInServiceAvailableVolumeCount;
}
synchronized int getExpiredHeartbeats() {
return expiredHeartbeats;
}

View File

@ -53,14 +53,24 @@ public interface FSClusterStats {
public int getNumDatanodesInService();
/**
* an indication of the average load of non-decommission(ing|ed) nodes
* eligible for block placement
* An indication of the average load of non-decommission(ing|ed) nodes
* eligible for block placement.
*
* @return average of the in service number of block transfers and block
* writes that are currently occurring on the cluster.
*/
public double getInServiceXceiverAverage();
/**
* An indication of the average load of volumes at non-decommission(ing|ed)
* nodes eligible for block placement.
*
* @return average of in service number of block transfers and block
* writes that are currently occurring on the volumes of the
* cluster.
*/
double getInServiceXceiverAverageForVolume();
/**
* Indicates the storage statistics per storage type.
* @return storage statistics per storage type.

View File

@ -183,6 +183,11 @@ class HeartbeatManager implements DatanodeStatistics {
public int getNumDatanodesInService() {
return stats.getNodesInService();
}
@Override
public int getInServiceAvailableVolumeCount() {
return stats.getNodesInServiceAvailableVolumeCount();
}
@Override
public long getCacheCapacity() {

View File

@ -86,10 +86,10 @@ class LowRedundancyBlocks implements Iterable<BlockInfo> {
private final List<LightWeightLinkedSet<BlockInfo>> priorityQueues
= new ArrayList<>(LEVEL);
/** The number of corrupt blocks with replication factor 1 */
private final LongAdder lowRedundancyBlocks = new LongAdder();
private final LongAdder corruptBlocks = new LongAdder();
/** The number of corrupt blocks with replication factor 1 */
private final LongAdder corruptReplicationOneBlocks = new LongAdder();
private final LongAdder lowRedundancyECBlockGroups = new LongAdder();
private final LongAdder corruptECBlockGroups = new LongAdder();
@ -369,11 +369,11 @@ class LowRedundancyBlocks implements Iterable<BlockInfo> {
* @return true if the block was found and removed from one of the priority
* queues
*/
boolean remove(BlockInfo block, int priLevel) {
synchronized boolean remove(BlockInfo block, int priLevel) {
return remove(block, priLevel, block.getReplication());
}
boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) {
synchronized boolean remove(BlockInfo block, int priLevel, int oldExpectedReplicas) {
if(priLevel >= 0 && priLevel < LEVEL
&& priorityQueues.get(priLevel).remove(block)) {
NameNode.blockStateChangeLog.debug(

View File

@ -18,9 +18,7 @@
package org.apache.hadoop.hdfs.server.common;
import java.lang.management.ManagementFactory;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.management.Attribute;
@ -34,8 +32,6 @@ import javax.management.ObjectName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.log4j.Appender;
import org.apache.log4j.AsyncAppender;
/**
* MetricsLoggerTask can be used as utility to dump metrics to log.
@ -56,12 +52,12 @@ public class MetricsLoggerTask implements Runnable {
}
}
private org.apache.log4j.Logger metricsLog;
private Logger metricsLog;
private String nodeName;
private short maxLogLineLength;
public MetricsLoggerTask(String metricsLog, String nodeName, short maxLogLineLength) {
this.metricsLog = org.apache.log4j.Logger.getLogger(metricsLog);
this.metricsLog = LoggerFactory.getLogger(metricsLog);
this.nodeName = nodeName;
this.maxLogLineLength = maxLogLineLength;
}
@ -115,8 +111,11 @@ public class MetricsLoggerTask implements Runnable {
.substring(0, maxLogLineLength) + "...");
}
private static boolean hasAppenders(org.apache.log4j.Logger logger) {
return logger.getAllAppenders().hasMoreElements();
// TODO : hadoop-logging module to hide log4j implementation details, this method
// can directly call utility from hadoop-logging.
private static boolean hasAppenders(Logger logger) {
return org.apache.log4j.Logger.getLogger(logger.getName()).getAllAppenders()
.hasMoreElements();
}
/**
@ -138,26 +137,4 @@ public class MetricsLoggerTask implements Runnable {
return attributeNames;
}
/**
* Make the metrics logger async and add all pre-existing appenders to the
* async appender.
*/
public static void makeMetricsLoggerAsync(String metricsLog) {
org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(metricsLog);
logger.setAdditivity(false); // Don't pollute actual logs with metrics dump
@SuppressWarnings("unchecked")
List<Appender> appenders = Collections.list(logger.getAllAppenders());
// failsafe against trying to async it more than once
if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
AsyncAppender asyncAppender = new AsyncAppender();
// change logger to have an async appender containing all the
// previously configured appenders
for (Appender appender : appenders) {
logger.removeAppender(appender);
asyncAppender.addAppender(appender);
}
logger.addAppender(asyncAppender);
}
}
}

View File

@ -63,6 +63,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports;
@ -791,6 +792,9 @@ class BPServiceActor implements Runnable {
shouldServiceRun = false;
return;
}
if (InvalidBlockReportLeaseException.class.getName().equals(reClass)) {
fullBlockReportLeaseId = 0;
}
LOG.warn("RemoteException in offerService", re);
sleepAfterException();
} catch (IOException e) {

View File

@ -4058,8 +4058,6 @@ public class DataNode extends ReconfigurableBase
return;
}
MetricsLoggerTask.makeMetricsLoggerAsync(METRICS_LOG_NAME);
// Schedule the periodic logging.
metricsLoggerTimer = new ScheduledThreadPoolExecutor(1);
metricsLoggerTimer.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.datanode;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.thirdparty.protobuf.ByteString;
import javax.crypto.SecretKey;
@ -632,6 +633,7 @@ class DataXceiver extends Receiver implements Runnable {
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
} catch ( SocketException ignored ) {
LOG.trace("{}:Ignoring exception while serving {} to {}",
dnR, block, remoteAddress, ignored);
@ -1122,6 +1124,7 @@ class DataXceiver extends Receiver implements Runnable {
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
LOG.info("Copied {} to {}", block, peer.getRemoteAddressString());
} catch (IOException ioe) {

View File

@ -61,6 +61,8 @@ public class DataNodeMetrics {
@Metric MutableCounterLong bytesRead;
@Metric("Milliseconds spent reading")
MutableCounterLong totalReadTime;
@Metric private MutableRate readTransferRate;
final private MutableQuantiles[] readTransferRateQuantiles;
@Metric MutableCounterLong blocksWritten;
@Metric MutableCounterLong blocksRead;
@Metric MutableCounterLong blocksReplicated;
@ -227,6 +229,7 @@ public class DataNodeMetrics {
sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len];
ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len];
readTransferRateQuantiles = new MutableQuantiles[len];
for (int i = 0; i < len; i++) {
int interval = intervals[i];
@ -255,6 +258,10 @@ public class DataNodeMetrics {
"ramDiskBlocksLazyPersistWindows" + interval + "s",
"Time between the RamDisk block write and disk persist in ms",
"ops", "latency", interval);
readTransferRateQuantiles[i] = registry.newInverseQuantiles(
"readTransferRate" + interval + "s",
"Rate at which bytes are read from datanode calculated in bytes per second",
"ops", "rate", interval);
}
}
@ -316,6 +323,13 @@ public class DataNodeMetrics {
}
}
public void addReadTransferRate(long readTransferRate) {
this.readTransferRate.add(readTransferRate);
for (MutableQuantiles q : readTransferRateQuantiles) {
q.add(readTransferRate);
}
}
public void addCacheReport(long latency) {
cacheReports.add(latency);
}

View File

@ -262,13 +262,24 @@ class FSDirStatAndListingOp {
needLocation, false);
listingCnt++;
if (listing[i] instanceof HdfsLocatedFileStatus) {
// Once we hit lsLimit locations, stop.
// This helps to prevent excessively large response payloads.
// Approximate #locations with locatedBlockCount() * repl_factor
LocatedBlocks blks =
((HdfsLocatedFileStatus)listing[i]).getLocatedBlocks();
locationBudget -= (blks == null) ? 0 :
blks.locatedBlockCount() * listing[i].getReplication();
// Once we hit lsLimit locations, stop.
// This helps to prevent excessively large response payloads.
LocatedBlocks blks =
((HdfsLocatedFileStatus) listing[i]).getLocatedBlocks();
if (blks != null) {
ErasureCodingPolicy ecPolicy = listing[i].getErasureCodingPolicy();
if (ecPolicy != null && !ecPolicy.isReplicationPolicy()) {
// Approximate #locations with locatedBlockCount() *
// internalBlocksNum.
locationBudget -= blks.locatedBlockCount() *
(ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits());
} else {
// Approximate #locations with locatedBlockCount() *
// replicationFactor.
locationBudget -=
blks.locatedBlockCount() * listing[i].getReplication();
}
}
}
}
// truncate return array if necessary

Some files were not shown because too many files have changed in this diff Show More