Merge branch 'trunk' into HDFS-7240

hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
This commit is contained in:
Anu Engineer 2017-09-18 15:11:06 -07:00
commit 19f7f87517
634 changed files with 26589 additions and 11182 deletions

View File

@ -359,7 +359,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The binary distribution of this product bundles these dependencies under the
following license:
re2j 1.0
re2j 1.1
---------------------------------------------------------------------
(GO license)
This is a work derived from Russ Cox's RE2 in Go, whose license

View File

@ -8,19 +8,10 @@ following notices:
* Copyright 2011 FuseSource Corp. http://fusesource.com
The binary distribution of this product bundles binaries of
org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
which has the following notices:
* This product includes software developed by FuseSource Corp.
http://fusesource.com
* This product includes software developed at
Progress Software Corporation and/or its subsidiaries or affiliates.
* This product includes software developed by IBM Corporation and others.
The binary distribution of this product bundles binaries of
AWS SDK for Java - Core 1.11.45,
AWS Java SDK for AWS KMS 1.11.45,
AWS Java SDK for Amazon S3 1.11.45,
AWS Java SDK for AWS STS 1.11.45,
AWS SDK for Java - Bundle 1.11.134,
AWS Java SDK for AWS KMS 1.11.134,
AWS Java SDK for Amazon S3 1.11.134,
AWS Java SDK for AWS STS 1.11.134,
JMES Path Query library 1.0,
which has the following notices:
* This software includes third party software subject to the following
@ -303,15 +294,15 @@ which has the following notices:
notice.
The binary distribution of this product bundles binaries of
Jetty :: Http Utility 9.3.11.,
Jetty :: IO Utility 9.3.11.,
Jetty :: Security 9.3.11.,
Jetty :: Server Core 9.3.11.,
Jetty :: Servlet Handling 9.3.11.,
Jetty :: Utilities 9.3.11.,
Jetty :: Http Utility 9.3.19.,
Jetty :: IO Utility 9.3.19.,
Jetty :: Security 9.3.19.,
Jetty :: Server Core 9.3.19.,
Jetty :: Servlet Handling 9.3.19.,
Jetty :: Utilities 9.3.19.,
Jetty :: Utilities :: Ajax,
Jetty :: Webapp Application Support 9.3.11.,
Jetty :: XML utilities 9.3.11.,
Jetty :: Webapp Application Support 9.3.19.,
Jetty :: XML utilities 9.3.19.,
which has the following notices:
* ==============================================================
Jetty Web Container
@ -481,15 +472,15 @@ which has the following notices:
Copyright (C) 1999-2012, QOS.ch. All rights reserved.
The binary distribution of this product bundles binaries of
Apache HBase - Annotations 1.2.4,
Apache HBase - Client 1.2.4,
Apache HBase - Common 1.2.4,
Apache HBase - Hadoop Compatibility 1.2.4,
Apache HBase - Hadoop Two Compatibility 1.2.4,
Apache HBase - Prefix Tree 1.2.4,
Apache HBase - Procedure 1.2.4,
Apache HBase - Protocol 1.2.4,
Apache HBase - Server 1.2.4,
Apache HBase - Annotations 1.2.6,
Apache HBase - Client 1.2.6,
Apache HBase - Common 1.2.6,
Apache HBase - Hadoop Compatibility 1.2.6,
Apache HBase - Hadoop Two Compatibility 1.2.6,
Apache HBase - Prefix Tree 1.2.6,
Apache HBase - Procedure 1.2.6,
Apache HBase - Protocol 1.2.6,
Apache HBase - Server 1.2.6,
which has the following notices:
* Apache HBase
Copyright 2007-2015 The Apache Software Foundation
@ -576,3 +567,17 @@ The binary distribution of this product bundles binaries of
software.amazon.ion:ion-java 1.0.1,
which has the following notices:
* Amazon Ion Java Copyright 2007-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
The binary distribution of this product bundles binaries of
joda-time:joda-time:2.9.9
which has the following notices:
* =============================================================================
= NOTICE file corresponding to section 4d of the Apache License Version 2.0 =
=============================================================================
This product includes software developed by
Joda.org (http://www.joda.org/).
The binary distribution of this product bundles binaries of
Ehcache 3.3.1,
which has the following notices:
* Ehcache V3 Copyright 2014-2016 Terracotta, Inc.

View File

@ -182,6 +182,21 @@
<exclude>io/serializations</exclude>
</excludes>
</relocation>
<!-- JSRs that haven't made it to inclusion in J2SE -->
<relocation>
<pattern>javax/el/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.el.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/cache/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.cache.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/servlet/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.servlet.</shadedPattern>
@ -189,6 +204,13 @@
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/ws/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.ws.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>net/</pattern>
<shadedPattern>${shaded.dependency.prefix}.net.</shadedPattern>
@ -199,6 +221,11 @@
<exclude>net/topology/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->
<relocation>
<pattern>okio/</pattern>
<shadedPattern>${shaded.dependency.prefix}.okio.</shadedPattern>
</relocation>
</relocations>
<transformers>
<!-- Needed until MSHADE-182 -->

View File

@ -25,7 +25,13 @@
<version>3.1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<description>Enforces our invariants for the api and runtime client modules.</description>
<description>
Enforces our invariants for the api and runtime client modules.
E.g. that modules have a specific set of transitive dependencies
and shaded artifacts only contain classes that are in particular
packages. Does the enforcement through the maven-enforcer-plugin
and an integration test.
</description>
<name>Apache Hadoop Client Packaging Invariants</name>
<properties>
@ -82,6 +88,8 @@
<exclude>commons-logging:commons-logging</exclude>
<!-- Leave log4j unshaded so downstream users can configure logging. -->
<exclude>log4j:log4j</exclude>
<!-- Leave javax annotations we need exposed -->
<exclude>com.google.code.findbugs:jsr305</exclude>
</excludes>
</banTransitiveDependencies>
<banDuplicateClasses>
@ -97,7 +105,6 @@
</dependencies>
</banDuplicateClasses>
</rules>
<!-- TODO we need a rule for "we don't have classes that are outside of the org.apache.hadoop package" -->
<!-- TODO we need a rule for "the constants in this set of classes haven't been shaded / don't have this prefix"
Manually checking the set of Keys that look like packages we relocate:
@ -116,6 +123,69 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>test-resources</id>
<phase>pre-integration-test</phase>
<goals>
<goal>testResources</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<!-- create a maven pom property that has all of our dependencies.
below in the integration-test phase we'll pass this list
of paths to our jar checker script.
-->
<execution>
<id>put-client-artifacts-in-a-property</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build-classpath</goal>
</goals>
<configuration>
<excludeTransitive>true</excludeTransitive>
<outputProperty>hadoop-client-artifacts</outputProperty>
</configuration>
</execution>
</executions>
</plugin>
<!--
Check that we actually relocated everything we included.
It's critical that we don't ship third party dependencies that haven't
been relocated under our pacakge space, since this will lead to
difficult to debug classpath errors for downstream. Unfortunately, that
means inspecting all the jars.
-->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>check-jar-contents</id>
<phase>integration-test</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>${shell-executable}</executable>
<workingDirectory>${project.build.testOutputDirectory}</workingDirectory>
<requiresOnline>false</requiresOnline>
<arguments>
<argument>ensure-jars-have-correct-contents.sh</argument>
<argument>${hadoop-client-artifacts}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

View File

@ -0,0 +1,82 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar]
#
# accepts a single command line argument with a colon separated list of
# paths to jars to check. Iterates through each such passed jar and checks
# all the contained paths to make sure they follow the below constructed
# safe list.
# we have to allow the directories that lead to the org/apache/hadoop dir
allowed_expr="(^org/$|^org/apache/$"
# We allow the following things to exist in our client artifacts:
# * classes in packages that start with org.apache.hadoop, which by
# convention should be in a path that looks like org/apache/hadoop
allowed_expr+="|^org/apache/hadoop/"
# * whatever in the "META-INF" directory
allowed_expr+="|^META-INF/"
# * whatever under the "webapps" directory; for things shipped by yarn
allowed_expr+="|^webapps/"
# * Hadoop's default configuration files, which have the form
# "_module_-default.xml"
allowed_expr+="|^[^-]*-default.xml$"
# * Hadoop's versioning properties files, which have the form
# "_module_-version-info.properties"
allowed_expr+="|^[^-]*-version-info.properties$"
# * Hadoop's application classloader properties file.
allowed_expr+="|^org.apache.hadoop.application-classloader.properties$"
# public suffix list used by httpcomponents
allowed_expr+="|^mozilla/$"
allowed_expr+="|^mozilla/public-suffix-list.txt$"
# Comes from commons-configuration, not sure if relocatable.
allowed_expr+="|^properties.dtd$"
allowed_expr+="|^PropertyList-1.0.dtd$"
# Comes from Ehcache, not relocatable at top level due to limitation
# of shade plugin AFAICT
allowed_expr+="|^ehcache-core.xsd$"
allowed_expr+="|^ehcache-107ext.xsd$"
# Comes from kerby's kerb-simplekdc, not relocatable since at top level
allowed_expr+="|^krb5-template.conf$"
allowed_expr+="|^krb5_udp-template.conf$"
# Jetty uses this style sheet for directory listings. TODO ensure our
# internal use of jetty disallows directory listings and remove this.
allowed_expr+="|^jetty-dir.css$"
allowed_expr+=")"
declare -i bad_artifacts=0
declare -a bad_contents
IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1")
for artifact in "${artifact_list[@]}"; do
bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}"))
if [ ${#bad_contents[@]} -gt 0 ]; then
echo "[ERROR] Found artifact with unexpected contents: '${artifact}'"
echo " Please check the following and either correct the build or update"
echo " the allowed list with reasoning."
echo ""
for bad_line in "${bad_contents[@]}"; do
echo " ${bad_line}"
done
bad_artifacts=${bad_artifacts}+1
else
echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'"
fi
done
if [ "${bad_artifacts}" -gt 0 ]; then
exit 1
fi

View File

@ -25,7 +25,13 @@
<version>3.1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<description>Enforces our invariants for the testing client modules.</description>
<description>
Enforces our invariants for the test client modules.
E.g. that modules have a specific set of transitive dependencies
and shaded artifacts only contain classes that are in particular
packages. Does the enforcement through the maven-enforcer-plugin
and an integration test.
</description>
<name>Apache Hadoop Client Packaging Invariants for Test</name>
<properties>
@ -90,6 +96,8 @@
<exclude>junit:junit</exclude>
<!-- JUnit brings in hamcrest -->
<exclude> org.hamcrest:hamcrest-core</exclude>
<!-- Leave javax annotations we need exposed -->
<exclude>com.google.code.findbugs:jsr305</exclude>
</excludes>
</banTransitiveDependencies>
<banDuplicateClasses>
@ -105,7 +113,6 @@
</dependencies>
</banDuplicateClasses>
</rules>
<!-- TODO we need a rule for "we don't have classes that are outside of the org.apache.hadoop package" -->
<!-- TODO we need a rule for "the constants in this set of classes haven't been shaded / don't have this prefix"
Manually checking the set of Keys that look like packages we relocate:
@ -124,6 +131,71 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>test-resources</id>
<phase>pre-integration-test</phase>
<goals>
<goal>testResources</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- create a maven pom property that has all of our dependencies.
below in the integration-test phase we'll pass this list
of paths to our jar checker script.
-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>put-client-artifacts-in-a-property</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build-classpath</goal>
</goals>
<configuration>
<!-- these two get covered in our non-test invariant check -->
<excludeArtifactIds>hadoop-client-api,hadoop-client-runtime</excludeArtifactIds>
<excludeTransitive>true</excludeTransitive>
<outputProperty>hadoop-client-artifacts</outputProperty>
</configuration>
</execution>
</executions>
</plugin>
<!--
Check that we actually relocated everything we included.
It's critical that we don't ship third party dependencies that haven't
been relocated under our pacakge space, since this will lead to
difficult to debug classpath errors for downstream. Unfortunately, that
means inspecting all the jars.
-->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>check-jar-contents</id>
<phase>integration-test</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>${shell-executable}</executable>
<workingDirectory>${project.build.testOutputDirectory}</workingDirectory>
<requiresOnline>false</requiresOnline>
<arguments>
<argument>ensure-jars-have-correct-contents.sh</argument>
<argument>${hadoop-client-artifacts}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

View File

@ -0,0 +1,70 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar]
#
# accepts a single command line argument with a colon separated list of
# paths to jars to check. Iterates through each such passed jar and checks
# all the contained paths to make sure they follow the below constructed
# safe list.
# we have to allow the directories that lead to the org/apache/hadoop dir
allowed_expr="(^org/$|^org/apache/$"
# We allow the following things to exist in our client artifacts:
# * classes in packages that start with org.apache.hadoop, which by
# convention should be in a path that looks like org/apache/hadoop
allowed_expr+="|^org/apache/hadoop/"
# * whatever in the "META-INF" directory
allowed_expr+="|^META-INF/"
# * whatever under the "webapps" directory; for minicluster UIs
allowed_expr+="|^webapps/"
# * Hadoop's default configuration files, which have the form
# "_module_-default.xml"
allowed_expr+="|^[^-]*-default.xml$"
# * Hadoop's versioning properties files, which have the form
# "_module_-version-info.properties"
allowed_expr+="|^[^-]*-version-info.properties$"
# * Hadoop's application classloader properties file.
allowed_expr+="|^org.apache.hadoop.application-classloader.properties$"
# * Used by JavaSandboxLinuxContainerRuntime as a default, loaded
# from root, so can't relocate. :(
allowed_expr+="|^java.policy$"
allowed_expr+=")"
declare -i bad_artifacts=0
declare -a bad_contents
IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1")
for artifact in "${artifact_list[@]}"; do
bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}"))
if [ ${#bad_contents[@]} -gt 0 ]; then
echo "[ERROR] Found artifact with unexpected contents: '${artifact}'"
echo " Please check the following and either correct the build or update"
echo " the allowed list with reasoning."
echo ""
for bad_line in "${bad_contents[@]}"; do
echo " ${bad_line}"
done
bad_artifacts=${bad_artifacts}+1
else
echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'"
fi
done
if [ "${bad_artifacts}" -gt 0 ]; then
exit 1
fi

View File

@ -77,7 +77,9 @@ public void clusterUp() throws IOException {
@After
public void clusterDown() {
cluster.close();
if (cluster != null) {
cluster.close();
}
}
@Test

View File

@ -75,6 +75,9 @@
<scope>provided</scope>
</dependency>
-->
<!-- Anything we're going to include in the relocated jar we list as optional
in order to work around MNG-5899
-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
@ -282,6 +285,12 @@
</exclusions>
</dependency>
<!-- Add back in Mockito since the hadoop-hdfs test jar needs it. -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<optional>true</optional>
</dependency>
<!-- Add back in the transitive dependencies excluded from hadoop-common in client TODO remove once we have a filter for "is in these artifacts" -->
<!-- skip javax.servlet:servlet-api because it's in client -->
<!-- Skip commons-logging:commons-logging-api because it looks like nothing actually included it -->
@ -339,40 +348,12 @@
<artifactId>jersey-servlet</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.eclipse.jdt</groupId>
<artifactId>core</artifactId>
<optional>true</optional>
</dependency>
<!-- skip org.apache.avro:avro-ipc because it doesn't look like hadoop-common actually uses it -->
<dependency>
<groupId>net.sf.kosmosfs</groupId>
<artifactId>kfs</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
<optional>true</optional>
<exclusions>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
@ -586,6 +567,7 @@
<exclude>org.slf4j:slf4j-api</exclude>
<exclude>commons-logging:commons-logging</exclude>
<exclude>junit:junit</exclude>
<exclude>com.google.code.findbugs:jsr305</exclude>
<!-- Keep optional runtime deps out of the shading -->
<exclude>org.apache.hadoop:hadoop-yarn-server-timelineservice</exclude>
<exclude>log4j:log4j</exclude>
@ -635,6 +617,53 @@
<exclude>testshell/*</exclude>
</excludes>
</filter>
<!-- Mockito tries to include its own unrelocated copy of hamcrest. :( -->
<filter>
<artifact>org.mockito:mockito-all</artifact>
<excludes>
<exclude>asm-license.txt</exclude>
<exclude>cglib-license.txt</exclude>
<exclude>hamcrest-license.txt</exclude>
<exclude>objenesis-license.txt</exclude>
<exclude>org/hamcrest/**/*.class</exclude>
<exclude>org/hamcrest/*.class</exclude>
</excludes>
</filter>
<!-- skip grizzly internals we don't need to run. -->
<filter>
<artifact>org.glassfish.grizzly:grizzly-http-servlet</artifact>
<excludes>
<exclude>catalog.cat</exclude>
<exclude>javaee_5.xsd</exclude>
<exclude>javaee_6.xsd</exclude>
<exclude>javaee_web_services_client_1_2.xsd</exclude>
<exclude>javaee_web_services_client_1_3.xsd</exclude>
<exclude>jsp_2_1.xsd</exclude>
<exclude>jsp_2_2.xsd</exclude>
<exclude>web-app_2_5.xsd</exclude>
<exclude>web-app_3_0.xsd</exclude>
<exclude>web-common_3_0.xsd</exclude>
<exclude>xml.xsd</exclude>
</excludes>
</filter>
<filter>
<!-- skip jetty license info already incorporated into LICENSE/NOTICE -->
<artifact>org.eclipse.jetty:*</artifact>
<excludes>
<exclude>about.html</exclude>
</excludes>
</filter>
<filter>
<artifact>org.apache.hadoop:*</artifact>
<excludes>
<!-- No shipping log4j configs in a downstream facing library -->
<exclude>log4j.properties</exclude>
<exclude>container-log4j.properties</exclude>
<!-- keep optional runtime configuration out of the jar; downstream can provide -->
<exclude>capacity-scheduler.xml</exclude>
<exclude>krb5.conf</exclude>
</excludes>
</filter>
</filters>
<relocations>
<relocation>
@ -717,6 +746,7 @@
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<!-- JSRs that haven't made it to inclusion in J2SE -->
<relocation>
<pattern>javax/el/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.el.</shadedPattern>
@ -724,6 +754,13 @@
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/cache/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.cache.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/inject/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.inject.</shadedPattern>
@ -738,6 +775,13 @@
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/ws/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.ws.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>jersey/</pattern>
<shadedPattern>${shaded.dependency.prefix}.jersey.</shadedPattern>
@ -755,6 +799,11 @@
<exclude>net/topology/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->
<relocation>
<pattern>okio/</pattern>
<shadedPattern>${shaded.dependency.prefix}.okio.</shadedPattern>
</relocation>
</relocations>
<transformers>
<!-- Needed until MSHADE-182 -->

View File

@ -94,6 +94,11 @@
<artifactId>commons-logging</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
<scope>runtime</scope>
</dependency>
<!-- Move log4j to optional, since it is needed for some pieces folks might not use:
* one of the three custom log4j appenders we have
-->
@ -149,6 +154,9 @@
<exclude>commons-logging:commons-logging</exclude>
<!-- Leave log4j unshaded so downstream users can configure logging. -->
<exclude>log4j:log4j</exclude>
<!-- Leave javax APIs that are stable -->
<!-- the jdk ships part of the javax.annotation namespace, so if we want to relocate this we'll have to care it out by class :( -->
<exclude>com.google.code.findbugs:jsr305</exclude>
</excludes>
</artifactSet>
<filters>
@ -181,6 +189,28 @@
<exclude>META-INF/services/javax.*</exclude>
</excludes>
</filter>
<filter>
<!-- skip french localization -->
<artifact>org.apache.commons:commons-math3</artifact>
<excludes>
<exclude>assets/org/apache/commons/math3/**/*</exclude>
</excludes>
</filter>
<filter>
<!-- skip jetty license info already incorporated into LICENSE/NOTICE -->
<artifact>org.eclipse.jetty:*</artifact>
<excludes>
<exclude>about.html</exclude>
</excludes>
</filter>
<filter>
<!-- skip docs on formats used in kerby -->
<artifact>org.apache.kerby:kerb-util</artifact>
<excludes>
<exclude>keytab.txt</exclude>
<exclude>ccache.txt</exclude>
</excludes>
</filter>
</filters>
<relocations>
<relocation>
@ -245,6 +275,7 @@
<exclude>io/serializations</exclude>
</excludes>
</relocation>
<!-- JSRs that haven't made it to inclusion in J2SE -->
<relocation>
<pattern>javax/el/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.el.</shadedPattern>
@ -252,6 +283,13 @@
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/cache/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.cache.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/servlet/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.servlet.</shadedPattern>
@ -259,6 +297,13 @@
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>javax/ws/</pattern>
<shadedPattern>${shaded.dependency.prefix}.javax.ws.</shadedPattern>
<excludes>
<exclude>**/pom.xml</exclude>
</excludes>
</relocation>
<relocation>
<pattern>net/</pattern>
<shadedPattern>${shaded.dependency.prefix}.net.</shadedPattern>
@ -269,6 +314,11 @@
<exclude>net/topology/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->
<relocation>
<pattern>okio/</pattern>
<shadedPattern>${shaded.dependency.prefix}.okio.</shadedPattern>
</relocation>
<!-- probably not. -->
<!--
<relocation>

View File

@ -129,55 +129,6 @@
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-nodemanager</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-servlet</artifactId>
</exclusion>
<exclusion>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<!-- No slf4j backends for downstream clients -->
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
@ -226,6 +177,37 @@
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<scope>compile</scope>
<exclusions>
<!--Excluding hadoop-yarn-api & hadoop-annotations as they are already
included as direct dependencies. Guava,commons-cli and log4j are
transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>

View File

@ -28,6 +28,7 @@
import java.security.interfaces.RSAPublicKey;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.security.authentication.util.CertificateUtil;
import org.slf4j.Logger;
@ -216,7 +217,8 @@ protected String getJWTFromCookie(HttpServletRequest req) {
* @param request for getting the original request URL
* @return url to use as login url for redirect
*/
protected String constructLoginURL(HttpServletRequest request) {
@VisibleForTesting
String constructLoginURL(HttpServletRequest request) {
String delimiter = "?";
if (authenticationProviderUrl.contains("?")) {
delimiter = "&";

View File

@ -47,7 +47,7 @@
import com.nimbusds.jwt.SignedJWT;
import com.nimbusds.jose.crypto.RSASSASigner;
public class TestJWTRedirectAuthentictionHandler extends
public class TestJWTRedirectAuthenticationHandler extends
KerberosSecurityTestcase {
private static final String SERVICE_URL = "https://localhost:8888/resource";
private static final String REDIRECT_LOCATION =
@ -392,7 +392,7 @@ public void testOrigURLWithQueryString() throws Exception {
new StringBuffer(SERVICE_URL));
Mockito.when(request.getQueryString()).thenReturn("name=value");
String loginURL = ((TestJWTRedirectAuthenticationHandler)handler).testConstructLoginURL(request);
String loginURL = handler.constructLoginURL(request);
Assert.assertNotNull("loginURL should not be null.", loginURL);
Assert.assertEquals("https://localhost:8443/authserver?originalUrl=" + SERVICE_URL + "?name=value", loginURL);
}
@ -409,7 +409,7 @@ public void testOrigURLNoQueryString() throws Exception {
new StringBuffer(SERVICE_URL));
Mockito.when(request.getQueryString()).thenReturn(null);
String loginURL = ((TestJWTRedirectAuthenticationHandler)handler).testConstructLoginURL(request);
String loginURL = handler.constructLoginURL(request);
Assert.assertNotNull("LoginURL should not be null.", loginURL);
Assert.assertEquals("https://localhost:8443/authserver?originalUrl=" + SERVICE_URL, loginURL);
}
@ -425,7 +425,7 @@ public void setup() throws Exception, NoSuchAlgorithmException {
publicKey = (RSAPublicKey) kp.getPublic();
privateKey = (RSAPrivateKey) kp.getPrivate();
handler = new TestJWTRedirectAuthenticationHandler();
handler = new JWTRedirectAuthenticationHandler();
}
protected void setupKerberosRequirements() throws Exception {
@ -453,15 +453,16 @@ protected Properties getProperties() {
protected SignedJWT getJWT(String sub, Date expires, RSAPrivateKey privateKey)
throws Exception {
JWTClaimsSet claimsSet = new JWTClaimsSet();
claimsSet.setSubject(sub);
claimsSet.setIssueTime(new Date(new Date().getTime()));
claimsSet.setIssuer("https://c2id.com");
claimsSet.setCustomClaim("scope", "openid");
claimsSet.setExpirationTime(expires);
JWTClaimsSet claimsSet = new JWTClaimsSet.Builder()
.subject(sub)
.issueTime(new Date(new Date().getTime()))
.issuer("https://c2id.com")
.claim("scope", "openid")
.audience("bar")
.expirationTime(expires)
.build();
List<String> aud = new ArrayList<String>();
aud.add("bar");
claimsSet.setAudience("bar");
JWSHeader header = new JWSHeader.Builder(JWSAlgorithm.RS256).build();
@ -472,10 +473,4 @@ protected SignedJWT getJWT(String sub, Date expires, RSAPrivateKey privateKey)
return signedJWT;
}
class TestJWTRedirectAuthenticationHandler extends JWTRedirectAuthenticationHandler {
public String testConstructLoginURL(HttpServletRequest req) {
return constructLoginURL(req);
}
};
}

View File

@ -182,8 +182,6 @@ log4j.appender.DNMETRICSRFA.MaxFileSize=64MB
#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
# Jets3t library
log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
# AWS SDK & S3A FileSystem
log4j.logger.com.amazonaws=ERROR

View File

@ -554,6 +554,16 @@ public EncryptedKeyVersion reencryptEncryptedKey(EncryptedKeyVersion ekv)
return getExtension().reencryptEncryptedKey(ekv);
}
/**
* Calls {@link CryptoExtension#drain(String)} for the given key name on the
* underlying {@link CryptoExtension}.
*
* @param keyName
*/
public void drain(String keyName) {
getExtension().drain(keyName);
}
/**
* Batched version of {@link #reencryptEncryptedKey(EncryptedKeyVersion)}.
* <p>

View File

@ -247,8 +247,9 @@ public static class Factory extends KeyProviderFactory {
* - HOSTNAME = string
* - PORT = integer
*
* This will always create a {@link LoadBalancingKMSClientProvider}
* if the uri is correct.
* If multiple hosts are provider, the Factory will create a
* {@link LoadBalancingKMSClientProvider} that round-robins requests
* across the provided list of hosts.
*/
@Override
public KeyProvider createProvider(URI providerUri, Configuration conf)
@ -275,26 +276,30 @@ public KeyProvider createProvider(URI providerUri, Configuration conf)
}
hostsPart = t[0];
}
return createProvider(conf, origUrl, port, hostsPart);
return createProvider(providerUri, conf, origUrl, port, hostsPart);
}
return null;
}
private KeyProvider createProvider(Configuration conf,
private KeyProvider createProvider(URI providerUri, Configuration conf,
URL origUrl, int port, String hostsPart) throws IOException {
String[] hosts = hostsPart.split(";");
KMSClientProvider[] providers = new KMSClientProvider[hosts.length];
for (int i = 0; i < hosts.length; i++) {
try {
providers[i] =
new KMSClientProvider(
new URI("kms", origUrl.getProtocol(), hosts[i], port,
origUrl.getPath(), null, null), conf);
} catch (URISyntaxException e) {
throw new IOException("Could not instantiate KMSProvider.", e);
if (hosts.length == 1) {
return new KMSClientProvider(providerUri, conf);
} else {
KMSClientProvider[] providers = new KMSClientProvider[hosts.length];
for (int i = 0; i < hosts.length; i++) {
try {
providers[i] =
new KMSClientProvider(
new URI("kms", origUrl.getProtocol(), hosts[i], port,
origUrl.getPath(), null, null), conf);
} catch (URISyntaxException e) {
throw new IOException("Could not instantiate KMSProvider..", e);
}
}
return new LoadBalancingKMSClientProvider(providers, conf);
}
return new LoadBalancingKMSClientProvider(providers, conf);
}
}
@ -1023,11 +1028,7 @@ public Token<?> run() throws Exception {
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (Exception e) {
if (e instanceof IOException) {
throw (IOException) e;
} else {
throw new IOException(e);
}
throw new IOException(e);
}
}
return tokens;

View File

@ -19,7 +19,6 @@
package org.apache.hadoop.crypto.key.kms;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.security.GeneralSecurityException;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
@ -32,13 +31,9 @@
import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension;
import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion;
import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -74,8 +69,6 @@ public WrapperException(Throwable cause) {
private final KMSClientProvider[] providers;
private final AtomicInteger currentIdx;
private RetryPolicy retryPolicy = null;
public LoadBalancingKMSClientProvider(KMSClientProvider[] providers,
Configuration conf) {
this(shuffle(providers), Time.monotonicNow(), conf);
@ -87,79 +80,24 @@ public LoadBalancingKMSClientProvider(KMSClientProvider[] providers,
super(conf);
this.providers = providers;
this.currentIdx = new AtomicInteger((int)(seed % providers.length));
int maxNumRetries = conf.getInt(CommonConfigurationKeysPublic.
KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, providers.length);
int sleepBaseMillis = conf.getInt(CommonConfigurationKeysPublic.
KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY,
CommonConfigurationKeysPublic.
KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT);
int sleepMaxMillis = conf.getInt(CommonConfigurationKeysPublic.
KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_KEY,
CommonConfigurationKeysPublic.
KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_DEFAULT);
Preconditions.checkState(maxNumRetries >= 0);
Preconditions.checkState(sleepBaseMillis >= 0);
Preconditions.checkState(sleepMaxMillis >= 0);
this.retryPolicy = RetryPolicies.failoverOnNetworkException(
RetryPolicies.TRY_ONCE_THEN_FAIL, maxNumRetries, 0, sleepBaseMillis,
sleepMaxMillis);
}
@VisibleForTesting
public KMSClientProvider[] getProviders() {
KMSClientProvider[] getProviders() {
return providers;
}
private <T> T doOp(ProviderCallable<T> op, int currPos)
throws IOException {
if (providers.length == 0) {
throw new IOException("No providers configured !");
}
IOException ex = null;
int numFailovers = 0;
for (int i = 0;; i++, numFailovers++) {
for (int i = 0; i < providers.length; i++) {
KMSClientProvider provider = providers[(currPos + i) % providers.length];
try {
return op.call(provider);
} catch (AccessControlException ace) {
// No need to retry on AccessControlException
// and AuthorizationException.
// This assumes all the servers are configured with identical
// permissions and identical key acls.
throw ace;
} catch (IOException ioe) {
LOG.warn("KMS provider at [{}] threw an IOException: ",
provider.getKMSUrl(), ioe);
LOG.warn("KMS provider at [{}] threw an IOException!! {}",
provider.getKMSUrl(), StringUtils.stringifyException(ioe));
ex = ioe;
RetryAction action = null;
try {
action = retryPolicy.shouldRetry(ioe, 0, numFailovers, false);
} catch (Exception e) {
if (e instanceof IOException) {
throw (IOException)e;
}
throw new IOException(e);
}
if (action.action == RetryAction.RetryDecision.FAIL) {
LOG.warn("Aborting since the Request has failed with all KMS"
+ " providers(depending on {}={} setting and numProviders={})"
+ " in the group OR the exception is not recoverable",
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY,
getConf().getInt(
CommonConfigurationKeysPublic.
KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, providers.length),
providers.length);
throw ex;
}
if (((numFailovers + 1) % providers.length) == 0) {
// Sleep only after we try all the providers for every cycle.
try {
Thread.sleep(action.delayMillis);
} catch (InterruptedException e) {
throw new InterruptedIOException("Thread Interrupted");
}
}
} catch (Exception e) {
if (e instanceof RuntimeException) {
throw (RuntimeException)e;
@ -168,6 +106,12 @@ private <T> T doOp(ProviderCallable<T> op, int currPos)
}
}
}
if (ex != null) {
LOG.warn("Aborting since the Request has failed with all KMS"
+ " providers in the group. !!");
throw ex;
}
throw new IOException("No providers configured !!");
}
private int nextIdx() {

View File

@ -928,6 +928,11 @@ public FileStatus next() {
* The specification of this method matches that of
* {@link FileContext#listLocatedStatus(Path)} except that Path f
* must be for this file system.
*
* In HDFS implementation, the BlockLocation of returned LocatedFileStatus
* will have different formats for replicated and erasure coded file. Please
* refer to {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
* for more details.
*/
public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
throws AccessControlException, FileNotFoundException,

View File

@ -28,6 +28,34 @@
* Represents the network location of a block, information about the hosts
* that contain block replicas, and other block metadata (E.g. the file
* offset associated with the block, length, whether it is corrupt, etc).
*
* For a single BlockLocation, it will have different meanings for replicated
* and erasure coded files.
*
* If the file is 3-replicated, offset and length of a BlockLocation represent
* the absolute value in the file and the hosts are the 3 datanodes that
* holding the replicas. Here is an example:
* <pre>
* BlockLocation(offset: 0, length: BLOCK_SIZE,
* hosts: {"host1:9866", "host2:9866, host3:9866"})
* </pre>
*
* And if the file is erasure-coded, each BlockLocation represents a logical
* block groups. Value offset is the offset of a block group in the file and
* value length is the total length of a block group. Hosts of a BlockLocation
* are the datanodes that holding all the data blocks and parity blocks of a
* block group.
* Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
* A BlockLocation example will be like:
* <pre>
* BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
* "host2:9866","host3:9866","host4:9866","host5:9866"})
* </pre>
*
* Please refer to
* {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
* {@link FileContext#getFileBlockLocations(Path, long, long)}
* for more examples.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable

View File

@ -721,35 +721,6 @@ public class CommonConfigurationKeysPublic {
/** Default value for KMS_CLIENT_ENC_KEY_CACHE_EXPIRY (12 hrs)*/
public static final int KMS_CLIENT_ENC_KEY_CACHE_EXPIRY_DEFAULT = 43200000;
/**
* @see
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
* core-default.xml</a>
*/
/** Default value is the number of providers specified. */
public static final String KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY =
"hadoop.security.kms.client.failover.max.retries";
/**
* @see
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
* core-default.xml</a>
*/
public static final String KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY =
"hadoop.security.kms.client.failover.sleep.base.millis";
/** Default value is 100 ms. */
public static final int KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT = 100;
/**
* @see
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
* core-default.xml</a>
*/
public static final String KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_KEY =
"hadoop.security.kms.client.failover.sleep.max.millis";
/** Default value is 2 secs. */
public static final int KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_DEFAULT = 2000;
/**
* @see
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">

View File

@ -230,8 +230,8 @@ public void unbuffer() {
try {
((CanUnbuffer)in).unbuffer();
} catch (ClassCastException e) {
throw new UnsupportedOperationException("this stream does not " +
"support unbuffering.");
throw new UnsupportedOperationException("this stream " +
in.getClass().getName() + " does not " + "support unbuffering.");
}
}

View File

@ -1293,7 +1293,36 @@ public Path next(final AbstractFileSystem fs, final Path p)
*
* This call is most helpful with DFS, where it returns
* hostnames of machines that contain the given file.
*
*
* In HDFS, if file is three-replicated, the returned array contains
* elements like:
* <pre>
* BlockLocation(offset: 0, length: BLOCK_SIZE,
* hosts: {"host1:9866", "host2:9866, host3:9866"})
* BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
* hosts: {"host2:9866", "host3:9866, host4:9866"})
* </pre>
*
* And if a file is erasure-coded, the returned BlockLocation are logical
* block groups.
*
* Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
* 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
* there will be one BlockLocation returned, with 0 offset, actual file size
* and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
* 3. If the file size is less than one group size but greater than one
* stripe size, then there will be one BlockLocation returned, with 0 offset,
* actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
* the actual blocks.
* 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
* for example, then the result will be like:
* <pre>
* BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
* "host2:9866","host3:9866","host4:9866","host5:9866"})
* BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
* "host4:9866", "host5:9866"})
* </pre>
*
* @param f - get blocklocations of this file
* @param start position (byte offset)
* @param len (in bytes)
@ -1527,7 +1556,7 @@ public RemoteIterator<Path> next(final AbstractFileSystem fs,
* Return the file's status and block locations If the path is a file.
*
* If a returned status is a file, it contains the file's block locations.
*
*
* @param f is the path
*
* @return an iterator that traverses statuses of the files/directories

View File

@ -799,7 +799,36 @@ protected void checkPath(Path path) {
* The default implementation returns an array containing one element:
* <pre>
* BlockLocation( { "localhost:9866" }, { "localhost" }, 0, file.getLen())
* </pre>>
* </pre>
*
* In HDFS, if file is three-replicated, the returned array contains
* elements like:
* <pre>
* BlockLocation(offset: 0, length: BLOCK_SIZE,
* hosts: {"host1:9866", "host2:9866, host3:9866"})
* BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
* hosts: {"host2:9866", "host3:9866, host4:9866"})
* </pre>
*
* And if a file is erasure-coded, the returned BlockLocation are logical
* block groups.
*
* Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
* 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
* there will be one BlockLocation returned, with 0 offset, actual file size
* and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
* 3. If the file size is less than one group size but greater than one
* stripe size, then there will be one BlockLocation returned, with 0 offset,
* actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
* the actual blocks.
* 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
* for example, then the result will be like:
* <pre>
* BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
* "host2:9866","host3:9866","host4:9866","host5:9866"})
* BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
* "host4:9866", "host5:9866"})
* </pre>
*
* @param file FilesStatus to get data from
* @param start offset into the given file
@ -2115,6 +2144,7 @@ public RemoteIterator<FileStatus> listStatusIterator(final Path p)
* List the statuses and block locations of the files in the given path.
* Does not guarantee to return the iterator that traverses statuses
* of the files in a sorted order.
*
* <pre>
* If the path is a directory,
* if recursive is false, returns files in the directory;

View File

@ -123,6 +123,13 @@ public LocatedFileStatus(long length, boolean isdir,
/**
* Get the file's block locations
*
* In HDFS, the returned BlockLocation will have different formats for
* replicated and erasure coded file.
* Please refer to
* {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
* for more details.
*
* @return the file's block locations
*/
public BlockLocation[] getBlockLocations() {

View File

@ -27,6 +27,7 @@
import java.io.PrintStream;
import java.net.BindException;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
@ -993,14 +994,31 @@ public WebAppContext getWebAppContext(){
* Get the pathname to the webapps files.
* @param appName eg "secondary" or "datanode"
* @return the pathname as a URL
* @throws FileNotFoundException if 'webapps' directory cannot be found on CLASSPATH.
* @throws FileNotFoundException if 'webapps' directory cannot be found
* on CLASSPATH or in the development location.
*/
protected String getWebAppsPath(String appName) throws FileNotFoundException {
URL url = getClass().getClassLoader().getResource("webapps/" + appName);
if (url == null)
throw new FileNotFoundException("webapps/" + appName
+ " not found in CLASSPATH");
String urlString = url.toString();
URL resourceUrl = null;
File webResourceDevLocation = new File("src/main/webapps", appName);
if (webResourceDevLocation.exists()) {
LOG.info("Web server is in development mode. Resources "
+ "will be read from the source tree.");
try {
resourceUrl = webResourceDevLocation.getParentFile().toURI().toURL();
} catch (MalformedURLException e) {
throw new FileNotFoundException("Mailformed URL while finding the "
+ "web resource dir:" + e.getMessage());
}
} else {
resourceUrl =
getClass().getClassLoader().getResource("webapps/" + appName);
if (resourceUrl == null) {
throw new FileNotFoundException("webapps/" + appName +
" not found in CLASSPATH");
}
}
String urlString = resourceUrl.toString();
return urlString.substring(0, urlString.lastIndexOf('/'));
}
@ -1200,6 +1218,7 @@ private void bindForPortRange(ServerConnector listener, int startPort)
* @throws Exception
*/
void openListeners() throws Exception {
LOG.debug("opening listeners: {}", listeners);
for (ServerConnector listener : listeners) {
if (listener.getLocalPort() != -1 && listener.getLocalPort() != -2) {
// This listener is either started externally or has been bound or was

View File

@ -28,7 +28,7 @@
* BoundedRangeFileInputStream on top of the same FSDataInputStream and they
* would not interfere with each other.
*/
class BoundedRangeFileInputStream extends InputStream {
public class BoundedRangeFileInputStream extends InputStream {
private FSDataInputStream in;
private long pos;

View File

@ -43,7 +43,7 @@
/**
* Compression related stuff.
*/
final class Compression {
public final class Compression {
static final Logger LOG = LoggerFactory.getLogger(Compression.class);
/**
@ -75,7 +75,7 @@ public void flush() throws IOException {
/**
* Compression algorithms.
*/
enum Algorithm {
public enum Algorithm {
LZO(TFile.COMPRESSION_LZO) {
private transient boolean checked = false;
private static final String defaultClazz =
@ -348,7 +348,7 @@ public String getName() {
}
}
static Algorithm getCompressionAlgorithmByName(String compressName) {
public static Algorithm getCompressionAlgorithmByName(String compressName) {
Algorithm[] algos = Algorithm.class.getEnumConstants();
for (Algorithm a : algos) {

View File

@ -25,7 +25,7 @@
* A simplified BufferedOutputStream with borrowed buffer, and allow users to
* see how much data have been buffered.
*/
class SimpleBufferedOutputStream extends FilterOutputStream {
public class SimpleBufferedOutputStream extends FilterOutputStream {
protected byte buf[]; // the borrowed buffer
protected int count = 0; // bytes used in buffer.

View File

@ -97,7 +97,9 @@ public long getCumulativeCpuTime() {
* @param newTime new sample time
*/
public void updateElapsedJiffies(BigInteger elapsedJiffies, long newTime) {
cumulativeCpuTime = elapsedJiffies.multiply(jiffyLengthInMillis);
BigInteger newValue = elapsedJiffies.multiply(jiffyLengthInMillis);
cumulativeCpuTime = newValue.compareTo(cumulativeCpuTime) >= 0 ?
newValue : cumulativeCpuTime;
sampleTime = newTime;
}

View File

@ -871,30 +871,6 @@
<description>File space usage statistics refresh interval in msec.</description>
</property>
<property>
<name>fs.s3n.buffer.dir</name>
<value>${hadoop.tmp.dir}/s3n</value>
<description>Determines where on the local filesystem the s3n:// filesystem
should store files before sending them to S3
(or after retrieving them from S3).
</description>
</property>
<property>
<name>fs.s3n.maxRetries</name>
<value>4</value>
<description>The maximum number of retries for reading or writing files to S3,
before we signal failure to the application.
</description>
</property>
<property>
<name>fs.s3n.sleepTimeSeconds</name>
<value>10</value>
<description>The number of seconds to sleep between each S3 retry.
</description>
</property>
<property>
<name>fs.swift.impl</name>
<value>org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem</value>
@ -911,56 +887,6 @@
</description>
</property>
<property>
<name>fs.s3n.awsAccessKeyId</name>
<description>AWS access key ID used by S3 native file system.</description>
</property>
<property>
<name>fs.s3n.awsSecretAccessKey</name>
<description>AWS secret key used by S3 native file system.</description>
</property>
<property>
<name>fs.s3n.block.size</name>
<value>67108864</value>
<description>Block size to use when reading files using the native S3
filesystem (s3n: URIs).</description>
</property>
<property>
<name>fs.s3n.multipart.uploads.enabled</name>
<value>false</value>
<description>Setting this property to true enables multiple uploads to
native S3 filesystem. When uploading a file, it is split into blocks
if the size is larger than fs.s3n.multipart.uploads.block.size.
</description>
</property>
<property>
<name>fs.s3n.multipart.uploads.block.size</name>
<value>67108864</value>
<description>The block size for multipart uploads to native S3 filesystem.
Default size is 64MB.
</description>
</property>
<property>
<name>fs.s3n.multipart.copy.block.size</name>
<value>5368709120</value>
<description>The block size for multipart copy in native S3 filesystem.
Default size is 5GB.
</description>
</property>
<property>
<name>fs.s3n.server-side-encryption-algorithm</name>
<value></value>
<description>Specify a server-side encryption algorithm for S3.
Unset by default, and the only other currently allowable value is AES256.
</description>
</property>
<property>
<name>fs.s3a.access.key</name>
<description>AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication.</description>
@ -1234,22 +1160,12 @@
uploads to.</description>
</property>
<property>
<name>fs.s3a.fast.upload</name>
<value>false</value>
<description>
Use the incremental block-based fast upload mechanism with
the buffering mechanism set in fs.s3a.fast.upload.buffer.
</description>
</property>
<property>
<name>fs.s3a.fast.upload.buffer</name>
<value>disk</value>
<description>
The buffering mechanism to use when using S3A fast upload
(fs.s3a.fast.upload=true). Values: disk, array, bytebuffer.
This configuration option has no effect if fs.s3a.fast.upload is false.
The buffering mechanism to for data being written.
Values: disk, array, bytebuffer.
"disk" will use the directories listed in fs.s3a.buffer.dir as
the location(s) to save data prior to being uploaded.
@ -1428,20 +1344,16 @@
<description>The implementation class of the S3A AbstractFileSystem.</description>
</property>
<!-- Ozone file system properties -->
<property>
<name>fs.ozfs.impl</name>
<value>org.apache.hadoop.fs.ozone.OzoneFileSystem</value>
<description>The implementation class of the Ozone FileSystem.</description>
</property>
<property>
<name>fs.s3a.list.version</name>
<value>2</value>
<description>
Select which version of the S3 SDK's List Objects API to use. Currently
support 2 (default) and 1 (older API).
</description>
</property>
<property>
<name>fs.AbstractFileSystem.ozfs.impl</name>
<value>org.apache.hadoop.fs.ozone.OzFs</value>
<description>The implementation class of the OzFs AbstractFileSystem.</description>
</property>
<!-- Azure file system properties -->
<!-- Azure file system properties -->
<property>
<name>fs.wasb.impl</name>
<value>org.apache.hadoop.fs.azure.NativeAzureFileSystem</value>
@ -1547,7 +1459,21 @@
</property>
<!-- ipc properties -->
<!-- Ozone file system properties -->
<property>
<name>fs.ozfs.impl</name>
<value>org.apache.hadoop.fs.ozone.OzoneFileSystem</value>
<description>The implementation class of the Ozone FileSystem.</description>
</property>
<property>
<name>fs.AbstractFileSystem.ozfs.impl</name>
<value>org.apache.hadoop.fs.ozone.OzFs</value>
<description>The implementation class of the OzFs AbstractFileSystem.</description>
</property>
<!-- ipc properties -->
<property>
<name>ipc.client.idlethreshold</name>
@ -1807,42 +1733,6 @@
<description>Replication factor</description>
</property>
<!-- s3native File System -->
<property>
<name>s3native.stream-buffer-size</name>
<value>4096</value>
<description>The size of buffer to stream files.
The size of this buffer should probably be a multiple of hardware
page size (4096 on Intel x86), and it determines how much data is
buffered during read and write operations.</description>
</property>
<property>
<name>s3native.bytes-per-checksum</name>
<value>512</value>
<description>The number of bytes per checksum. Must not be larger than
s3native.stream-buffer-size</description>
</property>
<property>
<name>s3native.client-write-packet-size</name>
<value>65536</value>
<description>Packet size for clients to write</description>
</property>
<property>
<name>s3native.blocksize</name>
<value>67108864</value>
<description>Block size</description>
</property>
<property>
<name>s3native.replication</name>
<value>3</value>
<description>Replication factor</description>
</property>
<!-- FTP file system -->
<property>
<name>ftp.stream-buffer-size</name>
@ -1977,38 +1867,38 @@
<!-- HTTP CORS support -->
<property>
<description>Enable/disable the cross-origin (CORS) filter.</description>
<name>hadoop.http.cross-origin.enabled</name>
<value>false</value>
<description>Enable/disable the cross-origin (CORS) filter.</description>
</property>
<property>
<name>hadoop.http.cross-origin.allowed-origins</name>
<value>*</value>
<description>Comma separated list of origins that are allowed for web
services needing cross-origin (CORS) support. Wildcards (*) and patterns
allowed</description>
<name>hadoop.http.cross-origin.allowed-origins</name>
<value>*</value>
</property>
<property>
<description>Comma separated list of methods that are allowed for web
services needing cross-origin (CORS) support.</description>
<name>hadoop.http.cross-origin.allowed-methods</name>
<value>GET,POST,HEAD</value>
<description>Comma separated list of methods that are allowed for web
services needing cross-origin (CORS) support.</description>
</property>
<property>
<description>Comma separated list of headers that are allowed for web
services needing cross-origin (CORS) support.</description>
<name>hadoop.http.cross-origin.allowed-headers</name>
<value>X-Requested-With,Content-Type,Accept,Origin</value>
<description>Comma separated list of headers that are allowed for web
services needing cross-origin (CORS) support.</description>
</property>
<property>
<description>The number of seconds a pre-flighted request can be cached
for web services needing cross-origin (CORS) support.</description>
<name>hadoop.http.cross-origin.max-age</name>
<value>1800</value>
<description>The number of seconds a pre-flighted request can be cached
for web services needing cross-origin (CORS) support.</description>
</property>
<property>
@ -2099,13 +1989,13 @@
<!-- Static Web User Filter properties. -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>dr.who</value>
<description>
The user name to filter as, on static web filters
while rendering content. An example use is the HDFS
web UI (user to be used for browsing files).
</description>
<name>hadoop.http.staticuser.user</name>
<value>dr.who</value>
</property>
<!-- SSLFactory configuration -->
@ -2453,34 +2343,6 @@
</description>
</property>
<property>
<name>hadoop.security.kms.client.failover.sleep.base.millis</name>
<value>100</value>
<description>
Expert only. The time to wait, in milliseconds, between failover
attempts increases exponentially as a function of the number of
attempts made so far, with a random factor of +/- 50%. This option
specifies the base value used in the failover calculation. The
first failover will retry immediately. The 2nd failover attempt
will delay at least hadoop.security.client.failover.sleep.base.millis
milliseconds. And so on.
</description>
</property>
<property>
<name>hadoop.security.kms.client.failover.sleep.max.millis</name>
<value>2000</value>
<description>
Expert only. The time to wait, in milliseconds, between failover
attempts increases exponentially as a function of the number of
attempts made so far, with a random factor of +/- 50%. This option
specifies the maximum value to wait between failovers.
Specifically, the time between two failover attempts will not
exceed +/- 50% of hadoop.security.client.failover.sleep.max.millis
milliseconds.
</description>
</property>
<property>
<name>ipc.server.max.connections</name>
<value>0</value>
@ -2496,6 +2358,8 @@
<!-- YARN registry -->
<property>
<name>hadoop.registry.rm.enabled</name>
<value>false</value>
<description>
Is the registry enabled in the YARN Resource Manager?
@ -2507,50 +2371,50 @@
If false, the paths must be created by other means,
and no automatic cleanup of service records will take place.
</description>
<name>hadoop.registry.rm.enabled</name>
<value>false</value>
</property>
<property>
<name>hadoop.registry.zk.root</name>
<value>/registry</value>
<description>
The root zookeeper node for the registry
</description>
<name>hadoop.registry.zk.root</name>
<value>/registry</value>
</property>
<property>
<name>hadoop.registry.zk.session.timeout.ms</name>
<value>60000</value>
<description>
Zookeeper session timeout in milliseconds
</description>
<name>hadoop.registry.zk.session.timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>hadoop.registry.zk.connection.timeout.ms</name>
<value>15000</value>
<description>
Zookeeper connection timeout in milliseconds
</description>
<name>hadoop.registry.zk.connection.timeout.ms</name>
<value>15000</value>
</property>
<property>
<name>hadoop.registry.zk.retry.times</name>
<value>5</value>
<description>
Zookeeper connection retry count before failing
</description>
<name>hadoop.registry.zk.retry.times</name>
<value>5</value>
</property>
<property>
<description>
</description>
<name>hadoop.registry.zk.retry.interval.ms</name>
<value>1000</value>
<description>
</description>
</property>
<property>
<name>hadoop.registry.zk.retry.ceiling.ms</name>
<value>60000</value>
<description>
Zookeeper retry limit in milliseconds, during
exponential backoff.
@ -2560,20 +2424,20 @@
with the backoff policy, result in a long retry
period
</description>
<name>hadoop.registry.zk.retry.ceiling.ms</name>
<value>60000</value>
</property>
<property>
<name>hadoop.registry.zk.quorum</name>
<value>localhost:2181</value>
<description>
List of hostname:port pairs defining the
zookeeper quorum binding for the registry
</description>
<name>hadoop.registry.zk.quorum</name>
<value>localhost:2181</value>
</property>
<property>
<name>hadoop.registry.secure</name>
<value>false</value>
<description>
Key to set if the registry is secure. Turning it on
changes the permissions policy from "open access"
@ -2581,11 +2445,11 @@
a user adding one or more auth key pairs down their
own tree.
</description>
<name>hadoop.registry.secure</name>
<value>false</value>
</property>
<property>
<name>hadoop.registry.system.acls</name>
<value>sasl:yarn@, sasl:mapred@, sasl:hdfs@</value>
<description>
A comma separated list of Zookeeper ACL identifiers with
system access to the registry in a secure cluster.
@ -2595,11 +2459,11 @@
If there is an "@" at the end of a SASL entry it
instructs the registry client to append the default kerberos domain.
</description>
<name>hadoop.registry.system.acls</name>
<value>sasl:yarn@, sasl:mapred@, sasl:hdfs@</value>
</property>
<property>
<name>hadoop.registry.kerberos.realm</name>
<value></value>
<description>
The kerberos realm: used to set the realm of
system principals which do not declare their realm,
@ -2611,26 +2475,24 @@
If neither are known and the realm is needed, then the registry
service/client will fail.
</description>
<name>hadoop.registry.kerberos.realm</name>
<value></value>
</property>
<property>
<name>hadoop.registry.jaas.context</name>
<value>Client</value>
<description>
Key to define the JAAS context. Used in secure
mode
</description>
<name>hadoop.registry.jaas.context</name>
<value>Client</value>
</property>
<property>
<name>hadoop.shell.missing.defaultFs.warning</name>
<value>false</value>
<description>
Enable hdfs shell commands to display warnings if (fs.defaultFS) property
is not set.
</description>
<name>hadoop.shell.missing.defaultFs.warning</name>
<value>false</value>
</property>
<property>
@ -2660,13 +2522,13 @@
</property>
<property>
<name>hadoop.http.logs.enabled</name>
<value>true</value>
<description>
Enable the "/logs" endpoint on all Hadoop daemons, which serves local
logs, but may be considered a security risk due to it listing the contents
of a directory.
</description>
<name>hadoop.http.logs.enabled</name>
<value>true</value>
</property>
<property>
@ -2721,8 +2583,7 @@
fs.adl.oauth2.credential, and fs.adl.oauth2.refresh.url.
The RefreshToken type requires property fs.adl.oauth2.client.id and
fs.adl.oauth2.refresh.token.
The MSI type requires properties fs.adl.oauth2.msi.port and
fs.adl.oauth2.msi.tenantguid.
The MSI type reads optional property fs.adl.oauth2.msi.port, if specified.
The DeviceCode type requires property
fs.adl.oauth2.devicecode.clientapp.id.
The Custom type requires property fs.adl.oauth2.access.token.provider.
@ -2766,17 +2627,8 @@
<value></value>
<description>
The localhost port for the MSI token service. This is the port specified
when creating the Azure VM.
Used by MSI token provider.
</description>
</property>
<property>
<name>fs.adl.oauth2.msi.tenantguid</name>
<value></value>
<description>
The tenant guid for the Azure AAD tenant under which the azure data lake
store account is created.
when creating the Azure VM. The default, if this setting is not specified,
is 50342.
Used by MSI token provider.
</description>
</property>
@ -2841,48 +2693,48 @@
</property>
<property>
<description>Host:Port of the ZooKeeper server to be used.
</description>
<name>hadoop.zk.address</name>
<!--value>127.0.0.1:2181</value-->
<description>Host:Port of the ZooKeeper server to be used.
</description>
</property>
<property>
<description>Number of tries to connect to ZooKeeper.</description>
<name>hadoop.zk.num-retries</name>
<value>1000</value>
<description>Number of tries to connect to ZooKeeper.</description>
</property>
<property>
<description>Retry interval in milliseconds when connecting to ZooKeeper.
</description>
<name>hadoop.zk.retry-interval-ms</name>
<value>1000</value>
<description>Retry interval in milliseconds when connecting to ZooKeeper.
</description>
</property>
<property>
<name>hadoop.zk.timeout-ms</name>
<value>10000</value>
<description>ZooKeeper session timeout in milliseconds. Session expiration
is managed by the ZooKeeper cluster itself, not by the client. This value is
used by the cluster to determine when the client's session expires.
Expirations happens when the cluster does not hear from the client within
the specified session timeout period (i.e. no heartbeat).</description>
<name>hadoop.zk.timeout-ms</name>
<value>10000</value>
</property>
<property>
<description>ACL's to be used for ZooKeeper znodes.</description>
<name>hadoop.zk.acl</name>
<value>world:anyone:rwcda</value>
<description>ACL's to be used for ZooKeeper znodes.</description>
</property>
<property>
<name>hadoop.zk.auth</name>
<description>
Specify the auths to be used for the ACL's specified in hadoop.zk.acl.
This takes a comma-separated list of authentication mechanisms, each of the
form 'scheme:auth' (the same syntax used for the 'addAuth' command in
the ZK CLI).
</description>
<name>hadoop.zk.auth</name>
</property>
</configuration>

View File

@ -20,109 +20,276 @@ Apache Hadoop Compatibility
Purpose
-------
This document captures the compatibility goals of the Apache Hadoop project. The different types of compatibility between Hadoop releases that affects Hadoop developers, downstream projects, and end-users are enumerated. For each type of compatibility we:
This document captures the compatibility goals of the Apache Hadoop project.
The different types of compatibility between Hadoop releases that affect
Hadoop developers, downstream projects, and end-users are enumerated. For each
type of compatibility this document will:
* describe the impact on downstream projects or end-users
* where applicable, call out the policy adopted by the Hadoop developers when incompatible changes are permitted.
All Hadoop interfaces are classified according to the intended audience and
stability in order to maintain compatibility with previous releases. See the
[Hadoop Interface Taxonomy](./InterfaceClassification.html) for details
about the classifications.
### Target Audience
This document is intended for consumption by the Hadoop developer community.
This document describes the lens through which changes to the Hadoop project
should be viewed. In order for end users and third party developers to have
confidence about cross-release compatibility, the developer community must
ensure that development efforts adhere to these policies. It is the
responsibility of the project committers to validate that all changes either
maintain compatibility or are explicitly marked as incompatible.
Within a component Hadoop developers are free to use Private and Limited Private
APIs, but when using components from a different module Hadoop developers
should follow the same guidelines as third-party developers: do not
use Private or Limited Private (unless explicitly allowed) interfaces and
prefer instead Stable interfaces to Evolving or Unstable interfaces where
possible. Where not possible, the preferred solution is to expand the audience
of the API rather than introducing or perpetuating an exception to these
compatibility guidelines. When working within a Maven module Hadoop developers
should observe where possible the same level of restraint with regard to
using components located in other Maven modules.
Above all, Hadoop developers must be mindful of the impact of their changes.
Stable interfaces must not change between major releases. Evolving interfaces
must not change between minor releases. New classes and components must be
labeled appropriately for audience and stability. See the
[Hadoop Interface Taxonomy](./InterfaceClassification.html) for details about
when the various labels are appropriate. As a general rule, all new interfaces
and APIs should have the most limited labels (e.g. Private Unstable) that will
not inhibit the intent of the interface or API.
### Notational Conventions
The key words "MUST" "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" are to be interpreted as
described in [RFC 2119](http://tools.ietf.org/html/rfc2119).
Deprecation
-----------
The Java API provides a @Deprecated annotation to mark an API element as
flagged for removal. The standard meaning of the annotation is that the
API element should not be used and may be removed in a later version.
In all cases removing an element from an API is an incompatible
change. In the case of [Stable](./InterfaceClassification.html#Stable) APIs,
the change cannot be made between minor releases within the same major
version. In addition, to allow consumers of the API time to adapt to the change,
the API element to be removed should be marked as deprecated for a full major
release before it is removed. For example, if a method is marked as deprecated
in Hadoop 2.8, it cannot be removed until Hadoop 4.0.
### Policy
[Stable](./InterfaceClassification.html#Stable) API elements MUST NOT be removed
until they have been marked as deprecated (through the @Deprecated annotation or
other appropriate documentation) for a full major release. In the case that an
API element was introduced as deprecated (to indicate that it is a temporary
measure that is intended to be removed) the API element MAY be removed in the
following major release. When modifying a
[Stable](./InterfaceClassification.html#Stable) API, developers SHOULD prefer
introducing a new method or endpoint and deprecating the existing one to making
incompatible changes to the method or endpoint.
Compatibility types
-------------------
### Java API
Hadoop interfaces and classes are annotated to describe the intended audience and stability in order to maintain compatibility with previous releases. See [Hadoop Interface Classification](./InterfaceClassification.html) for details.
Developers SHOULD annotate all Hadoop interfaces and classes with the
@InterfaceAudience and @InterfaceStability annotations to describe the
intended audience and stability. Annotations may be at the package, class, or
member variable or method level. Member variable and method annotations SHALL
override class annotations, and class annotations SHALL override package
annotations. A package, class, or member variable or method that is not
annotated SHALL be interpreted as implicitly
[Private](./InterfaceClassification.html#Private) and
[Unstable](./InterfaceClassification.html#Unstable).
* InterfaceAudience: captures the intended audience, possible values are Public (for end users and external projects), LimitedPrivate (for other Hadoop components, and closely related projects like YARN, MapReduce, HBase etc.), and Private (for intra component use).
* InterfaceStability: describes what types of interface changes are permitted. Possible values are Stable, Evolving, Unstable, and Deprecated.
* @InterfaceAudience captures the intended audience. Possible values are
[Public](./InterfaceClassification.html#Public) (for end users and external
projects), Limited[Private](./InterfaceClassification.html#Private) (for other
Hadoop components, and closely related projects like YARN, MapReduce, HBase
etc.), and [Private](./InterfaceClassification.html#Private)
(for intra component use).
* @InterfaceStability describes what types of interface changes are permitted. Possible values are [Stable](./InterfaceClassification.html#Stable), [Evolving](./InterfaceClassification.html#Evolving), and [Unstable](./InterfaceClassification.html#Unstable).
* @Deprecated notes that the package, class, or member variable or method could potentially be removed in the future and should not be used.
#### Use Cases
* Public-Stable API compatibility is required to ensure end-user programs and downstream projects continue to work without modification.
* LimitedPrivate-Stable API compatibility is required to allow upgrade of individual components across minor releases.
* Private-Stable API compatibility is required for rolling upgrades.
* [Public](./InterfaceClassification.html#Public)-[Stable](./InterfaceClassification.html#Stable) API compatibility is required to ensure end-user programs and downstream projects continue to work without modification.
* [Public](./InterfaceClassification.html#Public)-[Evolving](./InterfaceClassification.html#Evolving) API compatibility is useful to make functionality available for consumption before it is fully baked.
* Limited Private-[Stable](./InterfaceClassification.html#Stable) API compatibility is required to allow upgrade of individual components across minor releases.
* [Private](./InterfaceClassification.html#Private)-[Stable](./InterfaceClassification.html#Stable) API compatibility is required for rolling upgrades.
* [Private](./InterfaceClassification.html#Private)-[Unstable](./InterfaceClassification.html#Unstable) API compatibility allows internal components to evolve rapidly without concern for downstream consumers, and is how most interfaces should be labeled.
#### Policy
* Public-Stable APIs must be deprecated for at least one major release prior to their removal in a major release.
* LimitedPrivate-Stable APIs can change across major releases, but not within a major release.
* Private-Stable APIs can change across major releases, but not within a major release.
* Classes not annotated are implicitly "Private". Class members not annotated inherit the annotations of the enclosing class.
* Note: APIs generated from the proto files need to be compatible for rolling-upgrades. See the section on wire-compatibility for more details. The compatibility policies for APIs and wire-communication need to go hand-in-hand to address this.
The compatibility policy SHALL be determined by the relevant package, class, or
member variable or method annotations.
### Semantic compatibility
Note: APIs generated from the proto files MUST be compatible for rolling
upgrades. See the section on wire protocol compatibility for more details. The
compatibility policies for APIs and wire protocols must therefore go hand
in hand.
Apache Hadoop strives to ensure that the behavior of APIs remains consistent over versions, though changes for correctness may result in changes in behavior. Tests and javadocs specify the API's behavior. The community is in the process of specifying some APIs more rigorously, and enhancing test suites to verify compliance with the specification, effectively creating a formal specification for the subset of behaviors that can be easily tested.
#### Semantic compatibility
Apache Hadoop strives to ensure that the behavior of APIs remains consistent
over versions, though changes for correctness may result in changes in
behavior. API behavior SHALL be specified by the JavaDoc API documentation
where present and complete. When JavaDoc API documentation is not available,
behavior SHALL be specified by the behavior expected by the related unit tests.
In cases with no JavaDoc API documentation or unit test coverage, the expected
behavior is presumed to be obvious and SHOULD be assumed to be the minimum
functionality implied by the interface naming. The community is in the process
of specifying some APIs more rigorously and enhancing test suites to verify
compliance with the specification, effectively creating a formal specification
for the subset of behaviors that can be easily tested.
The behavior of any API MAY be changed to fix incorrect behavior according to
the stability of the API, with such a change to be accompanied by updating
existing documentation and tests and/or adding new documentation or tests.
#### Java Binary compatibility for end-user applications i.e. Apache Hadoop ABI
Apache Hadoop revisions SHOUD retain binary compatability such that end-user
applications continue to work without any modifications. Minor Apache Hadoop
revisions within the same major revision MUST retain compatibility such that
existing MapReduce applications (e.g. end-user applications and projects such
as Apache Pig, Apache Hive, et al), existing YARN applications (e.g.
end-user applications and projects such as Apache Spark, Apache Tez et al),
and applications that accesses HDFS directly (e.g. end-user applications and
projects such as Apache HBase, Apache Flume, et al) work unmodified and without
recompilation when used with any Apache Hadoop cluster within the same major
release as the original build target.
For MapReduce applications in particular, i.e. applications using the
org.apache.hadoop.mapred and/or org.apache.hadoop.mapreduce APIs, the developer
community SHALL support binary compatibility across major releases. The
MapReduce APIs SHALL be supported compatibly across major releases. See
[Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x](../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html) for more details.
Some applications may be affected by changes to disk layouts or other internal
changes. See the sections that follow for policies on how incompatible
changes to non-API interfaces are handled.
### Native Dependencies
Hadoop includes several native components, including compression, the
container executor binary, and various native integrations. These native
components introduce a set of native dependencies for Hadoop, both at compile
time and at runtime, such as cmake, gcc, zlib, etc. This set of native
dependencies is part of the Hadoop ABI.
#### Policy
The behavior of API may be changed to fix incorrect behavior, such a change to be accompanied by updating existing buggy tests or adding tests in cases there were none prior to the change.
The minimum required versions of the native components on which Hadoop depends
at compile time and/or runtime SHALL be considered
[Stable](./InterfaceClassification.html#Stable). Changes to the minimum
required versions MUST NOT increase between minor releases within a major
version.
### Wire compatibility
### Wire Protocols
Wire compatibility concerns data being transmitted over the wire between Hadoop processes. Hadoop uses Protocol Buffers for most RPC communication. Preserving compatibility requires prohibiting modification as described below. Non-RPC communication should be considered as well, for example using HTTP to transfer an HDFS image as part of snapshotting or transferring MapTask output. The potential communications can be categorized as follows:
Wire compatibility concerns data being transmitted "over the wire" between
Hadoop processes. Hadoop uses
[Protocol Buffers](https://developers.google.com/protocol-buffers/) for most
RPC communication. Preserving compatibility requires prohibiting modification
as described below. Non-RPC communication should be considered as well, for
example using HTTP to transfer an HDFS image as part of snapshotting or
transferring MapReduce map task output. The communications can be categorized as
follows:
* Client-Server: communication between Hadoop clients and servers (e.g., the HDFS client to NameNode protocol, or the YARN client to ResourceManager protocol).
* Client-Server (Admin): It is worth distinguishing a subset of the Client-Server protocols used solely by administrative commands (e.g., the HAAdmin protocol) as these protocols only impact administrators who can tolerate changes that end users (which use general Client-Server protocols) can not.
* Client-Server (Admin): It is worth distinguishing a subset of the Client-Server protocols used solely by administrative commands (e.g., the HAAdmin protocol) as these protocols only impact administrators who can tolerate changes that end users (which use general Client-Server protocols) cannot.
* Server-Server: communication between servers (e.g., the protocol between the DataNode and NameNode, or NodeManager and ResourceManager)
#### Use Cases
#### Protocol Dependencies
* Client-Server compatibility is required to allow users to continue using the old clients even after upgrading the server (cluster) to a later version (or vice versa). For example, a Hadoop 2.1.0 client talking to a Hadoop 2.3.0 cluster.
* Client-Server compatibility is also required to allow users to upgrade the client before upgrading the server (cluster). For example, a Hadoop 2.4.0 client talking to a Hadoop 2.3.0 cluster. This allows deployment of client-side bug fixes ahead of full cluster upgrades. Note that new cluster features invoked by new client APIs or shell commands will not be usable. YARN applications that attempt to use new APIs (including new fields in data structures) that have not yet been deployed to the cluster can expect link exceptions.
* Client-Server compatibility is also required to allow upgrading individual components without upgrading others. For example, upgrade HDFS from version 2.1.0 to 2.2.0 without upgrading MapReduce.
* Server-Server compatibility is required to allow mixed versions within an active cluster so the cluster may be upgraded without downtime in a rolling fashion.
The components of Apache Hadoop may have dependencies that include their own
protocols, such as Zookeeper, S3, Kerberos, etc. These protocol dependencies
SHALL be treated as internal protocols and governed by the same policy.
#### Transports
In addition to compatibility of the protocols themselves, maintaining
cross-version communications requires that the transports supported also be
stable. The most likely source of transport changes stems from secure
transports, such as SSL. Upgrading a service from SSLv2 to SSLv3 may break
existing SSLv2 clients. The minimum supported major version of any transports
MUST not increase across minor releases within a major version.
Service ports are considered as part of the transport mechanism. Fixed
service port numbers MUST be kept consistent to prevent breaking clients.
#### Policy
* Both Client-Server and Server-Server compatibility is preserved within a major release. (Different policies for different categories are yet to be considered.)
* Compatibility can be broken only at a major release, though breaking compatibility even at major releases has grave consequences and should be discussed in the Hadoop community.
* Hadoop protocols are defined in .proto (ProtocolBuffers) files. Client-Server protocols and Server-Server protocol .proto files are marked as stable. When a .proto file is marked as stable it means that changes should be made in a compatible fashion as described below:
* The following changes are compatible and are allowed at any time:
* Add an optional field, with the expectation that the code deals with the field missing due to communication with an older version of the code.
* Add a new rpc/method to the service
* Add a new optional request to a Message
* Rename a field
* Rename a .proto file
* Change .proto annotations that effect code generation (e.g. name of java package)
* The following changes are incompatible but can be considered only at a major release
* Change the rpc/method name
* Change the rpc/method parameter type or return type
* Remove an rpc/method
* Change the service name
* Change the name of a Message
* Modify a field type in an incompatible way (as defined recursively)
* Change an optional field to required
* Add or delete a required field
* Delete an optional field as long as the optional field has reasonable defaults to allow deletions
* The following changes are incompatible and hence never allowed
* Change a field id
* Reuse an old field that was previously deleted.
* Field numbers are cheap and changing and reusing is not a good idea.
Hadoop wire protocols are defined in .proto (ProtocolBuffers) files.
Client-Server and Server-Server protocols SHALL be classified according to the
audience and stability classifications noted in their .proto files. In cases
where no classifications are present, the protocols SHOULD be assumed to be
[Private](./InterfaceClassification.html#Private) and
[Stable](./InterfaceClassification.html#Stable).
### Java Binary compatibility for end-user applications i.e. Apache Hadoop ABI
The following changes to a .proto file SHALL be considered compatible:
As Apache Hadoop revisions are upgraded end-users reasonably expect that their applications should continue to work without any modifications. This is fulfilled as a result of supporting API compatibility, Semantic compatibility and Wire compatibility.
* Add an optional field, with the expectation that the code deals with the field missing due to communication with an older version of the code
* Add a new rpc/method to the service
* Add a new optional request to a Message
* Rename a field
* Rename a .proto file
* Change .proto annotations that effect code generation (e.g. name of java package)
However, Apache Hadoop is a very complex, distributed system and services a very wide variety of use-cases. In particular, Apache Hadoop MapReduce is a very, very wide API; in the sense that end-users may make wide-ranging assumptions such as layout of the local disk when their map/reduce tasks are executing, environment variables for their tasks etc. In such cases, it becomes very hard to fully specify, and support, absolute compatibility.
The following changes to a .proto file SHALL be considered incompatible:
#### Use cases
* Change an rpc/method name
* Change an rpc/method parameter type or return type
* Remove an rpc/method
* Change the service name
* Change the name of a Message
* Modify a field type in an incompatible way (as defined recursively)
* Change an optional field to required
* Add or delete a required field
* Delete an optional field as long as the optional field has reasonable defaults to allow deletions
* Existing MapReduce applications, including jars of existing packaged end-user applications and projects such as Apache Pig, Apache Hive, Cascading etc. should work unmodified when pointed to an upgraded Apache Hadoop cluster within a major release.
* Existing YARN applications, including jars of existing packaged end-user applications and projects such as Apache Tez etc. should work unmodified when pointed to an upgraded Apache Hadoop cluster within a major release.
* Existing applications which transfer data in/out of HDFS, including jars of existing packaged end-user applications and frameworks such as Apache Flume, should work unmodified when pointed to an upgraded Apache Hadoop cluster within a major release.
The following changes to a .proto file SHALL be considered incompatible:
#### Policy
* Change a field id
* Reuse an old field that was previously deleted.
* Existing MapReduce, YARN & HDFS applications and frameworks should work unmodified within a major release i.e. Apache Hadoop ABI is supported.
* A very minor fraction of applications maybe affected by changes to disk layouts etc., the developer community will strive to minimize these changes and will not make them within a minor version. In more egregious cases, we will consider strongly reverting these breaking changes and invalidating offending releases if necessary.
* In particular for MapReduce applications, the developer community will try our best to support providing binary compatibility across major releases e.g. applications using org.apache.hadoop.mapred.
* APIs are supported compatibly across hadoop-1.x and hadoop-2.x. See [Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x](../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html) for more details.
Hadoop wire protocols that are not defined via .proto files SHOULD be considered
to be [Private](./InterfaceClassification.html#Private) and
[Stable](./InterfaceClassification.html#Stable).
In addition to the limitations imposed by being
[Stable](./InterfaceClassification.html#Stable), Hadoop's wire protocols
MUST also be forward compatible across minor releases within a major version
according to the following:
* Client-Server compatibility MUST be maintained so as to allow users to continue using older clients even after upgrading the server (cluster) to a later version (or vice versa). For example, a Hadoop 2.1.0 client talking to a Hadoop 2.3.0 cluster.
* Client-Server compatibility MUST be maintained so as to allow users to upgrade the client before upgrading the server (cluster). For example, a Hadoop 2.4.0 client talking to a Hadoop 2.3.0 cluster. This allows deployment of client-side bug fixes ahead of full cluster upgrades. Note that new cluster features invoked by new client APIs or shell commands will not be usable. YARN applications that attempt to use new APIs (including new fields in data structures) that have not yet been deployed to the cluster can expect link exceptions.
* Client-Server compatibility MUST be maintained so as to allow upgrading individual components without upgrading others. For example, upgrade HDFS from version 2.1.0 to 2.2.0 without upgrading MapReduce.
* Server-Server compatibility MUST be maintained so as to allow mixed versions within an active cluster so the cluster may be upgraded without downtime in a rolling fashion.
New transport mechanisms MUST only be introduced with minor or major version
changes. Existing transport mechanisms MUST continue to be supported across
minor versions within a major version. Service port numbers MUST remain
consistent across minor version numbers within a major version.
### REST APIs
REST API compatibility corresponds to both the requests (URLs) and responses to each request (content, which may contain other URLs). Hadoop REST APIs are specifically meant for stable use by clients across releases, even major ones. The following are the exposed REST APIs:
REST API compatibility applies to the REST endpoints (URLs) and response data
format. Hadoop REST APIs are specifically meant for stable use by clients across
releases, even major ones. The following is a non-exhaustive list of the
exposed REST APIs:
* [WebHDFS](../hadoop-hdfs/WebHDFS.html) - Stable
* [WebHDFS](../hadoop-hdfs/WebHDFS.html)
* [ResourceManager](../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html)
* [NodeManager](../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html)
* [MR Application Master](../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html)
@ -130,134 +297,390 @@ REST API compatibility corresponds to both the requests (URLs) and responses to
* [Timeline Server v1 REST API](../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html)
* [Timeline Service v2 REST API](../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html)
Each API has an API-specific version number. Any incompatible changes MUST
increment the API version number.
#### Policy
The APIs annotated stable in the text above preserve compatibility across at least one major release, and maybe deprecated by a newer version of the REST API in a major release.
The Hadoop REST APIs SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Evolving](./InterfaceClassification.html#Evolving). With respect to API version
numbers, the Hadoop REST APIs SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable), i.e. no incompatible changes
are allowed to within an API version number.
### Log Output
The Hadoop daemons and CLIs produce log output via Log4j that is intended to
aid administrators and developers in understanding and troubleshooting cluster
behavior. Log messages are intended for human consumption, though automation
use cases are also supported.
#### Policy
All log output SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Evolving](./InterfaceClassification.html#Evolving).
### Audit Log Output
Several components have audit logging systems that record system information in
a machine readable format. Incompatible changes to that data format may break
existing automation utilities. For the audit log, an incompatible change is any
change that changes the format such that existing parsers no longer can parse
the logs.
#### Policy
All audit log output SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable). Any change to the
data format SHALL be considered an incompatible change.
### Metrics/JMX
While the Metrics API compatibility is governed by Java API compatibility, the actual metrics exposed by Hadoop need to be compatible for users to be able to automate using them (scripts etc.). Adding additional metrics is compatible. Modifying (e.g. changing the unit or measurement) or removing existing metrics breaks compatibility. Similarly, changes to JMX MBean object names also break compatibility.
While the Metrics API compatibility is governed by Java API compatibility, the
Metrics data format exposed by Hadoop MUST be maintained as compatible for
consumers of the data, e.g. for automation tasks.
#### Policy
Metrics should preserve compatibility within the major release.
The data format exposed via Metrics SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable).
### File formats & Metadata
User and system level data (including metadata) is stored in files of different formats. Changes to the metadata or the file formats used to store data/metadata can lead to incompatibilities between versions.
User and system level data (including metadata) is stored in files of various
formats. Changes to the metadata or the file formats used to store
data/metadata can lead to incompatibilities between versions. Each class of file
format is addressed below.
#### User-level file formats
Changes to formats that end-users use to store their data can prevent them from accessing the data in later releases, and hence it is highly important to keep those file-formats compatible. One can always add a "new" format improving upon an existing format. Examples of these formats include har, war, SequenceFileFormat etc.
Changes to formats that end users use to store their data can prevent them from
accessing the data in later releases, and hence are important to be compatible.
Examples of these formats include har, war, SequenceFileFormat, etc.
##### Policy
* Non-forward-compatible user-file format changes are restricted to major releases. When user-file formats change, new releases are expected to read existing formats, but may write data in formats incompatible with prior releases. Also, the community shall prefer to create a new format that programs must opt in to instead of making incompatible changes to existing formats.
User-level file formats SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable). User-lever file
format changes SHOULD be made forward compatible across major releases and MUST
be made forward compatible within a major release. The developer community
SHOULD prefer the creation of a new derivative file format to making
incompatible changes to an existing file format. Such new file formats MUST be
created as opt-in, meaning that users must be able to continue using the
existing compatible format until and unless they explicitly opt in to using
the new file format.
#### System-internal file formats
#### System-internal data schemas
Hadoop internal data is also stored in files and again changing these formats can lead to incompatibilities. While such changes are not as devastating as the user-level file formats, a policy on when the compatibility can be broken is important.
Hadoop internal data may also be stored in files or other data stores. Changing
the schemas of these data stores can lead to incompatibilities.
##### MapReduce
MapReduce uses formats like I-File to store MapReduce-specific data.
##### Policy
###### Policy
MapReduce-internal formats like IFile maintain compatibility within a major release. Changes to these formats can cause in-flight jobs to fail and hence we should ensure newer clients can fetch shuffle-data from old servers in a compatible manner.
All MapReduce-internal file formats, such as I-File format or the job history
server's jhist file format, SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Stable](./InterfaceClassification.html#Stable).
##### HDFS Metadata
HDFS persists metadata (the image and edit logs) in a particular format. Incompatible changes to either the format or the metadata prevent subsequent releases from reading older metadata. Such incompatible changes might require an HDFS "upgrade" to convert the metadata to make it accessible. Some changes can require more than one such "upgrades".
HDFS persists metadata (the image and edit logs) in a private file format.
Incompatible changes to either the format or the metadata prevent subsequent
releases from reading older metadata. Incompatible changes MUST include a
process by which existing metadata may be upgraded. Changes SHALL be
allowed to require more than one upgrade. Incompatible changes MUST result in
the metadata version number being incremented.
Depending on the degree of incompatibility in the changes, the following potential scenarios can arise:
Depending on the degree of incompatibility in the changes, the following
potential scenarios can arise:
* Automatic: The image upgrades automatically, no need for an explicit "upgrade".
* Direct: The image is upgradable, but might require one explicit release "upgrade".
* Indirect: The image is upgradable, but might require upgrading to intermediate release(s) first.
* Not upgradeable: The image is not upgradeable.
##### Policy
HDFS data nodes store data in a private directory structure. The schema of that
directory structure must remain stable to retain compatibility.
* A release upgrade must allow a cluster to roll-back to the older version and its older disk format. The rollback needs to restore the original data, but not required to restore the updated data.
* HDFS metadata changes must be upgradeable via any of the upgrade paths - automatic, direct or indirect.
* More detailed policies based on the kind of upgrade are yet to be considered.
###### Policy
The HDFS metadata format SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Evolving](./InterfaceClassification.html#Evolving). Incompatible
changes MUST include a process by which existing metada may be upgraded. The
upgrade process MUST allow the cluster metadata to be rolled back to the older
version and its older disk format. The rollback MUST restore the original data
but is not REQUIRED to restore the updated data. Any incompatible change
to the format MUST result in the major version number of the schema being
incremented.
The data node directory format SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Evolving](./InterfaceClassification.html#Evolving).
##### AWS S3A Guard Metadata
For each operation in the Hadoop S3 client (s3a) that reads or modifies
file metadata, a shadow copy of that file metadata is stored in a separate
metadata store, which offers HDFS-like consistency for the metadata, and may
also provide faster lookups for things like file status or directory listings.
S3A guard tables are created with a version marker which indicates
compatibility.
###### Policy
The S3A guard metadata schema SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Unstable](./InterfaceClassification.html#Unstable). Any incompatible change
to the schema MUST result in the version number of the schema being incremented.
##### YARN Resource Manager State Store
The YARN resource manager stores information about the cluster state in an
external state store for use in fail over and recovery. If the schema used for
the state store data does not remain compatible, the resource manager will not
be able to recover its state and will fail to start. The state store data
schema includes a version number that indicates compatibility.
###### Policy
The YARN resource manager state store data schema SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Evolving](./InterfaceClassification.html#Evolving). Any incompatible change
to the schema MUST result in the major version number of the schema being
incremented. Any compatible change to the schema MUST result in the minor
version number being incremented.
##### YARN Node Manager State Store
The YARN node manager stores information about the node state in an
external state store for use in recovery. If the schema used for the state
store data does not remain compatible, the node manager will not
be able to recover its state and will fail to start. The state store data
schema includes a version number that indicates compatibility.
###### Policy
The YARN node manager state store data schema SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Evolving](./InterfaceClassification.html#Evolving). Any incompatible change
to the schema MUST result in the major version number of the schema being
incremented. Any compatible change to the schema MUST result in the minor
version number being incremented.
##### YARN Federation State Store
The YARN resource manager federation service stores information about the
federated clusters, running applications, and routing policies in an
external state store for use in replication and recovery. If the schema used
for the state store data does not remain compatible, the federation service
will fail to initialize. The state store data schema includes a version number
that indicates compatibility.
###### Policy
The YARN federation service state store data schema SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Evolving](./InterfaceClassification.html#Evolving). Any incompatible change
to the schema MUST result in the major version number of the schema being
incremented. Any compatible change to the schema MUST result in the minor
version number being incremented.
### Command Line Interface (CLI)
The Hadoop command line programs may be used either directly via the system shell or via shell scripts. Changing the path of a command, removing or renaming command line options, the order of arguments, or the command return code and output break compatibility and may adversely affect users.
The Hadoop command line programs may be used either directly via the system
shell or via shell scripts. The CLIs include both the user-facing commands, such
as the hdfs command or the yarn command, and the admin-facing commands, such as
the scripts used to start and stop daemons. Changing the path of a command,
removing or renaming command line options, the order of arguments, or the
command return codes and output break compatibility and adversely affect users.
#### Policy
CLI commands are to be deprecated (warning when used) for one major release before they are removed or incompatibly modified in a subsequent major release.
All Hadoop CLI paths, usage, and output SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable).
Note that the CLI output SHALL be considered distinct from the log output
generated by the Hadoop CLIs. The latter SHALL be governed by the policy on log
output. Note also that for CLI output, all changes SHALL be considered
incompatible changes.
### Web UI
Web UI, particularly the content and layout of web pages, changes could potentially interfere with attempts to screen scrape the web pages for information.
Web UI, particularly the content and layout of web pages, changes could
potentially interfere with attempts to screen scrape the web pages for
information. The Hadoop Web UI pages, however, are not meant to be scraped, e.g.
for automation purposes. Users are expected to use REST APIs to programmatically
access cluster information.
#### Policy
Web pages are not meant to be scraped and hence incompatible changes to them are allowed at any time. Users are expected to use REST APIs to get any information.
The Hadoop Web UI SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Unstable](./InterfaceClassification.html#Unstable).
### Hadoop Configuration Files
Users use (1) Hadoop-defined properties to configure and provide hints to Hadoop and (2) custom properties to pass information to jobs. Hence, compatibility of config properties is two-fold:
* Modifying key-names, units of values, and default values of Hadoop-defined properties.
* Custom configuration property keys should not conflict with the namespace of Hadoop-defined properties. Typically, users should avoid using prefixes used by Hadoop: hadoop, io, ipc, fs, net, file, ftp, s3, kfs, ha, file, dfs, mapred, mapreduce, yarn.
Users use Hadoop-defined properties to configure and provide hints to Hadoop and
custom properties to pass information to jobs. Users are encouraged to avoid
using custom configuration property names that conflict with the namespace of
Hadoop-defined properties and should avoid using any prefixes used by Hadoop,
e.g. hadoop, io, ipc, fs, net, file, ftp, s3, kfs, ha, file, dfs, mapred,
mapreduce, and yarn.
#### Policy
* Hadoop-defined properties are to be deprecated at least for one major release before being removed. Modifying units for existing properties is not allowed.
* The default values of Hadoop-defined properties can be changed across minor/major releases, but will remain the same across point releases within a minor release.
* Currently, there is NO explicit policy regarding when new prefixes can be added/removed, and the list of prefixes to be avoided for custom configuration properties. However, as noted above, users should avoid using prefixes used by Hadoop: hadoop, io, ipc, fs, net, file, ftp, s3, kfs, ha, file, dfs, mapred, mapreduce, yarn.
Hadoop-defined properties (names and meanings) SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable). The units implied by a
Hadoop-defined property MUST NOT change, even
across major versions. Default values of Hadoop-defined properties SHALL be
considered [Public](./InterfaceClassification.html#Public) and
[Evolving](./InterfaceClassification.html#Evolving).
### Log4j Configuration Files
The log output produced by Hadoop daemons and CLIs is governed by a set of
configuration files. These files control the minimum level of log message that
will be output by the various components of Hadoop, as well as where and how
those messages are stored.
#### Policy
All Log4j configurations SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Evolving](./InterfaceClassification.html#Evolving).
### Directory Structure
Source code, artifacts (source and tests), user logs, configuration files, output and job history are all stored on disk either local file system or HDFS. Changing the directory structure of these user-accessible files break compatibility, even in cases where the original path is preserved via symbolic links (if, for example, the path is accessed by a servlet that is configured to not follow symbolic links).
Source code, artifacts (source and tests), user logs, configuration files,
output, and job history are all stored on disk either local file system or HDFS.
Changing the directory structure of these user-accessible files can break
compatibility, even in cases where the original path is preserved via symbolic
links (such as when the path is accessed by a servlet that is configured to
not follow symbolic links).
#### Policy
* The layout of source code and build artifacts can change anytime, particularly so across major versions. Within a major version, the developers will attempt (no guarantees) to preserve the directory structure; however, individual files can be added/moved/deleted. The best way to ensure patches stay in sync with the code is to get them committed to the Apache source tree.
* The directory structure of configuration files, user logs, and job history will be preserved across minor and point releases within a major release.
The layout of source code and build artifacts SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Unstable](./InterfaceClassification.html#Unstable). Within a major version,
the developer community SHOULD preserve the
overall directory structure, though individual files MAY be added, moved, or
deleted with no warning.
The directory structure of configuration files, user logs, and job history SHALL
be considered [Public](./InterfaceClassification.html#Public) and
[Evolving](./InterfaceClassification.html#Evolving).
### Java Classpath
User applications built against Hadoop might add all Hadoop jars (including Hadoop's library dependencies) to the application's classpath. Adding new dependencies or updating the version of existing dependencies may interfere with those in applications' classpaths.
Hadoop provides several client artifacts that applications use to interact
with the system. These artifacts typically have their own dependencies on
common libraries. In the cases where these dependencies are exposed to
end user applications or downstream consumers (i.e. not
[shaded](https://stackoverflow.com/questions/13620281/what-is-the-maven-shade-plugin-used-for-and-why-would-you-want-to-relocate-java))
changes to these dependencies can be disruptive. Developers are strongly
encouraged to avoid exposing dependencies to clients by using techniques
such as
[shading](https://stackoverflow.com/questions/13620281/what-is-the-maven-shade-plugin-used-for-and-why-would-you-want-to-relocate-java).
With regard to dependencies, adding a dependency is an incompatible change,
whereas removing a dependency is a compatible change.
Some user applications built against Hadoop may add all Hadoop JAR files
(including Hadoop's library dependencies) to the application's classpath.
Adding new dependencies or updating the versions of existing dependencies may
interfere with those in applications' classpaths and hence their correct
operation. Users are therefore discouraged from adopting this practice.
#### Policy
Currently, there is NO policy on when Hadoop's dependencies can change.
The set of dependencies exposed by the Hadoop client artifacts SHALL be
considered [Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable). Any dependencies that are not
exposed to clients (either because they are shaded or only exist in non-client
artifacts) SHALL be considered [Private](./InterfaceClassification.html#Private)
and [Unstable](./InterfaceClassification.html#Unstable)
### Environment variables
Users and related projects often utilize the exported environment variables (eg HADOOP\_CONF\_DIR), therefore removing or renaming environment variables is an incompatible change.
Users and related projects often utilize the environment variables exported by
Hadoop (e.g. HADOOP\_CONF\_DIR). Removing or renaming environment variables can
therefore impact end user applications.
#### Policy
Currently, there is NO policy on when the environment variables can change. Developers try to limit changes to major releases.
The environment variables consumed by Hadoop and the environment variables made
accessible to applications through YARN SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Evolving](./InterfaceClassification.html#Evolving).
The developer community SHOULD limit changes to major releases.
### Build artifacts
Hadoop uses maven for project management and changing the artifacts can affect existing user workflows.
Hadoop uses Maven for project management. Changes to the contents of
generated artifacts can impact existing user applications.
#### Policy
* Test artifacts: The test jars generated are strictly for internal use and are not expected to be used outside of Hadoop, similar to APIs annotated @Private, @Unstable.
* Built artifacts: The hadoop-client artifact (maven groupId:artifactId) stays compatible within a major release, while the other artifacts can change in incompatible ways.
The contents of Hadoop test artifacts SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Unstable](./InterfaceClassification.html#Unstable). Test artifacts include
all JAR files generated from test source code and all JAR files that include
"tests" in the file name.
The Hadoop client artifacts SHALL be considered
[Public](./InterfaceClassification.html#Public) and
[Stable](./InterfaceClassification.html#Stable). Client artifacts are the
following:
* hadoop-client
* hadoop-client-api
* hadoop-client-minicluster
* hadoop-client-runtime
* hadoop-hdfs-client
* hadoop-hdfs-native-client
* hadoop-mapreduce-client-app
* hadoop-mapreduce-client-common
* hadoop-mapreduce-client-core
* hadoop-mapreduce-client-hs
* hadoop-mapreduce-client-hs-plugins
* hadoop-mapreduce-client-jobclient
* hadoop-mapreduce-client-nativetask
* hadoop-mapreduce-client-shuffle
* hadoop-yarn-client
All other build artifacts SHALL be considered
[Private](./InterfaceClassification.html#Private) and
[Unstable](./InterfaceClassification.html#Unstable).
### Hardware/Software Requirements
To keep up with the latest advances in hardware, operating systems, JVMs, and other software, new Hadoop releases or some of their features might require higher versions of the same. For a specific environment, upgrading Hadoop might require upgrading other dependent software components.
To keep up with the latest advances in hardware, operating systems, JVMs, and
other software, new Hadoop releases may include features that require
newer hardware, operating systems releases, or JVM versions than previous
Hadoop releases. For a specific environment, upgrading Hadoop might require
upgrading other dependent software components.
#### Policies
* Hardware
* Architecture: The community has no plans to restrict Hadoop to specific architectures, but can have family-specific optimizations.
* Minimum resources: While there are no guarantees on the minimum resources required by Hadoop daemons, the community attempts to not increase requirements within a minor release.
* Operating Systems: The community will attempt to maintain the same OS requirements (OS kernel versions) within a minor release. Currently GNU/Linux and Microsoft Windows are the OSes officially supported by the community while Apache Hadoop is known to work reasonably well on other OSes such as Apple MacOSX, Solaris etc.
* The JVM requirements will not change across point releases within the same minor release except if the JVM version under question becomes unsupported. Minor/major releases might require later versions of JVM for some/all of the supported operating systems.
* Other software: The community tries to maintain the minimum versions of additional software required by Hadoop. For example, ssh, kerberos etc.
* Minimum resources: While there are no guarantees on the minimum resources required by Hadoop daemons, the developer community SHOULD avoid increasing requirements within a minor release.
* Operating Systems: The community SHOULD maintain the same minimum OS requirements (OS kernel versions) within a minor release. Currently GNU/Linux and Microsoft Windows are the OSes officially supported by the community, while Apache Hadoop is known to work reasonably well on other OSes such as Apple MacOSX, Solaris, etc.
* The JVM requirements SHALL NOT change across minor releases within the same major release unless the JVM version in question becomes unsupported. The JVM version requirement MAY be different for different operating systems or even operating system releases.
* File systems supported by Hadoop, e.g. through the HDFS FileSystem API, SHOULD not become unsupported between minor releases within a major version unless a migration path to an alternate client implementation is available.
References
----------

View File

@ -66,54 +66,103 @@ Hadoop uses the following kinds of audience in order of increasing/wider visibil
#### Private
The interface is for internal use within the project (such as HDFS or MapReduce)
and should not be used by applications or by other projects. It is subject to
change at anytime without notice. Most interfaces of a project are Private (also
referred to as project-private).
A Private interface is for internal use within the project (such as HDFS or
MapReduce) and should not be used by applications or by other projects. Most
interfaces of a project are Private (also referred to as project-private).
Unless an interface is intentionally exposed for external consumption, it should
be marked Private.
#### Limited-Private
The interface is used by a specified set of projects or systems (typically
closely related projects). Other projects or systems should not use the
interface. Changes to the interface will be communicated/negotiated with the
A Limited-Private interface is used by a specified set of projects or systems
(typically closely related projects). Other projects or systems should not use
the interface. Changes to the interface will be communicated/negotiated with the
specified projects. For example, in the Hadoop project, some interfaces are
LimitedPrivate{HDFS, MapReduce} in that they are private to the HDFS and
MapReduce projects.
#### Public
The interface is for general use by any application.
A Public interface is for general use by any application.
### Change Compatibility
Changes to an API fall into two broad categories: compatible and incompatible.
A compatible change is a change that meets the following criteria:
* no existing capabilities are removed,
* no existing capabilities are modified in a way that prevents their use by clients that were constructed to use the interface prior to the change, and
* no capabilities are added that require changes to clients that were constructed to use the interface prior to the change.
Any change that does not meet these three criteria is an incompatible change.
Stated simply a compatible change will not break existing clients. These
examples are compatible changes:
* adding a method to a Java class,
* adding an optional parameter to a RESTful web service, or
* adding a tag to an XML document.
* making the audience annotation of an interface more broad (e.g. from Private to Public) or the change compatibility annotation more restrictive (e.g. from Evolving to Stable)
These examples are incompatible changes:
* removing a method from a Java class,
* adding a method to a Java interface,
* adding a required parameter to a RESTful web service, or
* renaming a field in a JSON document.
* making the audience annotation of an interface less broad (e.g. from Public to Limited Private) or the change compatibility annotation more restrictive (e.g. from Evolving to Unstable)
### Stability
Stability denotes how stable an interface is, as in when incompatible changes to
the interface are allowed. Hadoop APIs have the following levels of stability.
Stability denotes how stable an interface is and when compatible and
incompatible changes to the interface are allowed. Hadoop APIs have the
following levels of stability.
#### Stable
Can evolve while retaining compatibility for minor release boundaries; in other
words, incompatible changes to APIs marked as Stable are allowed only at major
releases (i.e. at m.0).
A Stable interface is exposed as a preferred means of communication. A Stable
interface is expected not to change incompatibly within a major release and
hence serves as a safe development target. A Stable interface may evolve
compatibly between minor releases.
Incompatible changes allowed: major (X.0.0)
Compatible changes allowed: maintenance (x.Y.0)
#### Evolving
Evolving, but incompatible changes are allowed at minor releases (i.e. m .x)
An Evolving interface is typically exposed so that users or external code can
make use of a feature before it has stabilized. The expectation that an
interface should "eventually" stabilize and be promoted to Stable, however,
is not a requirement for the interface to be labeled as Evolving.
Incompatible changes are allowed for Evolving interface only at minor releases.
Incompatible changes allowed: minor (x.Y.0)
Compatible changes allowed: maintenance (x.y.Z)
#### Unstable
Incompatible changes to Unstable APIs are allowed at any time. This usually makes
sense for only private interfaces.
An Unstable interface is one for which no compatibility guarantees are made. An
Unstable interface is not necessarily unstable. An unstable interface is
typically exposed because a user or external code needs to access an interface
that is not intended for consumption. The interface is exposed as an Unstable
interface to state clearly that even though the interface is exposed, it is not
the preferred access path, and no compatibility guarantees are made for it.
However one may call this out for a supposedly public interface to highlight
that it should not be used as an interface; for public interfaces, labeling it
as Not-an-interface is probably more appropriate than "Unstable".
Unless there is a reason to offer a compatibility guarantee on an interface,
whether it is exposed or not, it should be labeled as Unstable. Private
interfaces also should be Unstable in most cases.
Examples of publicly visible interfaces that are unstable
(i.e. not-an-interface): GUI, CLIs whose output format will change.
Incompatible changes to Unstable interfaces are allowed at any time.
Incompatible changes allowed: maintenance (x.y.Z)
Compatible changes allowed: maintenance (x.y.Z)
#### Deprecated
APIs that could potentially be removed in the future and should not be used.
A Deprecated interface could potentially be removed in the future and should
not be used. Even so, a Deprecated interface will continue to function until
it is removed. When a Deprecated interface can be removed depends on whether
it is also Stable, Evolving, or Unstable.
How are the Classifications Recorded?
-------------------------------------
@ -121,95 +170,101 @@ How are the Classifications Recorded?
How will the classification be recorded for Hadoop APIs?
* Each interface or class will have the audience and stability recorded using
annotations in org.apache.hadoop.classification package.
annotations in the org.apache.hadoop.classification package.
* The javadoc generated by the maven target javadoc:javadoc lists only the public API.
* The javadoc generated by the maven target javadoc:javadoc lists only the
public API.
* One can derive the audience of java classes and java interfaces by the
audience of the package in which they are contained. Hence it is useful to
declare the audience of each java package as public or private (along with the
private audience variations).
How will the classification be recorded for other interfaces, such as CLIs?
* See the [Hadoop Compatibility](Compatibility.html) page for details.
FAQ
---
* Why arent the java scopes (private, package private and public) good enough?
* Javas scoping is not very complete. One is often forced to make a class
public in order for other internal components to use it. It does not have
friends or sub-package-private like C++.
public in order for other internal components to use it. It also does not
have friends or sub-package-private like C++.
* But I can easily access a private implementation interface if it is Java public.
Where is the protection and control?
* The purpose of this is not providing absolute access control. Its purpose
is to communicate to users and developers. One can access private
implementation functions in libc; however if they change the internal
implementation details, your application will break and you will have
little sympathy from the folks who are supplying libc. If you use a
non-public interface you understand the risks.
* But I can easily access a Private interface if it is Java public. Where is the
protection and control?
* The purpose of this classification scheme is not providing absolute
access control. Its purpose is to communicate to users and developers.
One can access private implementation functions in libc; however if
they change the internal implementation details, the application will
break and one will receive little sympathy from the folks who are
supplying libc. When using a non-public interface, the risks are
understood.
* Why bother declaring the stability of a private interface?
Arent private interfaces always unstable?
* Private interfaces are not always unstable. In the cases where they are
stable they capture internal properties of the system and can communicate
* Why bother declaring the stability of a Private interface? Arent Private
interfaces always Unstable?
* Private interfaces are not always Unstable. In the cases where they are
Stable they capture internal properties of the system and can communicate
these properties to its internal users and to developers of the interface.
* e.g. In HDFS, NN-DN protocol is private but stable and can help
implement rolling upgrades. It communicates that this interface should
not be changed in incompatible ways even though it is private.
* e.g. In HDFS, FSImage stability provides more flexible rollback.
* e.g. In HDFS, NN-DN protocol is Private but Stable and can help
implement rolling upgrades. The stability annotation communicates that
this interface should not be changed in incompatible ways even though
it is Private.
* e.g. In HDFS, FSImage the Stabile designation provides more flexible
rollback.
* What is the harm in applications using a private interface that is stable? How
is it different than a public stable interface?
* While a private interface marked as stable is targeted to change only at
* What is the harm in applications using a Private interface that is Stable?
How is it different from a Public Stable interface?
* While a Private interface marked as Stable is targeted to change only at
major releases, it may break at other times if the providers of that
interface are willing to change the internal users of that
interface. Further, a public stable interface is less likely to break even
interface also are willing to change the internal consumers of that
interface. Further, a Public Stable interface is less likely to break even
at major releases (even though it is allowed to break compatibility)
because the impact of the change is larger. If you use a private interface
because the impact of the change is larger. If you use a Private interface
(regardless of its stability) you run the risk of incompatibility.
* Why bother with Limited-private? Isnt it giving special treatment to some projects?
That is not fair.
* First, most interfaces should be public or private; actually let us state
it even stronger: make it private unless you really want to expose it to
public for general use.
* Limited-private is for interfaces that are not intended for general
* Why bother with Limited-Private? Isnt it giving special treatment to some
projects? That is not fair.
* Most interfaces should be Public or Private. An interface should be
Private unless it is explicitly intended for general use.
* Limited-Private is for interfaces that are not intended for general
use. They are exposed to related projects that need special hooks. Such a
classification has a cost to both the supplier and consumer of the limited
classification has a cost to both the supplier and consumer of the
interface. Both will have to work together if ever there is a need to
break the interface in the future; for example the supplier and the
consumers will have to work together to get coordinated releases of their
respective projects. This should not be taken lightly if you can get
away with private then do so; if the interface is really for general use
for all applications then do so. But remember that making an interface
public has huge responsibility. Sometimes Limited-private is just right.
* A good example of a limited-private interface is BlockLocations, This is a
fairly low-level interface that we are willing to expose to MR and perhaps
HBase. We are likely to change it down the road and at that time we will
coordinate release effort with the MR team.
While MR and HDFS are always released in sync today, they may
change down the road.
* If you have a limited-private interface with many projects listed then you
are fooling yourself. It is practically public.
* It might be worth declaring a special audience classification called
Hadoop-Private for the Hadoop family.
respective projects. This contract should not be taken lightlyuse
Private if possible; if the interface is really for general use
for all applications then use Public. Always remember that making an
interface Public comes with large burden of responsibility. Sometimes
Limited-Private is just right.
* A good example of a Limited-Private interface is BlockLocations. This
interface is a fairly low-level interface that is exposed to MapReduce
and HBase. The interface is likely to change down the road, and at that
time the release effort will have to be coordinated with the
MapReduce development team. While MapReduce and HDFS are always released
in sync today, that policy may change down the road.
* If you have a Limited-Private interface with many projects listed then
the interface is probably a good candidate to be made Public.
* Lets treat all private interfaces as Hadoop-private. What is the harm in
projects in the Hadoop family have access to private classes?
* Do we want MR accessing class files that are implementation details inside
HDFS. There used to be many such layer violations in the code that we have
been cleaning up over the last few years. We dont want such layer
violations to creep back in by no separating between the major components
like HDFS and MR.
* Let's treat all Private interfaces as Limited-Private for all of Hadoop. What
is the harm if projects in the Hadoop family have access to private classes?
* There used to be many cases in the code where one project depended on the
internal implementation details of another. A significant effort went
into cleaning up those issues. Opening up all interfaces as
Limited-Private for all of Hadoop would open the door to reintroducing
such coupling issues.
* Aren't all public interfaces stable?
* One may mark a public interface as evolving in its early days. Here one is
* Aren't all Public interfaces Stable?
* One may mark a Public interface as Evolving in its early days. Here one is
promising to make an effort to make compatible changes but may need to
break it at minor releases.
* One example of a public interface that is unstable is where one is
* One example of a Public interface that is Unstable is where one is
providing an implementation of a standards-body based interface that is
still under development. For example, many companies, in an attempt to be
first to market, have provided implementations of a new NFS protocol even
when the protocol was not fully completed by IETF. The implementor cannot
evolve the interface in a fashion that causes least distruption because
evolve the interface in a fashion that causes least disruption because
the stability is controlled by the standards body. Hence it is appropriate
to label the interface as unstable.
to label the interface as Unstable.

View File

@ -605,7 +605,7 @@ The result is `FSDataOutputStream`, which through its operations may generate ne
clients creating files with `overwrite==true` to fail if the file is created
by another client between the two tests.
* S3N, S3A, Swift and potentially other Object Stores do not currently change the FS state
* S3A, Swift and potentially other Object Stores do not currently change the FS state
until the output stream `close()` operation is completed.
This MAY be a bug, as it allows >1 client to create a file with `overwrite==false`,
and potentially confuse file/directory logic
@ -961,7 +961,7 @@ The outcome is no change to FileSystem state, with a return value of false.
FS' = FS; result = False
*Local Filesystem, S3N*
*Local Filesystem*
The outcome is as a normal rename, with the additional (implicit) feature
that the parent directories of the destination also exist.
@ -1262,4 +1262,4 @@ It currently supports to query:
* `StreamCapabilties.HFLUSH` ("*hflush*"): the capability to flush out the data
in client's buffer.
* `StreamCapabilities.HSYNC` ("*hsync*"): capability to flush out the data in
client's buffer and the disk device.
client's buffer and the disk device.

View File

@ -29,11 +29,10 @@ return codes of Unix filesystem actions as a reference. Even so, there
are places where HDFS diverges from the expected behaviour of a POSIX
filesystem.
The behaviour of other Hadoop filesystems are not as rigorously tested.
The bundled S3N and S3A FileSystem clients make Amazon's S3 Object Store ("blobstore")
The bundled S3A FileSystem clients make Amazon's S3 Object Store ("blobstore")
accessible through the FileSystem API. The Swift FileSystem driver provides similar
functionality for the OpenStack Swift blobstore. The Azure object storage
FileSystem talks to Microsoft's Azure equivalent. All of these
functionality for the OpenStack Swift blobstore. The Azure WASB and ADL object
storage FileSystems talks to Microsoft's Azure storage. All of these
bind to object stores, which do have different behaviors, especially regarding
consistency guarantees, and atomicity of operations.

View File

@ -195,21 +195,21 @@ equivalent. Furthermore, the build MUST be configured to never bundle this file
In addition, `src/test/resources/auth-keys.xml` will need to be created. It can be a copy of `contract-test-options.xml`.
The `AbstractFSContract` class automatically loads this resource file if present; specific keys for specific test cases can be added.
As an example, here are what S3N test keys look like:
As an example, here are what S3A test keys look like:
<configuration>
<property>
<name>fs.contract.test.fs.s3n</name>
<value>s3n://tests3contract</value>
<name>fs.contract.test.fs.s3a</name>
<value>s3a://tests3contract</value>
</property>
<property>
<name>fs.s3n.awsAccessKeyId</name>
<name>fs.s3a.access.key</name>
<value>DONOTPCOMMITTHISKEYTOSCM</value>
</property>
<property>
<name>fs.s3n.awsSecretAccessKey</name>
<name>fs.s3a.secret.key</name>
<value>DONOTEVERSHARETHISSECRETKEY!</value>
</property>
</configuration>

View File

@ -94,14 +94,10 @@ public void initializeMemberVariables() {
xmlPropsToSkipCompare.add("hadoop.tmp.dir");
xmlPropsToSkipCompare.add("nfs3.mountd.port");
xmlPropsToSkipCompare.add("nfs3.server.port");
xmlPropsToSkipCompare.add("test.fs.s3n.name");
xmlPropsToSkipCompare.add("fs.viewfs.rename.strategy");
// S3N/S3A properties are in a different subtree.
// - org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys
// S3A properties are in a different subtree.
xmlPrefixToSkipCompare.add("fs.s3a.");
xmlPrefixToSkipCompare.add("fs.s3n.");
xmlPrefixToSkipCompare.add("s3native.");
// WASB properties are in a different subtree.
// - org.apache.hadoop.fs.azure.NativeAzureFileSystem

View File

@ -54,7 +54,6 @@ private void testRedact(Configuration conf) throws Exception {
"fs.s3a.bucket.BUCKET.secret.key",
"fs.s3a.server-side-encryption.key",
"fs.s3a.bucket.engineering.server-side-encryption.key",
"fs.s3n.awsSecretKey",
"fs.azure.account.key.abcdefg.blob.core.windows.net",
"fs.adl.oauth2.refresh.token",
"fs.adl.oauth2.credential",

View File

@ -23,12 +23,9 @@
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.verify;
import java.io.IOException;
import java.net.NoRouteToHostException;
import java.net.URI;
import java.net.UnknownHostException;
import java.security.GeneralSecurityException;
import java.security.NoSuchAlgorithmException;
@ -36,9 +33,6 @@
import org.apache.hadoop.crypto.key.KeyProvider;
import org.apache.hadoop.crypto.key.KeyProvider.Options;
import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.net.ConnectTimeoutException;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.security.authorize.AuthorizationException;
import org.junit.Test;
@ -53,17 +47,14 @@ public void testCreation() throws Exception {
Configuration conf = new Configuration();
KeyProvider kp = new KMSClientProvider.Factory().createProvider(new URI(
"kms://http@host1/kms/foo"), conf);
assertTrue(kp instanceof LoadBalancingKMSClientProvider);
KMSClientProvider[] providers =
((LoadBalancingKMSClientProvider) kp).getProviders();
assertEquals(1, providers.length);
assertEquals(Sets.newHashSet("http://host1/kms/foo/v1/"),
Sets.newHashSet(providers[0].getKMSUrl()));
assertTrue(kp instanceof KMSClientProvider);
assertEquals("http://host1/kms/foo/v1/",
((KMSClientProvider) kp).getKMSUrl());
kp = new KMSClientProvider.Factory().createProvider(new URI(
"kms://http@host1;host2;host3/kms/foo"), conf);
assertTrue(kp instanceof LoadBalancingKMSClientProvider);
providers =
KMSClientProvider[] providers =
((LoadBalancingKMSClientProvider) kp).getProviders();
assertEquals(3, providers.length);
assertEquals(Sets.newHashSet("http://host1/kms/foo/v1/",
@ -131,7 +122,7 @@ public void testLoadBalancingWithFailure() throws Exception {
// This should be retried
KMSClientProvider p4 = mock(KMSClientProvider.class);
when(p4.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p4"));
.thenThrow(new IOException("p4"));
when(p4.getKMSUrl()).thenReturn("p4");
KeyProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] { p1, p2, p3, p4 }, 0, conf);
@ -329,298 +320,4 @@ public void testWarmUpEncryptedKeysWhenOneProviderSucceeds()
Mockito.verify(p1, Mockito.times(1)).warmUpEncryptedKeys(keyName);
Mockito.verify(p2, Mockito.times(1)).warmUpEncryptedKeys(keyName);
}
/**
* Tests whether retryPolicy fails immediately on encountering IOException
* which is not SocketException.
* @throws Exception
*/
@Test
public void testClientRetriesWithIOException() throws Exception {
Configuration conf = new Configuration();
// Setting total failover attempts to .
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.getMetadata(Mockito.anyString()))
.thenThrow(new IOException("p1"));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.getMetadata(Mockito.anyString()))
.thenThrow(new IOException("p2"));
KMSClientProvider p3 = mock(KMSClientProvider.class);
when(p3.getMetadata(Mockito.anyString()))
.thenThrow(new IOException("p3"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
when(p3.getKMSUrl()).thenReturn("p3");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2, p3}, 0, conf);
try {
kp.getMetadata("test3");
fail("Should fail since all providers threw an IOException");
} catch (Exception e) {
assertTrue(e instanceof IOException);
}
verify(kp.getProviders()[0], Mockito.times(1))
.getMetadata(Mockito.eq("test3"));
verify(kp.getProviders()[1], Mockito.never())
.getMetadata(Mockito.eq("test3"));
verify(kp.getProviders()[2], Mockito.never())
.getMetadata(Mockito.eq("test3"));
}
/**
* Tests that client doesn't retry once it encounters AccessControlException
* from first provider.
* This assumes all the kms servers are configured with identical access to
* keys.
* @throws Exception
*/
@Test
public void testClientRetriesWithAccessControlException() throws Exception {
Configuration conf = new Configuration();
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new AccessControlException("p1"));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new IOException("p2"));
KMSClientProvider p3 = mock(KMSClientProvider.class);
when(p3.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new IOException("p3"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
when(p3.getKMSUrl()).thenReturn("p3");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2, p3}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
fail("Should fail because provider p1 threw an AccessControlException");
} catch (Exception e) {
assertTrue(e instanceof AccessControlException);
}
verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.never()).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p3, Mockito.never()).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
/**
* Tests that client doesn't retry once it encounters RunTimeException
* from first provider.
* This assumes all the kms servers are configured with identical access to
* keys.
* @throws Exception
*/
@Test
public void testClientRetriesWithRuntimeException() throws Exception {
Configuration conf = new Configuration();
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new RuntimeException("p1"));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new IOException("p2"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
fail("Should fail since provider p1 threw RuntimeException");
} catch (Exception e) {
assertTrue(e instanceof RuntimeException);
}
verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.never()).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
/**
* Tests the client retries until it finds a good provider.
* @throws Exception
*/
@Test
public void testClientRetriesWithTimeoutsException() throws Exception {
Configuration conf = new Configuration();
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 4);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p1"));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new UnknownHostException("p2"));
KMSClientProvider p3 = mock(KMSClientProvider.class);
when(p3.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new NoRouteToHostException("p3"));
KMSClientProvider p4 = mock(KMSClientProvider.class);
when(p4.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenReturn(
new KMSClientProvider.KMSKeyVersion("test3", "v1", new byte[0]));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
when(p3.getKMSUrl()).thenReturn("p3");
when(p4.getKMSUrl()).thenReturn("p4");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2, p3, p4}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
} catch (Exception e) {
fail("Provider p4 should have answered the request.");
}
verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p3, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p4, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
/**
* Tests the operation succeeds second time after ConnectTimeoutException.
* @throws Exception
*/
@Test
public void testClientRetriesSucceedsSecondTime() throws Exception {
Configuration conf = new Configuration();
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p1"))
.thenReturn(new KMSClientProvider.KMSKeyVersion("test3", "v1",
new byte[0]));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p2"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
} catch (Exception e) {
fail("Provider p1 should have answered the request second time.");
}
verify(p1, Mockito.times(2)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
/**
* Tests whether retryPolicy retries specified number of times.
* @throws Exception
*/
@Test
public void testClientRetriesSpecifiedNumberOfTimes() throws Exception {
Configuration conf = new Configuration();
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p1"));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p2"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
fail("Should fail");
} catch (Exception e) {
assert (e instanceof ConnectTimeoutException);
}
verify(p1, Mockito.times(6)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.times(5)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
/**
* Tests whether retryPolicy retries number of times equals to number of
* providers if conf kms.client.failover.max.attempts is not set.
* @throws Exception
*/
@Test
public void testClientRetriesIfMaxAttemptsNotSet() throws Exception {
Configuration conf = new Configuration();
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p1"));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p2"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
fail("Should fail");
} catch (Exception e) {
assert (e instanceof ConnectTimeoutException);
}
verify(p1, Mockito.times(2)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
/**
* Tests that client doesn't retry once it encounters AuthenticationException
* wrapped in an IOException from first provider.
* @throws Exception
*/
@Test
public void testClientRetriesWithAuthenticationExceptionWrappedinIOException()
throws Exception {
Configuration conf = new Configuration();
conf.setInt(
CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
KMSClientProvider p1 = mock(KMSClientProvider.class);
when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new IOException(new AuthenticationException("p1")));
KMSClientProvider p2 = mock(KMSClientProvider.class);
when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
.thenThrow(new ConnectTimeoutException("p2"));
when(p1.getKMSUrl()).thenReturn("p1");
when(p2.getKMSUrl()).thenReturn("p2");
LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
new KMSClientProvider[] {p1, p2}, 0, conf);
try {
kp.createKey("test3", new Options(conf));
fail("Should fail since provider p1 threw AuthenticationException");
} catch (Exception e) {
assertTrue(e.getCause() instanceof AuthenticationException);
}
verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
verify(p2, Mockito.never()).createKey(Mockito.eq("test3"),
Mockito.any(Options.class));
}
}
}

View File

@ -61,7 +61,16 @@ public abstract class FileSystemContractBaseTest {
protected byte[] data = dataset(getBlockSize() * 2, 0, 255);
@Rule
public Timeout globalTimeout = new Timeout(30000);
public Timeout globalTimeout = new Timeout(getGlobalTimeout());
/**
* Get the timeout in milliseconds for each test case.
* @return a time in milliseconds.
*/
protected int getGlobalTimeout() {
return 30 * 1000;
}
@Rule
public ExpectedException thrown = ExpectedException.none();
@ -246,39 +255,18 @@ public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception {
@Test
public void testMkdirsWithUmask() throws Exception {
if (!isS3(fs)) {
Configuration conf = fs.getConf();
String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY);
try {
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK);
final Path dir = path("newDir");
assertTrue(fs.mkdirs(dir, new FsPermission((short) 0777)));
FileStatus status = fs.getFileStatus(dir);
assertTrue(status.isDirectory());
assertEquals((short) 0715, status.getPermission().toShort());
} finally {
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask);
}
}
}
/**
* Skip permission tests for S3FileSystem until HDFS-1333 is fixed.
* Classes that do not implement {@link FileSystem#getScheme()} method
* (e.g {@link RawLocalFileSystem}) will throw an
* {@link UnsupportedOperationException}.
* @param fileSystem FileSystem object to determine if it is S3 or not
* @return true if S3 false in any other case
*/
private boolean isS3(FileSystem fileSystem) {
Configuration conf = fs.getConf();
String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY);
try {
if (fileSystem.getScheme().equals("s3n")) {
return true;
}
} catch (UnsupportedOperationException e) {
LOG.warn("Unable to determine the schema of filesystem.");
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK);
final Path dir = path("newDir");
assertTrue(fs.mkdirs(dir, new FsPermission((short) 0777)));
FileStatus status = fs.getFileStatus(dir);
assertTrue(status.isDirectory());
assertEquals((short) 0715, status.getPermission().toShort());
} finally {
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask);
}
return false;
}
@Test

View File

@ -122,7 +122,7 @@ public void testOpenFileTwice() throws Throwable {
Path path = path("testopenfiletwice.txt");
byte[] block = dataset(TEST_FILE_LEN, 0, 255);
//this file now has a simple rule: offset => value
createFile(getFileSystem(), path, false, block);
createFile(getFileSystem(), path, true, block);
//open first
FSDataInputStream instream1 = getFileSystem().open(path);
FSDataInputStream instream2 = null;
@ -150,7 +150,7 @@ public void testSequentialRead() throws Throwable {
int base = 0x40; // 64
byte[] block = dataset(len, base, base + len);
//this file now has a simple rule: offset => (value | 0x40)
createFile(getFileSystem(), path, false, block);
createFile(getFileSystem(), path, true, block);
//open first
instream = getFileSystem().open(path);
assertEquals(base, instream.read());

View File

@ -341,7 +341,7 @@ public void testRandomSeeks() throws Throwable {
int filesize = 10 * 1024;
byte[] buf = dataset(filesize, 0, 255);
Path randomSeekFile = path("testrandomseeks.bin");
createFile(getFileSystem(), randomSeekFile, false, buf);
createFile(getFileSystem(), randomSeekFile, true, buf);
Random r = new Random();
// Record the sequence of seeks and reads which trigger a failure.

View File

@ -138,6 +138,63 @@ public void testFsPermission() {
}
}
@Test
public void testFsSymbolicConstructorWithNormalInput() {
// Test cases for symbolic representation
//Added both Octal and short representation to show with sticky bit
assertEquals(777, new FsPermission("+rwx").toOctal());
assertEquals(0777, new FsPermission("+rwx").toShort());
assertEquals(444, new FsPermission("+r").toOctal());
assertEquals(0444, new FsPermission("+r").toShort());
assertEquals(222, new FsPermission("+w").toOctal());
assertEquals(0222, new FsPermission("+w").toShort());
assertEquals(111, new FsPermission("+x").toOctal());
assertEquals(0111, new FsPermission("+x").toShort());
assertEquals(666, new FsPermission("+rw").toOctal());
assertEquals(0666, new FsPermission("+rw").toShort());
assertEquals(333, new FsPermission("+wx").toOctal());
assertEquals(0333, new FsPermission("+wx").toShort());
assertEquals(555, new FsPermission("+rx").toOctal());
assertEquals(0555, new FsPermission("+rx").toShort());
// Test case is to test with repeated values in mode.
// Repeated value in input will be ignored as duplicate.
assertEquals(666, new FsPermission("+rwr").toOctal());
assertEquals(0666, new FsPermission("+rwr").toShort());
assertEquals(000, new FsPermission("-rwr").toOctal());
assertEquals(0000, new FsPermission("-rwr").toShort());
assertEquals(1666, new FsPermission("+rwrt").toOctal());
assertEquals(01666, new FsPermission("+rwrt").toShort());
assertEquals(000, new FsPermission("-rwrt").toOctal());
assertEquals(0000, new FsPermission("-rwrt").toShort());
assertEquals(1777, new FsPermission("+rwxt").toOctal());
assertEquals(01777, new FsPermission("+rwxt").toShort());
assertEquals(000, new FsPermission("-rt").toOctal());
assertEquals(0000, new FsPermission("-rt").toShort());
assertEquals(000, new FsPermission("-rwx").toOctal());
assertEquals(0000, new FsPermission("-rwx").toShort());
}
@Test
public void testSymbolicPermission() {
for (int i = 0; i < SYMBOLIC.length; ++i) {

View File

@ -34,16 +34,16 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.sshd.SshServer;
import org.apache.sshd.server.SshServer;
import org.apache.sshd.common.NamedFactory;
import org.apache.sshd.server.Command;
import org.apache.sshd.server.PasswordAuthenticator;
import org.apache.sshd.server.UserAuth;
import org.apache.sshd.server.auth.UserAuthPassword;
import org.apache.sshd.server.auth.password.PasswordAuthenticator;
import org.apache.sshd.server.auth.UserAuth;
import org.apache.sshd.server.auth.password.UserAuthPasswordFactory;
import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider;
import org.apache.sshd.server.session.ServerSession;
import org.apache.sshd.server.sftp.SftpSubsystem;
import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
@ -76,7 +76,7 @@ private static void startSshdServer() throws IOException {
List<NamedFactory<UserAuth>> userAuthFactories =
new ArrayList<NamedFactory<UserAuth>>();
userAuthFactories.add(new UserAuthPassword.Factory());
userAuthFactories.add(new UserAuthPasswordFactory());
sshd.setUserAuthFactories(userAuthFactories);
@ -92,7 +92,7 @@ public boolean authenticate(String username, String password,
});
sshd.setSubsystemFactories(
Arrays.<NamedFactory<Command>>asList(new SftpSubsystem.Factory()));
Arrays.<NamedFactory<Command>>asList(new SftpSubsystemFactory()));
sshd.start();
port = sshd.getPort();
@ -140,7 +140,7 @@ public static void tearDown() {
if (sshd != null) {
try {
sshd.stop(true);
} catch (InterruptedException e) {
} catch (IOException e) {
// ignore
}
}

View File

@ -70,7 +70,7 @@ public interface TimeoutHandler {
* @throws Exception if the handler wishes to raise an exception
* that way.
*/
Exception evaluate(int timeoutMillis, Exception caught) throws Exception;
Throwable evaluate(int timeoutMillis, Throwable caught) throws Throwable;
}
/**
@ -116,7 +116,7 @@ public static int await(int timeoutMillis,
Preconditions.checkNotNull(timeoutHandler);
long endTime = Time.now() + timeoutMillis;
Exception ex = null;
Throwable ex = null;
boolean running = true;
int iterations = 0;
while (running) {
@ -128,9 +128,11 @@ public static int await(int timeoutMillis,
// the probe failed but did not raise an exception. Reset any
// exception raised by a previous probe failure.
ex = null;
} catch (InterruptedException | FailFastException e) {
} catch (InterruptedException
| FailFastException
| VirtualMachineError e) {
throw e;
} catch (Exception e) {
} catch (Throwable e) {
LOG.debug("eventually() iteration {}", iterations, e);
ex = e;
}
@ -145,15 +147,20 @@ public static int await(int timeoutMillis,
}
}
// timeout
Exception evaluate = timeoutHandler.evaluate(timeoutMillis, ex);
if (evaluate == null) {
// bad timeout handler logic; fall back to GenerateTimeout so the
// underlying problem isn't lost.
LOG.error("timeout handler {} did not throw an exception ",
timeoutHandler);
evaluate = new GenerateTimeout().evaluate(timeoutMillis, ex);
Throwable evaluate;
try {
evaluate = timeoutHandler.evaluate(timeoutMillis, ex);
if (evaluate == null) {
// bad timeout handler logic; fall back to GenerateTimeout so the
// underlying problem isn't lost.
LOG.error("timeout handler {} did not throw an exception ",
timeoutHandler);
evaluate = new GenerateTimeout().evaluate(timeoutMillis, ex);
}
} catch (Throwable throwable) {
evaluate = throwable;
}
throw evaluate;
return raise(evaluate);
}
/**
@ -217,6 +224,7 @@ public static int await(int timeoutMillis,
* @throws Exception the last exception thrown before timeout was triggered
* @throws FailFastException if raised -without any retry attempt.
* @throws InterruptedException if interrupted during the sleep operation.
* @throws OutOfMemoryError you've run out of memory.
*/
public static <T> T eventually(int timeoutMillis,
Callable<T> eval,
@ -224,7 +232,7 @@ public static <T> T eventually(int timeoutMillis,
Preconditions.checkArgument(timeoutMillis >= 0,
"timeoutMillis must be >= 0");
long endTime = Time.now() + timeoutMillis;
Exception ex;
Throwable ex;
boolean running;
int sleeptime;
int iterations = 0;
@ -232,10 +240,12 @@ public static <T> T eventually(int timeoutMillis,
iterations++;
try {
return eval.call();
} catch (InterruptedException | FailFastException e) {
} catch (InterruptedException
| FailFastException
| VirtualMachineError e) {
// these two exceptions trigger an immediate exit
throw e;
} catch (Exception e) {
} catch (Throwable e) {
LOG.debug("evaluate() iteration {}", iterations, e);
ex = e;
}
@ -245,7 +255,26 @@ public static <T> T eventually(int timeoutMillis,
}
} while (running);
// timeout. Throw the last exception raised
throw ex;
return raise(ex);
}
/**
* Take the throwable and raise it as an exception or an error, depending
* upon its type. This allows callers to declare that they only throw
* Exception (i.e. can be invoked by Callable) yet still rethrow a
* previously caught Throwable.
* @param throwable Throwable to rethrow
* @param <T> expected return type
* @return never
* @throws Exception if throwable is an Exception
* @throws Error if throwable is not an Exception
*/
private static <T> T raise(Throwable throwable) throws Exception {
if (throwable instanceof Exception) {
throw (Exception) throwable;
} else {
throw (Error) throwable;
}
}
/**
@ -365,6 +394,7 @@ public static <T, E extends Throwable> E intercept(
* @throws Exception any other exception raised
* @throws AssertionError if the evaluation call didn't raise an exception.
*/
@SuppressWarnings("unchecked")
public static <E extends Throwable> E intercept(
Class<E> clazz,
VoidCallable eval)
@ -487,14 +517,14 @@ public GenerateTimeout() {
* @return TimeoutException
*/
@Override
public Exception evaluate(int timeoutMillis, Exception caught)
throws Exception {
public Throwable evaluate(int timeoutMillis, Throwable caught)
throws Throwable {
String s = String.format("%s: after %d millis", message,
timeoutMillis);
String caughtText = caught != null
? ("; " + robustToString(caught)) : "";
return (TimeoutException) (new TimeoutException(s + caughtText)
return (new TimeoutException(s + caughtText)
.initCause(caught));
}
}

View File

@ -25,6 +25,7 @@
import java.io.IOException;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import static org.apache.hadoop.test.LambdaTestUtils.*;
import static org.apache.hadoop.test.GenericTestUtils.*;
@ -123,6 +124,27 @@ protected void assertMinRetryCount(int minCount) {
minCount <= retry.getInvocationCount());
}
/**
* Raise an exception.
* @param e exception to raise
* @return never
* @throws Exception passed in exception
*/
private boolean r(Exception e) throws Exception {
throw e;
}
/**
* Raise an error.
* @param e error to raise
* @return never
* @throws Exception never
* @throws Error the passed in error
*/
private boolean r(Error e) throws Exception {
throw e;
}
@Test
public void testAwaitAlwaysTrue() throws Throwable {
await(TIMEOUT,
@ -140,7 +162,7 @@ public void testAwaitAlwaysFalse() throws Throwable {
TIMEOUT_FAILURE_HANDLER);
fail("should not have got here");
} catch (TimeoutException e) {
assertTrue(retry.getInvocationCount() > 4);
assertMinRetryCount(1);
}
}
@ -316,9 +338,7 @@ public void testInterceptAwaitLambdaException() throws Throwable {
IOException ioe = intercept(IOException.class,
() -> await(
TIMEOUT,
() -> {
throw new IOException("inner " + ++count);
},
() -> r(new IOException("inner " + ++count)),
retry,
(timeout, ex) -> ex));
assertRetryCount(count - 1);
@ -339,9 +359,7 @@ public void testInterceptAwaitLambdaDiagnostics() throws Throwable {
public void testInterceptAwaitFailFastLambda() throws Throwable {
intercept(FailFastException.class,
() -> await(TIMEOUT,
() -> {
throw new FailFastException("ffe");
},
() -> r(new FailFastException("ffe")),
retry,
(timeout, ex) -> ex));
assertRetryCount(0);
@ -361,14 +379,13 @@ public void testEventuallyLambda() throws Throwable {
assertRetryCount(0);
}
@Test
public void testInterceptEventuallyLambdaFailures() throws Throwable {
intercept(IOException.class,
"oops",
() -> eventually(TIMEOUT,
() -> {
throw new IOException("oops");
},
() -> r(new IOException("oops")),
retry));
assertMinRetryCount(1);
}
@ -385,11 +402,95 @@ public void testInterceptEventuallyLambdaFailFast() throws Throwable {
intercept(FailFastException.class, "oops",
() -> eventually(
TIMEOUT,
() -> {
throw new FailFastException("oops");
},
() -> r(new FailFastException("oops")),
retry));
assertRetryCount(0);
}
/**
* Verify that assertions trigger catch and retry.
* @throws Throwable if the code is broken
*/
@Test
public void testEventuallySpinsOnAssertions() throws Throwable {
AtomicInteger counter = new AtomicInteger(0);
eventually(TIMEOUT,
() -> {
while (counter.incrementAndGet() < 5) {
fail("if you see this, we are in trouble");
}
},
retry);
assertMinRetryCount(4);
}
/**
* Verify that VirtualMachineError errors are immediately rethrown.
* @throws Throwable if the code is broken
*/
@Test
public void testInterceptEventuallyThrowsVMErrors() throws Throwable {
intercept(OutOfMemoryError.class, "OOM",
() -> eventually(
TIMEOUT,
() -> r(new OutOfMemoryError("OOM")),
retry));
assertRetryCount(0);
}
/**
* Verify that you can declare that an intercept will intercept Errors.
* @throws Throwable if the code is broken
*/
@Test
public void testInterceptHandlesErrors() throws Throwable {
intercept(OutOfMemoryError.class, "OOM",
() -> r(new OutOfMemoryError("OOM")));
}
/**
* Verify that if an Error raised is not the one being intercepted,
* it gets rethrown.
* @throws Throwable if the code is broken
*/
@Test
public void testInterceptRethrowsVMErrors() throws Throwable {
intercept(StackOverflowError.class, "",
() -> intercept(OutOfMemoryError.class, "",
() -> r(new StackOverflowError())));
}
@Test
public void testAwaitHandlesAssertions() throws Throwable {
// await a state which is never reached, expect a timeout exception
// with the text "failure" in it
TimeoutException ex = intercept(TimeoutException.class,
"failure",
() -> await(TIMEOUT,
() -> r(new AssertionError("failure")),
retry,
TIMEOUT_FAILURE_HANDLER));
// the retry handler must have been invoked
assertMinRetryCount(1);
// and the nested cause is tha raised assertion
if (!(ex.getCause() instanceof AssertionError)) {
throw ex;
}
}
@Test
public void testAwaitRethrowsVMErrors() throws Throwable {
// await a state which is never reached, expect a timeout exception
// with the text "failure" in it
intercept(StackOverflowError.class,
() -> await(TIMEOUT,
() -> r(new StackOverflowError()),
retry,
TIMEOUT_FAILURE_HANDLER));
// the retry handler must not have been invoked
assertMinRetryCount(0);
}
}

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import org.junit.Test;
import java.math.BigInteger;
import static org.junit.Assert.assertTrue;
public class TestCpuTimeTracker {
@Test
public void test() throws InterruptedException {
CpuTimeTracker tracker = new CpuTimeTracker(10);
tracker.updateElapsedJiffies(
BigInteger.valueOf(100),
System.currentTimeMillis());
float val1 = tracker.getCpuTrackerUsagePercent();
assertTrue(
"Not invalid CPU usage",
val1 == -1.0);
Thread.sleep(1000);
tracker.updateElapsedJiffies(
BigInteger.valueOf(200),
System.currentTimeMillis());
float val2 = tracker.getCpuTrackerUsagePercent();
assertTrue(
"Not positive CPU usage",
val2 > 0);
Thread.sleep(1000);
tracker.updateElapsedJiffies(
BigInteger.valueOf(0),
System.currentTimeMillis());
float val3 = tracker.getCpuTrackerUsagePercent();
assertTrue(
"Not positive CPU usage",
val3 == 0.0);
}
}

View File

@ -45,12 +45,6 @@
This is required by FTPFileSystem</description>
</property>
<property>
<name>test.fs.s3n.name</name>
<value>s3n:///</value>
<description>The name of the s3n file system for testing.</description>
</property>
<!-- Turn security off for tests by default -->
<property>
<name>hadoop.security.authentication</name>

View File

@ -1,16 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Speed up the s3native jets3t test
s3service.max-thread-count=10
threaded-service.max-thread-count=10

View File

@ -115,6 +115,14 @@ public boolean delete(Path f, boolean recursive)
return dfs.delete(getUriPath(f), recursive);
}
/**
* The returned BlockLocation will have different formats for replicated
* and erasure coded file.
*
* Please refer to
* {@link FileContext#getFileBlockLocations(Path, long, long)}
* for more details.
*/
@Override
public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
throws IOException, UnresolvedLinkException {
@ -165,6 +173,13 @@ public FsServerDefaults getServerDefaults(final Path f) throws IOException {
return dfs.getServerDefaults();
}
/**
* The BlockLocation of returned LocatedFileStatus will have different
* formats for replicated and erasure coded file.
* Please refer to
* {@link FileContext#getFileBlockLocations(Path, long, long)} for
* more details.
*/
@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(
final Path p)

View File

@ -72,6 +72,7 @@
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileEncryptionInfo;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.FsStatus;
@ -866,6 +867,10 @@ boolean recoverLease(String src) throws IOException {
* data-placement when performing operations. For example, the
* MapReduce system tries to schedule tasks on the same machines
* as the data-block the task processes.
*
* Please refer to
* {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
* for more details.
*/
public BlockLocation[] getBlockLocations(String src, long start,
long length) throws IOException {

View File

@ -260,6 +260,7 @@ private void flipDataBuffers() {
private final Coordinator coordinator;
private final CellBuffers cellBuffers;
private final ErasureCodingPolicy ecPolicy;
private final RawErasureEncoder encoder;
private final List<StripedDataStreamer> streamers;
private final DFSPacket[] currentPackets; // current Packet of each streamer
@ -286,7 +287,7 @@ private void flipDataBuffers() {
LOG.debug("Creating DFSStripedOutputStream for " + src);
}
final ErasureCodingPolicy ecPolicy = stat.getErasureCodingPolicy();
ecPolicy = stat.getErasureCodingPolicy();
final int numParityBlocks = ecPolicy.getNumParityUnits();
cellSize = ecPolicy.getCellSize();
numDataBlocks = ecPolicy.getNumDataUnits();
@ -478,11 +479,6 @@ private void allocateNewBlock() throws IOException {
final LocatedBlock lb = addBlock(excludedNodes, dfsClient, src,
currentBlockGroup, fileId, favoredNodes, getAddBlockFlags());
assert lb.isStriped();
if (lb.getLocations().length < numDataBlocks) {
throw new IOException("Failed to get " + numDataBlocks
+ " nodes from namenode: blockGroupSize= " + numAllBlocks
+ ", blocks.length= " + lb.getLocations().length);
}
// assign the new block to the current block group
currentBlockGroup = lb.getBlock();
blockGroupIndex++;
@ -494,11 +490,16 @@ private void allocateNewBlock() throws IOException {
StripedDataStreamer si = getStripedDataStreamer(i);
assert si.isHealthy();
if (blocks[i] == null) {
// allocBlock() should guarantee that all data blocks are successfully
// allocated.
assert i >= numDataBlocks;
// Set exception and close streamer as there is no block locations
// found for the parity block.
LOG.warn("Failed to get block location for parity block, index=" + i);
LOG.warn("Cannot allocate parity block(index={}, policy={}). " +
"Not enough datanodes? Exclude nodes={}", i, ecPolicy.getName(),
excludedNodes);
si.getLastException().set(
new IOException("Failed to get following block, i=" + i));
new IOException("Failed to get parity block, index=" + i));
si.getErrorState().setInternalError();
si.close(true);
} else {

View File

@ -240,6 +240,13 @@ public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
return getFileBlockLocations(file.getPath(), start, len);
}
/**
* The returned BlockLocation will have different formats for replicated
* and erasure coded file.
* Please refer to
* {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
* for more details.
*/
@Override
public BlockLocation[] getFileBlockLocations(Path p,
final long start, final long len) throws IOException {
@ -1040,6 +1047,13 @@ public FileStatus[] next(final FileSystem fs, final Path p)
}.resolve(this, absF);
}
/**
* The BlockLocation of returned LocatedFileStatus will have different
* formats for replicated and erasure coded file.
* Please refer to
* {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} for
* more details.
*/
@Override
protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path p,
final PathFilter filter)

View File

@ -1,90 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.protocol;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Get statistics pertaining to blocks of type {@link BlockType#CONTIGUOUS}
* in the filesystem.
* <p>
* @see ClientProtocol#getBlocksStats()
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class BlocksStats {
private final long lowRedundancyBlocksStat;
private final long corruptBlocksStat;
private final long missingBlocksStat;
private final long missingReplicationOneBlocksStat;
private final long bytesInFutureBlocksStat;
private final long pendingDeletionBlocksStat;
public BlocksStats(long lowRedundancyBlocksStat,
long corruptBlocksStat, long missingBlocksStat,
long missingReplicationOneBlocksStat, long bytesInFutureBlocksStat,
long pendingDeletionBlocksStat) {
this.lowRedundancyBlocksStat = lowRedundancyBlocksStat;
this.corruptBlocksStat = corruptBlocksStat;
this.missingBlocksStat = missingBlocksStat;
this.missingReplicationOneBlocksStat = missingReplicationOneBlocksStat;
this.bytesInFutureBlocksStat = bytesInFutureBlocksStat;
this.pendingDeletionBlocksStat = pendingDeletionBlocksStat;
}
public long getLowRedundancyBlocksStat() {
return lowRedundancyBlocksStat;
}
public long getCorruptBlocksStat() {
return corruptBlocksStat;
}
public long getMissingReplicaBlocksStat() {
return missingBlocksStat;
}
public long getMissingReplicationOneBlocksStat() {
return missingReplicationOneBlocksStat;
}
public long getBytesInFutureBlocksStat() {
return bytesInFutureBlocksStat;
}
public long getPendingDeletionBlocksStat() {
return pendingDeletionBlocksStat;
}
@Override
public String toString() {
StringBuilder statsBuilder = new StringBuilder();
statsBuilder.append("ReplicatedBlocksStats=[")
.append("LowRedundancyBlocks=").append(getLowRedundancyBlocksStat())
.append(", CorruptBlocks=").append(getCorruptBlocksStat())
.append(", MissingReplicaBlocks=").append(getMissingReplicaBlocksStat())
.append(", MissingReplicationOneBlocks=").append(
getMissingReplicationOneBlocksStat())
.append(", BytesInFutureBlocks=").append(getBytesInFutureBlocksStat())
.append(", PendingDeletionBlocks=").append(
getPendingDeletionBlocksStat())
.append("]");
return statsBuilder.toString();
}
}

View File

@ -778,14 +778,14 @@ SnapshottableDirectoryStatus[] getSnapshottableDirListing()
* in the filesystem.
*/
@Idempotent
BlocksStats getBlocksStats() throws IOException;
ReplicatedBlockStats getReplicatedBlockStats() throws IOException;
/**
* Get statistics pertaining to blocks of type {@link BlockType#STRIPED}
* in the filesystem.
*/
@Idempotent
ECBlockGroupsStats getECBlockGroupsStats() throws IOException;
ECBlockGroupStats getECBlockGroupStats() throws IOException;
/**
* Get a report on the system's current datanodes.

View File

@ -0,0 +1,83 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.protocol;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Get statistics pertaining to blocks of type {@link BlockType#STRIPED}
* in the filesystem.
* <p>
* @see ClientProtocol#getECBlockGroupStats()
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class ECBlockGroupStats {
private final long lowRedundancyBlockGroups;
private final long corruptBlockGroups;
private final long missingBlockGroups;
private final long bytesInFutureBlockGroups;
private final long pendingDeletionBlocks;
public ECBlockGroupStats(long lowRedundancyBlockGroups,
long corruptBlockGroups, long missingBlockGroups,
long bytesInFutureBlockGroups, long pendingDeletionBlocks) {
this.lowRedundancyBlockGroups = lowRedundancyBlockGroups;
this.corruptBlockGroups = corruptBlockGroups;
this.missingBlockGroups = missingBlockGroups;
this.bytesInFutureBlockGroups = bytesInFutureBlockGroups;
this.pendingDeletionBlocks = pendingDeletionBlocks;
}
public long getBytesInFutureBlockGroups() {
return bytesInFutureBlockGroups;
}
public long getCorruptBlockGroups() {
return corruptBlockGroups;
}
public long getLowRedundancyBlockGroups() {
return lowRedundancyBlockGroups;
}
public long getMissingBlockGroups() {
return missingBlockGroups;
}
public long getPendingDeletionBlocks() {
return pendingDeletionBlocks;
}
@Override
public String toString() {
StringBuilder statsBuilder = new StringBuilder();
statsBuilder.append("ECBlockGroupStats=[")
.append("LowRedundancyBlockGroups=").append(
getLowRedundancyBlockGroups())
.append(", CorruptBlockGroups=").append(getCorruptBlockGroups())
.append(", MissingBlockGroups=").append(getMissingBlockGroups())
.append(", BytesInFutureBlockGroups=").append(
getBytesInFutureBlockGroups())
.append(", PendingDeletionBlocks=").append(
getPendingDeletionBlocks())
.append("]");
return statsBuilder.toString();
}
}

View File

@ -1,83 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.protocol;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Get statistics pertaining to blocks of type {@link BlockType#STRIPED}
* in the filesystem.
* <p>
* @see ClientProtocol#getECBlockGroupsStats()
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class ECBlockGroupsStats {
private final long lowRedundancyBlockGroupsStat;
private final long corruptBlockGroupsStat;
private final long missingBlockGroupsStat;
private final long bytesInFutureBlockGroupsStat;
private final long pendingDeletionBlockGroupsStat;
public ECBlockGroupsStats(long lowRedundancyBlockGroupsStat, long
corruptBlockGroupsStat, long missingBlockGroupsStat, long
bytesInFutureBlockGroupsStat, long pendingDeletionBlockGroupsStat) {
this.lowRedundancyBlockGroupsStat = lowRedundancyBlockGroupsStat;
this.corruptBlockGroupsStat = corruptBlockGroupsStat;
this.missingBlockGroupsStat = missingBlockGroupsStat;
this.bytesInFutureBlockGroupsStat = bytesInFutureBlockGroupsStat;
this.pendingDeletionBlockGroupsStat = pendingDeletionBlockGroupsStat;
}
public long getBytesInFutureBlockGroupsStat() {
return bytesInFutureBlockGroupsStat;
}
public long getCorruptBlockGroupsStat() {
return corruptBlockGroupsStat;
}
public long getLowRedundancyBlockGroupsStat() {
return lowRedundancyBlockGroupsStat;
}
public long getMissingBlockGroupsStat() {
return missingBlockGroupsStat;
}
public long getPendingDeletionBlockGroupsStat() {
return pendingDeletionBlockGroupsStat;
}
@Override
public String toString() {
StringBuilder statsBuilder = new StringBuilder();
statsBuilder.append("ECBlockGroupsStats=[")
.append("LowRedundancyBlockGroups=").append(
getLowRedundancyBlockGroupsStat())
.append(", CorruptBlockGroups=").append(getCorruptBlockGroupsStat())
.append(", MissingBlockGroups=").append(getMissingBlockGroupsStat())
.append(", BytesInFutureBlockGroups=").append(
getBytesInFutureBlockGroupsStat())
.append(", PendingDeletionBlockGroups=").append(
getPendingDeletionBlockGroupsStat())
.append("]");
return statsBuilder.toString();
}
}

View File

@ -78,6 +78,17 @@ public LocatedBlocks getBlockLocations() {
return locations;
}
/**
* This function is used to transform the underlying HDFS LocatedBlocks to
* BlockLocations.
*
* The returned BlockLocation will have different formats for replicated
* and erasure coded file.
* Please refer to
* {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations
* (FileStatus, long, long)}
* for examples.
*/
public final LocatedFileStatus makeQualifiedLocated(URI defaultUri,
Path path) {
makeQualified(defaultUri, path);
@ -96,5 +107,4 @@ public int hashCode() {
// satisfy findbugs
return super.hashCode();
}
}

View File

@ -0,0 +1,90 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.protocol;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Get statistics pertaining to blocks of type {@link BlockType#CONTIGUOUS}
* in the filesystem.
* <p>
* @see ClientProtocol#getReplicatedBlockStats()
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class ReplicatedBlockStats {
private final long lowRedundancyBlocks;
private final long corruptBlocks;
private final long missingBlocks;
private final long missingReplicationOneBlocks;
private final long bytesInFutureBlocks;
private final long pendingDeletionBlocks;
public ReplicatedBlockStats(long lowRedundancyBlocks,
long corruptBlocks, long missingBlocks,
long missingReplicationOneBlocks, long bytesInFutureBlocks,
long pendingDeletionBlocks) {
this.lowRedundancyBlocks = lowRedundancyBlocks;
this.corruptBlocks = corruptBlocks;
this.missingBlocks = missingBlocks;
this.missingReplicationOneBlocks = missingReplicationOneBlocks;
this.bytesInFutureBlocks = bytesInFutureBlocks;
this.pendingDeletionBlocks = pendingDeletionBlocks;
}
public long getLowRedundancyBlocks() {
return lowRedundancyBlocks;
}
public long getCorruptBlocks() {
return corruptBlocks;
}
public long getMissingReplicaBlocks() {
return missingBlocks;
}
public long getMissingReplicationOneBlocks() {
return missingReplicationOneBlocks;
}
public long getBytesInFutureBlocks() {
return bytesInFutureBlocks;
}
public long getPendingDeletionBlocks() {
return pendingDeletionBlocks;
}
@Override
public String toString() {
StringBuilder statsBuilder = new StringBuilder();
statsBuilder.append("ReplicatedBlockStats=[")
.append("LowRedundancyBlocks=").append(getLowRedundancyBlocks())
.append(", CorruptBlocks=").append(getCorruptBlocks())
.append(", MissingReplicaBlocks=").append(getMissingReplicaBlocks())
.append(", MissingReplicationOneBlocks=").append(
getMissingReplicationOneBlocks())
.append(", BytesInFutureBlocks=").append(getBytesInFutureBlocks())
.append(", PendingDeletionBlocks=").append(
getPendingDeletionBlocks())
.append("]");
return statsBuilder.toString();
}
}

View File

@ -61,7 +61,7 @@
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats;
import org.apache.hadoop.hdfs.protocol.EncryptionZone;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@ -73,7 +73,7 @@
import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.BlocksStats;
import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
@ -120,8 +120,8 @@
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileInfoResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatusRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetResponseProto;
@ -246,13 +246,13 @@ public class ClientNamenodeProtocolTranslatorPB implements
private final static GetFsStatusRequestProto VOID_GET_FSSTATUS_REQUEST =
GetFsStatusRequestProto.newBuilder().build();
private final static GetFsBlocksStatsRequestProto
VOID_GET_FS_REPLICABLOCKS_STATS_REQUEST =
GetFsBlocksStatsRequestProto.newBuilder().build();
private final static GetFsReplicatedBlockStatsRequestProto
VOID_GET_FS_REPLICATED_BLOCK_STATS_REQUEST =
GetFsReplicatedBlockStatsRequestProto.newBuilder().build();
private final static GetFsECBlockGroupsStatsRequestProto
VOID_GET_FS_ECBLOCKGROUPS_STATS_REQUEST =
GetFsECBlockGroupsStatsRequestProto.newBuilder().build();
private final static GetFsECBlockGroupStatsRequestProto
VOID_GET_FS_ECBLOCKGROUP_STATS_REQUEST =
GetFsECBlockGroupStatsRequestProto.newBuilder().build();
private final static RollEditsRequestProto VOID_ROLLEDITS_REQUEST =
RollEditsRequestProto.getDefaultInstance();
@ -695,20 +695,20 @@ public long[] getStats() throws IOException {
}
@Override
public BlocksStats getBlocksStats() throws IOException {
public ReplicatedBlockStats getReplicatedBlockStats() throws IOException {
try {
return PBHelperClient.convert(rpcProxy.getFsBlocksStats(null,
VOID_GET_FS_REPLICABLOCKS_STATS_REQUEST));
return PBHelperClient.convert(rpcProxy.getFsReplicatedBlockStats(null,
VOID_GET_FS_REPLICATED_BLOCK_STATS_REQUEST));
} catch (ServiceException e) {
throw ProtobufHelper.getRemoteException(e);
}
}
@Override
public ECBlockGroupsStats getECBlockGroupsStats() throws IOException {
public ECBlockGroupStats getECBlockGroupStats() throws IOException {
try {
return PBHelperClient.convert(rpcProxy.getFsECBlockGroupsStats(null,
VOID_GET_FS_ECBLOCKGROUPS_STATS_REQUEST));
return PBHelperClient.convert(rpcProxy.getFsECBlockGroupStats(null,
VOID_GET_FS_ECBLOCKGROUP_STATS_REQUEST));
} catch (ServiceException e) {
throw ProtobufHelper.getRemoteException(e);
}

View File

@ -76,7 +76,7 @@
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats;
import org.apache.hadoop.hdfs.protocol.EncryptionZone;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState;
@ -92,7 +92,7 @@
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
import org.apache.hadoop.hdfs.protocol.BlocksStats;
import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeStatus;
@ -122,8 +122,8 @@
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.DatanodeReportTypeProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.DatanodeStorageReportProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetEditsFromTxidResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.OpenFilesBatchResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RollingUpgradeActionProto;
@ -1810,17 +1810,17 @@ public static long[] convert(GetFsStatsResponseProto res) {
return result;
}
public static BlocksStats convert(
GetFsBlocksStatsResponseProto res) {
return new BlocksStats(res.getLowRedundancy(),
public static ReplicatedBlockStats convert(
GetFsReplicatedBlockStatsResponseProto res) {
return new ReplicatedBlockStats(res.getLowRedundancy(),
res.getCorruptBlocks(), res.getMissingBlocks(),
res.getMissingReplOneBlocks(), res.getBlocksInFuture(),
res.getPendingDeletionBlocks());
}
public static ECBlockGroupsStats convert(
GetFsECBlockGroupsStatsResponseProto res) {
return new ECBlockGroupsStats(res.getLowRedundancy(),
public static ECBlockGroupStats convert(
GetFsECBlockGroupStatsResponseProto res) {
return new ECBlockGroupStats(res.getLowRedundancy(),
res.getCorruptBlocks(), res.getMissingBlocks(),
res.getBlocksInFuture(), res.getPendingDeletionBlocks());
}
@ -2236,37 +2236,37 @@ public static GetFsStatsResponseProto convert(long[] fsStats) {
return result.build();
}
public static GetFsBlocksStatsResponseProto convert(
BlocksStats blocksStats) {
GetFsBlocksStatsResponseProto.Builder result =
GetFsBlocksStatsResponseProto.newBuilder();
public static GetFsReplicatedBlockStatsResponseProto convert(
ReplicatedBlockStats replicatedBlockStats) {
GetFsReplicatedBlockStatsResponseProto.Builder result =
GetFsReplicatedBlockStatsResponseProto.newBuilder();
result.setLowRedundancy(
blocksStats.getLowRedundancyBlocksStat());
replicatedBlockStats.getLowRedundancyBlocks());
result.setCorruptBlocks(
blocksStats.getCorruptBlocksStat());
replicatedBlockStats.getCorruptBlocks());
result.setMissingBlocks(
blocksStats.getMissingReplicaBlocksStat());
replicatedBlockStats.getMissingReplicaBlocks());
result.setMissingReplOneBlocks(
blocksStats.getMissingReplicationOneBlocksStat());
replicatedBlockStats.getMissingReplicationOneBlocks());
result.setBlocksInFuture(
blocksStats.getBytesInFutureBlocksStat());
replicatedBlockStats.getBytesInFutureBlocks());
result.setPendingDeletionBlocks(
blocksStats.getPendingDeletionBlocksStat());
replicatedBlockStats.getPendingDeletionBlocks());
return result.build();
}
public static GetFsECBlockGroupsStatsResponseProto convert(
ECBlockGroupsStats ecBlockGroupsStats) {
GetFsECBlockGroupsStatsResponseProto.Builder result =
GetFsECBlockGroupsStatsResponseProto.newBuilder();
public static GetFsECBlockGroupStatsResponseProto convert(
ECBlockGroupStats ecBlockGroupStats) {
GetFsECBlockGroupStatsResponseProto.Builder result =
GetFsECBlockGroupStatsResponseProto.newBuilder();
result.setLowRedundancy(
ecBlockGroupsStats.getLowRedundancyBlockGroupsStat());
result.setCorruptBlocks(ecBlockGroupsStats.getCorruptBlockGroupsStat());
result.setMissingBlocks(ecBlockGroupsStats.getMissingBlockGroupsStat());
ecBlockGroupStats.getLowRedundancyBlockGroups());
result.setCorruptBlocks(ecBlockGroupStats.getCorruptBlockGroups());
result.setMissingBlocks(ecBlockGroupStats.getMissingBlockGroups());
result.setBlocksInFuture(
ecBlockGroupsStats.getBytesInFutureBlockGroupsStat());
ecBlockGroupStats.getBytesInFutureBlockGroups());
result.setPendingDeletionBlocks(
ecBlockGroupsStats.getPendingDeletionBlockGroupsStat());
ecBlockGroupStats.getPendingDeletionBlocks());
return result.build();
}

View File

@ -22,7 +22,6 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.ContentSummary.Builder;
import org.apache.hadoop.fs.FileChecksum;
@ -645,56 +644,4 @@ private static StorageType[] toStorageTypes(List<?> list) {
}
}
static BlockLocation[] toBlockLocationArray(Map<?, ?> json)
throws IOException{
final Map<?, ?> rootmap =
(Map<?, ?>)json.get(BlockLocation.class.getSimpleName() + "s");
final List<?> array = JsonUtilClient.getList(rootmap,
BlockLocation.class.getSimpleName());
Preconditions.checkNotNull(array);
final BlockLocation[] locations = new BlockLocation[array.size()];
int i = 0;
for (Object object : array) {
final Map<?, ?> m = (Map<?, ?>) object;
locations[i++] = JsonUtilClient.toBlockLocation(m);
}
return locations;
}
/** Convert a Json map to BlockLocation. **/
static BlockLocation toBlockLocation(Map<?, ?> m)
throws IOException{
if(m == null) {
return null;
}
long length = ((Number) m.get("length")).longValue();
long offset = ((Number) m.get("offset")).longValue();
boolean corrupt = Boolean.
getBoolean(m.get("corrupt").toString());
String[] storageIds = toStringArray(getList(m, "storageIds"));
String[] cachedHosts = toStringArray(getList(m, "cachedHosts"));
String[] hosts = toStringArray(getList(m, "hosts"));
String[] names = toStringArray(getList(m, "names"));
String[] topologyPaths = toStringArray(getList(m, "topologyPaths"));
StorageType[] storageTypes = toStorageTypeArray(
getList(m, "storageTypes"));
return new BlockLocation(names, hosts, cachedHosts,
topologyPaths, storageIds, storageTypes,
offset, length, corrupt);
}
static String[] toStringArray(List<?> list) {
if (list == null) {
return null;
} else {
final String[] array = new String[list.size()];
int i = 0;
for (Object object : list) {
array[i++] = object.toString();
}
return array;
}
}
}

View File

@ -1616,68 +1616,14 @@ public BlockLocation[] getFileBlockLocations(final Path p,
final long offset, final long length) throws IOException {
statistics.incrementReadOps(1);
storageStatistics.incrementOpCounter(OpType.GET_FILE_BLOCK_LOCATIONS);
BlockLocation[] locations = null;
try {
locations = getFileBlockLocations(
GetOpParam.Op.GETFILEBLOCKLOCATIONS,
p, offset, length);
} catch (RemoteException e) {
// See the error message from ExceptionHandle
if(e.getMessage() != null &&
e.getMessage().contains(
"Invalid value for webhdfs parameter") &&
e.getMessage().contains(
GetOpParam.Op.GETFILEBLOCKLOCATIONS.toString())) {
// Old webhdfs server doesn't support GETFILEBLOCKLOCATIONS
// operation, fall back to query again using old API
// GET_BLOCK_LOCATIONS.
LOG.info("Invalid webhdfs operation parameter "
+ GetOpParam.Op.GETFILEBLOCKLOCATIONS + ". Fallback to use "
+ GetOpParam.Op.GET_BLOCK_LOCATIONS + " instead.");
locations = getFileBlockLocations(
GetOpParam.Op.GET_BLOCK_LOCATIONS,
p, offset, length);
}
}
return locations;
}
/**
* Get file block locations implementation. Provide a operation
* parameter to determine how to get block locations from a webhdfs
* server. Older server only supports <b>GET_BLOCK_LOCATIONS</b> but
* not <b>GETFILEBLOCKLOCATIONS</b>.
*
* @param path path to the file
* @param offset start offset in the given file
* @param length of the file to get locations for
* @param operation
* Valid operation is either
* {@link org.apache.hadoop.hdfs.web.resources.GetOpParam.Op
* #GET_BLOCK_LOCATIONS} or
* {@link org.apache.hadoop.hdfs.web.resources.GetOpParam.Op
* #GET_BLOCK_LOCATIONS}
* @throws IOException
* Http connection error, decoding error or given
* operation is not valid
*/
@VisibleForTesting
protected BlockLocation[] getFileBlockLocations(
GetOpParam.Op operation, final Path path,
final long offset, final long length) throws IOException {
return new FsPathResponseRunner<BlockLocation[]>(operation, path,
final HttpOpParam.Op op = GetOpParam.Op.GET_BLOCK_LOCATIONS;
return new FsPathResponseRunner<BlockLocation[]>(op, p,
new OffsetParam(offset), new LengthParam(length)) {
@Override
BlockLocation[] decodeResponse(Map<?,?> json) throws IOException {
switch(operation) {
case GETFILEBLOCKLOCATIONS:
return JsonUtilClient.toBlockLocationArray(json);
case GET_BLOCK_LOCATIONS:
return DFSUtilClient.locatedBlocks2Locations(
JsonUtilClient.toLocatedBlocks(json));
default :
throw new IOException("Unknown operation " + operation.name());
}
return DFSUtilClient.locatedBlocks2Locations(
JsonUtilClient.toLocatedBlocks(json));
}
}.run();
}

View File

@ -33,18 +33,8 @@ public enum Op implements HttpOpParam.Op {
GETHOMEDIRECTORY(false, HttpURLConnection.HTTP_OK),
GETDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true),
/**
* GET_BLOCK_LOCATIONS is a private/stable API op. It returns a
* {@link org.apache.hadoop.hdfs.protocol.LocatedBlocks}
* json object.
*/
/** GET_BLOCK_LOCATIONS is a private unstable op. */
GET_BLOCK_LOCATIONS(false, HttpURLConnection.HTTP_OK),
/**
* GETFILEBLOCKLOCATIONS is the public op that complies with
* {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations}
* interface.
*/
GETFILEBLOCKLOCATIONS(false, HttpURLConnection.HTTP_OK),
GETACLSTATUS(false, HttpURLConnection.HTTP_OK),
GETXATTRS(false, HttpURLConnection.HTTP_OK),
GETTRASHROOT(false, HttpURLConnection.HTTP_OK),

View File

@ -327,10 +327,10 @@ message GetFsStatsResponseProto {
optional uint64 pending_deletion_blocks = 9;
}
message GetFsBlocksStatsRequestProto { // no input paramters
message GetFsReplicatedBlockStatsRequestProto { // no input paramters
}
message GetFsBlocksStatsResponseProto {
message GetFsReplicatedBlockStatsResponseProto {
required uint64 low_redundancy = 1;
required uint64 corrupt_blocks = 2;
required uint64 missing_blocks = 3;
@ -339,10 +339,10 @@ message GetFsBlocksStatsResponseProto {
required uint64 pending_deletion_blocks = 6;
}
message GetFsECBlockGroupsStatsRequestProto { // no input paramters
message GetFsECBlockGroupStatsRequestProto { // no input paramters
}
message GetFsECBlockGroupsStatsResponseProto {
message GetFsECBlockGroupStatsResponseProto {
required uint64 low_redundancy = 1;
required uint64 corrupt_blocks = 2;
required uint64 missing_blocks = 3;
@ -831,10 +831,10 @@ service ClientNamenodeProtocol {
rpc recoverLease(RecoverLeaseRequestProto)
returns(RecoverLeaseResponseProto);
rpc getFsStats(GetFsStatusRequestProto) returns(GetFsStatsResponseProto);
rpc getFsBlocksStats(GetFsBlocksStatsRequestProto)
returns (GetFsBlocksStatsResponseProto);
rpc getFsECBlockGroupsStats(GetFsECBlockGroupsStatsRequestProto)
returns (GetFsECBlockGroupsStatsResponseProto);
rpc getFsReplicatedBlockStats(GetFsReplicatedBlockStatsRequestProto)
returns (GetFsReplicatedBlockStatsResponseProto);
rpc getFsECBlockGroupStats(GetFsECBlockGroupStatsRequestProto)
returns (GetFsECBlockGroupStatsResponseProto);
rpc getDatanodeReport(GetDatanodeReportRequestProto)
returns(GetDatanodeReportResponseProto);
rpc getDatanodeStorageReport(GetDatanodeStorageReportRequestProto)

View File

@ -23,12 +23,9 @@
import java.util.EnumSet;
import java.util.List;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.MapType;
import com.google.common.base.Charsets;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.DelegationTokenRenewer;
import org.apache.hadoop.fs.FSDataInputStream;
@ -122,8 +119,6 @@ public class HttpFSFileSystem extends FileSystem
public static final String NEW_LENGTH_PARAM = "newlength";
public static final String START_AFTER_PARAM = "startAfter";
public static final String POLICY_NAME_PARAM = "storagepolicy";
public static final String OFFSET_PARAM = "offset";
public static final String LENGTH_PARAM = "length";
public static final String SNAPSHOT_NAME_PARAM = "snapshotname";
public static final String OLD_SNAPSHOT_NAME_PARAM = "oldsnapshotname";
@ -210,7 +205,6 @@ public static FILE_TYPE getType(FileStatus fileStatus) {
public static final String STORAGE_POLICIES_JSON = "BlockStoragePolicies";
public static final String STORAGE_POLICY_JSON = "BlockStoragePolicy";
public static final String BLOCK_LOCATIONS_JSON = "BlockLocations";
public static final int HTTP_TEMPORARY_REDIRECT = 307;
@ -1359,42 +1353,6 @@ public BlockStoragePolicy getStoragePolicy(Path src) throws IOException {
return createStoragePolicy((JSONObject) json.get(STORAGE_POLICY_JSON));
}
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
long len) throws IOException {
Map<String, String> params = new HashMap<String, String>();
params.put(OP_PARAM, Operation.GETFILEBLOCKLOCATIONS.toString());
params.put(OFFSET_PARAM, Long.toString(start));
params.put(LENGTH_PARAM, Long.toString(len));
HttpURLConnection conn =
getConnection(Operation.GETFILEBLOCKLOCATIONS.getMethod(), params,
file.getPath(), true);
HttpExceptionUtils.validateResponse(conn, HttpURLConnection.HTTP_OK);
JSONObject json = (JSONObject) HttpFSUtils.jsonParse(conn);
return toBlockLocations(json);
}
private BlockLocation[] toBlockLocations(JSONObject json)
throws IOException {
ObjectMapper mapper = new ObjectMapper();
MapType subType = mapper.getTypeFactory().constructMapType(
Map.class,
String.class,
BlockLocation[].class);
MapType rootType = mapper.getTypeFactory().constructMapType(
Map.class,
mapper.constructType(String.class),
mapper.constructType(subType));
Map<String, Map<String, BlockLocation[]>> jsonMap = mapper
.readValue(json.toJSONString(), rootType);
Map<String, BlockLocation[]> locationMap = jsonMap
.get(BLOCK_LOCATIONS_JSON);
BlockLocation[] locationArray = locationMap.get(
BlockLocation.class.getSimpleName());
return locationArray;
}
private BlockStoragePolicy createStoragePolicy(JSONObject policyJson)
throws IOException {
byte id = ((Number) policyJson.get("id")).byteValue();

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.fs.http.server;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.BlockStoragePolicySpi;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileChecksum;
@ -36,7 +35,6 @@
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
import org.apache.hadoop.hdfs.web.JsonUtil;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.lib.service.FileSystemAccess;
import org.apache.hadoop.util.StringUtils;
@ -1458,41 +1456,6 @@ public Void execute(FileSystem fs) throws IOException {
}
}
/**
* Executor that performs a getFileBlockLocations FileSystemAccess
* file system operation.
*/
@InterfaceAudience.Private
@SuppressWarnings("rawtypes")
public static class FSFileBlockLocations implements
FileSystemAccess.FileSystemExecutor<Map> {
private Path path;
private long offsetValue;
private long lengthValue;
/**
* Creates a file-block-locations executor.
*
* @param path the path to retrieve the location
* @param offsetValue offset into the given file
* @param lengthValue length for which to get locations for
*/
public FSFileBlockLocations(String path, long offsetValue,
long lengthValue) {
this.path = new Path(path);
this.offsetValue = offsetValue;
this.lengthValue = lengthValue;
}
@Override
public Map execute(FileSystem fs) throws IOException {
BlockLocation[] locations =
fs.getFileBlockLocations(this.path, this.offsetValue,
this.lengthValue);
return JsonUtil.toJsonMap(locations);
}
}
/**
* Executor that performs a createSnapshot FileSystemAccess operation.
*/
@ -1596,5 +1559,4 @@ public Void execute(FileSystem fs) throws IOException {
return null;
}
}
}

View File

@ -58,8 +58,7 @@ public class HttpFSParametersProvider extends ParametersProvider {
PARAMS_DEF.put(Operation.GETHOMEDIRECTORY, new Class[]{});
PARAMS_DEF.put(Operation.GETCONTENTSUMMARY, new Class[]{});
PARAMS_DEF.put(Operation.GETFILECHECKSUM, new Class[]{});
PARAMS_DEF.put(Operation.GETFILEBLOCKLOCATIONS,
new Class[] {OffsetParam.class, LenParam.class});
PARAMS_DEF.put(Operation.GETFILEBLOCKLOCATIONS, new Class[]{});
PARAMS_DEF.put(Operation.GETACLSTATUS, new Class[]{});
PARAMS_DEF.put(Operation.GETTRASHROOT, new Class[]{});
PARAMS_DEF.put(Operation.INSTRUMENTATION, new Class[]{});

View File

@ -51,7 +51,6 @@
import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.XAttrNameParam;
import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.XAttrSetFlagParam;
import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.XAttrValueParam;
import org.apache.hadoop.hdfs.web.JsonUtil;
import org.apache.hadoop.http.JettyUtils;
import org.apache.hadoop.lib.service.FileSystemAccess;
import org.apache.hadoop.lib.service.FileSystemAccessException;
@ -299,25 +298,7 @@ public InputStream run() throws Exception {
break;
}
case GETFILEBLOCKLOCATIONS: {
long offset = 0;
// In case length is not given, reset to max long
// in order to retrieve all file block locations
long len = Long.MAX_VALUE;
Long offsetParam = params.get(OffsetParam.NAME, OffsetParam.class);
Long lenParam = params.get(LenParam.NAME, LenParam.class);
AUDIT_LOG.info("[{}] offset [{}] len [{}]",
new Object[] {path, offsetParam, lenParam});
if (offsetParam != null && offsetParam.longValue() > 0) {
offset = offsetParam.longValue();
}
if (lenParam != null && lenParam.longValue() > 0) {
len = lenParam.longValue();
}
FSOperations.FSFileBlockLocations command =
new FSOperations.FSFileBlockLocations(path, offset, len);
@SuppressWarnings("rawtypes") Map locations = fsExecute(user, command);
final String json = JsonUtil.toJsonString("BlockLocations", locations);
response = Response.ok(json).type(MediaType.APPLICATION_JSON).build();
response = Response.status(Response.Status.BAD_REQUEST).build();
break;
}
case GETACLSTATUS: {

View File

@ -20,7 +20,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockStoragePolicySpi;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileChecksum;
@ -1041,7 +1040,7 @@ protected enum Operation {
WORKING_DIRECTORY, MKDIRS, SET_TIMES, SET_PERMISSION, SET_OWNER,
SET_REPLICATION, CHECKSUM, CONTENT_SUMMARY, FILEACLS, DIRACLS, SET_XATTR,
GET_XATTRS, REMOVE_XATTR, LIST_XATTRS, ENCRYPTION, LIST_STATUS_BATCH,
GETTRASHROOT, STORAGEPOLICY, ERASURE_CODING, GETFILEBLOCKLOCATIONS,
GETTRASHROOT, STORAGEPOLICY, ERASURE_CODING,
CREATE_SNAPSHOT, RENAME_SNAPSHOT, DELETE_SNAPSHOT
}
@ -1131,9 +1130,6 @@ private void operation(Operation op) throws Exception {
case ERASURE_CODING:
testErasureCoding();
break;
case GETFILEBLOCKLOCATIONS:
testGetFileBlockLocations();
break;
case CREATE_SNAPSHOT:
testCreateSnapshot();
break;
@ -1189,88 +1185,6 @@ public Void run() throws Exception {
});
}
private void testGetFileBlockLocations() throws Exception {
BlockLocation[] locations1, locations2, locations11, locations21 = null;
Path testFile = null;
// Test single block file block locations.
try (FileSystem fs = FileSystem.get(getProxiedFSConf())) {
testFile = new Path(getProxiedFSTestDir(), "singleBlock.txt");
DFSTestUtil.createFile(fs, testFile, (long) 1, (short) 1, 0L);
locations1 = fs.getFileBlockLocations(testFile, 0, 1);
Assert.assertNotNull(locations1);
}
try (FileSystem fs = getHttpFSFileSystem()) {
locations2 = fs.getFileBlockLocations(testFile, 0, 1);
Assert.assertNotNull(locations2);
}
verifyBlockLocations(locations1, locations2);
// Test multi-block single replica file block locations.
try (FileSystem fs = FileSystem.get(getProxiedFSConf())) {
testFile = new Path(getProxiedFSTestDir(), "multipleBlocks.txt");
DFSTestUtil.createFile(fs, testFile, 512, (short) 2048,
(long) 512, (short) 1, 0L);
locations1 = fs.getFileBlockLocations(testFile, 0, 1024);
locations11 = fs.getFileBlockLocations(testFile, 1024, 2048);
Assert.assertNotNull(locations1);
Assert.assertNotNull(locations11);
}
try (FileSystem fs = getHttpFSFileSystem()) {
locations2 = fs.getFileBlockLocations(testFile, 0, 1024);
locations21 = fs.getFileBlockLocations(testFile, 1024, 2048);
Assert.assertNotNull(locations2);
Assert.assertNotNull(locations21);
}
verifyBlockLocations(locations1, locations2);
verifyBlockLocations(locations11, locations21);
// Test multi-block multi-replica file block locations.
try (FileSystem fs = FileSystem.get(getProxiedFSConf())) {
testFile = new Path(getProxiedFSTestDir(), "multipleBlocks.txt");
DFSTestUtil.createFile(fs, testFile, 512, (short) 2048,
(long) 512, (short) 3, 0L);
locations1 = fs.getFileBlockLocations(testFile, 0, 2048);
Assert.assertNotNull(locations1);
}
try (FileSystem fs = getHttpFSFileSystem()) {
locations2 = fs.getFileBlockLocations(testFile, 0, 2048);
Assert.assertNotNull(locations2);
}
verifyBlockLocations(locations1, locations2);
}
private void verifyBlockLocations(BlockLocation[] locations1,
BlockLocation[] locations2) throws IOException {
Assert.assertEquals(locations1.length, locations2.length);
for (int i = 0; i < locations1.length; i++) {
BlockLocation location1 = locations1[i];
BlockLocation location2 = locations2[i];
Assert.assertEquals(location1.isCorrupt(), location2.isCorrupt());
Assert.assertEquals(location1.getOffset(), location2.getOffset());
Assert.assertEquals(location1.getLength(), location2.getLength());
Arrays.sort(location1.getHosts());
Arrays.sort(location2.getHosts());
Arrays.sort(location1.getNames());
Arrays.sort(location2.getNames());
Arrays.sort(location1.getTopologyPaths());
Arrays.sort(location2.getTopologyPaths());
Assert.assertArrayEquals(location1.getHosts(), location2.getHosts());
Assert.assertArrayEquals(location1.getNames(), location2.getNames());
Assert.assertArrayEquals(location1.getTopologyPaths(),
location2.getTopologyPaths());
}
}
private void testCreateSnapshot(String snapshotName) throws Exception {
if (!this.isLocalFS()) {
Path snapshottablePath = new Path("/tmp/tmp-snap-test");
@ -1363,5 +1277,4 @@ private void testDeleteSnapshot() throws Exception {
fs.delete(snapshottablePath, true);
}
}
}

View File

@ -167,8 +167,6 @@ private static synchronized MiniDFSCluster startMiniHdfs(Configuration conf) thr
new Path(helper.getTestRootDir(), "test.jks").toUri();
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_PROVIDER_PATH,
jceksPath);
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
ERASURE_CODING_POLICY.getName());
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
int totalDataNodes = ERASURE_CODING_POLICY.getNumDataUnits() +
ERASURE_CODING_POLICY.getNumParityUnits();
@ -178,6 +176,7 @@ private static synchronized MiniDFSCluster startMiniHdfs(Configuration conf) thr
DFSTestUtil.createKey(testkey, miniHdfs, conf);
DistributedFileSystem fileSystem = miniHdfs.getFileSystem();
fileSystem.enableErasureCodingPolicy(ERASURE_CODING_POLICY.getName());
fileSystem.getClient().setKeyProvider(miniHdfs.getNameNode()
.getNamesystem().getProvider());

View File

@ -563,22 +563,18 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_DATANODE_DISK_CHECK_TIMEOUT_DEFAULT =
"10m";
public static final String DFS_NAMENODE_EC_POLICIES_ENABLED_KEY = "dfs.namenode.ec.policies.enabled";
public static final String DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT = "";
public static final String DFS_NAMENODE_EC_POLICIES_MAX_CELLSIZE_KEY = "dfs.namenode.ec.policies.max.cellsize";
public static final int DFS_NAMENODE_EC_POLICIES_MAX_CELLSIZE_DEFAULT = 4 * 1024 * 1024;
public static final String DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY =
"dfs.namenode.ec.system.default.policy";
public static final String DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT =
"RS-6-3-1024k";
public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_KEY = "dfs.datanode.ec.reconstruction.stripedread.threads";
public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_DEFAULT = 20;
public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_KEY = "dfs.datanode.ec.reconstruction.stripedread.buffer.size";
public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_DEFAULT = 64 * 1024;
public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_TIMEOUT_MILLIS_KEY = "dfs.datanode.ec.reconstruction.stripedread.timeout.millis";
public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_TIMEOUT_MILLIS_DEFAULT = 5000; //5s
public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_KEY = "dfs.datanode.ec.reconstruction.stripedblock.threads.size";
public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_DEFAULT = 8;
public static final String DFS_DN_EC_RECONSTRUCTION_THREADS_KEY = "dfs.datanode.ec.reconstruction.threads";
public static final int DFS_DN_EC_RECONSTRUCTION_THREADS_DEFAULT = 8;
public static final String
DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY =

View File

@ -124,12 +124,12 @@
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileInfoResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatusRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetListingRequestProto;
@ -763,22 +763,22 @@ public GetFsStatsResponseProto getFsStats(RpcController controller,
}
@Override
public GetFsBlocksStatsResponseProto getFsBlocksStats(
RpcController controller, GetFsBlocksStatsRequestProto request)
public GetFsReplicatedBlockStatsResponseProto getFsReplicatedBlockStats(
RpcController controller, GetFsReplicatedBlockStatsRequestProto request)
throws ServiceException {
try {
return PBHelperClient.convert(server.getBlocksStats());
return PBHelperClient.convert(server.getReplicatedBlockStats());
} catch (IOException e) {
throw new ServiceException(e);
}
}
@Override
public GetFsECBlockGroupsStatsResponseProto getFsECBlockGroupsStats(
RpcController controller, GetFsECBlockGroupsStatsRequestProto request)
public GetFsECBlockGroupStatsResponseProto getFsECBlockGroupStats(
RpcController controller, GetFsECBlockGroupStatsRequestProto request)
throws ServiceException {
try {
return PBHelperClient.convert(server.getECBlockGroupsStats());
return PBHelperClient.convert(server.getECBlockGroupStats());
} catch (IOException e) {
throw new ServiceException(e);
}

View File

@ -24,7 +24,7 @@
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StopWatch;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.Timer;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
@ -35,6 +35,7 @@
import com.google.protobuf.Message;
import com.google.protobuf.TextFormat;
/**
* Represents a set of calls for which a quorum of results is needed.
* @param <KEY> a key used to identify each of the outgoing calls
@ -60,11 +61,12 @@ class QuorumCall<KEY, RESULT> {
* fraction of the configured timeout for any call.
*/
private static final float WAIT_PROGRESS_WARN_THRESHOLD = 0.7f;
private final StopWatch quorumStopWatch = new StopWatch();
private final StopWatch quorumStopWatch;
private final Timer timer;
static <KEY, RESULT> QuorumCall<KEY, RESULT> create(
Map<KEY, ? extends ListenableFuture<RESULT>> calls) {
final QuorumCall<KEY, RESULT> qr = new QuorumCall<KEY, RESULT>();
Map<KEY, ? extends ListenableFuture<RESULT>> calls, Timer timer) {
final QuorumCall<KEY, RESULT> qr = new QuorumCall<KEY, RESULT>(timer);
for (final Entry<KEY, ? extends ListenableFuture<RESULT>> e : calls.entrySet()) {
Preconditions.checkArgument(e.getValue() != null,
"null future for key: " + e.getKey());
@ -82,18 +84,53 @@ public void onSuccess(RESULT res) {
}
return qr;
}
private QuorumCall() {
// Only instantiated from factory method above
static <KEY, RESULT> QuorumCall<KEY, RESULT> create(
Map<KEY, ? extends ListenableFuture<RESULT>> calls) {
return create(calls, new Timer());
}
/**
* Not intended for outside use.
*/
private QuorumCall() {
this(new Timer());
}
private QuorumCall(Timer timer) {
// Only instantiated from factory method above
this.timer = timer;
this.quorumStopWatch = new StopWatch(timer);
}
/**
* Used in conjunction with {@link #getQuorumTimeoutIncreaseMillis(long, int)}
* to check for pauses.
*/
private void restartQuorumStopWatch() {
quorumStopWatch.reset().start();
}
private boolean shouldIncreaseQuorumTimeout(long offset, int millis) {
/**
* Check for a pause (e.g. GC) since the last time
* {@link #restartQuorumStopWatch()} was called. If detected, return the
* length of the pause; else, -1.
* @param offset Offset the elapsed time by this amount; use if some amount
* of pause was expected
* @param millis Total length of timeout in milliseconds
* @return Length of pause, if detected, else -1
*/
private long getQuorumTimeoutIncreaseMillis(long offset, int millis) {
long elapsed = quorumStopWatch.now(TimeUnit.MILLISECONDS);
return elapsed + offset > (millis * WAIT_PROGRESS_INFO_THRESHOLD);
long pauseTime = elapsed + offset;
if (pauseTime > (millis * WAIT_PROGRESS_INFO_THRESHOLD)) {
QuorumJournalManager.LOG.info("Pause detected while waiting for " +
"QuorumCall response; increasing timeout threshold by pause time " +
"of " + pauseTime + " ms.");
return pauseTime;
} else {
return -1;
}
}
@ -119,7 +156,7 @@ public synchronized void waitFor(
int minResponses, int minSuccesses, int maxExceptions,
int millis, String operationName)
throws InterruptedException, TimeoutException {
long st = Time.monotonicNow();
long st = timer.monotonicNow();
long nextLogTime = st + (long)(millis * WAIT_PROGRESS_INFO_THRESHOLD);
long et = st + millis;
while (true) {
@ -128,7 +165,7 @@ public synchronized void waitFor(
if (minResponses > 0 && countResponses() >= minResponses) return;
if (minSuccesses > 0 && countSuccesses() >= minSuccesses) return;
if (maxExceptions >= 0 && countExceptions() > maxExceptions) return;
long now = Time.monotonicNow();
long now = timer.monotonicNow();
if (now > nextLogTime) {
long waited = now - st;
@ -154,8 +191,9 @@ public synchronized void waitFor(
long rem = et - now;
if (rem <= 0) {
// Increase timeout if a full GC occurred after restarting stopWatch
if (shouldIncreaseQuorumTimeout(0, millis)) {
et = et + millis;
long timeoutIncrease = getQuorumTimeoutIncreaseMillis(0, millis);
if (timeoutIncrease > 0) {
et += timeoutIncrease;
} else {
throw new TimeoutException();
}
@ -165,8 +203,9 @@ public synchronized void waitFor(
rem = Math.max(rem, 1);
wait(rem);
// Increase timeout if a full GC occurred after restarting stopWatch
if (shouldIncreaseQuorumTimeout(-rem, millis)) {
et = et + millis;
long timeoutIncrease = getQuorumTimeoutIncreaseMillis(-rem, millis);
if (timeoutIncrease > 0) {
et += timeoutIncrease;
}
}
}

View File

@ -157,27 +157,36 @@ public int run(String[] args) throws Exception {
*/
public void start() throws IOException {
Preconditions.checkState(!isStarted(), "JN already running");
validateAndCreateJournalDir(localDir);
DefaultMetricsSystem.initialize("JournalNode");
JvmMetrics.create("JournalNode",
conf.get(DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY),
DefaultMetricsSystem.instance());
InetSocketAddress socAddr = JournalNodeRpcServer.getAddress(conf);
SecurityUtil.login(conf, DFSConfigKeys.DFS_JOURNALNODE_KEYTAB_FILE_KEY,
DFSConfigKeys.DFS_JOURNALNODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
registerJNMXBean();
httpServer = new JournalNodeHttpServer(conf, this);
httpServer.start();
try {
httpServerURI = httpServer.getServerURI().toString();
validateAndCreateJournalDir(localDir);
rpcServer = new JournalNodeRpcServer(conf, this);
rpcServer.start();
DefaultMetricsSystem.initialize("JournalNode");
JvmMetrics.create("JournalNode",
conf.get(DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY),
DefaultMetricsSystem.instance());
InetSocketAddress socAddr = JournalNodeRpcServer.getAddress(conf);
SecurityUtil.login(conf, DFSConfigKeys.DFS_JOURNALNODE_KEYTAB_FILE_KEY,
DFSConfigKeys.DFS_JOURNALNODE_KERBEROS_PRINCIPAL_KEY,
socAddr.getHostName());
registerJNMXBean();
httpServer = new JournalNodeHttpServer(conf, this);
httpServer.start();
httpServerURI = httpServer.getServerURI().toString();
rpcServer = new JournalNodeRpcServer(conf, this);
rpcServer.start();
} catch (IOException ioe) {
//Shutdown JournalNode of JournalNodeRpcServer fails to start
LOG.error("Failed to start JournalNode.", ioe);
this.stop(1);
throw ioe;
}
}
public boolean isStarted() {

View File

@ -233,47 +233,47 @@ public long getNumTimedOutPendingReconstructions() {
/** Used by metrics. */
public long getLowRedundancyBlocks() {
return neededReconstruction.getLowRedundancyBlocksStat();
return neededReconstruction.getLowRedundancyBlocks();
}
/** Used by metrics. */
public long getCorruptBlocks() {
return corruptReplicas.getCorruptBlocksStat();
return corruptReplicas.getCorruptBlocks();
}
/** Used by metrics. */
public long getMissingBlocks() {
return neededReconstruction.getCorruptBlocksStat();
return neededReconstruction.getCorruptBlocks();
}
/** Used by metrics. */
public long getMissingReplicationOneBlocks() {
return neededReconstruction.getCorruptReplicationOneBlocksStat();
return neededReconstruction.getCorruptReplicationOneBlocks();
}
/** Used by metrics. */
public long getPendingDeletionReplicatedBlocks() {
return invalidateBlocks.getBlocksStat();
return invalidateBlocks.getBlocks();
}
/** Used by metrics. */
public long getLowRedundancyECBlockGroups() {
return neededReconstruction.getLowRedundancyECBlockGroupsStat();
return neededReconstruction.getLowRedundancyECBlockGroups();
}
/** Used by metrics. */
public long getCorruptECBlockGroups() {
return corruptReplicas.getCorruptECBlockGroupsStat();
return corruptReplicas.getCorruptECBlockGroups();
}
/** Used by metrics. */
public long getMissingECBlockGroups() {
return neededReconstruction.getCorruptECBlockGroupsStat();
return neededReconstruction.getCorruptECBlockGroups();
}
/** Used by metrics. */
public long getPendingDeletionECBlockGroups() {
return invalidateBlocks.getECBlockGroupsStat();
public long getPendingDeletionECBlocks() {
return invalidateBlocks.getECBlocks();
}
/**
@ -748,7 +748,7 @@ public void metaSave(PrintWriter out) {
invalidateBlocks.dump(out);
//Dump corrupt blocks and their storageIDs
Set<Block> corruptBlocks = corruptReplicas.getCorruptBlocks();
Set<Block> corruptBlocks = corruptReplicas.getCorruptBlocksSet();
out.println("Corrupt Blocks:");
for(Block block : corruptBlocks) {
Collection<DatanodeDescriptor> corruptNodes =
@ -2057,6 +2057,7 @@ public DatanodeStorageInfo[] chooseTarget4NewBlock(final String src,
final List<String> favoredNodes,
final byte storagePolicyID,
final BlockType blockType,
final ErasureCodingPolicy ecPolicy,
final EnumSet<AddBlockFlag> flags) throws IOException {
List<DatanodeDescriptor> favoredDatanodeDescriptors =
getDatanodeDescriptors(favoredNodes);
@ -2067,14 +2068,23 @@ public DatanodeStorageInfo[] chooseTarget4NewBlock(final String src,
final DatanodeStorageInfo[] targets = blockplacement.chooseTarget(src,
numOfReplicas, client, excludedNodes, blocksize,
favoredDatanodeDescriptors, storagePolicy, flags);
if (targets.length < minReplication) {
throw new IOException("File " + src + " could only be replicated to "
+ targets.length + " nodes instead of minReplication (="
+ minReplication + "). There are "
+ getDatanodeManager().getNetworkTopology().getNumOfLeaves()
+ " datanode(s) running and "
+ (excludedNodes == null? "no": excludedNodes.size())
+ " node(s) are excluded in this operation.");
final String errorMessage = "File %s could only be written to %d of " +
"the %d %s. There are %d datanode(s) running and %s "
+ "node(s) are excluded in this operation.";
if (blockType == BlockType.CONTIGUOUS && targets.length < minReplication) {
throw new IOException(String.format(errorMessage, src,
targets.length, minReplication, "minReplication nodes",
getDatanodeManager().getNetworkTopology().getNumOfLeaves(),
(excludedNodes == null? "no": excludedNodes.size())));
} else if (blockType == BlockType.STRIPED &&
targets.length < ecPolicy.getNumDataUnits()) {
throw new IOException(
String.format(errorMessage, src, targets.length,
ecPolicy.getNumDataUnits(),
String.format("required nodes for %s", ecPolicy.getName()),
getDatanodeManager().getNetworkTopology().getNumOfLeaves(),
(excludedNodes == null ? "no" : excludedNodes.size())));
}
return targets;
}

View File

@ -240,7 +240,7 @@ long[] getCorruptBlockIdsForTesting(BlockType blockType,
* method to get the set of corrupt blocks in corruptReplicasMap.
* @return Set of Block objects
*/
Set<Block> getCorruptBlocks() {
Set<Block> getCorruptBlocksSet() {
Set<Block> corruptBlocks = new HashSet<Block>();
corruptBlocks.addAll(corruptReplicasMap.keySet());
return corruptBlocks;
@ -267,11 +267,11 @@ String getCorruptReason(Block block, DatanodeDescriptor node) {
}
}
long getCorruptBlocksStat() {
long getCorruptBlocks() {
return totalCorruptBlocks.longValue();
}
long getCorruptECBlockGroupsStat() {
long getCorruptECBlockGroups() {
return totalCorruptECBlockGroups.longValue();
}
}

View File

@ -53,9 +53,9 @@ class InvalidateBlocks {
private final Map<DatanodeInfo, LightWeightHashSet<Block>>
nodeToBlocks = new HashMap<>();
private final Map<DatanodeInfo, LightWeightHashSet<Block>>
nodeToECBlockGroups = new HashMap<>();
nodeToECBlocks = new HashMap<>();
private final LongAdder numBlocks = new LongAdder();
private final LongAdder numECBlockGroups = new LongAdder();
private final LongAdder numECBlocks = new LongAdder();
private final int blockInvalidateLimit;
/**
@ -87,7 +87,7 @@ private void printBlockDeletionTime(final Logger log) {
* @return The total number of blocks to be invalidated.
*/
long numBlocks() {
return getECBlockGroupsStat() + getBlocksStat();
return getECBlocks() + getBlocks();
}
/**
@ -95,7 +95,7 @@ long numBlocks() {
* {@link org.apache.hadoop.hdfs.protocol.BlockType#CONTIGUOUS}
* to be invalidated.
*/
long getBlocksStat() {
long getBlocks() {
return numBlocks.longValue();
}
@ -104,8 +104,8 @@ long getBlocksStat() {
* {@link org.apache.hadoop.hdfs.protocol.BlockType#STRIPED}
* to be invalidated.
*/
long getECBlockGroupsStat() {
return numECBlockGroups.longValue();
long getECBlocks() {
return numECBlocks.longValue();
}
private LightWeightHashSet<Block> getBlocksSet(final DatanodeInfo dn) {
@ -115,9 +115,9 @@ private LightWeightHashSet<Block> getBlocksSet(final DatanodeInfo dn) {
return null;
}
private LightWeightHashSet<Block> getECBlockGroupsSet(final DatanodeInfo dn) {
if (nodeToECBlockGroups.containsKey(dn)) {
return nodeToECBlockGroups.get(dn);
private LightWeightHashSet<Block> getECBlocksSet(final DatanodeInfo dn) {
if (nodeToECBlocks.containsKey(dn)) {
return nodeToECBlocks.get(dn);
}
return null;
}
@ -125,7 +125,7 @@ private LightWeightHashSet<Block> getECBlockGroupsSet(final DatanodeInfo dn) {
private LightWeightHashSet<Block> getBlocksSet(final DatanodeInfo dn,
final Block block) {
if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
return getECBlockGroupsSet(dn);
return getECBlocksSet(dn);
} else {
return getBlocksSet(dn);
}
@ -134,8 +134,8 @@ private LightWeightHashSet<Block> getBlocksSet(final DatanodeInfo dn,
private void putBlocksSet(final DatanodeInfo dn, final Block block,
final LightWeightHashSet set) {
if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
assert getECBlockGroupsSet(dn) == null;
nodeToECBlockGroups.put(dn, set);
assert getECBlocksSet(dn) == null;
nodeToECBlocks.put(dn, set);
} else {
assert getBlocksSet(dn) == null;
nodeToBlocks.put(dn, set);
@ -144,7 +144,7 @@ private void putBlocksSet(final DatanodeInfo dn, final Block block,
private long getBlockSetsSize(final DatanodeInfo dn) {
LightWeightHashSet<Block> replicaBlocks = getBlocksSet(dn);
LightWeightHashSet<Block> stripedBlocks = getECBlockGroupsSet(dn);
LightWeightHashSet<Block> stripedBlocks = getECBlocksSet(dn);
return ((replicaBlocks == null ? 0 : replicaBlocks.size()) +
(stripedBlocks == null ? 0 : stripedBlocks.size()));
}
@ -179,7 +179,7 @@ synchronized void add(final Block block, final DatanodeInfo datanode,
}
if (set.add(block)) {
if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
numECBlockGroups.increment();
numECBlocks.increment();
} else {
numBlocks.increment();
}
@ -196,9 +196,9 @@ synchronized void remove(final DatanodeInfo dn) {
if (replicaBlockSets != null) {
numBlocks.add(replicaBlockSets.size() * -1);
}
LightWeightHashSet<Block> blockGroupSets = nodeToECBlockGroups.remove(dn);
if (blockGroupSets != null) {
numECBlockGroups.add(blockGroupSets.size() * -1);
LightWeightHashSet<Block> ecBlocksSet = nodeToECBlocks.remove(dn);
if (ecBlocksSet != null) {
numECBlocks.add(ecBlocksSet.size() * -1);
}
}
@ -207,7 +207,7 @@ synchronized void remove(final DatanodeInfo dn, final Block block) {
final LightWeightHashSet<Block> v = getBlocksSet(dn, block);
if (v != null && v.remove(block)) {
if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
numECBlockGroups.decrement();
numECBlocks.decrement();
} else {
numBlocks.decrement();
}
@ -231,21 +231,21 @@ private void dumpBlockSet(final Map<DatanodeInfo,
/** Print the contents to out. */
synchronized void dump(final PrintWriter out) {
final int size = nodeToBlocks.values().size() +
nodeToECBlockGroups.values().size();
nodeToECBlocks.values().size();
out.println("Metasave: Blocks " + numBlocks()
+ " waiting deletion from " + size + " datanodes.");
if (size == 0) {
return;
}
dumpBlockSet(nodeToBlocks, out);
dumpBlockSet(nodeToECBlockGroups, out);
dumpBlockSet(nodeToECBlocks, out);
}
/** @return a list of the storage IDs. */
synchronized List<DatanodeInfo> getDatanodes() {
HashSet<DatanodeInfo> set = new HashSet<>();
set.addAll(nodeToBlocks.keySet());
set.addAll(nodeToECBlockGroups.keySet());
set.addAll(nodeToECBlocks.keySet());
return new ArrayList<>(set);
}
@ -289,9 +289,9 @@ synchronized List<Block> invalidateWork(final DatanodeDescriptor dn) {
remainingLimit = getBlocksToInvalidateByLimit(nodeToBlocks.get(dn),
toInvalidate, numBlocks, remainingLimit);
}
if ((remainingLimit > 0) && (nodeToECBlockGroups.get(dn) != null)) {
getBlocksToInvalidateByLimit(nodeToECBlockGroups.get(dn),
toInvalidate, numECBlockGroups, remainingLimit);
if ((remainingLimit > 0) && (nodeToECBlocks.get(dn) != null)) {
getBlocksToInvalidateByLimit(nodeToECBlocks.get(dn),
toInvalidate, numECBlocks, remainingLimit);
}
if (toInvalidate.size() > 0 && getBlockSetsSize(dn) == 0) {
remove(dn);
@ -302,8 +302,8 @@ synchronized List<Block> invalidateWork(final DatanodeDescriptor dn) {
synchronized void clear() {
nodeToBlocks.clear();
nodeToECBlockGroups.clear();
nodeToECBlocks.clear();
numBlocks.reset();
numECBlockGroups.reset();
numECBlocks.reset();
}
}

View File

@ -144,33 +144,33 @@ synchronized int getCorruptBlockSize() {
/** Return the number of corrupt blocks with replication factor 1 */
long getCorruptReplicationOneBlockSize() {
return getCorruptReplicationOneBlocksStat();
return getCorruptReplicationOneBlocks();
}
/**
* Return under replicated block count excluding corrupt replicas.
*/
long getLowRedundancyBlocksStat() {
return lowRedundancyBlocks.longValue() - getCorruptBlocksStat();
long getLowRedundancyBlocks() {
return lowRedundancyBlocks.longValue() - getCorruptBlocks();
}
long getCorruptBlocksStat() {
long getCorruptBlocks() {
return corruptBlocks.longValue();
}
long getCorruptReplicationOneBlocksStat() {
long getCorruptReplicationOneBlocks() {
return corruptReplicationOneBlocks.longValue();
}
/**
* Return low redundancy striped blocks excluding corrupt blocks.
*/
long getLowRedundancyECBlockGroupsStat() {
long getLowRedundancyECBlockGroups() {
return lowRedundancyECBlockGroups.longValue() -
getCorruptECBlockGroupsStat();
getCorruptECBlockGroups();
}
long getCorruptECBlockGroupsStat() {
long getCorruptECBlockGroups() {
return corruptECBlockGroups.longValue();
}

View File

@ -3054,8 +3054,16 @@ void transferReplicaForPipelineRecovery(final ExtendedBlock b,
b.setNumBytes(visible);
if (targets.length > 0) {
new Daemon(new DataTransfer(targets, targetStorageTypes,
targetStorageIds, b, stage, client)).start();
Daemon daemon = new Daemon(threadGroup,
new DataTransfer(targets, targetStorageTypes, targetStorageIds, b,
stage, client));
daemon.start();
try {
daemon.join();
} catch (InterruptedException e) {
throw new IOException(
"Pipeline recovery for " + b + " is interrupted.", e);
}
}
}

View File

@ -53,19 +53,19 @@ public ErasureCodingWorker(Configuration conf, DataNode datanode) {
this.datanode = datanode;
this.conf = conf;
initializeStripedReadThreadPool(conf.getInt(
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_KEY,
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_DEFAULT));
initializeStripedReadThreadPool();
initializeStripedBlkReconstructionThreadPool(conf.getInt(
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_KEY,
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_DEFAULT));
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_THREADS_KEY,
DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_THREADS_DEFAULT));
}
private void initializeStripedReadThreadPool(int num) {
LOG.debug("Using striped reads; pool threads={}", num);
private void initializeStripedReadThreadPool() {
LOG.debug("Using striped reads");
stripedReadPool = new ThreadPoolExecutor(1, num, 60, TimeUnit.SECONDS,
new SynchronousQueue<Runnable>(),
// Essentially, this is a cachedThreadPool.
stripedReadPool = new ThreadPoolExecutor(0, Integer.MAX_VALUE,
60, TimeUnit.SECONDS,
new SynchronousQueue<>(),
new Daemon.DaemonFactory() {
private final AtomicInteger threadIndex = new AtomicInteger(0);

View File

@ -22,6 +22,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.hadoop.util.Time;
/**
* StripedBlockReconstructor reconstruct one or more missed striped block in
@ -83,18 +84,28 @@ void reconstruct() throws IOException {
final int toReconstructLen =
(int) Math.min(getStripedReader().getBufferSize(), remaining);
long start = Time.monotonicNow();
// step1: read from minimum source DNs required for reconstruction.
// The returned success list is the source DNs we do real read from
getStripedReader().readMinimumSources(toReconstructLen);
long readEnd = Time.monotonicNow();
// step2: decode to reconstruct targets
reconstructTargets(toReconstructLen);
long decodeEnd = Time.monotonicNow();
// step3: transfer data
if (stripedWriter.transferData2Targets() == 0) {
String error = "Transfer failed for all targets.";
throw new IOException(error);
}
long writeEnd = Time.monotonicNow();
// Only the succeed reconstructions are recorded.
final DataNodeMetrics metrics = getDatanode().getMetrics();
metrics.incrECReconstructionReadTime(readEnd - start);
metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd);
metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd);
updatePositionInBlock(toReconstructLen);

View File

@ -151,6 +151,12 @@ public class DataNodeMetrics {
MutableCounterLong ecReconstructionBytesWritten;
@Metric("Bytes remote read by erasure coding worker")
MutableCounterLong ecReconstructionRemoteBytesRead;
@Metric("Milliseconds spent on read by erasure coding worker")
private MutableCounterLong ecReconstructionReadTimeMillis;
@Metric("Milliseconds spent on decoding by erasure coding worker")
private MutableCounterLong ecReconstructionDecodingTimeMillis;
@Metric("Milliseconds spent on write by erasure coding worker")
private MutableCounterLong ecReconstructionWriteTimeMillis;
final MetricsRegistry registry = new MetricsRegistry("datanode");
final String name;
@ -503,4 +509,16 @@ public void incrECReconstructionRemoteBytesRead(long bytes) {
public void incrECReconstructionBytesWritten(long bytes) {
ecReconstructionBytesWritten.incr(bytes);
}
public void incrECReconstructionReadTime(long millis) {
ecReconstructionReadTimeMillis.incr(millis);
}
public void incrECReconstructionWriteTime(long millis) {
ecReconstructionWriteTimeMillis.incr(millis);
}
public void incrECReconstructionDecodingTime(long millis) {
ecReconstructionDecodingTimeMillis.incr(millis);
}
}

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang.ArrayUtils;
import com.google.common.base.Preconditions;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
@ -101,15 +101,10 @@ public static ErasureCodingPolicyManager getInstance() {
private ErasureCodingPolicyManager() {}
public void init(Configuration conf) {
// Populate the list of enabled policies from configuration
final String[] enablePolicyNames = conf.getTrimmedStrings(
DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT);
// Load erasure coding default policy
final String defaultPolicyName = conf.getTrimmed(
DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY,
DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT);
final String[] policyNames =
(String[]) ArrayUtils.add(enablePolicyNames, defaultPolicyName);
this.policiesByName = new TreeMap<>();
this.policiesByID = new TreeMap<>();
this.enabledPoliciesByName = new TreeMap<>();
@ -129,11 +124,8 @@ public void init(Configuration conf) {
policiesByID.put(policy.getId(), policy);
}
for (String policyName : policyNames) {
if (policyName.trim().isEmpty()) {
continue;
}
ErasureCodingPolicy ecPolicy = policiesByName.get(policyName);
if (!defaultPolicyName.trim().isEmpty()) {
ErasureCodingPolicy ecPolicy = policiesByName.get(defaultPolicyName);
if (ecPolicy == null) {
String names = policiesByName.values()
.stream().map(ErasureCodingPolicy::getName)
@ -141,8 +133,8 @@ public void init(Configuration conf) {
String msg = String.format("EC policy '%s' specified at %s is not a "
+ "valid policy. Please choose from list of available "
+ "policies: [%s]",
policyName,
DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
defaultPolicyName,
DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY,
names);
throw new HadoopIllegalArgumentException(msg);
}
@ -250,14 +242,15 @@ public synchronized ErasureCodingPolicy addPolicy(
policy.getSchema(), policy.getCellSize());
for (ErasureCodingPolicy p : getPolicies()) {
if (p.getName().equals(assignedNewName)) {
throw new HadoopIllegalArgumentException("The policy name " +
assignedNewName + " already exists");
LOG.info("The policy name " + assignedNewName + " already exists");
return p;
}
if (p.getSchema().equals(policy.getSchema()) &&
p.getCellSize() == policy.getCellSize()) {
throw new HadoopIllegalArgumentException("A policy with same schema "
LOG.info("A policy with same schema "
+ policy.getSchema().toString() + " and cell size "
+ p.getCellSize() + " already exists");
return p;
}
}
policy.setName(assignedNewName);
@ -298,6 +291,11 @@ public synchronized void removePolicy(String name) {
}
ecPolicy.setState(ErasureCodingPolicyState.REMOVED);
LOG.info("Remove erasure coding policy " + name);
/*
* TODO HDFS-12405 postpone the delete removed policy to Namenode restart
* time.
* */
}
@VisibleForTesting
@ -347,4 +345,36 @@ public synchronized void enablePolicy(String name) {
enabledPoliciesByName.values().toArray(new ErasureCodingPolicy[0]);
LOG.info("Enable the erasure coding policy " + name);
}
/**
* Load an erasure coding policy into erasure coding manager.
*/
private void loadPolicy(ErasureCodingPolicy policy) {
if (!CodecUtil.hasCodec(policy.getCodecName()) ||
policy.getCellSize() > maxCellSize) {
// If policy is not supported in current system, set the policy state to
// DISABLED;
policy.setState(ErasureCodingPolicyState.DISABLED);
}
this.policiesByName.put(policy.getName(), policy);
this.policiesByID.put(policy.getId(), policy);
if (policy.isEnabled()) {
enablePolicy(policy.getName());
}
}
/**
* Reload erasure coding policies from fsImage.
*
* @param ecPolicies contains ErasureCodingPolicy list
*
*/
public synchronized void loadPolicies(List<ErasureCodingPolicy> ecPolicies) {
Preconditions.checkNotNull(ecPolicies);
for (ErasureCodingPolicy p : ecPolicies) {
loadPolicy(p);
}
allPolicies = policiesByName.values().toArray(new ErasureCodingPolicy[0]);
}
}

View File

@ -32,7 +32,6 @@
import org.apache.hadoop.crypto.CryptoProtocolVersion;
import org.apache.hadoop.crypto.key.KeyProvider;
import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension;
import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion;
import org.apache.hadoop.fs.FileEncryptionInfo;
import org.apache.hadoop.fs.FileStatus;
@ -698,9 +697,7 @@ static String getCurrentKeyVersion(final FSDirectory dir, final String zone)
// drain the local cache of the key provider.
// Do not invalidateCache on the server, since that's the responsibility
// when rolling the key version.
if (dir.getProvider() instanceof CryptoExtension) {
((CryptoExtension) dir.getProvider()).drain(keyName);
}
dir.getProvider().drain(keyName);
final EncryptedKeyVersion edek;
try {
edek = dir.getProvider().generateEncryptedKey(keyName);

View File

@ -24,7 +24,6 @@
import org.apache.hadoop.fs.XAttr;
import org.apache.hadoop.fs.XAttrSetFlag;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.XAttrHelper;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp;
@ -80,11 +79,10 @@ static ErasureCodingPolicy getErasureCodingPolicyByName(
.collect(Collectors.joining(", "));
final String message = String.format("Policy '%s' does not match any " +
"enabled erasure" +
" coding policies: [%s]. The set of enabled erasure coding " +
"policies can be configured at '%s'.",
" coding policies: [%s]. An erasure coding policy can be" +
" enabled by enableErasureCodingPolicy API.",
ecPolicyName,
sysPolicies,
DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY
sysPolicies
);
throw new HadoopIllegalArgumentException(message);
}
@ -210,10 +208,22 @@ static FileStatus unsetErasureCodingPolicy(final FSNamesystem fsn,
return fsd.getAuditFileInfo(iip);
}
static ErasureCodingPolicy addErasureCodePolicy(final FSNamesystem fsn,
ErasureCodingPolicy policy) {
/**
* Add an erasure coding policy.
*
* @param fsn namespace
* @param policy the new policy to be added into system
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
static ErasureCodingPolicy addErasureCodingPolicy(final FSNamesystem fsn,
ErasureCodingPolicy policy, final boolean logRetryCache) {
Preconditions.checkNotNull(policy);
return fsn.getErasureCodingPolicyManager().addPolicy(policy);
ErasureCodingPolicy retPolicy =
fsn.getErasureCodingPolicyManager().addPolicy(policy);
fsn.getEditLog().logAddErasureCodingPolicy(policy, logRetryCache);
return retPolicy;
}
/**
@ -221,24 +231,47 @@ static ErasureCodingPolicy addErasureCodePolicy(final FSNamesystem fsn,
*
* @param fsn namespace
* @param ecPolicyName the name of the policy to be removed
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
static void removeErasureCodePolicy(final FSNamesystem fsn,
String ecPolicyName) throws IOException {
static void removeErasureCodingPolicy(final FSNamesystem fsn,
String ecPolicyName, final boolean logRetryCache) throws IOException {
Preconditions.checkNotNull(ecPolicyName);
fsn.getErasureCodingPolicyManager().removePolicy(ecPolicyName);
fsn.getEditLog().logRemoveErasureCodingPolicy(ecPolicyName, logRetryCache);
}
static void enableErasureCodePolicy(final FSNamesystem fsn,
String ecPolicyName) throws IOException {
/**
* Enable an erasure coding policy.
*
* @param fsn namespace
* @param ecPolicyName the name of the policy to be enabled
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
static void enableErasureCodingPolicy(final FSNamesystem fsn,
String ecPolicyName, final boolean logRetryCache) throws IOException {
Preconditions.checkNotNull(ecPolicyName);
fsn.getErasureCodingPolicyManager().enablePolicy(ecPolicyName);
fsn.getEditLog().logEnableErasureCodingPolicy(ecPolicyName, logRetryCache);
}
static void disableErasureCodePolicy(final FSNamesystem fsn,
String ecPolicyName) throws IOException {
/**
* Disable an erasure coding policy.
*
* @param fsn namespace
* @param ecPolicyName the name of the policy to be disabled
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
static void disableErasureCodingPolicy(final FSNamesystem fsn,
String ecPolicyName, final boolean logRetryCache) throws IOException {
Preconditions.checkNotNull(ecPolicyName);
fsn.getErasureCodingPolicyManager().disablePolicy(ecPolicyName);
fsn.getEditLog().logDisableErasureCodingPolicy(ecPolicyName, logRetryCache);
}
private static List<XAttr> removeErasureCodingPolicyXAttr(

View File

@ -201,7 +201,7 @@ static ValidateAddBlockResult validateAddBlock(
}
storagePolicyID = pendingFile.getStoragePolicyID();
return new ValidateAddBlockResult(blockSize, numTargets, storagePolicyID,
clientMachine, blockType);
clientMachine, blockType, ecPolicy);
}
static LocatedBlock makeLocatedBlock(FSNamesystem fsn, BlockInfo blk,
@ -286,7 +286,7 @@ static DatanodeStorageInfo[] chooseTargetForNewBlock(
return bm.chooseTarget4NewBlock(src, r.numTargets, clientNode,
excludedNodesSet, r.blockSize,
favoredNodesList, r.storagePolicyID,
r.blockType, flags);
r.blockType, r.ecPolicy, flags);
}
/**
@ -831,20 +831,28 @@ private static class FileState {
}
static class ValidateAddBlockResult {
final long blockSize;
final int numTargets;
final byte storagePolicyID;
final String clientMachine;
final BlockType blockType;
private final long blockSize;
private final int numTargets;
private final byte storagePolicyID;
private final String clientMachine;
private final BlockType blockType;
private final ErasureCodingPolicy ecPolicy;
ValidateAddBlockResult(
long blockSize, int numTargets, byte storagePolicyID,
String clientMachine, BlockType blockType) {
String clientMachine, BlockType blockType,
ErasureCodingPolicy ecPolicy) {
this.blockSize = blockSize;
this.numTargets = numTargets;
this.storagePolicyID = storagePolicyID;
this.clientMachine = clientMachine;
this.blockType = blockType;
this.ecPolicy = ecPolicy;
if (blockType == BlockType.STRIPED) {
Preconditions.checkArgument(ecPolicy != null,
"ecPolicy is not specified for striped block");
}
}
}
}

View File

@ -44,6 +44,7 @@
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
@ -97,6 +98,10 @@
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TruncateOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.EnableErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisableErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
@ -1228,6 +1233,38 @@ void logRemoveXAttrs(String src, List<XAttr> xAttrs, boolean toLogRpcIds) {
logEdit(op);
}
void logAddErasureCodingPolicy(ErasureCodingPolicy ecPolicy,
boolean toLogRpcIds) {
AddErasureCodingPolicyOp op =
AddErasureCodingPolicyOp.getInstance(cache.get());
op.setErasureCodingPolicy(ecPolicy);
logRpcIds(op, toLogRpcIds);
logEdit(op);
}
void logEnableErasureCodingPolicy(String ecPolicyName, boolean toLogRpcIds) {
EnableErasureCodingPolicyOp op =
EnableErasureCodingPolicyOp.getInstance(cache.get());
op.setErasureCodingPolicy(ecPolicyName);
logRpcIds(op, toLogRpcIds);
logEdit(op);
}
void logDisableErasureCodingPolicy(String ecPolicyName, boolean toLogRpcIds) {
DisableErasureCodingPolicyOp op =
DisableErasureCodingPolicyOp.getInstance(cache.get());
op.setErasureCodingPolicy(ecPolicyName);
logRpcIds(op, toLogRpcIds);
logEdit(op);
}
void logRemoveErasureCodingPolicy(String ecPolicyName, boolean toLogRpcIds) {
RemoveErasureCodingPolicyOp op =
RemoveErasureCodingPolicyOp.getInstance(cache.get());
op.setErasureCodingPolicy(ecPolicyName);
logRpcIds(op, toLogRpcIds);
logEdit(op);
}
/**
* Get all the journals this edit log is currently operating on.
*/

View File

@ -96,6 +96,14 @@
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TruncateOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp
.AddErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp
.RemoveErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp
.EnableErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp
.DisableErasureCodingPolicyOp;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
@ -958,6 +966,41 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
setStoragePolicyOp.policyId);
break;
}
case OP_ADD_ERASURE_CODING_POLICY:
AddErasureCodingPolicyOp addOp = (AddErasureCodingPolicyOp) op;
fsNamesys.getErasureCodingPolicyManager().addPolicy(
addOp.getEcPolicy());
if (toAddRetryCache) {
fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId,
addOp.getEcPolicy());
}
break;
case OP_ENABLE_ERASURE_CODING_POLICY:
EnableErasureCodingPolicyOp enableOp = (EnableErasureCodingPolicyOp) op;
fsNamesys.getErasureCodingPolicyManager().enablePolicy(
enableOp.getEcPolicy());
if (toAddRetryCache) {
fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
}
break;
case OP_DISABLE_ERASURE_CODING_POLICY:
DisableErasureCodingPolicyOp disableOp =
(DisableErasureCodingPolicyOp) op;
fsNamesys.getErasureCodingPolicyManager().disablePolicy(
disableOp.getEcPolicy());
if (toAddRetryCache) {
fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
}
break;
case OP_REMOVE_ERASURE_CODING_POLICY:
RemoveErasureCodingPolicyOp removeOp = (RemoveErasureCodingPolicyOp) op;
fsNamesys.getErasureCodingPolicyManager().removePolicy(
removeOp.getEcPolicy());
if (toAddRetryCache) {
fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
}
break;
default:
throw new IOException("Invalid operation read " + op.opCode);
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_ERASURE_CODING_POLICY;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_APPEND;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_BLOCK;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_CACHE_DIRECTIVE;
@ -31,7 +32,9 @@
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_CREATE_SNAPSHOT;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DELETE;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DELETE_SNAPSHOT;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DISABLE_ERASURE_CODING_POLICY;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DISALLOW_SNAPSHOT;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ENABLE_ERASURE_CODING_POLICY;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_END_LOG_SEGMENT;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_INVALID;
@ -41,6 +44,7 @@
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REASSIGN_LEASE;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_CACHE_DIRECTIVE;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_CACHE_POOL;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_ERASURE_CODING_POLICY;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_XATTR;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_RENAME;
import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_RENAME_OLD;
@ -75,7 +79,9 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.CheckedInputStream;
import java.util.zip.Checksum;
@ -100,6 +106,7 @@
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
@ -119,6 +126,7 @@
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableFactories;
import org.apache.hadoop.io.WritableFactory;
import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.ipc.ClientId;
import org.apache.hadoop.ipc.RpcConstants;
import org.apache.hadoop.security.token.delegation.DelegationKey;
@ -4339,6 +4347,323 @@ public void readFields(DataInput in) throws IOException {
this.len = in.readLong();
}
}
/**
* Operation corresponding to add an erasure coding policy.
*/
static class AddErasureCodingPolicyOp extends FSEditLogOp {
private ErasureCodingPolicy ecPolicy;
AddErasureCodingPolicyOp() {
super(OP_ADD_ERASURE_CODING_POLICY);
}
static AddErasureCodingPolicyOp getInstance(OpInstanceCache cache) {
return (AddErasureCodingPolicyOp) cache
.get(OP_ADD_ERASURE_CODING_POLICY);
}
@Override
void resetSubFields() {
this.ecPolicy = null;
}
public ErasureCodingPolicy getEcPolicy() {
return this.ecPolicy;
}
public AddErasureCodingPolicyOp setErasureCodingPolicy(
ErasureCodingPolicy policy) {
Preconditions.checkNotNull(policy.getName());
Preconditions.checkNotNull(policy.getSchema());
Preconditions.checkArgument(policy.getCellSize() > 0);
this.ecPolicy = policy;
return this;
}
@Override
void readFields(DataInputStream in, int logVersion) throws IOException {
this.ecPolicy = FSImageSerialization.readErasureCodingPolicy(in);
readRpcIds(in, logVersion);
}
@Override
public void writeFields(DataOutputStream out) throws IOException {
Preconditions.checkNotNull(ecPolicy);
FSImageSerialization.writeErasureCodingPolicy(out, ecPolicy);
writeRpcIds(rpcClientId, rpcCallId, out);
}
@Override
protected void toXml(ContentHandler contentHandler) throws SAXException {
Preconditions.checkNotNull(ecPolicy);
XMLUtils.addSaxString(contentHandler, "CODEC", ecPolicy.getCodecName());
XMLUtils.addSaxString(contentHandler, "DATAUNITS",
Integer.toString(ecPolicy.getNumDataUnits()));
XMLUtils.addSaxString(contentHandler, "PARITYUNITS",
Integer.toString(ecPolicy.getNumParityUnits()));
XMLUtils.addSaxString(contentHandler, "CELLSIZE",
Integer.toString(ecPolicy.getCellSize()));
Map<String, String> extraOptions = ecPolicy.getSchema().getExtraOptions();
if (extraOptions == null || extraOptions.isEmpty()) {
XMLUtils.addSaxString(contentHandler, "EXTRAOPTIONS",
Integer.toString(0));
appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId);
return;
}
XMLUtils.addSaxString(contentHandler, "EXTRAOPTIONS",
Integer.toString(extraOptions.size()));
for (Map.Entry<String, String> entry : extraOptions.entrySet()) {
contentHandler.startElement("", "", "EXTRAOPTION",
new AttributesImpl());
XMLUtils.addSaxString(contentHandler, "KEY", entry.getKey());
XMLUtils.addSaxString(contentHandler, "VALUE", entry.getValue());
contentHandler.endElement("", "", "EXTRAOPTION");
}
appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId);
}
@Override
void fromXml(Stanza st) throws InvalidXmlException {
final String codecName = st.getValue("CODEC");
final int dataUnits = Integer.parseInt(st.getValue("DATAUNITS"));
final int parityUnits = Integer.parseInt(st.getValue("PARITYUNITS"));
final int cellSize = Integer.parseInt(st.getValue("CELLSIZE"));
final int extraOptionNum = Integer.parseInt(st.getValue("EXTRAOPTIONS"));
ECSchema schema;
if (extraOptionNum == 0) {
schema = new ECSchema(codecName, dataUnits, parityUnits, null);
} else {
Map<String, String> extraOptions = new HashMap<String, String>();
List<Stanza> stanzas = st.getChildren("EXTRAOPTION");
for (Stanza a: stanzas) {
extraOptions.put(a.getValue("KEY"), a.getValue("VALUE"));
}
schema = new ECSchema(codecName, dataUnits, parityUnits, extraOptions);
}
this.ecPolicy = new ErasureCodingPolicy(schema, cellSize);
readRpcIdsFromXml(st);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("AddErasureCodingPolicy [");
builder.append(ecPolicy.toString());
appendRpcIdsToString(builder, rpcClientId, rpcCallId);
builder.append("]");
return builder.toString();
}
}
/**
* Operation corresponding to enable an erasure coding policy.
*/
static class EnableErasureCodingPolicyOp extends FSEditLogOp {
private String ecPolicyName;
EnableErasureCodingPolicyOp() {
super(OP_ENABLE_ERASURE_CODING_POLICY);
}
static EnableErasureCodingPolicyOp getInstance(OpInstanceCache cache) {
return (EnableErasureCodingPolicyOp) cache
.get(OP_ENABLE_ERASURE_CODING_POLICY);
}
@Override
void resetSubFields() {
this.ecPolicyName = null;
}
public String getEcPolicy() {
return this.ecPolicyName;
}
public EnableErasureCodingPolicyOp setErasureCodingPolicy(
String policyName) {
Preconditions.checkNotNull(policyName);
this.ecPolicyName = policyName;
return this;
}
@Override
void readFields(DataInputStream in, int logVersion) throws IOException {
this.ecPolicyName = FSImageSerialization.readString(in);
readRpcIds(in, logVersion);
}
@Override
public void writeFields(DataOutputStream out) throws IOException {
Preconditions.checkNotNull(ecPolicyName);
FSImageSerialization.writeString(ecPolicyName, out);
writeRpcIds(rpcClientId, rpcCallId, out);
}
@Override
protected void toXml(ContentHandler contentHandler) throws SAXException {
Preconditions.checkNotNull(ecPolicyName);
XMLUtils.addSaxString(contentHandler, "POLICYNAME", this.ecPolicyName);
appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId);
}
@Override
void fromXml(Stanza st) throws InvalidXmlException {
this.ecPolicyName = st.getValue("POLICYNAME");
readRpcIdsFromXml(st);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("EnableErasureCodingPolicy [");
builder.append(ecPolicyName);
appendRpcIdsToString(builder, rpcClientId, rpcCallId);
builder.append("]");
return builder.toString();
}
}
/**
* Operation corresponding to disable an erasure coding policy.
*/
static class DisableErasureCodingPolicyOp extends FSEditLogOp {
private String ecPolicyName;
DisableErasureCodingPolicyOp() {
super(OP_DISABLE_ERASURE_CODING_POLICY);
}
static DisableErasureCodingPolicyOp getInstance(OpInstanceCache cache) {
return (DisableErasureCodingPolicyOp) cache
.get(OP_DISABLE_ERASURE_CODING_POLICY);
}
@Override
void resetSubFields() {
this.ecPolicyName = null;
}
public String getEcPolicy() {
return this.ecPolicyName;
}
public DisableErasureCodingPolicyOp setErasureCodingPolicy(
String policyName) {
Preconditions.checkNotNull(policyName);
this.ecPolicyName = policyName;
return this;
}
@Override
void readFields(DataInputStream in, int logVersion) throws IOException {
this.ecPolicyName = FSImageSerialization.readString(in);
readRpcIds(in, logVersion);
}
@Override
public void writeFields(DataOutputStream out) throws IOException {
FSImageSerialization.writeString(ecPolicyName, out);
writeRpcIds(rpcClientId, rpcCallId, out);
}
@Override
protected void toXml(ContentHandler contentHandler) throws SAXException {
XMLUtils.addSaxString(contentHandler, "POLICYNAME", this.ecPolicyName);
appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId);
}
@Override
void fromXml(Stanza st) throws InvalidXmlException {
this.ecPolicyName = st.getValue("POLICYNAME");
readRpcIdsFromXml(st);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("DisableErasureCodingPolicy [");
builder.append(ecPolicyName);
appendRpcIdsToString(builder, rpcClientId, rpcCallId);
builder.append("]");
return builder.toString();
}
}
/**
* Operation corresponding to remove an erasure coding policy.
*/
static class RemoveErasureCodingPolicyOp extends FSEditLogOp {
private String ecPolicyName;
RemoveErasureCodingPolicyOp() {
super(OP_REMOVE_ERASURE_CODING_POLICY);
}
static RemoveErasureCodingPolicyOp getInstance(OpInstanceCache cache) {
return (RemoveErasureCodingPolicyOp) cache
.get(OP_REMOVE_ERASURE_CODING_POLICY);
}
@Override
void resetSubFields() {
this.ecPolicyName = null;
}
public String getEcPolicy() {
return this.ecPolicyName;
}
public RemoveErasureCodingPolicyOp setErasureCodingPolicy(
String policyName) {
Preconditions.checkNotNull(policyName);
this.ecPolicyName = policyName;
return this;
}
@Override
void readFields(DataInputStream in, int logVersion) throws IOException {
this.ecPolicyName = FSImageSerialization.readString(in);
readRpcIds(in, logVersion);
}
@Override
public void writeFields(DataOutputStream out) throws IOException {
FSImageSerialization.writeString(ecPolicyName, out);
writeRpcIds(rpcClientId, rpcCallId, out);
}
@Override
protected void toXml(ContentHandler contentHandler) throws SAXException {
XMLUtils.addSaxString(contentHandler, "POLICYNAME", this.ecPolicyName);
appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId);
}
@Override
void fromXml(Stanza st) throws InvalidXmlException {
this.ecPolicyName = st.getValue("POLICYNAME");
readRpcIdsFromXml(st);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("RemoveErasureCodingPolicy [");
builder.append(ecPolicyName);
appendRpcIdsToString(builder, rpcClientId, rpcCallId);
builder.append("]");
return builder.toString();
}
}
/**
* Operation corresponding to upgrade
*/

View File

@ -80,6 +80,11 @@ public enum FSEditLogOpCodes {
OP_TRUNCATE ((byte) 46, TruncateOp.class),
OP_APPEND ((byte) 47, AppendOp.class),
OP_SET_QUOTA_BY_STORAGETYPE ((byte) 48, SetQuotaByStorageTypeOp.class),
OP_ADD_ERASURE_CODING_POLICY ((byte) 49, AddErasureCodingPolicyOp.class),
OP_ENABLE_ERASURE_CODING_POLICY((byte) 50, EnableErasureCodingPolicyOp.class),
OP_DISABLE_ERASURE_CODING_POLICY((byte) 51,
DisableErasureCodingPolicyOp.class),
OP_REMOVE_ERASURE_CODING_POLICY((byte) 52, RemoveErasureCodingPolicyOp.class),
// Note that the current range of the valid OP code is 0~127
OP_INVALID ((byte) -1);

View File

@ -334,10 +334,10 @@ private INodeFile loadINodeFile(INodeSection.INode n) {
boolean isStriped = f.hasErasureCodingPolicyID();
assert ((!isStriped) || (isStriped && !f.hasReplication()));
Short replication = (!isStriped ? (short) f.getReplication() : null);
Byte ecPolicyID = (isStriped ?
(byte) f.getErasureCodingPolicyID() : null);
ErasureCodingPolicy ecPolicy = isStriped ?
fsn.getErasureCodingPolicyManager().getByID(
(byte) f.getErasureCodingPolicyID()) : null;
Byte ecPolicyID = (isStriped ? ecPolicy.getId() : null);
fsn.getErasureCodingPolicyManager().getByID(ecPolicyID) : null;
BlockInfo[] blocks = new BlockInfo[bp.size()];
for (int i = 0; i < bp.size(); ++i) {

View File

@ -36,10 +36,13 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -47,6 +50,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ErasureCodingPolicyProto;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
@ -55,6 +59,7 @@
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.ErasureCodingSection;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FSImageFormatPBSnapshot;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
@ -287,6 +292,12 @@ public int compare(FileSummary.Section s1, FileSummary.Section s2) {
prog.endStep(Phase.LOADING_FSIMAGE, step);
}
break;
case ERASURE_CODING:
Step step = new Step(StepType.ERASURE_CODING_POLICIES);
prog.beginStep(Phase.LOADING_FSIMAGE, step);
loadErasureCodingSection(in);
prog.endStep(Phase.LOADING_FSIMAGE, step);
break;
default:
LOG.warn("Unrecognized section {}", n);
break;
@ -366,6 +377,17 @@ private void loadCacheManagerSection(InputStream in, StartupProgress prog,
new CacheManager.PersistState(s, pools, directives));
}
private void loadErasureCodingSection(InputStream in)
throws IOException {
ErasureCodingSection s = ErasureCodingSection.parseDelimitedFrom(in);
List<ErasureCodingPolicy> ecPolicies = Lists
.newArrayListWithCapacity(s.getPoliciesCount());
for (int i = 0; i < s.getPoliciesCount(); ++i) {
ecPolicies.add(PBHelperClient.convertErasureCodingPolicy(
s.getPolicies(i)));
}
fsn.getErasureCodingPolicyManager().loadPolicies(ecPolicies);
}
}
public static final class Saver {
@ -497,7 +519,13 @@ private void saveInternal(FileOutputStream fout,
// depends on this behavior.
context.checkCancelled();
Step step = new Step(StepType.INODES, filePath);
// Erasure coding policies should be saved before inodes
Step step = new Step(StepType.ERASURE_CODING_POLICIES, filePath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
saveErasureCodingSection(b);
prog.endStep(Phase.SAVING_CHECKPOINT, step);
step = new Step(StepType.INODES, filePath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
saveInodes(b);
saveSnapshots(b);
@ -555,6 +583,23 @@ private void saveCacheManagerSection(FileSummary.Builder summary)
commitSection(summary, SectionName.CACHE_MANAGER);
}
private void saveErasureCodingSection(
FileSummary.Builder summary) throws IOException {
final FSNamesystem fsn = context.getSourceNamesystem();
ErasureCodingPolicy[] ecPolicies =
fsn.getErasureCodingPolicyManager().getPolicies();
ArrayList<ErasureCodingPolicyProto> ecPolicyProtoes =
new ArrayList<ErasureCodingPolicyProto>();
for (ErasureCodingPolicy p : ecPolicies) {
ecPolicyProtoes.add(PBHelperClient.convertErasureCodingPolicy(p));
}
ErasureCodingSection section = ErasureCodingSection.newBuilder().
addAllPolicies(ecPolicyProtoes).build();
section.writeDelimitedTo(sectionOutputStream);
commitSection(summary, SectionName.ERASURE_CODING);
}
private void saveNameSystemSection(FileSummary.Builder summary)
throws IOException {
final FSNamesystem fsn = context.getSourceNamesystem();
@ -606,6 +651,7 @@ public enum SectionName {
NS_INFO("NS_INFO"),
STRING_TABLE("STRING_TABLE"),
EXTENDED_ACL("EXTENDED_ACL"),
ERASURE_CODING("ERASURE_CODING"),
INODE("INODE"),
INODE_REFERENCE("INODE_REFERENCE"),
SNAPSHOT("SNAPSHOT"),

View File

@ -21,6 +21,8 @@
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -33,6 +35,7 @@
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
@ -46,6 +49,7 @@
import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.erasurecode.ECSchema;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@ -750,4 +754,45 @@ public static CachePoolInfo readCachePoolInfo(Stanza st)
return info;
}
public static void writeErasureCodingPolicy(DataOutputStream out,
ErasureCodingPolicy ecPolicy) throws IOException {
writeString(ecPolicy.getSchema().getCodecName(), out);
writeInt(ecPolicy.getNumDataUnits(), out);
writeInt(ecPolicy.getNumParityUnits(), out);
writeInt(ecPolicy.getCellSize(), out);
Map<String, String> extraOptions = ecPolicy.getSchema().getExtraOptions();
if (extraOptions == null || extraOptions.isEmpty()) {
writeInt(0, out);
return;
}
writeInt(extraOptions.size(), out);
for (Map.Entry<String, String> entry : extraOptions.entrySet()) {
writeString(entry.getKey(), out);
writeString(entry.getValue(), out);
}
}
public static ErasureCodingPolicy readErasureCodingPolicy(DataInput in)
throws IOException {
String codecName = readString(in);
int numDataUnits = readInt(in);
int numParityUnits = readInt(in);
int cellSize = readInt(in);
int size = readInt(in);
Map<String, String> extraOptions = new HashMap<>(size);
if (size != 0) {
for (int i = 0; i < size; i++) {
String key = readString(in);
String value = readString(in);
extraOptions.put(key, value);
}
}
ECSchema ecSchema = new ECSchema(codecName, numDataUnits,
numParityUnits, extraOptions);
return new ErasureCodingPolicy(ecSchema, cellSize);
}
}

View File

@ -89,8 +89,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
import org.apache.hadoop.hdfs.protocol.BlocksStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats;
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean;
@ -4080,10 +4080,10 @@ long[] getStats() {
* Get statistics pertaining to blocks of type {@link BlockType#CONTIGUOUS}
* in the filesystem.
* <p>
* @see ClientProtocol#getBlocksStats()
* @see ClientProtocol#getReplicatedBlockStats()
*/
BlocksStats getBlocksStats() {
return new BlocksStats(getLowRedundancyReplicatedBlocks(),
ReplicatedBlockStats getReplicatedBlockStats() {
return new ReplicatedBlockStats(getLowRedundancyReplicatedBlocks(),
getCorruptReplicatedBlocks(), getMissingReplicatedBlocks(),
getMissingReplicationOneBlocks(), getBytesInFutureReplicatedBlocks(),
getPendingDeletionReplicatedBlocks());
@ -4093,12 +4093,12 @@ BlocksStats getBlocksStats() {
* Get statistics pertaining to blocks of type {@link BlockType#STRIPED}
* in the filesystem.
* <p>
* @see ClientProtocol#getECBlockGroupsStats()
* @see ClientProtocol#getECBlockGroupStats()
*/
ECBlockGroupsStats getECBlockGroupsStats() {
return new ECBlockGroupsStats(getLowRedundancyECBlockGroups(),
ECBlockGroupStats getECBlockGroupStats() {
return new ECBlockGroupStats(getLowRedundancyECBlockGroups(),
getCorruptECBlockGroups(), getMissingECBlockGroups(),
getBytesInFutureECBlockGroups(), getPendingDeletionECBlockGroups());
getBytesInFutureECBlockGroups(), getPendingDeletionECBlocks());
}
@Override // FSNamesystemMBean
@ -4711,10 +4711,10 @@ public long getBytesInFutureECBlockGroups() {
}
@Override // ECBlockGroupsMBean
@Metric({"PendingDeletionECBlockGroups", "Number of erasure coded block " +
"groups that are pending deletion"})
public long getPendingDeletionECBlockGroups() {
return blockManager.getPendingDeletionECBlockGroups();
@Metric({"PendingDeletionECBlocks", "Number of erasure coded blocks " +
"that are pending deletion"})
public long getPendingDeletionECBlocks() {
return blockManager.getPendingDeletionECBlocks();
}
@Override
@ -4993,6 +4993,7 @@ private long nextBlockId(BlockType blockType) throws IOException {
}
boolean isFileDeleted(INodeFile file) {
assert hasReadLock();
// Not in the inodeMap or in the snapshot but marked deleted.
if (dir.getInode(file.getId()) == null) {
return true;
@ -7110,6 +7111,8 @@ private void reencryptEncryptionZoneInt(final String zone,
if (keyVersionName == null) {
throw new IOException("Failed to get key version name for " + zone);
}
LOG.info("Re-encryption using key version " + keyVersionName
+ " for zone " + zone);
}
writeLock();
try {
@ -7186,10 +7189,12 @@ void setErasureCodingPolicy(final String srcArg, final String ecPolicyName,
/**
* Add multiple erasure coding policies to the ErasureCodingPolicyManager.
* @param policies The policies to add.
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @return The according result of add operation.
*/
AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies)
throws IOException {
AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies,
final boolean logRetryCache) throws IOException {
final String operationName = "addErasureCodingPolicies";
String addECPolicyName = "";
checkOperation(OperationCategory.WRITE);
@ -7198,12 +7203,12 @@ AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies)
writeLock();
try {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot add erasure coding policy");
for (ErasureCodingPolicy policy : policies) {
try {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot add erasure coding policy");
ErasureCodingPolicy newPolicy =
FSDirErasureCodingOp.addErasureCodePolicy(this, policy);
FSDirErasureCodingOp.addErasureCodingPolicy(this, policy,
logRetryCache);
addECPolicyName = newPolicy.getName();
responses.add(new AddECPolicyResponse(newPolicy));
} catch (HadoopIllegalArgumentException e) {
@ -7224,9 +7229,12 @@ AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies)
/**
* Remove an erasure coding policy.
* @param ecPolicyName the name of the policy to be removed
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
void removeErasureCodingPolicy(String ecPolicyName) throws IOException {
void removeErasureCodingPolicy(String ecPolicyName,
final boolean logRetryCache) throws IOException {
final String operationName = "removeErasureCodingPolicy";
checkOperation(OperationCategory.WRITE);
boolean success = false;
@ -7235,23 +7243,27 @@ void removeErasureCodingPolicy(String ecPolicyName) throws IOException {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot remove erasure coding policy "
+ ecPolicyName);
FSDirErasureCodingOp.removeErasureCodePolicy(this, ecPolicyName);
FSDirErasureCodingOp.removeErasureCodingPolicy(this, ecPolicyName,
logRetryCache);
success = true;
} finally {
writeUnlock(operationName);
if (success) {
getEditLog().logSync();
}
logAuditEvent(success, operationName, null, null, null);
logAuditEvent(success, operationName, ecPolicyName, null, null);
}
}
/**
* Enable an erasure coding policy.
* @param ecPolicyName the name of the policy to be enabled
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
void enableErasureCodingPolicy(String ecPolicyName) throws IOException {
void enableErasureCodingPolicy(String ecPolicyName,
final boolean logRetryCache) throws IOException {
final String operationName = "enableErasureCodingPolicy";
checkOperation(OperationCategory.WRITE);
boolean success = false;
@ -7261,7 +7273,8 @@ void enableErasureCodingPolicy(String ecPolicyName) throws IOException {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot enable erasure coding policy "
+ ecPolicyName);
FSDirErasureCodingOp.enableErasureCodePolicy(this, ecPolicyName);
FSDirErasureCodingOp.enableErasureCodingPolicy(this, ecPolicyName,
logRetryCache);
success = true;
} finally {
writeUnlock(operationName);
@ -7275,9 +7288,12 @@ void enableErasureCodingPolicy(String ecPolicyName) throws IOException {
/**
* Disable an erasure coding policy.
* @param ecPolicyName the name of the policy to be disabled
* @param logRetryCache whether to record RPC ids in editlog for retry cache
* rebuilding
* @throws IOException
*/
void disableErasureCodingPolicy(String ecPolicyName) throws IOException {
void disableErasureCodingPolicy(String ecPolicyName,
final boolean logRetryCache) throws IOException {
final String operationName = "disableErasureCodingPolicy";
checkOperation(OperationCategory.WRITE);
boolean success = false;
@ -7287,7 +7303,8 @@ void disableErasureCodingPolicy(String ecPolicyName) throws IOException {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot disable erasure coding policy "
+ ecPolicyName);
FSDirErasureCodingOp.disableErasureCodePolicy(this, ecPolicyName);
FSDirErasureCodingOp.disableErasureCodingPolicy(this, ecPolicyName,
logRetryCache);
success = true;
} finally {
writeUnlock(operationName);

View File

@ -557,6 +557,12 @@ synchronized boolean checkLeases() {
if (!p.startsWith("/")) {
throw new IOException("Invalid path in the lease " + p);
}
final INodeFile lastINode = iip.getLastINode().asFile();
if (fsnamesystem.isFileDeleted(lastINode)) {
// INode referred by the lease could have been deleted.
removeLease(lastINode.getId());
continue;
}
boolean completed = false;
try {
completed = fsnamesystem.internalReleaseLease(

View File

@ -98,7 +98,7 @@
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.EncryptionZone;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@ -116,7 +116,7 @@
import org.apache.hadoop.hdfs.protocol.QuotaByStorageTypeExceededException;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
import org.apache.hadoop.hdfs.protocol.BlocksStats;
import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
@ -517,10 +517,10 @@ public NameNodeRpcServer(Configuration conf, NameNode nn)
QuotaByStorageTypeExceededException.class,
AclException.class,
FSLimitException.PathComponentTooLongException.class,
FSLimitException.MaxDirectoryItemsExceededException.class,
UnresolvedPathException.class);
FSLimitException.MaxDirectoryItemsExceededException.class);
clientRpcServer.addSuppressedLoggingExceptions(StandbyException.class);
clientRpcServer.addSuppressedLoggingExceptions(StandbyException.class,
UnresolvedPathException.class);
clientRpcServer.setTracer(nn.tracer);
if (serviceRpcServer != null) {
@ -1163,17 +1163,17 @@ public long[] getStats() throws IOException {
}
@Override // ClientProtocol
public BlocksStats getBlocksStats() throws IOException {
public ReplicatedBlockStats getReplicatedBlockStats() throws IOException {
checkNNStartup();
namesystem.checkOperation(OperationCategory.READ);
return namesystem.getBlocksStats();
return namesystem.getReplicatedBlockStats();
}
@Override // ClientProtocol
public ECBlockGroupsStats getECBlockGroupsStats() throws IOException {
public ECBlockGroupStats getECBlockGroupStats() throws IOException {
checkNNStartup();
namesystem.checkOperation(OperationCategory.READ);
return namesystem.getECBlockGroupsStats();
return namesystem.getECBlockGroupStats();
}
@Override // ClientProtocol
@ -2337,7 +2337,21 @@ public AddECPolicyResponse[] addErasureCodingPolicies(
ErasureCodingPolicy[] policies) throws IOException {
checkNNStartup();
namesystem.checkSuperuserPrivilege();
return namesystem.addErasureCodingPolicies(policies);
final CacheEntryWithPayload cacheEntry =
RetryCache.waitForCompletion(retryCache, null);
if (cacheEntry != null && cacheEntry.isSuccess()) {
return (AddECPolicyResponse[]) cacheEntry.getPayload();
}
boolean success = false;
AddECPolicyResponse[] responses = new AddECPolicyResponse[0];
try {
responses =
namesystem.addErasureCodingPolicies(policies, cacheEntry != null);
success = true;
} finally {
RetryCache.setState(cacheEntry, success, responses);
}
return responses;
}
@Override
@ -2345,7 +2359,17 @@ public void removeErasureCodingPolicy(String ecPolicyName)
throws IOException {
checkNNStartup();
namesystem.checkSuperuserPrivilege();
namesystem.removeErasureCodingPolicy(ecPolicyName);
final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
if (cacheEntry != null && cacheEntry.isSuccess()) {
return;
}
boolean success = false;
try {
namesystem.removeErasureCodingPolicy(ecPolicyName, cacheEntry != null);
success = true;
} finally {
RetryCache.setState(cacheEntry, success);
}
}
@Override // ClientProtocol
@ -2353,7 +2377,17 @@ public void enableErasureCodingPolicy(String ecPolicyName)
throws IOException {
checkNNStartup();
namesystem.checkSuperuserPrivilege();
namesystem.enableErasureCodingPolicy(ecPolicyName);
final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
if (cacheEntry != null && cacheEntry.isSuccess()) {
return;
}
boolean success = false;
try {
namesystem.enableErasureCodingPolicy(ecPolicyName, cacheEntry != null);
success = true;
} finally {
RetryCache.setState(cacheEntry, success);
}
}
@Override // ClientProtocol
@ -2361,7 +2395,17 @@ public void disableErasureCodingPolicy(String ecPolicyName)
throws IOException {
checkNNStartup();
namesystem.checkSuperuserPrivilege();
namesystem.disableErasureCodingPolicy(ecPolicyName);
final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
if (cacheEntry != null && cacheEntry.isSuccess()) {
return;
}
boolean success = false;
try {
namesystem.disableErasureCodingPolicy(ecPolicyName, cacheEntry != null);
success = true;
} finally {
RetryCache.setState(cacheEntry, success);
}
}
@Override // ReconfigurationProtocol

View File

@ -53,7 +53,7 @@ public interface ECBlockGroupsMBean {
long getBytesInFutureECBlockGroups();
/**
* Return count of erasure coded block groups that are pending deletion.
* Return count of erasure coded blocks that are pending deletion.
*/
long getPendingDeletionECBlockGroups();
long getPendingDeletionECBlocks();
}

View File

@ -52,7 +52,12 @@ public enum StepType {
/**
* The namenode is performing an operation related to cache entries.
*/
CACHE_ENTRIES("CacheEntries", "cache entries");
CACHE_ENTRIES("CacheEntries", "cache entries"),
/**
* The namenode is performing an operation related to erasure coding policies.
*/
ERASURE_CODING_POLICIES("ErasureCodingPolicies", "erasure coding policies");
private final String name, description;

View File

@ -54,7 +54,6 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@ -993,21 +992,6 @@ private Response get(
return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
}
}
case GETFILEBLOCKLOCATIONS:
{
final long offsetValue = offset.getValue();
final Long lengthValue = length.getValue();
FileSystem fs = FileSystem.get(conf != null ?
conf : new Configuration());
BlockLocation[] locations = fs.getFileBlockLocations(
new org.apache.hadoop.fs.Path(fullpath),
offsetValue,
lengthValue != null? lengthValue: Long.MAX_VALUE);
final String js = JsonUtil.toJsonString("BlockLocations",
JsonUtil.toJsonMap(locations));
return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
}
case GET_BLOCK_LOCATIONS:
{
final long offsetValue = offset.getValue();

View File

@ -66,13 +66,13 @@
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
import org.apache.hadoop.hdfs.protocol.BlocksStats;
import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeVolumeInfo;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
@ -534,30 +534,31 @@ public void report(String[] argv, int i) throws IOException {
* minutes. Use "-metaSave" to list of all such blocks and accurate
* counts.
*/
BlocksStats blocksStats = dfs.getClient().getNamenode().getBlocksStats();
ReplicatedBlockStats replicatedBlockStats =
dfs.getClient().getNamenode().getReplicatedBlockStats();
System.out.println("Replicated Blocks:");
System.out.println("\tUnder replicated blocks: " +
blocksStats.getLowRedundancyBlocksStat());
replicatedBlockStats.getLowRedundancyBlocks());
System.out.println("\tBlocks with corrupt replicas: " +
blocksStats.getCorruptBlocksStat());
replicatedBlockStats.getCorruptBlocks());
System.out.println("\tMissing blocks: " +
blocksStats.getMissingReplicaBlocksStat());
replicatedBlockStats.getMissingReplicaBlocks());
System.out.println("\tMissing blocks (with replication factor 1): " +
blocksStats.getMissingReplicationOneBlocksStat());
replicatedBlockStats.getMissingReplicationOneBlocks());
System.out.println("\tPending deletion blocks: " +
blocksStats.getPendingDeletionBlocksStat());
replicatedBlockStats.getPendingDeletionBlocks());
ECBlockGroupsStats ecBlockGroupsStats =
dfs.getClient().getNamenode().getECBlockGroupsStats();
ECBlockGroupStats ecBlockGroupStats =
dfs.getClient().getNamenode().getECBlockGroupStats();
System.out.println("Erasure Coded Block Groups: ");
System.out.println("\tLow redundancy block groups: " +
ecBlockGroupsStats.getLowRedundancyBlockGroupsStat());
ecBlockGroupStats.getLowRedundancyBlockGroups());
System.out.println("\tBlock groups with corrupt internal blocks: " +
ecBlockGroupsStats.getCorruptBlockGroupsStat());
ecBlockGroupStats.getCorruptBlockGroups());
System.out.println("\tMissing block groups: " +
ecBlockGroupsStats.getMissingBlockGroupsStat());
System.out.println("\tPending deletion block groups: " +
ecBlockGroupsStats.getPendingDeletionBlockGroupsStat());
ecBlockGroupStats.getMissingBlockGroups());
System.out.println("\tPending deletion blocks: " +
ecBlockGroupStats.getPendingDeletionBlocks());
System.out.println();
@ -2243,7 +2244,7 @@ public int run(String[] argv) throws Exception {
System.err.println(cmd.substring(1) + ": "
+ e.getLocalizedMessage());
}
if (LOG.isDebugEnabled()) {
if (LOG.isDebugEnabled() && debugException != null) {
LOG.debug("Exception encountered:", debugException);
}
return exitCode;

Some files were not shown because too many files have changed in this diff Show More