diff --git a/LICENSE-binary b/LICENSE-binary
index f1c512f83cf..b71ea104c5b 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -240,7 +240,7 @@ com.google.guava:guava:20.0
com.google.guava:guava:27.0-jre
com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava
com.microsoft.azure:azure-storage:7.0.0
-com.nimbusds:nimbus-jose-jwt:9.8.1
+com.nimbusds:nimbus-jose-jwt:9.31
com.squareup.okhttp3:okhttp:4.10.0
com.squareup.okio:okio:3.2.0
com.zaxxer:HikariCP:4.0.3
@@ -289,12 +289,8 @@ io.netty:netty-resolver-dns-classes-macos:4.1.77.Final
io.netty:netty-transport-native-epoll:4.1.77.Final
io.netty:netty-transport-native-kqueue:4.1.77.Final
io.netty:netty-resolver-dns-native-macos:4.1.77.Final
-io.opencensus:opencensus-api:0.24.0
-io.opencensus:opencensus-contrib-grpc-metrics:0.24.0
-io.opentracing:opentracing-api:0.33.0
-io.opentracing:opentracing-noop:0.33.0
-io.opentracing:opentracing-util:0.33.0
-io.perfmark:perfmark-api:0.19.0
+io.opencensus:opencensus-api:0.12.3
+io.opencensus:opencensus-contrib-grpc-metrics:0.12.3
io.reactivex:rxjava:1.3.8
io.reactivex:rxjava-string:1.1.1
io.reactivex:rxnetty:0.4.20
@@ -303,7 +299,6 @@ javax.inject:javax.inject:1
log4j:log4j:1.2.17
net.java.dev.jna:jna:5.2.0
net.minidev:accessors-smart:1.2
-net.minidev:json-smart:2.4.7
org.apache.avro:avro:1.9.2
org.apache.commons:commons-collections4:4.2
org.apache.commons:commons-compress:1.21
@@ -327,39 +322,39 @@ org.apache.htrace:htrace-core4:4.1.0-incubating
org.apache.httpcomponents:httpclient:4.5.6
org.apache.httpcomponents:httpcore:4.4.10
org.apache.kafka:kafka-clients:2.8.2
-org.apache.kerby:kerb-admin:2.0.2
-org.apache.kerby:kerb-client:2.0.2
-org.apache.kerby:kerb-common:2.0.2
-org.apache.kerby:kerb-core:2.0.2
-org.apache.kerby:kerb-crypto:2.0.2
-org.apache.kerby:kerb-identity:2.0.2
-org.apache.kerby:kerb-server:2.0.2
-org.apache.kerby:kerb-simplekdc:2.0.2
-org.apache.kerby:kerb-util:2.0.2
-org.apache.kerby:kerby-asn1:2.0.2
-org.apache.kerby:kerby-config:2.0.2
-org.apache.kerby:kerby-pkix:2.0.2
-org.apache.kerby:kerby-util:2.0.2
-org.apache.kerby:kerby-xdr:2.0.2
-org.apache.kerby:token-provider:2.0.2
+org.apache.kerby:kerb-admin:2.0.3
+org.apache.kerby:kerb-client:2.0.3
+org.apache.kerby:kerb-common:2.0.3
+org.apache.kerby:kerb-core:2.0.3
+org.apache.kerby:kerb-crypto:2.0.3
+org.apache.kerby:kerb-identity:2.0.3
+org.apache.kerby:kerb-server:2.0.3
+org.apache.kerby:kerb-simplekdc:2.0.3
+org.apache.kerby:kerb-util:2.0.3
+org.apache.kerby:kerby-asn1:2.0.3
+org.apache.kerby:kerby-config:2.0.3
+org.apache.kerby:kerby-pkix:2.0.3
+org.apache.kerby:kerby-util:2.0.3
+org.apache.kerby:kerby-xdr:2.0.3
+org.apache.kerby:token-provider:2.0.3
org.apache.solr:solr-solrj:8.11.2
org.apache.yetus:audience-annotations:0.5.0
org.apache.zookeeper:zookeeper:3.6.3
-org.codehaus.jettison:jettison:1.5.3
-org.eclipse.jetty:jetty-annotations:9.4.48.v20220622
-org.eclipse.jetty:jetty-http:9.4.48.v20220622
-org.eclipse.jetty:jetty-io:9.4.48.v20220622
-org.eclipse.jetty:jetty-jndi:9.4.48.v20220622
-org.eclipse.jetty:jetty-plus:9.4.48.v20220622
-org.eclipse.jetty:jetty-security:9.4.48.v20220622
-org.eclipse.jetty:jetty-server:9.4.48.v20220622
-org.eclipse.jetty:jetty-servlet:9.4.48.v20220622
-org.eclipse.jetty:jetty-util:9.4.48.v20220622
-org.eclipse.jetty:jetty-util-ajax:9.4.48.v20220622
-org.eclipse.jetty:jetty-webapp:9.4.48.v20220622
-org.eclipse.jetty:jetty-xml:9.4.48.v20220622
-org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.48.v20220622
-org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.48.v20220622
+org.codehaus.jettison:jettison:1.5.4
+org.eclipse.jetty:jetty-annotations:9.4.51.v20230217
+org.eclipse.jetty:jetty-http:9.4.51.v20230217
+org.eclipse.jetty:jetty-io:9.4.51.v20230217
+org.eclipse.jetty:jetty-jndi:9.4.51.v20230217
+org.eclipse.jetty:jetty-plus:9.4.51.v20230217
+org.eclipse.jetty:jetty-security:9.4.51.v20230217
+org.eclipse.jetty:jetty-server:9.4.51.v20230217
+org.eclipse.jetty:jetty-servlet:9.4.51.v20230217
+org.eclipse.jetty:jetty-util:9.4.51.v20230217
+org.eclipse.jetty:jetty-util-ajax:9.4.51.v20230217
+org.eclipse.jetty:jetty-webapp:9.4.51.v20230217
+org.eclipse.jetty:jetty-xml:9.4.51.v20230217
+org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.51.v20230217
+org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.51.v20230217
org.ehcache:ehcache:3.3.1
org.ini4j:ini4j:0.5.4
org.jetbrains.kotlin:kotlin-stdlib:1.4.10
@@ -367,7 +362,7 @@ org.jetbrains.kotlin:kotlin-stdlib-common:1.4.10
org.lz4:lz4-java:1.7.1
org.objenesis:objenesis:2.6
org.xerial.snappy:snappy-java:1.0.5
-org.yaml:snakeyaml:1.33
+org.yaml:snakeyaml:2.0
org.wildfly.openssl:wildfly-openssl:1.1.3.Final
diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64
index dd0348961f4..14a53780127 100644
--- a/dev-support/docker/Dockerfile_aarch64
+++ b/dev-support/docker/Dockerfile_aarch64
@@ -74,7 +74,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin"
###
# Avoid out of memory errors in builds
###
-ENV MAVEN_OPTS -Xms256m -Xmx1536m
+ENV MAVEN_OPTS -Xms256m -Xmx3072m
# Skip gpg verification when downloading Yetus via yetus-wrapper
ENV HADOOP_SKIP_YETUS_VERIFICATION true
diff --git a/hadoop-client-modules/hadoop-client/pom.xml b/hadoop-client-modules/hadoop-client/pom.xml
index 5299c9e8713..9170bf4b549 100644
--- a/hadoop-client-modules/hadoop-client/pom.xml
+++ b/hadoop-client-modules/hadoop-client/pom.xml
@@ -69,6 +69,10 @@
com.github.pjfanningjersey-json
+
+ org.codehaus.jettison
+ jettison
+ com.sun.jerseyjersey-server
@@ -182,6 +186,10 @@
com.github.pjfanningjersey-json
+
+ org.codehaus.jettison
+ jettison
+ io.nettynetty
@@ -233,6 +241,10 @@
com.github.pjfanningjersey-json
+
+ org.codehaus.jettison
+ jettison
+ com.sun.jerseyjersey-servlet
@@ -290,6 +302,10 @@
com.github.pjfanningjersey-json
+
+ org.codehaus.jettison
+ jettison
+ io.nettynetty
diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml
index 6eaa4fdfce5..433a615c606 100644
--- a/hadoop-common-project/hadoop-auth/pom.xml
+++ b/hadoop-common-project/hadoop-auth/pom.xml
@@ -110,20 +110,8 @@
org.bouncycastlebcprov-jdk15on
-
-
- net.minidev
- json-smart
-
-
- net.minidev
- json-smart
- org.apache.zookeeperzookeeper
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml
new file mode 100644
index 00000000000..b788b4497fe
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml
@@ -0,0 +1,40640 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @param customMessage depcrication message
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKey key that take up the values of deprecated key
+ @param customMessage deprecation message]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ As a side effect get loads the properties from the sources if called for
+ the first time as a lazy init.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value
+ @param enumeration type]]>
+
+
+
+
+
+
+ enumeration type
+ @throws IllegalArgumentException If mapping is illegal for the type
+ provided
+ @return enumeration type]]>
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.
+ @param name property name.
+ @param addr inetSocketAddress addr.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @param Interface class type.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @param Interface class type.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param propertyName xml property name.
+ @param out the writer to write to.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException raised on errors performing I/O.
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException raised on errors performing I/O.]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException raised on errors performing I/O.
+ @throws NoSuchAlgorithmException no such algorithm exception.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException raised on errors performing I/O.
+ @throws NoSuchAlgorithmException This exception is thrown when a particular
+ cryptographic algorithm is requested
+ but is not available in the environment.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException raised on errors performing I/O.
+ @throws UnsupportedOperationException Unsupported Operation Exception.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications, must
+ include entries for user, group, and others for compatibility with
+ permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
+
+ Please refer to
+ {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ {@link FileContext#getFileBlockLocations(Path, long, long)}
+ for more examples.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ This does not change the current offset of a file, and is thread-safe.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if reached
+ end-of-stream
+ @throws IOException if there is some error performing the read]]>
+
+
+
+
+
+
+
+
+ This operation provides similar semantics to
+ {@link #read(long, ByteBuffer)}, the difference is that this method is
+ guaranteed to read data until the {@link ByteBuffer} is full, or until
+ the end of the data stream is reached.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @throws IOException if there is some error performing the read
+ @throws EOFException the end of the data was reached before
+ the read operation completed
+ @see #read(long, ByteBuffer)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ @param buf
+ the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if
+ reach end-of-stream
+ @throws IOException
+ if there is some error performing the read]]>
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+ @throws UnresolvedLinkException If unresolved link occurred.
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid
+
+ @return if delete success true, not false.]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+ @return input stream.]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+ @return output stream.]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @param options rename options.
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+ f is
+ not supported.]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ {@literal <---}X{@literal --->}
+ fs://host/A/B/link
+ {@literal <-----}Y{@literal ----->}
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file link already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist.
+ @throws UnresolvedLinkException If unresolved link occurred.
+ @throws AccessControlException If access is denied.
+ @throws IOException If an I/O error occurred.
+ @return resolve path.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries
+ to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications, must include entries for user, group, and others for
+ compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns
+ each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal <}String{@literal >} of the XAttr names of the
+ file or directory
+ @throws IOException If an I/O error occurred.]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @param uri uri of the filesystem.
+ @param conf configrution.
+ @return filesystem instance.
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() {@literal <=} start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @param options rename options.
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
?
+
Matches any single character.
+
+
*
+
Matches zero or more characters.
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException If an I/O error occurred.
+ @throws UnsupportedOperationException if the operation is unsupported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal } of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+
+
+ This is a carefully evolving class.
+ New methods may be marked as Unstable or Evolving for their initial release,
+ as a warning that they are new and may change based on the
+ experience of use in applications.
+
+ Important note for developers
+
+ If you are making changes here to the public API or protected methods,
+ you must review the following subclasses and make sure that
+ they are filtering/passing through new methods as appropriate.
+
+ {@link FilterFileSystem}: methods are passed through. If not,
+ then {@code TestFilterFileSystem.MustNotImplement} must be
+ updated with the unsupported interface.
+ Furthermore, if the new API's support is probed for via
+ {@link #hasPathCapability(Path, String)} then
+ {@link FilterFileSystem#hasPathCapability(Path, String)}
+ must return false, always.
+
+ {@link ChecksumFileSystem}: checksums are created and
+ verified.
+
+ {@code TestHarFileSystem} will need its {@code MustNotImplement}
+ interface updated.
+
+
+ There are some external places your changes will break things.
+ Do co-ordinate changes here.
+
+
+ HBase: HBoss
+
+ Hive: HiveShim23
+
+ {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ returns true if the operation succeeded. When deleteSource is true,
+ this means "after the copy, delete(source) returned true"
+ If the destination is a directory, and mkdirs (dest) fails,
+ the operation will return false rather than raise any exception.
+
+ The overwrite flag is about overwriting files; it has no effect about
+ handing an attempt to copy a file atop a directory (expect an IOException),
+ or a directory over a path which contains a file (mkdir will fail, so
+ "false").
+
+ The operation is recursive, and the deleteSource operation takes place
+ as each subdirectory is copied. Therefore, if an operation fails partway
+ through, the source tree may be partially deleted.
+
+ If a file is changed while the readVectored() operation is in progress, the output is
+ undefined. Some ranges may have old data, some may have new and some may have both.
+
+
+ While a readVectored() operation is in progress, normal read api calls may block.
+
+ Consult the filesystem specification document for the requirements
+ of an implementation of this interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Exceptions are caught and downgraded to debug logging.
+ @param source source of statistics.
+ @return a string for logging.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is designed to affordable to use in log statements.
+ @param source source of statistics -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is for use in log statements where for the cost of creation
+ of this entry is low; it is affordable to use in log statements.
+ @param statistics statistics to stringify -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It is serializable so that frameworks which can use java serialization
+ to propagate data (Spark, Flink...) can send the statistics
+ back. For this reason, TreeMaps are explicitly used as field types,
+ even though IDEs can recommend use of Map instead.
+ For security reasons, untrusted java object streams should never be
+ deserialized. If for some reason this is required, use
+ {@link #requiredSerializationClasses()} to get the list of classes
+ used when deserializing instances of this object.
+
+
+ It is annotated for correct serializations with jackson2.
+
]]>
+
+
+
+
+
+
+
+
+
+ This is not an atomic option.
+
+ The instance can be serialized, and its
+ {@code toString()} method lists all the values.
+ @param statistics statistics
+ @return a snapshot of the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It can be used to accrue values so as to dynamically update
+ the mean. If so, know that there is no synchronization
+ on the methods.
+
+
+ If a statistic has 0 samples then it is considered to be empty.
+
+
+ All 'empty' statistics are equivalent, independent of the sum value.
+
+
+ For non-empty statistics, sum and sample values must match
+ for equality.
+
+
+ It is serializable and annotated for correct serializations with jackson2.
+
+
+ Thread safety. The operations to add/copy sample data, are thread safe.
+
+
+
{@link #add(MeanStatistic)}
+
{@link #addSample(long)}
+
{@link #clear()}
+
{@link #setSamplesAndSum(long, long)}
+
{@link #set(MeanStatistic)}
+
{@link #setSamples(long)} and {@link #setSum(long)}
+
+
+ So is the {@link #mean()} method. This ensures that when
+ used to aggregated statistics, the aggregate value and sample
+ count are set and evaluated consistently.
+
+
+ Other methods marked as synchronized because Findbugs overreacts
+ to the idea that some operations to update sum and sample count
+ are synchronized, but that things like equals are not.
+
+ The name of the constants are uppercase, words separated by
+ underscores.
+
+
+ The value of the constants are lowercase of the constant names.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value enumSet value.
+ @param elementType elementType.]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value enumSet value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value enumSet Value.
+ @param elementType elementType.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation.
+
+ @param position input position.
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte.]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+
+ @param what input what.
+ @param start input start.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+
+ @param utf8 input utf8.
+ @param start input start.
+ @param length input length.
+ @param replace input replace.
+ @throws CharacterCodingException a character encoding or
+ decoding error occurs.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+
+ @param string input string.
+ @param replace input replace.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()
+ @throws CharacterCodingException a character encoding or decoding error occurs.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException any other problem for write.]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException any other problem for readFields.]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements
+ WritableComparable{@literal } {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec
+ io.compress.passthrough.extension = .gz
+
+
+ Note: this is not a Splittable codec: it doesn't know the
+ capabilities of the passed in stream. It should be possible to
+ extend this in a subclass: the inner classes are marked as protected
+ to enable this. Do not retrofit splitting to this class..]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException raised on errors performing I/O.
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff;
+ byte[3]=n&0xff.
+ Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff;
+ byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff;
+ byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
+ byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff;
+ byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
+ byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff;
+ byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]=
+ (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff;
+ byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff;
+ byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=
+ (n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException raised on errors performing I/O.]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 +
+ (NB[0]&0xff)<<8 + NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)
+ <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param generic type.
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @param minSamples input minSamples.
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>", where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName serviceName.
+ @param nameName nameName.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName serviceName.
+ @param nameName nameName.
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+
+ @param conf input Configuration.]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return abstract delegation token identifier.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return abstract delegation token identifier.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param dToken abstract delegation token identifier.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @param dToken abstract delegation token identifier.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.
+ @return delegation token long value.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param dToken abstract delegation token identifier.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param dToken abstract delegation token identifier.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+ @param exitCode exit code
+ @param cause inner cause
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will be 0 until a call
+ to {@link #finished()} has been made.
+
+ @return the currently recorded duration.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Generics Type.
+ @param theClass class of which an object is created
+ @param conf Configuration
+ @return a new object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Generics Type T
+ @param o object whose correctly-typed Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+ Generics Type.
+ @param conf input Configuration.
+ @param src the object to copy from
+ @param dst the object to copy into, which is destroyed
+ @return dst param (the copy)
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException raised on errors performing I/O.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.
+
+ @return all shells set.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ RunningJob runningJob = JobClient.runJob(job);
+ if (runningJob.isSuccessful()) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.
+ @throws Exception Exception.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.
+ @throws Exception exception.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr {@literal <=} n
+ (n is the cardinality of the set A to record in
+ the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param timeout timeout to wait
+ @param unit time unit.
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown
+ @throws TimeoutException the future timed out.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+
If it is an IOE: Return.
+
If it is a {@link UncheckedIOException}: return the cause
+
Completion/Execution Exceptions: extract and repeat
+
If it is an RTE or Error: throw.
+
Any other type: wrap in an IOE
+
+
+ Recursively handles wrapped Execution and Completion Exceptions in
+ case something very complicated has happened.
+ @param e exception.
+ @return an IOException extracted or built from the cause.
+ @throws RuntimeException if that is the inner cause.
+ @throws Error if that is the inner cause.]]>
+
+
+
+
+
+
+
+
+ type of result
+ @param type of builder
+ @return the builder passed in.]]>
+
+
+
+
+
+
+
+
+
+ fs.example.s3a.option becomes "s3a.option"
+ fs.example.fs.io.policy becomes "fs.io.policy"
+ fs.example.something becomes "something"
+
+ @param builder builder to modify
+ @param conf configuration to read
+ @param prefix prefix to scan/strip
+ @param mandatory are the options to be mandatory or optional?]]>
+
+
+
+
+
+ Return type.
+ @return the evaluated result.
+ @throws UnsupportedOperationException fail fast if unsupported
+ @throws IllegalArgumentException invalid argument]]>
+
+
+
+
+ Contains methods promoted from
+ {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they
+ are a key part of integrating async IO in application code.
+
+
+ One key feature is that the {@link #awaitFuture(Future)} and
+ {@link #awaitFuture(Future, long, TimeUnit)} calls will
+ extract and rethrow exceptions raised in the future's execution,
+ including extracting the inner IOException of any
+ {@code UncheckedIOException} raised in the future.
+ This makes it somewhat easier to execute IOException-raising
+ code inside futures.
+
]]>
+
+
+
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @param iterator iterator.
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @param iterable iterable.
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @param array array.
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @param mapper transformation
+ @return a remote iterator]]>
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @return a remote iterator]]>
+
+
+
+
+
+
+
+ Elements are filtered in the hasNext() method; if not used
+ the filtering will be done on demand in the {@code next()}
+ call.
+
+ @param type
+ @param iterator source
+ @param filter filter
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type.
+ @return a new iterator]]>
+
+
+
+
+
+
+ type
+ @return a list of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ type
+ @return an array of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ and this classes log is set to DEBUG,
+ then the statistics of the operation are evaluated and logged at
+ debug.
+
+ The number of entries processed is returned, as it is useful to
+ know this, especially during tests or when reporting values
+ to users.
+
+ This does not close the iterator afterwards.
+ @param source iterator source
+ @param consumer consumer of the values.
+ @return the number of elements processed
+ @param type of source
+ @throws IOException if the source RemoteIterator or the consumer raise one.]]>
+
+
+
+
+
+ type of source]]>
+
+
+
+
+ This aims to make it straightforward to use lambda-expressions to
+ transform the results of an iterator, without losing the statistics
+ in the process, and to chain the operations together.
+
+ The closeable operation will be passed through RemoteIterators which
+ wrap other RemoteIterators. This is to support any iterator which
+ can be closed to release held connections, file handles etc.
+ Unless client code is written to assume that RemoteIterator instances
+ may be closed, this is not likely to be broadly used. It is added
+ to make it possible to adopt this feature in a managed way.
+
+ One notable feature is that the
+ {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will
+ LOG at debug any IOStatistics provided by the iterator, if such
+ statistics are provided. There's no attempt at retrieval and logging
+ if the LOG is not set to debug, so it is a zero cost feature unless
+ the logger {@code org.apache.hadoop.fs.functional.RemoteIterators}
+ is at DEBUG.
+
+ Based on the S3A Listing code, and some some work on moving other code
+ to using iterative listings so as to pick up the statistics.]]>
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index 41efc183c3e..5fb267cfd0d 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -175,6 +175,14 @@
+
+
+ org.codehaus.jettison
+ jettison
+ com.sun.jerseyjersey-server
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
index abf3573986a..1218d22ecf5 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
@@ -26,9 +26,9 @@ MYNAME="${BASH_SOURCE-$0}"
function hadoop_usage
{
hadoop_add_option "buildpaths" "attempt to add class files from build tree"
- hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode"
+ hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in worker mode"
hadoop_add_option "loglevel level" "set the log4j level for this command"
- hadoop_add_option "hosts filename" "list of hosts to use in slave mode"
+ hadoop_add_option "hosts filename" "list of hosts to use in worker mode"
hadoop_add_option "workers" "turn on worker mode"
hadoop_add_subcommand "checknative" client "check native Hadoop and compression libraries availability"
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
index 55304916ad1..1d8096b4baa 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
@@ -16,7 +16,7 @@
# limitations under the License.
-# Run a Hadoop command on all slave hosts.
+# Run a Hadoop command on all worker hosts.
function hadoop_usage
{
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
index 54d5c729848..b4eec1fe2cc 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
+++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
@@ -75,14 +75,6 @@ log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
-#
-# TaskLog Appender
-#
-log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
-
-log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
-log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
#
# HDFS block state change log from block manager
#
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
index df853078461..5d8f0e575f2 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
@@ -2413,8 +2413,14 @@ public abstract class FileSystem extends Configured
if (stat.isFile()) { // file
curFile = stat;
} else if (recursive) { // directory
- itors.push(curItor);
- curItor = listLocatedStatus(stat.getPath());
+ try {
+ RemoteIterator newDirItor = listLocatedStatus(stat.getPath());
+ itors.push(curItor);
+ curItor = newDirItor;
+ } catch (FileNotFoundException ignored) {
+ LOGGER.debug("Directory {} deleted while attempting for recursive listing",
+ stat.getPath());
+ }
}
}
@@ -3936,6 +3942,7 @@ public abstract class FileSystem extends Configured
private volatile long bytesReadDistanceOfThreeOrFour;
private volatile long bytesReadDistanceOfFiveOrLarger;
private volatile long bytesReadErasureCoded;
+ private volatile long remoteReadTimeMS;
/**
* Add another StatisticsData object to this one.
@@ -3953,6 +3960,7 @@ public abstract class FileSystem extends Configured
this.bytesReadDistanceOfFiveOrLarger +=
other.bytesReadDistanceOfFiveOrLarger;
this.bytesReadErasureCoded += other.bytesReadErasureCoded;
+ this.remoteReadTimeMS += other.remoteReadTimeMS;
}
/**
@@ -3971,6 +3979,7 @@ public abstract class FileSystem extends Configured
this.bytesReadDistanceOfFiveOrLarger =
-this.bytesReadDistanceOfFiveOrLarger;
this.bytesReadErasureCoded = -this.bytesReadErasureCoded;
+ this.remoteReadTimeMS = -this.remoteReadTimeMS;
}
@Override
@@ -4019,6 +4028,10 @@ public abstract class FileSystem extends Configured
public long getBytesReadErasureCoded() {
return bytesReadErasureCoded;
}
+
+ public long getRemoteReadTimeMS() {
+ return remoteReadTimeMS;
+ }
}
private interface StatisticsAggregator {
@@ -4246,6 +4259,14 @@ public abstract class FileSystem extends Configured
}
}
+ /**
+ * Increment the time taken to read bytes from remote in the statistics.
+ * @param durationMS time taken in ms to read bytes from remote
+ */
+ public void increaseRemoteReadTime(final long durationMS) {
+ getThreadStatistics().remoteReadTimeMS += durationMS;
+ }
+
/**
* Apply the given aggregator to all StatisticsData objects associated with
* this Statistics object.
@@ -4393,6 +4414,25 @@ public abstract class FileSystem extends Configured
return bytesRead;
}
+ /**
+ * Get total time taken in ms for bytes read from remote.
+ * @return time taken in ms for remote bytes read.
+ */
+ public long getRemoteReadTime() {
+ return visitAll(new StatisticsAggregator() {
+ private long remoteReadTimeMS = 0;
+
+ @Override
+ public void accept(StatisticsData data) {
+ remoteReadTimeMS += data.remoteReadTimeMS;
+ }
+
+ public Long aggregate() {
+ return remoteReadTimeMS;
+ }
+ });
+ }
+
/**
* Get all statistics data.
* MR or other frameworks can use the method to get all statistics at once.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java
index 62806d61b54..9e62e63775a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java
@@ -47,7 +47,8 @@ public class FileSystemStorageStatistics extends StorageStatistics {
"bytesReadDistanceOfOneOrTwo",
"bytesReadDistanceOfThreeOrFour",
"bytesReadDistanceOfFiveOrLarger",
- "bytesReadErasureCoded"
+ "bytesReadErasureCoded",
+ "remoteReadTimeMS"
};
private static class LongStatisticIterator
@@ -107,6 +108,8 @@ public class FileSystemStorageStatistics extends StorageStatistics {
return data.getBytesReadDistanceOfFiveOrLarger();
case "bytesReadErasureCoded":
return data.getBytesReadErasureCoded();
+ case "remoteReadTimeMS":
+ return data.getRemoteReadTimeMS();
default:
return null;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
index c18dc519188..2990696ee1b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
@@ -23,6 +23,9 @@ import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalDirAllocator;
+
/**
* Provides functionality necessary for caching blocks of data read from FileSystem.
*/
@@ -64,7 +67,10 @@ public interface BlockCache extends Closeable {
*
* @param blockNumber the id of the given block.
* @param buffer contents of the given block to be added to this cache.
+ * @param conf the configuration.
+ * @param localDirAllocator the local dir allocator instance.
* @throws IOException if there is an error writing the given block.
*/
- void put(int blockNumber, ByteBuffer buffer) throws IOException;
+ void put(int blockNumber, ByteBuffer buffer, Configuration conf,
+ LocalDirAllocator localDirAllocator) throws IOException;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java
index a0db4b308b6..e43b176d0bf 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java
@@ -33,6 +33,8 @@ import java.util.function.Supplier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.statistics.DurationTracker;
import static java.util.Objects.requireNonNull;
@@ -95,6 +97,10 @@ public abstract class CachingBlockManager extends BlockManager {
private final PrefetchingStatistics prefetchingStatistics;
+ private final Configuration conf;
+
+ private final LocalDirAllocator localDirAllocator;
+
/**
* Constructs an instance of a {@code CachingBlockManager}.
*
@@ -102,14 +108,17 @@ public abstract class CachingBlockManager extends BlockManager {
* @param blockData information about each block of the underlying file.
* @param bufferPoolSize size of the in-memory cache in terms of number of blocks.
* @param prefetchingStatistics statistics for this stream.
- *
+ * @param conf the configuration.
+ * @param localDirAllocator the local dir allocator instance.
* @throws IllegalArgumentException if bufferPoolSize is zero or negative.
*/
public CachingBlockManager(
ExecutorServiceFuturePool futurePool,
BlockData blockData,
int bufferPoolSize,
- PrefetchingStatistics prefetchingStatistics) {
+ PrefetchingStatistics prefetchingStatistics,
+ Configuration conf,
+ LocalDirAllocator localDirAllocator) {
super(blockData);
Validate.checkPositiveInteger(bufferPoolSize, "bufferPoolSize");
@@ -129,6 +138,8 @@ public abstract class CachingBlockManager extends BlockManager {
this.ops = new BlockOperations();
this.ops.setDebug(false);
+ this.conf = requireNonNull(conf);
+ this.localDirAllocator = localDirAllocator;
}
/**
@@ -468,7 +479,8 @@ public abstract class CachingBlockManager extends BlockManager {
blockFuture = cf;
}
- CachePutTask task = new CachePutTask(data, blockFuture, this, Instant.now());
+ CachePutTask task =
+ new CachePutTask(data, blockFuture, this, Instant.now());
Future actionFuture = futurePool.executeFunction(task);
data.setCaching(actionFuture);
ops.end(op);
@@ -554,7 +566,7 @@ public abstract class CachingBlockManager extends BlockManager {
return;
}
- cache.put(blockNumber, buffer);
+ cache.put(blockNumber, buffer, conf, localDirAllocator);
}
private static class CachePutTask implements Supplier {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java
index c84335a763e..11416032651 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java
@@ -27,10 +27,9 @@ import java.nio.channels.WritableByteChannel;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
+import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
-import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
-import java.nio.file.attribute.PosixFilePermissions;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
@@ -39,9 +38,13 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalDirAllocator;
+
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
@@ -67,6 +70,12 @@ public class SingleFilePerBlockCache implements BlockCache {
private final PrefetchingStatistics prefetchingStatistics;
+ /**
+ * File attributes attached to any intermediate temporary file created during index creation.
+ */
+ private static final Set TEMP_FILE_ATTRS =
+ ImmutableSet.of(PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE);
+
/**
* Cache entry.
* Each block is stored as a separate file.
@@ -172,11 +181,17 @@ public class SingleFilePerBlockCache implements BlockCache {
/**
* Puts the given block in this cache.
*
- * @throws IllegalArgumentException if buffer is null.
- * @throws IllegalArgumentException if buffer.limit() is zero or negative.
+ * @param blockNumber the block number, used as a key for blocks map.
+ * @param buffer buffer contents of the given block to be added to this cache.
+ * @param conf the configuration.
+ * @param localDirAllocator the local dir allocator instance.
+ * @throws IOException if either local dir allocator fails to allocate file or if IO error
+ * occurs while writing the buffer content to the file.
+ * @throws IllegalArgumentException if buffer is null, or if buffer.limit() is zero or negative.
*/
@Override
- public void put(int blockNumber, ByteBuffer buffer) throws IOException {
+ public void put(int blockNumber, ByteBuffer buffer, Configuration conf,
+ LocalDirAllocator localDirAllocator) throws IOException {
if (closed) {
return;
}
@@ -191,7 +206,7 @@ public class SingleFilePerBlockCache implements BlockCache {
Validate.checkPositiveInteger(buffer.limit(), "buffer.limit()");
- Path blockFilePath = getCacheFilePath();
+ Path blockFilePath = getCacheFilePath(conf, localDirAllocator);
long size = Files.size(blockFilePath);
if (size != 0) {
String message =
@@ -221,8 +236,19 @@ public class SingleFilePerBlockCache implements BlockCache {
writeChannel.close();
}
- protected Path getCacheFilePath() throws IOException {
- return getTempFilePath();
+ /**
+ * Return temporary file created based on the file path retrieved from local dir allocator.
+ *
+ * @param conf The configuration object.
+ * @param localDirAllocator Local dir allocator instance.
+ * @return Path of the temporary file created.
+ * @throws IOException if IO error occurs while local dir allocator tries to retrieve path
+ * from local FS or file creation fails or permission set fails.
+ */
+ protected Path getCacheFilePath(final Configuration conf,
+ final LocalDirAllocator localDirAllocator)
+ throws IOException {
+ return getTempFilePath(conf, localDirAllocator);
}
@Override
@@ -323,9 +349,19 @@ public class SingleFilePerBlockCache implements BlockCache {
private static final String CACHE_FILE_PREFIX = "fs-cache-";
- public static boolean isCacheSpaceAvailable(long fileSize) {
+ /**
+ * Determine if the cache space is available on the local FS.
+ *
+ * @param fileSize The size of the file.
+ * @param conf The configuration.
+ * @param localDirAllocator Local dir allocator instance.
+ * @return True if the given file size is less than the available free space on local FS,
+ * False otherwise.
+ */
+ public static boolean isCacheSpaceAvailable(long fileSize, Configuration conf,
+ LocalDirAllocator localDirAllocator) {
try {
- Path cacheFilePath = getTempFilePath();
+ Path cacheFilePath = getTempFilePath(conf, localDirAllocator);
long freeSpace = new File(cacheFilePath.toString()).getUsableSpace();
LOG.info("fileSize = {}, freeSpace = {}", fileSize, freeSpace);
Files.deleteIfExists(cacheFilePath);
@@ -339,16 +375,25 @@ public class SingleFilePerBlockCache implements BlockCache {
// The suffix (file extension) of each serialized index file.
private static final String BINARY_FILE_SUFFIX = ".bin";
- // File attributes attached to any intermediate temporary file created during index creation.
- private static final FileAttribute> TEMP_FILE_ATTRS =
- PosixFilePermissions.asFileAttribute(EnumSet.of(PosixFilePermission.OWNER_READ,
- PosixFilePermission.OWNER_WRITE));
-
- private static Path getTempFilePath() throws IOException {
- return Files.createTempFile(
- CACHE_FILE_PREFIX,
- BINARY_FILE_SUFFIX,
- TEMP_FILE_ATTRS
- );
+ /**
+ * Create temporary file based on the file path retrieved from local dir allocator
+ * instance. The file is created with .bin suffix. The created file has been granted
+ * posix file permissions available in TEMP_FILE_ATTRS.
+ *
+ * @param conf the configuration.
+ * @param localDirAllocator the local dir allocator instance.
+ * @return path of the file created.
+ * @throws IOException if IO error occurs while local dir allocator tries to retrieve path
+ * from local FS or file creation fails or permission set fails.
+ */
+ private static Path getTempFilePath(final Configuration conf,
+ final LocalDirAllocator localDirAllocator) throws IOException {
+ org.apache.hadoop.fs.Path path =
+ localDirAllocator.getLocalPathForWrite(CACHE_FILE_PREFIX, conf);
+ File dir = new File(path.getParent().toUri().getPath());
+ String prefix = path.getName();
+ File tmpFile = File.createTempFile(prefix, BINARY_FILE_SUFFIX, dir);
+ Path tmpFilePath = Paths.get(tmpFile.toURI());
+ return Files.setPosixFilePermissions(tmpFilePath, TEMP_FILE_ATTRS);
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
index 178f761191b..515148e9298 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
@@ -497,7 +497,12 @@ public final class HttpServer2 implements FilterContainer {
prefix -> this.conf.get(prefix + "type")
.equals(PseudoAuthenticationHandler.TYPE))
) {
- server.initSpnego(conf, hostName, usernameConfKey, keytabConfKey);
+ server.initSpnego(
+ conf,
+ hostName,
+ getFilterProperties(conf, authFilterConfigurationPrefixes),
+ usernameConfKey,
+ keytabConfKey);
}
for (URI ep : endpoints) {
@@ -1340,8 +1345,12 @@ public final class HttpServer2 implements FilterContainer {
}
private void initSpnego(Configuration conf, String hostName,
- String usernameConfKey, String keytabConfKey) throws IOException {
+ Properties authFilterConfigurationPrefixes, String usernameConfKey, String keytabConfKey)
+ throws IOException {
Map params = new HashMap<>();
+ for (Map.Entry