diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch b/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch
new file mode 100644
index 00000000000..8f87d4092bc
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch
@@ -0,0 +1,98 @@
+diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
+index a277abd..ed7c709 100644
+--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
+@@ -43,18 +43,6 @@
+ public abstract MetricsSystem init(String prefix);
+
+ /**
+- * Register a metrics source
+- * @param the actual type of the source object
+- * @param source object to register
+- * @param name of the source. Must be unique or null (then extracted from
+- * the annotations of the source object.)
+- * @param desc the description of the source (or null. See above.)
+- * @return the source object
+- * @exception MetricsException
+- */
+- public abstract T register(String name, String desc, T source);
+-
+- /**
+ * Unregister a metrics source
+ * @param name of the source. This is the name you use to call register()
+ */
+@@ -77,18 +65,19 @@
+ */
+ @InterfaceAudience.Private
+ public abstract MetricsSource getSource(String name);
++
+
+ /**
+- * Register a metrics sink
+- * @param the type of the sink
+- * @param sink to register
+- * @param name of the sink. Must be unique.
+- * @param desc the description of the sink
+- * @return the sink
++ * Register a metrics source
++ * @param the actual type of the source object
++ * @param source object to register
++ * @param name of the source. Must be unique or null (then extracted from
++ * the annotations of the source object.)
++ * @param desc the description of the source (or null. See above.)
++ * @return the source object
+ * @exception MetricsException
+ */
+- public abstract
+- T register(String name, String desc, T sink);
++ public abstract T register(String name, String desc, T source);
+
+ /**
+ * Register a callback interface for JMX events
+diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
+index 6986edb..eeea81f 100644
+--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
+@@ -270,27 +270,6 @@ void registerSource(String name, String desc, MetricsSource source) {
+ LOG.debug("Registered source "+ name);
+ }
+
+- @Override public synchronized
+- T register(final String name, final String description, final T sink) {
+- LOG.debug(name +", "+ description);
+- if (allSinks.containsKey(name)) {
+- LOG.warn("Sink "+ name +" already exists!");
+- return sink;
+- }
+- allSinks.put(name, sink);
+- if (config != null) {
+- registerSink(name, description, sink);
+- }
+- // We want to re-register the sink to pick up new config
+- // when the metrics system restarts.
+- register(name, new AbstractCallback() {
+- @Override public void postStart() {
+- register(name, description, sink);
+- }
+- });
+- return sink;
+- }
+-
+ synchronized void registerSink(String name, String desc, MetricsSink sink) {
+ checkNotNull(config, "config");
+ MetricsConfig conf = sinkConfigs.get(name);
+diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
+index c19d238..f8412f1 100644
+--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
++++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
+@@ -130,8 +130,8 @@ public void testTagsForPrefix() throws Exception {
+ GangliaMetricsTestHelper.setDatagramSocket(gsink31, mockds31);
+
+ // register the sinks
+- ms.register("gsink30", "gsink30 desc", gsink30);
+- ms.register("gsink31", "gsink31 desc", gsink31);
++ //ms.register("gsink30", "gsink30 desc", gsink30);
++ //ms.register("gsink31", "gsink31 desc", gsink31);
+ ms.publishMetricsNow(); // publish the metrics
+
+ ms.stop();
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_2.7.2.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_2.7.2.xml
new file mode 100644
index 00000000000..5ef99b22770
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_2.7.2.xml
@@ -0,0 +1,46648 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key
+ @param newKeys
+ @param customMessage
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key
+ @param newKey
+ @param customMessage]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true
if the key is deprecated and
+ false
otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null
if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name
or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name
exists without value]]>
+
+
+
+
+
+ name property as a trimmed String
,
+ null
if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name
or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String
,
+ defaultValue
if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name
or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name
property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name
property. If
+ name
is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name
property. If
+ name
is deprecated, it also sets the value
to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue
is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue
if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int
.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int
,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int
,
+ or defaultValue
.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int
values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int
values]]>
+
+
+
+
+
+
+ name property to an int
.
+
+ @param name property name.
+ @param value int
value of the property.]]>
+
+
+
+
+
+
+ name property as a long
.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long
,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property as a long
or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long
or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property to a long
.
+
+ @param name property name.
+ @param value long
value of the property.]]>
+
+
+
+
+
+
+ name property as a float
.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float
,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property to a float
.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double
.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double
,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property to a double
.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean
.
+ If no such property is specified, or if the specified value is not a valid
+ boolean
, then defaultValue
is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property to a boolean
.
+
+ @param name property name.
+ @param value boolean
value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString())
.
+ @param name property name
+ @param value new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>)
.
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern
.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern
, then DefaultValue
is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of String
s.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of String
s.]]>
+
+
+
+
+
+ name property as
+ an array of String
s.
+ If no such property is specified then null
is returned.
+
+ @param name property name.
+ @return property value as an array of String
s,
+ or null
.]]>
+
+
+
+
+
+
+ name property as
+ an array of String
s.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of String
s,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of String
s, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection
is returned.
+
+ @param name property name.
+ @return property value as a collection of String
s, or empty Collection
]]>
+
+
+
+
+
+ name property as
+ an array of String
s, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed String
s,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of String
s, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed String
s,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress
. If hostProperty
is
+ null
, addressProperty
will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress
.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port
.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port
. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port
. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class
.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue
is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[]
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property as a Class
.
+ If no such property is specified, then defaultValue
is
+ returned.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @return property value as a Class
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface
.
+
+ If no such property is specified, then defaultValue
is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class
,
+ or defaultValue
.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface
.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name
.
+ @return a List
of objects implementing xface
.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass
implementing the given interface xface
.
+
+ An exception is thrown if theClass
does not implement the
+ interface xface
.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true to set quiet-mode on, false
+ to turn it off.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ with matching keys]]>
+
+
+
+
+
+
+
+
+
+
+
+ Resources
+
+ Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String
or by a {@link Path}. If named by a String
,
+ then the classpath is examined for a file with that name. If named by a
+ Path
, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
+ -
+
+ core-default.xml: Read-only defaults for hadoop.
+ - core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+ Final Parameters
+
+ Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
+ <property>
+ <name>dfs.hosts.include</name>
+ <value>/etc/hadoop/conf/hosts.include</value>
+ <final>true</final>
+ </property>
+
+ Administrators typically define parameters as final in
+ core-site.xml for values that user applications may not alter.
+
+
Variable Expansion
+
+ Value strings are first processed for variable expansion. The
+ available properties are:
+ - Other properties defined in this Configuration; and, if a name is
+ undefined here,
+ - Properties in {@link System#getProperties()}.
+
+
+ For example, if a configuration resource contains the following property
+ definitions:
+
+ <property>
+ <name>basedir</name>
+ <value>/user/${user.name}</value>
+ </property>
+
+ <property>
+ <name>tempdir</name>
+ <value>${basedir}/tmp</value>
+ </property>
+
+ When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+ By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProviderExtension implementation providing a short lived
+ cache for KeyVersions
and Metadata
to avoid burst
+ of requests to hit the underlying KeyProvider
.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException
+ @throws NoSuchAlgorithmException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider
implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyVersion material of the latest key version
+ of the key and is encrypted using the same cipher.
+
+ NOTE: The generated key is not stored by the KeyProvider
+
+ @param encryptionKeyName
+ The latest KeyVersion of this key's material will be encrypted.
+ @return EncryptedKeyVersion with the generated key material, the version
+ name is 'EEK' (for Encrypted Encryption Key)
+ @throws IOException
+ thrown if the key material could not be generated
+ @throws GeneralSecurityException
+ thrown if the key material could not be encrypted because of a
+ cryptographic issue.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The alternative to this is using the same IV for both the encryption key
+ and the encrypted key. Even a simple symmetric transformation like this
+ improves security by avoiding IV re-use. IVs will also be fairly unique
+ among different EEKs.
+
+ @param encryptedKeyIV of the encrypted key (i.e. {@link
+ #getEncryptedKeyIv()})
+ @return IV for the encryption key]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProviderDelegationTokenExtension using a given
+ {@link KeyProvider}.
+
+ If the given KeyProvider
implements the
+ {@link DelegationTokenExtension} interface the KeyProvider
+ itself will provide the extension functionality, otherwise a default
+ extension implementation will be used.
+
+ @param keyProvider KeyProvider
to use to create the
+ KeyProviderDelegationTokenExtension
extension.
+ @return a KeyProviderDelegationTokenExtension
instance
+ using the given KeyProvider
.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Credentials object if it is not already present,
+ @param renewer the user allowed to renew the delegation tokens
+ @param credentials cache in which to add new delegation tokens
+ @return list of new delegation tokens
+ @throws IOException thrown if IOException if an IO error occurs.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @/
+
+ where :
+ - PROTO = http or https
+ - AUTHORITY = [:]
+ - HOSTS = [;]
+ - HOSTNAME = string
+ - PORT = integer
+
+ If multiple hosts are provider, the Factory will create a
+ {@link LoadBalancingKMSClientProvider} that round-robins requests
+ across the provided list of hosts.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri
is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+ default port;]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, buf.position() will be advanced by the number
+ of bytes read and buf.limit() should be unchanged.
+
+ In the case of an exception, the values of buf.position() and buf.limit()
+ are undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Many implementations will throw {@link UnsupportedOperationException}, so
+ callers that are not confident in support for this method from the
+ underlying filesystem should be prepared to handle that exception.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ @param buf
+ the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if
+ reach end-of-stream
+ @throws IOException
+ if there is some error performing the read]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ setReplication of FileSystem
+ @param src file name
+ @param replication new replication
+ @throws IOException
+ @return true if successful;
+ false if file does not exist or is a directory]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+ core-default.xml]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND)
+
+
+
+ Use the CreateFlag as follows:
+
+ - CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+ - APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+ - OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+ - CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+ - CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+ - SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+ - LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+ - APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combination is not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+ - APPEND|OVERWRITE
+ - CREATE|APPEND|OVERWRITE
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ absOrFqPath is not supported.
+ @throws IOExcepton If the file system for absOrFqPath
could
+ not be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ defaultFsUri is not supported]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NewWdir can be one of:
+
+ - relative path: "foo/bar";
+ - absolute without scheme: "/foo/bar"
+ - fully qualified with scheme: "xx://auth/foo/bar"
+
+
+ Illegal WDs:
+
+ - relative with scheme: "xx:foo/bar"
+ - non existent directory
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f
is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Progress - to report progress on the operation - default null
+ Permission - umask is applied against permisssion: default is
+ FsPermissions:getDefault()
+
+ CreateParent - create missing parent path; default is to not
+ to create parents
+ The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+ - BufferSize - buffersize used in FSDataOutputStream
+
- Blocksize - block size for file blocks
+
- ReplicationFactor - replication for blocks
+
- ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f
already exists
+ @throws FileNotFoundException If parent of f
does not exist
+ and createParent
is false
+ @throws ParentNotDirectoryException If parent of f
is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f
is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir
does not exist
+ and createParent
is false
+ @throws ParentNotDirectoryException If parent of dir
is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir
is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f
is invalid]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ Fails if path is a directory.
+ Fails if path does not exist.
+ Fails if path is not closed.
+ Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true
if the file has been truncated to the desired
+ newLength
and is immediately available to be reused for
+ write operations such as append
, or
+ false
if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f
does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Fails if src is a file and dst is a directory.
+ Fails if src is a directory and dst is a file.
+ Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst
already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src
does not exist
+ @throws ParentNotDirectoryException If parent of dst
is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst
is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username
or
+ groupname
is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ <---X--->
+ fs://host/A/B/link
+ <-----Y----->
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file linkcode> already exists
+ @throws FileNotFoundException If target
does not exist
+ @throws ParentNotDirectoryException If parent of link
is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target
or link
is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List of the XAttr names of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *** Path Names ***
+
+
+ The Hadoop file system supports a URI name space and URI names.
+ It offers a forest of file systems that can be referenced using fully
+ qualified URIs.
+ Two common Hadoop file systems implementations are
+
+ - the local file system: file:///path
+
- the hdfs file system hdfs://nnAddress:nnPort/path
+
+
+ While URI names are very flexible, it requires knowing the name or address
+ of the server. For convenience one often wants to access the default system
+ in one's environment without knowing its name/address. This has an
+ additional benefit that it allows one to change one's default fs
+ (e.g. admin moves application from cluster1 to cluster2).
+
+
+ To facilitate this, Hadoop supports a notion of a default file system.
+ The user can set his default file system, although this is
+ typically set up for you in your environment via your default config.
+ A default file system implies a default scheme and authority; slash-relative
+ names (such as /for/bar) are resolved relative to that default FS.
+ Similarly a user can also have working-directory-relative names (i.e. names
+ not starting with a slash). While the working directory is generally in the
+ same default FS, the wd can be in a different FS.
+
+ Hence Hadoop path names can be one of:
+
+ - fully qualified URI: scheme://authority/path
+
- slash relative names: /path relative to the default file system
+
- wd-relative names: path relative to the working dir
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
+ ****The Role of the FileContext and configuration defaults****
+
+ The FileContext provides file namespace context for resolving file names;
+ it also contains the umask for permissions, In that sense it is like the
+ per-process file-related state in Unix system.
+ These two properties
+
+ - default file system i.e your slash)
+
- umask
+
+ in general, are obtained from the default configuration file
+ in your environment, (@see {@link Configuration}).
+
+ No other configuration parameters are obtained from the default config as
+ far as the file context layer is concerned. All file system instances
+ (i.e. deployments of file systems) have default properties; we call these
+ server side (SS) defaults. Operation like create allow one to select many
+ properties: either pass them in as explicit parameters or use
+ the SS properties.
+
+ The file system related SS defaults are
+
+ - the home directory (default is "/user/userName")
+
- the initial wd (only for local fs)
+
- replication factor
+
- block size
+
- buffer size
+
- encryptDataTransfer
+
- checksum option. (checksumType and bytesPerChecksum)
+
+
+
+ *** Usage Model for the FileContext class ***
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+ - myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
- myFContext.create(path, ...);
+
- myFContext.setWorkingDir(path)
+
- myFContext.open (path, ...);
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+ - myFContext = FileContext.getFileContext(URI)
+
- myFContext.create(path, ...);
+ ...
+
+ Example 3: FileContext with local file system as the default
+
+ - myFContext = FileContext.getLocalFSFileContext()
+
- myFContext.create(path, ...);
+
- ...
+
+ Example 4: Use a specific config, ignoring $HADOOP_CONFIG
+ Generally you should not need use a config unless you are doing
+
+ - configX = someConfigSomeOnePassedToYou.
+
- myFContext = getFileContext(configX); // configX is not changed,
+ // is passed down
+
- myFContext.create(path, ...);
+
- ...
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f is
+ not supported
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for
+ f
is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for
+ pathPattern
is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ files does not
+ exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+ -
+
+
+
- ?
+
- Matches any single character.
+
+
+
- *
+
- Matches zero or more characters.
+
+
+
- [abc]
+
- Matches a single character from character set
+ {a,b,c}.
+
+
+
- [a-b]
+
- Matches a single character from the character range
+ {a...b}. Note: character a must be
+ lexicographically less than or equal to character b.
+
+
+
- [^a]
+
- Matches a single char that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
- \c
+
- Removes (escapes) any special meaning of character c.
+
+
+
- {ab,cd}
+
- Matches a string from the string set {ab, cd}
+
+
+
- {ab,c{de,fh}}
+
- Matches a string from string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a regular expression specifying a pth pattern
+
+ @return an array of paths that match the path pattern
+
+ @throws AccessControlException If access is denied
+ @throws UnsupportedFileSystemException If file system for
+ pathPattern
is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ pathPattern is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dst already exists
+ @throws FileNotFoundException If src
does not exist
+ @throws ParentNotDirectoryException If parent of dst
is not
+ a directory
+ @throws UnsupportedFileSystemException If file system for
+ src
or dst
is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dst
is invalid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation throws an UnsupportedOperationException
.
+
+ @return the protocol scheme for the FileSystem.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fs.scheme.class whose value names the FileSystem class.
+ The entire URI is passed to the FileSystem instance's initialize method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fs.scheme.class whose value names the FileSystem class.
+ The entire URI is passed to the FileSystem instance's initialize method.
+ This always returns a new FileSystem object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Fails if src is a file and dst is a directory.
+ Fails if src is a directory and dst is a file.
+ Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+ Fails if path is a directory.
+ Fails if path does not exist.
+ Fails if path is not closed.
+ Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true
if the file has been truncated to the desired
+ newLength
and is immediately available to be reused for
+ write operations such as append
, or
+ false
if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+ -
+
+
+
- ?
+
- Matches any single character.
+
+
+
- *
+
- Matches zero or more characters.
+
+
+
- [abc]
+
- Matches a single character from character set
+ {a,b,c}.
+
+
+
- [a-b]
+
- Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
+
- [^a]
+
- Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
- \c
+
- Removes (escapes) any special meaning of character c.
+
+
+
- {ab,cd}
+
- Matches a string from the string set {ab, cd}
+
+
+
- {ab,c{de,fh}}
+
- Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a regular expression specifying a pth pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List of the XAttr names of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object. The
+ Hadoop DFS is a multi-machine system that appears as a single
+ disk. It's useful because of its fault tolerance and potentially
+ very large capacity.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ caller's environment variables to use
+ for expansion
+ @return String[] with absolute path to new jar in position 0 and
+ unexpanded wild card entry path in position 1
+ @throws IOException if there is an I/O error while writing the jar file]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ FilterFileSystem contains
+ some other file system, which it uses as
+ its basic file system, possibly transforming
+ the data along the way or providing additional
+ functionality. The class FilterFileSystem
+ itself simply overrides all methods of
+ FileSystem
with versions that
+ pass all requests to the contained file
+ system. Subclasses of FilterFileSystem
+ may further override some of these methods
+ and may also provide additional methods
+ and fields.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -1
+ if there is no more data because the end of the stream has been
+ reached]]>
+
+
+
+
+
+
+
+
+
+ length bytes have been read.
+
+ @param position position in the input stream to seek
+ @param buffer buffer into which data is read
+ @param offset offset into the buffer in which data is written
+ @param length the number of bytes to read
+ @throws EOFException If the end of stream is reached while reading.
+ If an exception is thrown an undetermined number
+ of bytes in the buffer may have been written.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @return har
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ path is invalid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @return file
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if and only if pathname
+ should be included]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ only to the given
+ valid time.
+
+ @param mtime the modification time to set (only if greater than zero).
+ @param atime currently ignored.
+ @throws IOException if setting the last modified time fails.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the iteration has more elements.
+
+ @return true if the iterator has more elements.
+ @throws IOException if any IO error occurs]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ XAttr is byte[], this class is to
+ covert byte[] to some kind of string representation or convert back.
+ String representation is convenient for display and input. For example
+ display in screen as shell response and json response, input as http
+ or shell parameter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @return ftp
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A {@link FileSystem} backed by an FTP client provided by Apache Commons Net.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ But for removeAcl operation it will be false. i.e. AclSpec should
+ not contain permissions.
+ Example: "user:foo,group:bar"
+ @return Returns list of {@link AclEntry} parsed]]>
+
+
+
+
+
+
+
+
+ @param aclStr
+ String representation of an ACL.
+ Example: "user:foo:rw-"
+ @param includePermission
+ for setAcl operations this will be true. i.e. Acl should include
+ permissions.
+ But for removeAcl operation it will be false. i.e. Acl should not
+ contain permissions.
+ Example: "user:foo,group:bar,mask::"
+ @return Returns an {@link AclEntry} object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ unmodifiable ordered list of all ACL entries]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Recommended to use this API ONLY if client communicates with the old
+ NameNode, needs to pass the Permission for the path to get effective
+ permission, else use {@link AclStatus#getEffectivePermission(AclEntry)}.
+ @param entry AclEntry to get the effective action
+ @param permArg Permission for the path. However if the client is NOT
+ communicating with old namenode, then this argument will not have
+ any preference.
+ @return Returns the effective permission for the entry.
+ @throws IllegalArgumentException If the client communicating with old
+ namenode and permission is not passed as an argument.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mode is invalid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @return viewfs
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /user -> hdfs://nnContainingUserDir/user
+ /project/foo -> hdfs://nnProject1/projects/foo
+ /project/bar -> hdfs://nnProject2/projects/bar
+ /tmp -> hdfs://nnTmp/privateTmpForUserXXX
+
+
+ ViewFs is specified with the following URI: viewfs:///
+
+ To use viewfs one would typically set the default file system in the
+ config (i.e. fs.default.name< = viewfs:///) along with the
+ mount table config variables as described below.
+
+
+ ** Config variables to specify the mount table entries **
+
+
+ The file system is initialized from the standard Hadoop config through
+ config variables.
+ See {@link FsConstants} for URI and Scheme constants;
+ See {@link Constants} for config var constants;
+ see {@link ConfigUtil} for convenient lib.
+
+
+ All the mount table config entries for view fs are prefixed by
+ fs.viewfs.mounttable.
+ For example the above example can be specified with the following
+ config variables:
+
+ - fs.viewfs.mounttable.default.link./user=
+ hdfs://nnContainingUserDir/user
+
- fs.viewfs.mounttable.default.link./project/foo=
+ hdfs://nnProject1/projects/foo
+
- fs.viewfs.mounttable.default.link./project/bar=
+ hdfs://nnProject2/projects/bar
+
- fs.viewfs.mounttable.default.link./tmp=
+ hdfs://nnTmp/privateTmpForUserXXX
+
+
+ The default mount table (when no authority is specified) is
+ from config variables prefixed by fs.viewFs.mounttable.default
+ The authority component of a URI can be used to specify a different mount
+ table. For example,
+
+ - viewfs://sanjayMountable/
+
+ is initialized from fs.viewFs.mounttable.sanjayMountable.* config variables.
+
+
+ **** Merge Mounts **** (NOTE: merge mounts are not implemented yet.)
+
+
+ One can also use "MergeMounts" to merge several directories (this is
+ sometimes called union-mounts or junction-mounts in the literature.
+ For example of the home directories are stored on say two file systems
+ (because they do not fit on one) then one could specify a mount
+ entry such as following merges two dirs:
+
+ - /user -> hdfs://nnUser1/user,hdfs://nnUser2/user
+
+ Such a mergeLink can be specified with the following config var where ","
+ is used as the separator for each of links to be merged:
+
+ - fs.viewfs.mounttable.default.linkMerge./user=
+ hdfs://nnUser1/user,hdfs://nnUser1/user
+
+ A special case of the merge mount is where mount table's root is merged
+ with the root (slash) of another file system:
+
+ - fs.viewfs.mounttable.default.linkMergeSlash=hdfs://nn99/
+
+ In this cases the root of the mount table is merged with the root of
+ hdfs://nn99/ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Zookeeper disconnects can happen due to network issues or loss of
+ Zookeeper quorum. Thus enterNeutralMode can be used to guard against
+ split-brain issues. In such situations it might be prudent to call
+ becomeStandby too. However, such state change operations might be
+ expensive and enterNeutralMode can help guard against doing that for
+ transient issues.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The application will be notified with a callback only on state changes
+ (i.e. there will never be successive calls to becomeActive without an
+ intermediate call to enterNeutralMode).
+ The callbacks will be running on Zookeeper client library threads. The
+ application should return from these callbacks quickly so as not to impede
+ Zookeeper client library performance and notifications. The app will
+ typically remember the state change and return from the callback. It will
+ then proceed with implementing actions around that state change. It is
+ possible to be called back again while these actions are in flight and the
+ app should handle this scenario.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf
method will be called upon instantiation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ state (e.g ACTIVE/STANDBY) as well as
+ some additional information. {@see HAServiceStatus}
+
+ @throws AccessControlException
+ if access is denied.
+ @throws IOException
+ if other errors happen]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ shell(/path/to/my/script.sh arg1 arg2 ...)
+
+ The string between '(' and ')' is passed directly to a bash shell
+ (cmd.exe on Windows) and may not include any closing parentheses.
+
+ The shell command will be run with an environment set up to contain
+ all of the current Hadoop configuration variables, with the '_' character
+ replacing any '.' characters in the configuration keys.
+
+ If the shell command returns an exit code of 0, the fencing is
+ determined to be successful. If it returns any other exit code, the
+ fencing was not successful and the next fencing method in the list
+ will be attempted.
+
+ Note: this fencing method does not implement any timeout.
+ If timeouts are necessary, they should be implemented in the shell
+ script itself (eg by forking a subshell to kill its parent in
+ some number of seconds).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fuser to kill the process listening on the service's
+ TCP port. This is more accurate than using "jps" since it doesn't
+ require parsing, and will work even if there are multiple service
+ processes running on the same machine.
+ It returns a successful status code if:
+
+ fuser
indicates it successfully killed a process, or
+ nc -z
indicates that nothing is listening on the target port
+
+
+ This fencing mechanism is configured as following in the fencing method
+ list:
+ sshfence([[username][:ssh-port]])
+ where the optional argument specifies the username and port to use
+ with ssh.
+
+ In order to achieve passwordless SSH, the operator must also configure
+ dfs.ha.fencing.ssh.private-key-files to point to an
+ SSH key that has passphrase-less access to the given username and host.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hadoop.http.filter.initializers.
+
+
+- StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ nth value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ nth value in the file.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable
, this class is much more effective,
+ because ObjectWritable
will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes()
, defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes()
method, must
+ implement Writable
interface.
+
+
+ The code looks like this:
+
+ public class GenericObject extends GenericWritable {
+
+ private static Class[] CLASSES = {
+ ClassType1.class,
+ ClassType2.class,
+ ClassType3.class,
+ };
+
+ protected Class[] getTypes() {
+ return CLASSES;
+ }
+
+ }
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link IOException} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data
file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+ The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ key and
+ val
. Returns true if such a pair exists and false when at
+ the end of the map]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ key or if it does not exist, at the first entry
+ after the named key.
+
+- * @param key - key that we're trying to find
+- * @param val - data value if key is found
+- * @return - the key that was the closest match or null if eof.]]>
+
+
+
+
+
+
+
+
+ key does not exist, return
+ the first entry that falls just before the key
. Otherwise,
+ return the record that sorts just after.
+ @return - the key that was the closest match or null if eof.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an MD5Hash whose digest contains the
+ same values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ className by first finding
+ it in the specified conf. If the specified conf is null,
+ try load it directly.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A {@link Comparator} that operates directly on byte representations of
+ objects.
+
+ @param
+ @see DeserializerComparator]]>
+
+
+
+
+
+
+
+ before
+ closing the related file descriptor.
+
+ It is safe to use even if the readahead request has already
+ been fulfilled.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An example of such an attack is:
+
+ - Malicious user removes his task's syslog file, and puts a link to the
+ jobToken file of a target user.
+ - Malicious user tries to open the syslog file via the servlet on the
+ tasktracker.
+ - The tasktracker is unaware of the symlink, and simply streams the contents
+ of the jobToken file. The malicious user can now access potentially sensitive
+ map outputs, etc. of the target user's job.
+
+ A similar attack is possible involving task log truncation, but in that case
+ due to an insecure write to a file.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SequenceFiles are flat files consisting of binary key/value
+ pairs.
+
+ SequenceFile
provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFile
Writer
s based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+ -
+
Writer
: Uncompressed records.
+
+ -
+
RecordCompressWriter
: Record-compressed files, only compress
+ values.
+
+ -
+
BlockCompressWriter
: Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+ The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+ The recommended way is to use the static createWriter methods
+ provided by the SequenceFile
to chose the preferred format.
+
+ The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile
formats.
+
+
+
+ Essentially there are 3 different formats for SequenceFile
s
+ depending on the CompressionType
specified. All of them share a
+ common header described below.
+
+
+
+ -
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+ -
+ keyClassName -key class
+
+ -
+ valueClassName - value class
+
+ -
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+ -
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+ -
+ compression codec -
CompressionCodec
class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+ -
+ metadata - {@link Metadata} for this file.
+
+ -
+ sync - A sync marker to denote end of the header.
+
+
+
+
+
+ -
+ Header
+
+ -
+ Record
+
+ - Record length
+ - Key length
+ - Key
+ - Value
+
+
+ -
+ A sync-marker every few
100
bytes or so.
+
+
+
+
+
+ -
+ Header
+
+ -
+ Record
+
+ - Record length
+ - Key length
+ - Key
+ - Compressed Value
+
+
+ -
+ A sync-marker every few
100
bytes or so.
+
+
+
+
+
+ -
+ Header
+
+ -
+ Record Block
+
+ - Uncompressed number of records in the block
+ - Compressed key-lengths block-size
+ - Compressed key-lengths block
+ - Compressed keys block-size
+ - Compressed keys block
+ - Compressed value-lengths block-size
+ - Compressed value-lengths block
+ - Compressed values block-size
+ - Compressed values block
+
+
+ -
+ A sync-marker every block.
+
+
+
+ The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ = 0. Otherwise,
+ the length is not available.
+ @return The opened stream.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ key, skipping its
+ value. True if another entry exists, and false at end of file.]]>
+
+
+
+
+
+
+
+ key and
+ val
. Returns true if such a pair exists and false when at
+ end of file]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The position passed must be a position returned by {@link
+ SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary
+ position, use {@link SequenceFile.Reader#sync(long)}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SegmentDescriptor
+ @param segments the list of SegmentDescriptors
+ @param tmpDir the directory to write temporary files into
+ @return RawKeyValueIterator
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ For best performance, applications should make sure that the {@link
+ Writable#readFields(DataInput)} implementation of their keys is
+ very efficient. In particular, it should avoid allocating memory.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This always returns a synchronized position. In other words,
+ immediately after calling {@link SequenceFile.Reader#seek(long)} with a position
+ returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However
+ the key may be earlier in the file than key last written when this
+ method was called (e.g., with block-compression, it may be the first key
+ in the block that was being written when this method was called).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ key. Returns
+ true if such a key exists and false when at the end of the set.]]>
+
+
+
+
+
+
+ key.
+ Returns key
, or null if no match exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start
. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]
).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string. Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput
to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+ For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput
to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key
or value
type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+ Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+ Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparable
s can be compared to each other, typically
+ via Comparator
s. Any type which is to be used as a
+ key
in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+ Note that hashCode()
is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode()
implementation in Object
does not
+ satisfy this property.
+
+ Example:
+
+ public class MyWritableComparable implements WritableComparable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The default implementation reads the data into two {@link
+ WritableComparable}s (using {@link
+ Writable#readFields(DataInput)}, then calls {@link
+ #compare(WritableComparable,WritableComparable)}.]]>
+
+
+
+
+
+
+ The default implementation uses the natural ordering, calling {@link
+ Comparable#compareTo(Object)}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This base implemenation uses the natural ordering. To define alternate
+ orderings, override {@link #compare(WritableComparable,WritableComparable)}.
+
+ One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Enum type
+ @param in DataInput to read from
+ @param enumType Class type of Enum
+ @return Enum represented by String read from DataInput
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ len number of bytes in input streamin
+ @param in input stream
+ @param len number of bytes to skip
+ @throws IOException when skipped less number of bytes]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Compressor
+ @param conf the Configuration
object which contains confs for creating or reinit the compressor
+ @return Compressor
for the given
+ CompressionCodec
from the pool or a new one]]>
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Decompressor
+ @return Decompressor
for the given
+ CompressionCodec
the pool or a new one]]>
+
+
+
+
+
+ Compressor to be returned to the pool]]>
+
+
+
+
+
+ Decompressor to be returned to the
+ pool]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[]
remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true
if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true
if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true
and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true
if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true
and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false
when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .lz4.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .snappy.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "none" - No compression.
+ "lzo" - LZO compression.
+ "gz" - GZIP compression.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Block Compression.
+ Named meta data blocks.
+ Sorted or unsorted keys.
+ Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+ - Some constant overhead of reading or writing a compressed block.
+
+ - Each compressed block requires one compression/decompression codec for
+ I/O.
+
- Temporary space to buffer the key.
+
- Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+ - TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
- MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+ - tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
- tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
- tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+ - Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
- The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
- Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
- File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ entry of the TFile.
+ @param endKey
+ End key of the scan. If null, scan up to the last entry
+ of the TFile.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Use {@link Scanner#atEnd()} to test whether the cursor is at the end
+ location of the scanner.
+
+ Use {@link Scanner#advance()} to move the cursor to the next key-value
+ pair (or end if none exists). Use seekTo methods (
+ {@link Scanner#seekTo(byte[])} or
+ {@link Scanner#seekTo(byte[], int, int)}) to seek to any arbitrary
+ location in the covered range (including backward seeking). Use
+ {@link Scanner#rewind()} to seek back to the beginning of the scanner.
+ Use {@link Scanner#seekToEnd()} to seek to the end of the scanner.
+
+ Actual keys and values may be obtained through {@link Scanner.Entry}
+ object, which is obtained through {@link Scanner#entry()}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Algorithmic comparator: binary comparators that is language
+ independent. Currently, only "memcmp" is supported.
+ Language-specific comparator: binary comparators that can
+ only be constructed in specific language. For Java, the syntax
+ is "jclass:", followed by the class name of the RawComparator.
+ Currently, we only support RawComparators that can be
+ constructed through the default constructor (with no
+ parameters). Parameterized RawComparators such as
+ {@link WritableComparator} or
+ {@link JavaSerializationComparator} may not be directly used.
+ One should write a wrapper class that inherits from such classes
+ and use its default constructor to perform proper
+ initialization.
+
+ @param conf
+ The configuration object.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If an exception is thrown, the TFile will be in an inconsistent
+ state. The only legitimate call after that would be close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+ if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+ if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+ if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+ if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
+ if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+ if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
+ byte[4]=(n>>8)&0xff; byte[5]=n&0xff
+ if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
+ byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+ if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
+ byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
+ byte[7]=n&0xff;
+ if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
+ byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
+ byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
+
+
+
+
+
+ (int)Utils#readVLong(in).
+
+ @param in
+ input stream
+ @return the decoded integer
+ @throws IOException
+
+ @see Utils#readVLong(DataInput)]]>
+
+
+
+
+
+
+
+ if (FB >= -32), return (long)FB;
+ if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+ if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
+ NB[1]&0xff;
+ if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
+ (NB[1]&0xff)<<8 + NB[2]&0xff;
+ if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This servlet generally will be placed under the /jmx URL for each
+ HttpServer. It provides read only
+ access to JMX metrics. The optional qry
parameter
+ may be used to query only a subset of the JMX Beans. This query
+ functionality is provided through the
+ {@link MBeanServer#queryNames(ObjectName, javax.management.QueryExp)}
+ method.
+
+ For example http://.../jmx?qry=Hadoop:*
will return
+ all hadoop metrics exposed through JMX.
+
+ The optional get
parameter is used to query an specific
+ attribute of a JMX bean. The format of the URL is
+ http://.../jmx?get=MXBeanName::AttributeName
+
+ For example
+
+ http://../jmx?get=Hadoop:service=NameNode,name=NameNodeInfo::ClusterId
+
will return the cluster id of the namenode mxbean.
+
+ If the qry
or the get
parameter is not formatted
+ correctly then a 400 BAD REQUEST http response code will be returned.
+
+ If a resouce such as a mbean or attribute can not be found,
+ a 404 SC_NOT_FOUND http response code will be returned.
+
+ The return format is JSON and in the form
+
+
+ {
+ "beans" : [
+ {
+ "name":"bean-name"
+ ...
+ }
+ ]
+ }
+
+
+ The servlet attempts to convert the the JMXBeans into JSON. Each
+ bean's attributes will be converted to a JSON object member.
+
+ If the attribute is a boolean, a number, a string, or an array
+ it will be converted to the JSON equivalent.
+
+ If the value is a {@link CompositeData} then it will be converted
+ to a JSON object with the keys as the name of the JSON member and
+ the value is converted following these same rules.
+
+ If the value is a {@link TabularData} then it will be converted
+ to an array of the {@link CompositeData} elements that it contains.
+
+ All other objects will be converted to a string and output as such.
+
+ The bean's name and modelerType will be returned for all beans.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Every event is a standalone JSON clause
+ Time is published as a time_t event since 1/1/1970
+ -this is the fastest to generate.
+ An ISO date is generated, but this is cached and will only be accurate to within a second
+ the stack trace is included as an array
+
+
+ A simple log event will resemble the following
+
+ {"name":"test","time":1318429136789,"date":"2011-10-12 15:18:56,789","level":"INFO","thread":"main","message":"test message"}
+
+
+ An event with an error will contain data similar to that below (which has been reformatted to be multi-line).
+
+
+ {
+ "name":"testException",
+ "time":1318429136789,
+ "date":"2011-10-12 15:18:56,789",
+ "level":"INFO",
+ "thread":"quoted\"",
+ "message":"new line\n and {}",
+ "exceptionclass":"java.net.NoRouteToHostException",
+ "stack":[
+ "java.net.NoRouteToHostException: that box caught fire 3 years ago",
+ "\tat org.apache.hadoop.log.TestLog4Json.testException(TestLog4Json.java:49)",
+ "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)",
+ "\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)",
+ "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)",
+ "\tat java.lang.reflect.Method.invoke(Method.java:597)",
+ "\tat junit.framework.TestCase.runTest(TestCase.java:168)",
+ "\tat junit.framework.TestCase.runBare(TestCase.java:134)",
+ "\tat junit.framework.TestResult$1.protect(TestResult.java:110)",
+ "\tat junit.framework.TestResult.runProtected(TestResult.java:128)",
+ "\tat junit.framework.TestResult.run(TestResult.java:113)",
+ "\tat junit.framework.TestCase.run(TestCase.java:124)",
+ "\tat junit.framework.TestSuite.runTest(TestSuite.java:232)",
+ "\tat junit.framework.TestSuite.run(TestSuite.java:227)",
+ "\tat org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:83)",
+ "\tat org.apache.maven.surefire.junit4.JUnit4TestSet.execute(JUnit4TestSet.java:59)",
+ "\tat org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.executeTestSet(AbstractDirectoryTestSuite.java:120)",
+ "\tat org.apache.maven.surefire.suite.AbstractDirectoryTestSuite.execute(AbstractDirectoryTestSuite.java:145)",
+ "\tat org.apache.maven.surefire.Surefire.run(Surefire.java:104)",
+ "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)",
+ "\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)",
+ "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)",
+ "\tat java.lang.reflect.Method.invoke(Method.java:597)",
+ "\tat org.apache.maven.surefire.booter.SurefireBooter.runSuitesInProcess(SurefireBooter.java:290)",
+ "\tat org.apache.maven.surefire.booter.SurefireBooter.main(SurefireBooter.java:1017)"
+ ]
+ }
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+The API is abstract so that it can be implemented on top of
+a variety of metrics client libraries. The choice of
+client library is a configuration option, and different
+modules within the same application can use
+different metrics implementation libraries.
+
+Sub-packages:
+
+ org.apache.hadoop.metrics.spi
+ - The abstract Server Provider Interface package. Those wishing to
+ integrate the metrics API with a particular metrics client library should
+ extend this package.
+
+ org.apache.hadoop.metrics.file
+ - An implementation package which writes the metric data to
+ a file, or sends it to the standard output stream.
+
+ -
org.apache.hadoop.metrics.ganglia
+ - An implementation package which sends metric data to
+ Ganglia.
+
+
+Introduction to the Metrics API
+
+Here is a simple example of how to use this package to report a single
+metric value:
+
+ private ContextFactory contextFactory = ContextFactory.getFactory();
+
+ void reportMyMetric(float myMetric) {
+ MetricsContext myContext = contextFactory.getContext("myContext");
+ MetricsRecord myRecord = myContext.getRecord("myRecord");
+ myRecord.setMetric("myMetric", myMetric);
+ myRecord.update();
+ }
+
+
+In this example there are three names:
+
+ - myContext
+ - The context name will typically identify either the application, or else a
+ module within an application or library.
+
+ - myRecord
+ - The record name generally identifies some entity for which a set of
+ metrics are to be reported. For example, you could have a record named
+ "cacheStats" for reporting a number of statistics relating to the usage of
+ some cache in your application.
+
+ - myMetric
+ - This identifies a particular metric. For example, you might have metrics
+ named "cache_hits" and "cache_misses".
+
+
+
+Tags
+
+In some cases it is useful to have multiple records with the same name. For
+example, suppose that you want to report statistics about each disk on a computer.
+In this case, the record name would be something like "diskStats", but you also
+need to identify the disk which is done by adding a tag to the record.
+The code could look something like this:
+
+ private MetricsRecord diskStats =
+ contextFactory.getContext("myContext").getRecord("diskStats");
+
+ void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
+ diskStats.setTag("diskName", diskName);
+ diskStats.setMetric("diskBusy", diskBusy);
+ diskStats.setMetric("diskUsed", diskUsed);
+ diskStats.update();
+ }
+
+
+Buffering and Callbacks
+
+Data is not sent immediately to the metrics system when
+MetricsRecord.update()
is called. Instead it is stored in an
+internal table, and the contents of the table are sent periodically.
+This can be important for two reasons:
+
+ - It means that a programmer is free to put calls to this API in an
+ inner loop, since updates can be very frequent without slowing down
+ the application significantly.
+ - Some implementations can gain efficiency by combining many metrics
+ into a single UDP message.
+
+
+The API provides a timer-based callback via the
+registerUpdater()
method. The benefit of this
+versus using java.util.Timer
is that the callbacks will be done
+immediately before sending the data, making the data as current as possible.
+
+Configuration
+
+It is possible to programmatically examine and modify configuration data
+before creating a context, like this:
+
+ ContextFactory factory = ContextFactory.getFactory();
+ ... examine and/or modify factory attributes ...
+ MetricsContext context = factory.getContext("myContext");
+
+The factory attributes can be examined and modified using the following
+ContextFactory
methods:
+
+ Object getAttribute(String attributeName)
+ String[] getAttributeNames()
+ void setAttribute(String name, Object value)
+ void removeAttribute(attributeName)
+
+
+
+ContextFactory.getFactory()
initializes the factory attributes by
+reading the properties file hadoop-metrics.properties
if it exists
+on the class path.
+
+
+A factory attribute named:
+
+contextName.class
+
+should have as its value the fully qualified name of the class to be
+instantiated by a call of the CodeFactory
method
+getContext(contextName)
. If this factory attribute is not
+specified, the default is to instantiate
+org.apache.hadoop.metrics.file.FileContext
.
+
+
+Other factory attributes are specific to a particular implementation of this
+API and are documented elsewhere. For example, configuration attributes for
+the file and Ganglia implementations can be found in the javadoc for
+their respective packages.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Implementation of the metrics package that sends metric data to
+Ganglia.
+Programmers should not normally need to use this package directly. Instead
+they should use org.hadoop.metrics.
+
+
+These are the implementation specific factory attributes
+(See ContextFactory.getFactory()):
+
+
+ - contextName.servers
+ - Space and/or comma separated sequence of servers to which UDP
+ messages should be sent.
+
+ - contextName.period
+ - The period in seconds on which the metric data is sent to the
+ server(s).
+
+ - contextName.multicast
+ - Enable multicast for Ganglia
+
+ - contextName.multicast.ttl
+ - TTL for multicast packets
+
+ - contextName.units.recordName.metricName
+ - The units for the specified metric in the specified record.
+
+ - contextName.slope.recordName.metricName
+ - The slope for the specified metric in the specified record.
+
+ - contextName.tmax.recordName.metricName
+ - The tmax for the specified metric in the specified record.
+
+ - contextName.dmax.recordName.metricName
+ - The dmax for the specified metric in the specified record.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ contextName.tableName. The returned map consists of
+ those attributes with the contextName and tableName stripped off.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ recordName.
+ Throws an exception if the metrics implementation is configured with a fixed
+ set of record names and recordName
is not in that set.
+
+ @param recordName the name of the record
+ @throws MetricsException if recordName conflicts with configuration data]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class implements the internal table of metric data, and the timer
+ on which data is to be sent to the metrics system. Subclasses must
+ override the abstract emitRecord
method in order to transmit
+ the data. ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ update
+ and remove()
.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+ org.apache.hadoop.metrics.file and
+org.apache.hadoop.metrics.ganglia
.
+
+Plugging in an implementation involves writing a concrete subclass of
+AbstractMetricsContext
. The subclass should get its
+ configuration information using the getAttribute(attributeName)
+ method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations of this interface consume the {@link MetricsRecord} generated
+ from {@link MetricsSource}. It registers with {@link MetricsSystem} which
+ periodically pushes the {@link MetricsRecord} to the sink using
+ {@link #putMetrics(MetricsRecord)} method. If the implementing class also
+ implements {@link Closeable}, then the MetricsSystem will close the sink when
+ it is stopped.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the actual type of the source object
+ @param source object to register
+ @param name of the source. Must be unique or null (then extracted from
+ the annotations of the source object.)
+ @param desc the description of the source (or null. See above.)
+ @return the source object
+ @exception MetricsException]]>
+
+
+
+
+
+
+
+
+
+
+
+ the actual type of the source object
+ @param source object to register
+ @return the source object
+ @exception MetricsException]]>
+
+
+
+
+
+
+
+ the type of the sink
+ @param sink to register
+ @param name of the sink. Must be unique.
+ @param desc the description of the sink
+ @return the sink
+ @exception MetricsException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name="
+ Where the and are the supplied parameters
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured
calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping
that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =} getCount().
+ @param newCapacity The new capacity in bytes.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Index idx = startVector(...);
+ while (!idx.done()) {
+ .... // read element of a vector
+ idx.incr();
+ }
+
+
+ @deprecated Replaced by Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+ (DEPRECATED) Hadoop record I/O contains classes and a record description language
+ translator for simplifying serialization and deserialization of records in a
+ language-neutral manner.
+
+
+
+ DEPRECATED: Replaced by Avro.
+
+
+ Introduction
+
+ Software systems of any significant complexity require mechanisms for data
+interchange with the outside world. These interchanges typically involve the
+marshaling and unmarshaling of logical units of data to and from data streams
+(files, network connections, memory buffers etc.). Applications usually have
+some code for serializing and deserializing the data types that they manipulate
+embedded in them. The work of serialization has several features that make
+automatic code generation for it worthwhile. Given a particular output encoding
+(binary, XML, etc.), serialization of primitive types and simple compositions
+of primitives (structs, vectors etc.) is a very mechanical task. Manually
+written serialization code can be susceptible to bugs especially when records
+have a large number of fields or a record definition changes between software
+versions. Lastly, it can be very useful for applications written in different
+programming languages to be able to share and interchange data. This can be
+made a lot easier by describing the data records manipulated by these
+applications in a language agnostic manner and using the descriptions to derive
+implementations of serialization in multiple target languages.
+
+This document describes Hadoop Record I/O, a mechanism that is aimed
+at
+
+- enabling the specification of simple serializable data types (records)
+
- enabling the generation of code in multiple target languages for
+marshaling and unmarshaling such types
+
- providing target language specific support that will enable application
+programmers to incorporate generated code into their applications
+
+
+The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR,
+ASN.1, PADS and ICE. While these systems all include a DDL that enables
+the specification of most record types, they differ widely in what else they
+focus on. The focus in Hadoop Record I/O is on data marshaling and
+multi-lingual support. We take a translator-based approach to serialization.
+Hadoop users have to describe their data in a simple data description
+language. The Hadoop DDL translator rcc generates code that users
+can invoke in order to read/write their data from/to simple stream
+abstractions. Next we list explicitly some of the goals and non-goals of
+Hadoop Record I/O.
+
+
+Goals
+
+
+- Support for commonly used primitive types. Hadoop should include as
+primitives commonly used builtin types from programming languages we intend to
+support.
+
+
- Support for common data compositions (including recursive compositions).
+Hadoop should support widely used composite types such as structs and
+vectors.
+
+
- Code generation in multiple target languages. Hadoop should be capable of
+generating serialization code in multiple target languages and should be
+easily extensible to new target languages. The initial target languages are
+C++ and Java.
+
+
- Support for generated target languages. Hadooop should include support
+in the form of headers, libraries, packages for supported target languages
+that enable easy inclusion and use of generated code in applications.
+
+
- Support for multiple output encodings. Candidates include
+packed binary, comma-separated text, XML etc.
+
+
- Support for specifying record types in a backwards/forwards compatible
+manner. This will probably be in the form of support for optional fields in
+records. This version of the document does not include a description of the
+planned mechanism, we intend to include it in the next iteration.
+
+
+
+Non-Goals
+
+
+ - Serializing existing arbitrary C++ classes.
+
- Serializing complex data structures such as trees, linked lists etc.
+
- Built-in indexing schemes, compression, or check-sums.
+
- Dynamic construction of objects from an XML schema.
+
+
+The remainder of this document describes the features of Hadoop record I/O
+in more detail. Section 2 describes the data types supported by the system.
+Section 3 lays out the DDL syntax with some examples of simple records.
+Section 4 describes the process of code generation with rcc. Section 5
+describes target language mappings and support for Hadoop types. We include a
+fairly complete description of C++ mappings with intent to include Java and
+others in upcoming iterations of this document. The last section talks about
+supported output encodings.
+
+
+Data Types and Streams
+
+This section describes the primitive and composite types supported by Hadoop.
+We aim to support a set of types that can be used to simply and efficiently
+express a wide range of record types in different programming languages.
+
+Primitive Types
+
+For the most part, the primitive types of Hadoop map directly to primitive
+types in high level programming languages. Special cases are the
+ustring (a Unicode string) and buffer types, which we believe
+find wide use and which are usually implemented in library code and not
+available as language built-ins. Hadoop also supplies these via library code
+when a target language built-in is not present and there is no widely
+adopted "standard" implementation. The complete list of primitive types is:
+
+
+ - byte: An 8-bit unsigned integer.
+
- boolean: A boolean value.
+
- int: A 32-bit signed integer.
+
- long: A 64-bit signed integer.
+
- float: A single precision floating point number as described by
+ IEEE-754.
+
- double: A double precision floating point number as described by
+ IEEE-754.
+
- ustring: A string consisting of Unicode characters.
+
- buffer: An arbitrary sequence of bytes.
+
+
+
+Composite Types
+Hadoop supports a small set of composite types that enable the description
+of simple aggregate types and containers. A composite type is serialized
+by sequentially serializing it constituent elements. The supported
+composite types are:
+
+
+
+ - record: An aggregate type like a C-struct. This is a list of
+typed fields that are together considered a single unit of data. A record
+is serialized by sequentially serializing its constituent fields. In addition
+to serialization a record has comparison operations (equality and less-than)
+implemented for it, these are defined as memberwise comparisons.
+
+
- vector: A sequence of entries of the same data type, primitive
+or composite.
+
+
- map: An associative container mapping instances of a key type to
+instances of a value type. The key and value types may themselves be primitive
+or composite types.
+
+
+
+Streams
+
+Hadoop generates code for serializing and deserializing record types to
+abstract streams. For each target language Hadoop defines very simple input
+and output stream interfaces. Application writers can usually develop
+concrete implementations of these by putting a one method wrapper around
+an existing stream implementation.
+
+
+DDL Syntax and Examples
+
+We now describe the syntax of the Hadoop data description language. This is
+followed by a few examples of DDL usage.
+
+Hadoop DDL Syntax
+
+
+recfile = *include module *record
+include = "include" path
+path = (relative-path / absolute-path)
+module = "module" module-name
+module-name = name *("." name)
+record := "class" name "{" 1*(field) "}"
+field := type name ";"
+name := ALPHA (ALPHA / DIGIT / "_" )*
+type := (ptype / ctype)
+ptype := ("byte" / "boolean" / "int" |
+ "long" / "float" / "double"
+ "ustring" / "buffer")
+ctype := (("vector" "<" type ">") /
+ ("map" "<" type "," type ">" ) ) / name)
+
+
+A DDL file describes one or more record types. It begins with zero or
+more include declarations, a single mandatory module declaration
+followed by zero or more class declarations. The semantics of each of
+these declarations are described below:
+
+
+
+- include: An include declaration specifies a DDL file to be
+referenced when generating code for types in the current DDL file. Record types
+in the current compilation unit may refer to types in all included files.
+File inclusion is recursive. An include does not trigger code
+generation for the referenced file.
+
+
- module: Every Hadoop DDL file must have a single module
+declaration that follows the list of includes and precedes all record
+declarations. A module declaration identifies a scope within which
+the names of all types in the current file are visible. Module names are
+mapped to C++ namespaces, Java packages etc. in generated code.
+
+
- class: Records types are specified through class
+declarations. A class declaration is like a Java class declaration.
+It specifies a named record type and a list of fields that constitute records
+of the type. Usage is illustrated in the following examples.
+
+
+
+Examples
+
+
+- A simple DDL file links.jr with just one record declaration.
+
+module links {
+ class Link {
+ ustring URL;
+ boolean isRelative;
+ ustring anchorText;
+ };
+}
+
+
+ - A DDL file outlinks.jr which includes another
+
+include "links.jr"
+
+module outlinks {
+ class OutLinks {
+ ustring baseURL;
+ vector outLinks;
+ };
+}
+
+
+
+Code Generation
+
+The Hadoop translator is written in Java. Invocation is done by executing a
+wrapper shell script named named rcc. It takes a list of
+record description files as a mandatory argument and an
+optional language argument (the default is Java) --language or
+-l. Thus a typical invocation would look like:
+
+$ rcc -l C++ ...
+
+
+
+Target Language Mappings and Support
+
+For all target languages, the unit of code generation is a record type.
+For each record type, Hadoop generates code for serialization and
+deserialization, record comparison and access to record members.
+
+C++
+
+Support for including Hadoop generated C++ code in applications comes in the
+form of a header file recordio.hh which needs to be included in source
+that uses Hadoop types and a library librecordio.a which applications need
+to be linked with. The header declares the Hadoop C++ namespace which defines
+appropriate types for the various primitives, the basic interfaces for
+records and streams and enumerates the supported serialization encodings.
+Declarations of these interfaces and a description of their semantics follow:
+
+
+namespace hadoop {
+
+ enum RecFormat { kBinary, kXML, kCSV };
+
+ class InStream {
+ public:
+ virtual ssize_t read(void *buf, size_t n) = 0;
+ };
+
+ class OutStream {
+ public:
+ virtual ssize_t write(const void *buf, size_t n) = 0;
+ };
+
+ class IOError : public runtime_error {
+ public:
+ explicit IOError(const std::string& msg);
+ };
+
+ class IArchive;
+ class OArchive;
+
+ class RecordReader {
+ public:
+ RecordReader(InStream& in, RecFormat fmt);
+ virtual ~RecordReader(void);
+
+ virtual void read(Record& rec);
+ };
+
+ class RecordWriter {
+ public:
+ RecordWriter(OutStream& out, RecFormat fmt);
+ virtual ~RecordWriter(void);
+
+ virtual void write(Record& rec);
+ };
+
+
+ class Record {
+ public:
+ virtual std::string type(void) const = 0;
+ virtual std::string signature(void) const = 0;
+ protected:
+ virtual bool validate(void) const = 0;
+
+ virtual void
+ serialize(OArchive& oa, const std::string& tag) const = 0;
+
+ virtual void
+ deserialize(IArchive& ia, const std::string& tag) = 0;
+ };
+}
+
+
+
+
+- RecFormat: An enumeration of the serialization encodings supported
+by this implementation of Hadoop.
+
+
- InStream: A simple abstraction for an input stream. This has a
+single public read method that reads n bytes from the stream into
+the buffer buf. Has the same semantics as a blocking read system
+call. Returns the number of bytes read or -1 if an error occurs.
+
+
- OutStream: A simple abstraction for an output stream. This has a
+single write method that writes n bytes to the stream from the
+buffer buf. Has the same semantics as a blocking write system
+call. Returns the number of bytes written or -1 if an error occurs.
+
+
- RecordReader: A RecordReader reads records one at a time from
+an underlying stream in a specified record format. The reader is instantiated
+with a stream and a serialization format. It has a read method that
+takes an instance of a record and deserializes the record from the stream.
+
+
- RecordWriter: A RecordWriter writes records one at a
+time to an underlying stream in a specified record format. The writer is
+instantiated with a stream and a serialization format. It has a
+write method that takes an instance of a record and serializes the
+record to the stream.
+
+
- Record: The base class for all generated record types. This has two
+public methods type and signature that return the typename and the
+type signature of the record.
+
+
+
+Two files are generated for each record file (note: not for each record). If a
+record file is named "name.jr", the generated files are
+"name.jr.cc" and "name.jr.hh" containing serialization
+implementations and record type declarations respectively.
+
+For each record in the DDL file, the generated header file will contain a
+class definition corresponding to the record type, method definitions for the
+generated type will be present in the '.cc' file. The generated class will
+inherit from the abstract class hadoop::Record. The DDL files
+module declaration determines the namespace the record belongs to.
+Each '.' delimited token in the module declaration results in the
+creation of a namespace. For instance, the declaration module docs.links
+results in the creation of a docs namespace and a nested
+docs::links namespace. In the preceding examples, the Link class
+is placed in the links namespace. The header file corresponding to
+the links.jr file will contain:
+
+
+namespace links {
+ class Link : public hadoop::Record {
+ // ....
+ };
+};
+
+
+Each field within the record will cause the generation of a private member
+declaration of the appropriate type in the class declaration, and one or more
+acccessor methods. The generated class will implement the serialize and
+deserialize methods defined in hadoop::Record+. It will also
+implement the inspection methods type and signature from
+hadoop::Record. A default constructor and virtual destructor will also
+be generated. Serialization code will read/write records into streams that
+implement the hadoop::InStream and the hadoop::OutStream interfaces.
+
+For each member of a record an accessor method is generated that returns
+either the member or a reference to the member. For members that are returned
+by value, a setter method is also generated. This is true for primitive
+data members of the types byte, int, long, boolean, float and
+double. For example, for a int field called MyField the folowing
+code is generated.
+
+
+...
+private:
+ int32_t mMyField;
+ ...
+public:
+ int32_t getMyField(void) const {
+ return mMyField;
+ };
+
+ void setMyField(int32_t m) {
+ mMyField = m;
+ };
+ ...
+
+
+For a ustring or buffer or composite field. The generated code
+only contains accessors that return a reference to the field. A const
+and a non-const accessor are generated. For example:
+
+
+...
+private:
+ std::string mMyBuf;
+ ...
+public:
+
+ std::string& getMyBuf() {
+ return mMyBuf;
+ };
+
+ const std::string& getMyBuf() const {
+ return mMyBuf;
+ };
+ ...
+
+
+Examples
+
+Suppose the inclrec.jr file contains:
+
+module inclrec {
+ class RI {
+ int I32;
+ double D;
+ ustring S;
+ };
+}
+
+
+and the testrec.jr file contains:
+
+
+include "inclrec.jr"
+module testrec {
+ class R {
+ vector VF;
+ RI Rec;
+ buffer Buf;
+ };
+}
+
+
+Then the invocation of rcc such as:
+
+$ rcc -l c++ inclrec.jr testrec.jr
+
+will result in generation of four files:
+inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}.
+
+The inclrec.jr.hh will contain:
+
+
+#ifndef _INCLREC_JR_HH_
+#define _INCLREC_JR_HH_
+
+#include "recordio.hh"
+
+namespace inclrec {
+
+ class RI : public hadoop::Record {
+
+ private:
+
+ int32_t I32;
+ double D;
+ std::string S;
+
+ public:
+
+ RI(void);
+ virtual ~RI(void);
+
+ virtual bool operator==(const RI& peer) const;
+ virtual bool operator<(const RI& peer) const;
+
+ virtual int32_t getI32(void) const { return I32; }
+ virtual void setI32(int32_t v) { I32 = v; }
+
+ virtual double getD(void) const { return D; }
+ virtual void setD(double v) { D = v; }
+
+ virtual std::string& getS(void) const { return S; }
+ virtual const std::string& getS(void) const { return S; }
+
+ virtual std::string type(void) const;
+ virtual std::string signature(void) const;
+
+ protected:
+
+ virtual void serialize(hadoop::OArchive& a) const;
+ virtual void deserialize(hadoop::IArchive& a);
+ };
+} // end namespace inclrec
+
+#endif /* _INCLREC_JR_HH_ */
+
+
+
+The testrec.jr.hh file will contain:
+
+
+
+
+#ifndef _TESTREC_JR_HH_
+#define _TESTREC_JR_HH_
+
+#include "inclrec.jr.hh"
+
+namespace testrec {
+ class R : public hadoop::Record {
+
+ private:
+
+ std::vector VF;
+ inclrec::RI Rec;
+ std::string Buf;
+
+ public:
+
+ R(void);
+ virtual ~R(void);
+
+ virtual bool operator==(const R& peer) const;
+ virtual bool operator<(const R& peer) const;
+
+ virtual std::vector& getVF(void) const;
+ virtual const std::vector& getVF(void) const;
+
+ virtual std::string& getBuf(void) const ;
+ virtual const std::string& getBuf(void) const;
+
+ virtual inclrec::RI& getRec(void) const;
+ virtual const inclrec::RI& getRec(void) const;
+
+ virtual bool serialize(hadoop::OutArchive& a) const;
+ virtual bool deserialize(hadoop::InArchive& a);
+
+ virtual std::string type(void) const;
+ virtual std::string signature(void) const;
+ };
+}; // end namespace testrec
+#endif /* _TESTREC_JR_HH_ */
+
+
+
+Java
+
+Code generation for Java is similar to that for C++. A Java class is generated
+for each record type with private members corresponding to the fields. Getters
+and setters for fields are also generated. Some differences arise in the
+way comparison is expressed and in the mapping of modules to packages and
+classes to files. For equality testing, an equals method is generated
+for each record type. As per Java requirements a hashCode method is also
+generated. For comparison a compareTo method is generated for each
+record type. This has the semantics as defined by the Java Comparable
+interface, that is, the method returns a negative integer, zero, or a positive
+integer as the invoked object is less than, equal to, or greater than the
+comparison parameter.
+
+A .java file is generated per record type as opposed to per DDL
+file as in C++. The module declaration translates to a Java
+package declaration. The module name maps to an identical Java package
+name. In addition to this mapping, the DDL compiler creates the appropriate
+directory hierarchy for the package and places the generated .java
+files in the correct directories.
+
+Mapping Summary
+
+
+DDL Type C++ Type Java Type
+
+boolean bool boolean
+byte int8_t byte
+int int32_t int
+long int64_t long
+float float float
+double double double
+ustring std::string java.lang.String
+buffer std::string org.apache.hadoop.record.Buffer
+class type class type class type
+vector std::vector java.util.ArrayList
+map std::map java.util.TreeMap
+
+
+Data encodings
+
+This section describes the format of the data encodings supported by Hadoop.
+Currently, three data encodings are supported, namely binary, CSV and XML.
+
+Binary Serialization Format
+
+The binary data encoding format is fairly dense. Serialization of composite
+types is simply defined as a concatenation of serializations of the constituent
+elements (lengths are included in vectors and maps).
+
+Composite types are serialized as follows:
+
+- class: Sequence of serialized members.
+
- vector: The number of elements serialized as an int. Followed by a
+sequence of serialized elements.
+
- map: The number of key value pairs serialized as an int. Followed
+by a sequence of serialized (key,value) pairs.
+
+
+Serialization of primitives is more interesting, with a zero compression
+optimization for integral types and normalization to UTF-8 for strings.
+Primitive types are serialized as follows:
+
+
+- byte: Represented by 1 byte, as is.
+
- boolean: Represented by 1-byte (0 or 1)
+
- int/long: Integers and longs are serialized zero compressed.
+Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a
+sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents
+the number of trailing bytes, N, as the negative number (-120-N). For example,
+the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'.
+This doesn't help much for 4-byte integers but does a reasonably good job with
+longs without bit twiddling.
+
- float/double: Serialized in IEEE 754 single and double precision
+format in network byte order. This is the format used by Java.
+
- ustring: Serialized as 4-byte zero compressed length followed by
+data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native
+language representation.
+
- buffer: Serialized as a 4-byte zero compressed length followed by the
+raw bytes in the buffer.
+
+
+
+CSV Serialization Format
+
+The CSV serialization format has a lot more structure than the "standard"
+Excel CSV format, but we believe the additional structure is useful because
+
+
+- it makes parsing a lot easier without detracting too much from legibility
+
- the delimiters around composites make it obvious when one is reading a
+sequence of Hadoop records
+
+
+Serialization formats for the various types are detailed in the grammar that
+follows. The notable feature of the formats is the use of delimiters for
+indicating the certain field types.
+
+
+- A string field begins with a single quote (').
+
- A buffer field begins with a sharp (#).
+
- A class, vector or map begins with 's{', 'v{' or 'm{' respectively and
+ends with '}'.
+
+
+The CSV format can be described by the following grammar:
+
+
+record = primitive / struct / vector / map
+primitive = boolean / int / long / float / double / ustring / buffer
+
+boolean = "T" / "F"
+int = ["-"] 1*DIGIT
+long = ";" ["-"] 1*DIGIT
+float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+
+ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+struct = "s{" record *("," record) "}"
+vector = "v{" [record *("," record)] "}"
+map = "m{" [*(record "," record)] "}"
+
+
+XML Serialization Format
+
+The XML serialization format is the same used by Apache XML-RPC
+(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original
+XML-RPC format and adds some additional data types. All record I/O types are
+not directly expressible in this format, and access to a DDL is required in
+order to convert these to valid types. All types primitive or composite are
+represented by <value> elements. The particular XML-RPC type is
+indicated by a nested element in the <value> element. The encoding for
+records is always UTF-8. Primitive types are serialized as follows:
+
+
+- byte: XML tag <ex:i1>. Values: 1-byte unsigned
+integers represented in US-ASCII
+
- boolean: XML tag <boolean>. Values: "0" or "1"
+
- int: XML tags <i4> or <int>. Values: 4-byte
+signed integers represented in US-ASCII.
+
- long: XML tag <ex:i8>. Values: 8-byte signed integers
+represented in US-ASCII.
+
- float: XML tag <ex:float>. Values: Single precision
+floating point numbers represented in US-ASCII.
+
- double: XML tag <double>. Values: Double precision
+floating point numbers represented in US-ASCII.
+
- ustring: XML tag <;string>. Values: String values
+represented as UTF-8. XML does not permit all Unicode characters in literal
+data. In particular, NULLs and control chars are not allowed. Additionally,
+XML processors are required to replace carriage returns with line feeds and to
+replace CRLF sequences with line feeds. Programming languages that we work
+with do not impose these restrictions on string types. To work around these
+restrictions, disallowed characters and CRs are percent escaped in strings.
+The '%' character is also percent escaped.
+
- buffer: XML tag <string&>. Values: Arbitrary binary
+data. Represented as hexBinary, each byte is replaced by its 2-byte
+hexadecimal representation.
+
+
+Composite types are serialized as follows:
+
+
+- class: XML tag <struct>. A struct is a sequence of
+<member> elements. Each <member> element has a <name>
+element and a <value> element. The <name> is a string that must
+match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented
+by a <value> element.
+
+
- vector: XML tag <array<. An <array> contains a
+single <data> element. The <data> element is a sequence of
+<value> elements each of which represents an element of the vector.
+
+
- map: XML tag <array>. Same as vector.
+
+
+
+For example:
+
+
+class {
+ int MY_INT; // value 5
+ vector MY_VEC; // values 0.1, -0.89, 2.45e4
+ buffer MY_BUF; // value '\00\n\tabc%'
+}
+
+
+is serialized as
+
+
+<value>
+ <struct>
+ <member>
+ <name>MY_INT</name>
+ <value><i4>5</i4></value>
+ </member>
+ <member>
+ <name>MY_VEC</name>
+ <value>
+ <array>
+ <data>
+ <value><ex:float>0.1</ex:float></value>
+ <value><ex:float>-0.89</ex:float></value>
+ <value><ex:float>2.45e4</ex:float></value>
+ </data>
+ </array>
+ </value>
+ </member>
+ <member>
+ <name>MY_BUF</name>
+ <value><string>%00\n\tabc%25</string></value>
+ </member>
+ </struct>
+</value>
+
]]>
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+ (DEPRECATED) This package contains classes needed for code generation
+ from the hadoop record compiler. CppGenerator and JavaGenerator
+ are the main entry points from the parser. There are classes
+ corrsponding to every primitive type and compound type
+ included in Hadoop record I/O syntax.
+
+
+
+ DEPRECATED: Replaced by Avro.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This task takes the given record definition files and compiles them into
+ java or c++
+ files. It is then up to the user to compile the generated files.
+
+ The task requires the file
or the nested fileset element to be
+ specified. Optional attributes are language
(set the output
+ language, default is "java"),
+ destdir
(name of the destination directory for generated java/c++
+ code, default is ".") and failonerror
(specifies error handling
+ behavior. default is true).
+
Usage
+
+ <recordcc
+ destdir="${basedir}/gensrc"
+ language="java">
+ <fileset include="**\/*.jr" />
+ </recordcc>
+
+
+ @deprecated Replaced by Avro.]]>
+
+
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+ (DEPRECATED) This package contains code generated by JavaCC from the
+ Hadoop record syntax file rcc.jj. For details about the
+ record file syntax please @see org.apache.hadoop.record.
+
+
+
+ DEPRECATED: Replaced by Avro.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Propagates to hadoop-auth AuthenticationFilter configuration all Hadoop
+ configuration properties prefixed with "hadoop.http.authentication."
+
+ @param container The filter container
+ @param conf Configuration for run-time parameters]]>
+
+
+
+
+
+
+
+
+ It enables anonymous access, simple/speudo and Kerberos HTTP SPNEGO
+ authentication for Hadoop JobTracker, NameNode, DataNodes and
+ TaskTrackers.
+
+ Refer to the core-default.xml
file, after the comment
+ 'HTTP Authentication' for details on the configuration options.
+ All related configuration properties have 'hadoop.http.authentication.'
+ as prefix.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mapping
+ and mapping]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ % hadoop credential create alias [-provider providerPath]
+ % hadoop credential list [-provider providerPath]
+ % hadoop credential delete alias [-provider providerPath] [-f]
+
+ @param args
+ @return 0 if the argument(s) were recognized, 1 otherwise
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: Java doesn't appear able to extract international characters
+ from the SubjectAlts. It can only extract international characters
+ from the CN field.
+
+ (Or maybe the version of OpenSSL I'm using to test isn't storing the
+ international characters correctly in the SubjectAlts?).
+
+ @param cert X509Certificate
+ @return Array of SubjectALT DNS names stored in the certificate.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null
the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null
the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE
if the token is transmitted in the
+ URL query string, FALSE
if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE
if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs
parameter is not NULL,
+ the request will be done on behalf of the specified doAs
user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL
instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ live.
+ @return a (snapshotted) map of blocker name->description values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Do nothing if the service is null or not
+ in a state in which it can be/needs to be stopped.
+
+ The service state is checked before the operation begins.
+ This process is not thread safe.
+ @param service a service or null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+ If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Features:
+
+ - mixing of '
-
' (dash) and '.
' (dot) separators,
+ - transition between characters and digits also constitutes a separator:
+
1.0alpha1 => [1, 0, alpha, 1]
+ - unlimited number of version components,
+ - version components in the text can be digits or strings,
+ - strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering.
+ Well-known qualifiers (case insensitive) are:
+ alpha
or a
+ beta
or b
+ milestone
or m
+ rc
or cr
+ snapshot
+ (the empty string)
or ga
or final
+ sp
+
+ Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive),
+
+ - a dash usually precedes a qualifier, and is always less important than something preceded with a dot.
+
+
+ @see "Versioning" on Maven Wiki]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This can be used for testing
+ @return contents of the MachineList]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the static class to find
+ @param the parent class of the array
+ @param cls the dynamic class to find
+ @param opts the list of options to look through
+ @return the first option that matches
+ @throws IOException]]>
+
+
+
+
+
+
+ the type of options
+ @param oldOpts the old options
+ @param newOpts the new options
+ @return a new array of options]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occured and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class
of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShellCommandExecutorshould be used in cases where the output
+ of the command needs no explicit parsing and where the command, working
+ directory and the environment remains unchanged. The output of the command
+ is stored as-is and is expected to be small.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager
singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.]]>
+
+
+
+
+
+
+
+
+
+
+ true if the thread is successfully interrupted,
+ false otherwise
+ @throws InterruptedException]]>
+
+
+
+
+
+
+ true if the thread is successfully interrupted,
+ false otherwise
+ @throws InterruptedException]]>
+
+
+
+
+
+
+ true if the service is terminated,
+ false otherwise
+ @throws InterruptedException]]>
+
+
+
+
+
+
+
+ true if the service is terminated,
+ false otherwise
+ @throws InterruptedException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool
, is the standard for any Map-Reduce tool/application.
+ The tool/application should delegate the handling of
+
+ standard command-line options to {@link ToolRunner#run(Tool, String[])}
+ and only handle its custom arguments.
+
+ Here is how a typical Tool
is implemented:
+
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration
processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ JobClient.runJob(job);
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner
handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration
, or builds one if null.
+
+ Sets the Tool
's configuration with the possibly modified
+ version of the conf
.
+
+ @param conf Configuration
for the Tool
.
+ @param tool Tool
to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration
.
+
+ Equivalent to run(tool.getConf(), tool, args)
.
+
+ @param tool Tool
to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner
can be used to run classes implementing
+ Tool
interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration
of the Tool
. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.
+
+ @see Filter The general behavior of a filter
+
+ @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+ NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15
.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count
with probability equal to the
+ error rate of this filter, and v > count
otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count
with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+ Originally created by
+ European Commission One-Lab Project 034819.
+
+ @see Filter The general behavior of a filter
+
+ @see Summary cache: a scalable wide-area web cache sharing protocol]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m
bit matrix but
+ each of the s
rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr
elements are recorded in the
+ initial bit vector, where nr <= n
(n
is
+ the cardinality of the set A
to record in the filter).
+
+ As the size of A
grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr
, is
+ strictly less than the current cardinality of A
, n
.
+ If an active Bloom filter is found, the key is inserted and
+ nr
is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A
and the element
+ is added in this new Bloom filter and the nr
value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k
positions are set to one in one of the matrix rows.
+
+ Originally created by
+ European Commission One-Lab Project 034819.
+
+ @see Filter The general behavior of a filter
+ @see BloomFilter A Bloom filter
+
+ @see Theory and Network Applications of Dynamic Bloom Filters]]>
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null
, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.
+
+ @see Filter The general behavior of a filter
+ @see BloomFilter A Bloom filter
+ @see RemoveScheme The different selective clearing algorithms
+
+ @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]>
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index c804fcaaffc..09f630cf9da 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -33,6 +33,7 @@
common
true
+ true
../etc/hadoop
wsce-site.xml
@@ -493,6 +494,7 @@
src/test/resources/test.har/_masterindex
src/test/resources/test.har/part-0
src/test/resources/javakeystoreprovider.password
+ dev-support/jdiff-workaround.patch
diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml
index b2e2c4f9139..c6cff924d3d 100644
--- a/hadoop-project-dist/pom.xml
+++ b/hadoop-project-dist/pom.xml
@@ -158,7 +158,7 @@
false
- 2.6.0
+ 2.7.2
-unstable
@@ -171,7 +171,7 @@
org.apache.maven.plugins
maven-javadoc-plugin
-
+
javadoc
@@ -239,6 +239,26 @@
org.apache.maven.plugins
maven-antrun-plugin
+
+
+
+ pre-site
+ prepare-package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
site
prepare-package
@@ -247,7 +267,6 @@
-
@@ -300,6 +319,25 @@
+
+
+
+ post-site
+ prepare-package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+