From 405ed1dde6bcccca1e07e45a356a89c1b583e236 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 18 Apr 2023 10:12:07 +0100 Subject: [PATCH] HADOOP-18470. Hadoop 3.3.5 release wrap-up (#5558) Post-release updates of the branches * Add jdiff xml files from 3.3.5 release. * Declare 3.3.5 as the latest stable release. * Copy release notes. --- .../jdiff/Apache_Hadoop_Common_3.3.5.xml | 40640 ++++++++++++++++ .../markdown/release/3.3.5/CHANGELOG.3.3.5.md | 359 + .../release/3.3.5/RELEASENOTES.3.3.5.md | 89 + .../jdiff/Apache_Hadoop_HDFS_3.3.5.xml | 835 + .../Apache_Hadoop_MapReduce_Common_3.3.5.xml | 113 + .../Apache_Hadoop_MapReduce_Core_3.3.5.xml | 28963 +++++++++++ ...pache_Hadoop_MapReduce_JobClient_3.3.5.xml | 16 + hadoop-project-dist/pom.xml | 2 +- .../jdiff/Apache_Hadoop_YARN_API_3.3.5.xml | 26420 ++++++++++ .../jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml | 3067 ++ .../jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml | 3982 ++ ...Apache_Hadoop_YARN_Server_Common_3.3.5.xml | 1456 + 12 files changed, 105941 insertions(+), 1 deletion(-) create mode 100644 hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md create mode 100644 hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml create mode 100644 hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml create mode 100644 hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml new file mode 100644 index 00000000000..b788b4497fe --- /dev/null +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.5.xml @@ -0,0 +1,40640 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @param customMessage depcrication message + @deprecated use {@link #addDeprecation(String key, String newKey, + String customMessage)} instead]]> + + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key to be deprecated + @param newKey key that take up the values of deprecated key + @param customMessage deprecation message]]> + + + + + + + UnsupportedOperationException + + If a key is deprecated in favor of multiple keys, they are all treated as + aliases of each other, and setting any one of them resets all the others + to the new value. + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKeys list of keys that take up the values of deprecated key + @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]> + + + + + + + UnsupportedOperationException + + If you have multiple deprecation entries to add, it is more efficient to + use #addDeprecations(DeprecationDelta[] deltas) instead. + + @param key Key that is to be deprecated + @param newKey key that takes up the value of deprecated key]]> + + + + + + key is deprecated. + + @param key the parameter which is to be checked for deprecation + @return true if the key is deprecated and + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + final. + + @param name resource to be added, the classpath is examined for a file + with that name.]]> + + + + + + + + + + final. + + @param url url of the resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + @param file file-path of resource to be added, the local filesystem is + examined directly to find the resource, without referring to + the classpath.]]> + + + + + + + + + + final. + + WARNING: The contents of the InputStream will be cached, by this method. + So use this sparingly because it does increase the memory consumption. + + @param in InputStream to deserialize the object from. In will be read from + when a get or set is called next. After it is read the stream will be + closed.]]> + + + + + + + + + + + final. + + @param in InputStream to deserialize the object from. + @param name the name of the resource because InputStream.toString is not + very descriptive some times.]]> + + + + + + + + + + + final. + + @param conf Configuration object from which to load properties]]> + + + + + + + + + + + name property, null if + no such property exists. If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null. + + Values are processed for variable expansion + before being returned. + + As a side effect get loads the properties from the sources if called for + the first time as a lazy init. + + @param name the property name, will be trimmed before get value. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + + + + + + + + + name property, but only for + names which have no valid value, usually non-existent or commented + out in XML. + + @param name the property name + @return true if the property name exists without value]]> + + + + + + name property as a trimmed String, + null if no such property exists. + If the key is deprecated, it returns the value of + the first key which replaces the deprecated key and is not null + + Values are processed for variable expansion + before being returned. + + @param name the property name. + @return the value of the name or its replacing property, + or null if no such property exists.]]> + + + + + + + name property as a trimmed String, + defaultValue if no such property exists. + See @{Configuration#getTrimmed} for more details. + + @param name the property name. + @param defaultValue the property default value. + @return the value of the name or defaultValue + if it is not set.]]> + + + + + + name property, without doing + variable expansion.If the key is + deprecated, it returns the value of the first key which replaces + the deprecated key and is not null. + + @param name the property name. + @return the value of the name property or + its replacing property and null if no such property exists.]]> + + + + + + + value of the name property. If + name is deprecated or there is a deprecated name associated to it, + it sets the value to both names. Name will be trimmed before put into + configuration. + + @param name property name. + @param value property value.]]> + + + + + + + + value of the name property. If + name is deprecated, it also sets the value to + the keys that replace the deprecated key. Name will be trimmed before put + into configuration. + + @param name property name. + @param value property value. + @param source the place that this configuration value came from + (For debugging). + @throws IllegalArgumentException when the value or name is null.]]> + + + + + + + + + + + + + + + + + + + + name. If the key is deprecated, + it returns the value of the first key which replaces the deprecated key + and is not null. + If no such property exists, + then defaultValue is returned. + + @param name property name, will be trimmed before get value. + @param defaultValue default value. + @return property value, or defaultValue if the property + doesn't exist.]]> + + + + + + + name property as an int. + + If no such property exists, the provided default value is returned, + or if the specified value is not a valid int, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as an int, + or defaultValue.]]> + + + + + + name property as a set of comma-delimited + int values. + + If no such property exists, an empty array is returned. + + @param name property name + @return property value interpreted as an array of comma-delimited + int values]]> + + + + + + + name property to an int. + + @param name property name. + @param value int value of the property.]]> + + + + + + + name property as a long. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid long, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property as a long or + human readable format. If no such property exists, the provided default + value is returned, or if the specified value is not a valid + long or human readable format, then an error is thrown. You + can use the following suffix (case insensitive): k(kilo), m(mega), g(giga), + t(tera), p(peta), e(exa) + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a long, + or defaultValue.]]> + + + + + + + name property to a long. + + @param name property name. + @param value long value of the property.]]> + + + + + + + name property as a float. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid float, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a float, + or defaultValue.]]> + + + + + + + name property to a float. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a double. + If no such property exists, the provided default value is returned, + or if the specified value is not a valid double, + then an error is thrown. + + @param name property name. + @param defaultValue default value. + @throws NumberFormatException when the value is invalid + @return property value as a double, + or defaultValue.]]> + + + + + + + name property to a double. + + @param name property name. + @param value property value.]]> + + + + + + + name property as a boolean. + If no such property is specified, or if the specified value is not a valid + boolean, then defaultValue is returned. + + @param name property name. + @param defaultValue default value. + @return property value as a boolean, + or defaultValue.]]> + + + + + + + name property to a boolean. + + @param name property name. + @param value boolean value of the property.]]> + + + + + + + + + + + + + + name property to the given type. This + is equivalent to set(<name>, value.toString()). + @param name property name + @param value new value + @param enumeration type]]> + + + + + + + enumeration type + @throws IllegalArgumentException If mapping is illegal for the type + provided + @return enumeration type]]> + + + + + + + + name to the given time duration. This + is equivalent to set(<name>, value + <time suffix>). + @param name Property name + @param value Time duration + @param unit Unit of time]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a Pattern. + If no such property is specified, or if the specified value is not a valid + Pattern, then DefaultValue is returned. + Note that the returned value is NOT trimmed by this method. + + @param name property name + @param defaultValue default value + @return property value as a compiled Pattern, or defaultValue]]> + + + + + + + Pattern. + If the pattern is passed as null, sets the empty pattern which results in + further calls to getPattern(...) returning the default value. + + @param name property name + @param pattern new value]]> + + + + + + + + + + + + + + + + + + + name property as + a collection of Strings. + If no such property is specified then empty collection is returned. +

+ This is an optimized version of {@link #getStrings(String)} + + @param name property name. + @return property value as a collection of Strings.]]> + + + + + + name property as + an array of Strings. + If no such property is specified then null is returned. + + @param name property name. + @return property value as an array of Strings, + or null.]]> + + + + + + + name property as + an array of Strings. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of Strings, + or default value.]]> + + + + + + name property as + a collection of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then empty Collection is returned. + + @param name property name. + @return property value as a collection of Strings, or empty Collection]]> + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then an empty array is returned. + + @param name property name. + @return property value as an array of trimmed Strings, + or empty array.]]> + + + + + + + name property as + an array of Strings, trimmed of the leading and trailing whitespace. + If no such property is specified then default value is returned. + + @param name property name. + @param defaultValue The default value + @return property value as an array of trimmed Strings, + or default value.]]> + + + + + + + name property as + as comma delimited values. + + @param name property name. + @param values The values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostProperty as a + InetSocketAddress. If hostProperty is + null, addressProperty will be used. This + is useful for cases where we want to differentiate between host + bind address and address clients should use to establish connection. + + @param hostProperty bind host property name. + @param addressProperty address property name. + @param defaultAddressValue the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + name property as a + InetSocketAddress. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + name property as + a host:port. + @param name property name. + @param addr inetSocketAddress addr.]]> + + + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. If the host and address + properties are configured the host component of the address will be combined + with the port component of the addr to generate the address. This is to allow + optional control over which host name is used in multi-home bind-host + cases where a host can have multiple names + @param hostProperty the bind-host configuration name + @param addressProperty the service address configuration name + @param defaultAddressValue the service default address configuration value + @param addr InetSocketAddress of the service listener + @return InetSocketAddress for clients to connect]]> + + + + + + + name property as a host:port. The wildcard + address is replaced with the local host's address. + @param name property name. + @param addr InetSocketAddress of a listener to store in the given property + @return InetSocketAddress for clients to connect]]> + + + + + + + + + + + + + + + + + + + + name property + as an array of Class. + The value of the property specifies a list of comma separated class names. + If no such property is specified, then defaultValue is + returned. + + @param name the property name. + @param defaultValue default value. + @return property value as a Class[], + or defaultValue.]]> + + + + + + + name property as a Class. + If no such property is specified, then defaultValue is + returned. + + @param name the conf key name. + @param defaultValue default value. + @return property value as a Class, + or defaultValue.]]> + + + + + + + + name property as a Class + implementing the interface specified by xface. + + If no such property is specified, then defaultValue is + returned. + + An exception is thrown if the returned class does not implement the named + interface. + + @param name the conf key name. + @param defaultValue default value. + @param xface the interface implemented by the named class. + @param Interface class type. + @return property value as a Class, + or defaultValue.]]> + + + + + + + name property as a List + of objects implementing the interface specified by xface. + + An exception is thrown if any of the classes does not exist, or if it does + not implement the named interface. + + @param name the property name. + @param xface the interface implemented by the classes named by + name. + @param Interface class type. + @return a List of objects implementing xface.]]> + + + + + + + + name property to the name of a + theClass implementing the given interface xface. + + An exception is thrown if theClass does not implement the + interface xface. + + @param name property name. + @param theClass property value. + @param xface the interface implemented by the named class.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + dirsProp with + the given path. If dirsProp contains multiple directories, + then one is chosen based on path's hash code. If the selected + directory does not exist, an attempt is made to create it. + + @param dirsProp directory in which to locate the file. + @param path file-path. + @return local file under the directory with the given path. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + name. + + @param name configuration resource name. + @return an input stream attached to the resource.]]> + + + + + + name. + + @param name configuration resource name. + @return a reader attached to the resource.]]> + + + + + + + + + + + + + + + + + + + + + + String + key-value pairs in the configuration. + + @return an iterator over the entries.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + When property name is not empty and the property exists in the + configuration, this method writes the property and its attributes + to the {@link Writer}. +
  • + +
  • + When property name is null or empty, this method writes all the + configuration properties and their attributes to the {@link Writer}. +
  • + +
  • + When property name is not empty but the property doesn't exist in + the configuration, this method throws an {@link IllegalArgumentException}. +
  • + + @param propertyName xml property name. + @param out the writer to write to. + @throws IOException raised on errors performing I/O.]]> +
    +
    + + + + + + + +
  • + When propertyName is not empty, and the property exists + in the configuration, the format of the output would be, +
    +  {
    +    "property": {
    +      "key" : "key1",
    +      "value" : "value1",
    +      "isFinal" : "key1.isFinal",
    +      "resource" : "key1.resource"
    +    }
    +  }
    +  
    +
  • + +
  • + When propertyName is null or empty, it behaves same as + {@link #dumpConfiguration(Configuration, Writer)}, the + output would be, +
    +  { "properties" :
    +      [ { key : "key1",
    +          value : "value1",
    +          isFinal : "key1.isFinal",
    +          resource : "key1.resource" },
    +        { key : "key2",
    +          value : "value2",
    +          isFinal : "ke2.isFinal",
    +          resource : "key2.resource" }
    +       ]
    +   }
    +  
    +
  • + +
  • + When propertyName is not empty, and the property is not + found in the configuration, this method will throw an + {@link IllegalArgumentException}. +
  • + +

    + @param config the configuration + @param propertyName property name + @param out the Writer to write to + @throws IOException raised on errors performing I/O. + @throws IllegalArgumentException when property name is not + empty and the property is not found in configuration]]> + + + + + + + + + { "properties" : + [ { key : "key1", + value : "value1", + isFinal : "key1.isFinal", + resource : "key1.resource" }, + { key : "key2", + value : "value2", + isFinal : "ke2.isFinal", + resource : "key2.resource" } + ] + } + + + It does not output the properties of the configuration object which + is loaded from an input stream. +

    + + @param config the configuration + @param out the Writer to write to + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + true to set quiet-mode on, false + to turn it off.]]> + + + + + + + + + + + + + + + + + + + + + } with matching keys]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resources + +

    Configurations are specified by resources. A resource contains a set of + name/value pairs as XML data. Each resource is named by either a + String or by a {@link Path}. If named by a String, + then the classpath is examined for a file with that name. If named by a + Path, then the local filesystem is examined directly, without + referring to the classpath. + +

    Unless explicitly turned off, Hadoop by default specifies two + resources, loaded in-order from the classpath:

      +
    1. + + core-default.xml: Read-only defaults for hadoop.
    2. +
    3. core-site.xml: Site-specific configuration for a given hadoop + installation.
    4. +
    + Applications may add additional resources, which are loaded + subsequent to these resources in the order they are added. + +

    Final Parameters

    + +

    Configuration parameters may be declared final. + Once a resource declares a value final, no subsequently-loaded + resource can alter that value. + For example, one might define a final parameter with: +

    
    +  <property>
    +    <name>dfs.hosts.include</name>
    +    <value>/etc/hadoop/conf/hosts.include</value>
    +    <final>true</final>
    +  </property>
    + + Administrators typically define parameters as final in + core-site.xml for values that user applications may not alter. + +

    Variable Expansion

    + +

    Value strings are first processed for variable expansion. The + available properties are:

      +
    1. Other properties defined in this Configuration; and, if a name is + undefined here,
    2. +
    3. Environment variables in {@link System#getenv()} if a name starts with + "env.", or
    4. +
    5. Properties in {@link System#getProperties()}.
    6. +
    + +

    For example, if a configuration resource contains the following property + definitions: +

    
    +  <property>
    +    <name>basedir</name>
    +    <value>/user/${user.name}</value>
    +  </property>
    +  
    +  <property>
    +    <name>tempdir</name>
    +    <value>${basedir}/tmp</value>
    +  </property>
    +
    +  <property>
    +    <name>otherdir</name>
    +    <value>${env.BASE_DIR}/other</value>
    +  </property>
    +  
    + +

    When conf.get("tempdir") is called, then ${basedir} + will be resolved to another property in this Configuration, while + ${user.name} would then ordinarily be resolved to the value + of the System property with that name. +

    When conf.get("otherdir") is called, then ${env.BASE_DIR} + will be resolved to the value of the ${BASE_DIR} environment variable. + It supports ${env.NAME:-default} and ${env.NAME-default} notations. + The former is resolved to "default" if ${NAME} environment variable is undefined + or its value is empty. + The latter behaves the same way only if ${NAME} is undefined. +

    By default, warnings will be given to any deprecated configuration + parameters and these are suppressible by configuring + log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + log4j.properties file. + +

    Tags

    + +

    Optionally we can tag related properties together by using tag + attributes. System tags are defined by hadoop.tags.system property. Users + can define there own custom tags in hadoop.tags.custom property. + +

    For example, we can tag existing property as: +

    
    +  <property>
    +    <name>dfs.replication</name>
    +    <value>3</value>
    +    <tag>HDFS,REQUIRED</tag>
    +  </property>
    +
    +  <property>
    +    <name>dfs.data.transfer.protection</name>
    +    <value>3</value>
    +    <tag>HDFS,SECURITY</tag>
    +  </property>
    + 
    +

    Properties marked with tags can be retrieved with conf + .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + (Arrays.asList("YARN","SECURITY")).

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #createKey(String, byte[], Options)} method. + + @param name the base name of the key + @param options the options for the new key. + @return the version name of the first version of the key. + @throws IOException raised on errors performing I/O. + @throws NoSuchAlgorithmException no such algorithm exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation generates the key material and calls the + {@link #rollNewVersion(String, byte[])} method. + + @param name the basename of the key + @return the name of the new version of the key + @throws IOException raised on errors performing I/O. + @throws NoSuchAlgorithmException This exception is thrown when a particular + cryptographic algorithm is requested + but is not available in the environment.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KeyProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + NULL if + a provider for the specified URI scheme could not be found. + @throws IOException thrown if the provider failed to initialize.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + uri has syntax error]]> + + + + + + + + + + + + + + + + uri is + not found]]> + + + + + + + + + + + + + + + + + + + + + + + uri + determines a configuration property name, + fs.AbstractFileSystem.scheme.impl whose value names the + AbstractFileSystem class. + + The entire URI and conf is passed to the AbstractFileSystem factory method. + + @param uri for the file system to be created. + @param conf which is passed to the file system impl. + + @return file system for the given URI. + + @throws UnsupportedFileSystemException if the file system for + uri is not supported.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException raised on errors performing I/O. + @throws UnsupportedOperationException Unsupported Operation Exception.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing modifications, must + include entries for user, group, and others for compatibility with + permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return {@literal Map} describing the XAttrs of the file + or directory + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BlockLocation(offset: 0, length: BLOCK_SIZE, + hosts: {"host1:9866", "host2:9866, host3:9866"}) + + + And if the file is erasure-coded, each BlockLocation represents a logical + block groups. Value offset is the offset of a block group in the file and + value length is the total length of a block group. Hosts of a BlockLocation + are the datanodes that holding all the data blocks and parity blocks of a + block group. + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + A BlockLocation example will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + 
    + + Please refer to + {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + {@link FileContext#getFileBlockLocations(Path, long, long)} + for more examples.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. +

    + This does not change the current offset of a file, and is thread-safe. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if reached + end-of-stream + @throws IOException if there is some error performing the read]]> + + + + + + + + + This operation provides similar semantics to + {@link #read(long, ByteBuffer)}, the difference is that this method is + guaranteed to read data until the {@link ByteBuffer} is full, or until + the end of the data stream is reached. + + @param position position within file + @param buf the ByteBuffer to receive the results of the read operation. + @throws IOException if there is some error performing the read + @throws EOFException the end of the data was reached before + the read operation completed + @see #read(long, ByteBuffer)]]> + + + + + + + + + + + + + + + After a successful call, {@code buf.position()} will be advanced by the + number of bytes read and {@code buf.limit()} will be unchanged. +

    + In the case of an exception, the state of the buffer (the contents of the + buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is + undefined, and callers should be prepared to recover from this + eventuality. +

    + Callers should use {@link StreamCapabilities#hasCapability(String)} with + {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying + stream supports this interface, otherwise they might get a + {@link UnsupportedOperationException}. +

    + Implementations should treat 0-length requests as legitimate, and must not + signal an error upon their receipt. + + @param buf + the ByteBuffer to receive the results of the read operation. + @return the number of bytes read, possibly zero, or -1 if + reach end-of-stream + @throws IOException + if there is some error performing the read]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setReplication of FileSystem + @param src file name + @param replication new replication + @throws IOException if an I/O error occurs. + @return true if successful; + false if file does not exist or is a directory]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + A higher number here does not necessarily improve performance, especially + for object stores, where multiple threads may be attempting to create an FS + instance for the same URI. +

    + Default value: {@value}.]]> +
    + + + + + Default value: {@value}. +

    ]]> +
    +
    + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + core-default.xml]]> + + + + + + + + core-default.xml]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND) + +

    + + Use the CreateFlag as follows: +

      +
    1. CREATE - to create a file if it does not exist, + else throw FileAlreadyExists.
    2. +
    3. APPEND - to append to a file if it exists, + else throw FileNotFoundException.
    4. +
    5. OVERWRITE - to truncate a file if it exists, + else throw FileNotFoundException.
    6. +
    7. CREATE|APPEND - to create a file if it does not exist, + else append to an existing file.
    8. +
    9. CREATE|OVERWRITE - to create a file if it does not exist, + else overwrite an existing file.
    10. +
    11. SYNC_BLOCK - to force closed blocks to the disk device. + In addition {@link Syncable#hsync()} should be called after each write, + if true synchronous behavior is required.
    12. +
    13. LAZY_PERSIST - Create the block on transient storage (RAM) if + available.
    14. +
    15. APPEND_NEWBLOCK - Append data to a new block instead of end of the last + partial block.
    16. +
    + + Following combinations are not valid and will result in + {@link HadoopIllegalArgumentException}: +
      +
    1. APPEND|OVERWRITE
    2. +
    3. CREATE|APPEND|OVERWRITE
    4. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + absOrFqPath is not supported. + @throws IOException If the file system for absOrFqPath could + not be instantiated.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defaultFsUri is not supported]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewWdir can be one of: +
      +
    • relative path: "foo/bar";
    • +
    • absolute without scheme: "/foo/bar"
    • +
    • fully qualified with scheme: "xx://auth/foo/bar"
    • +
    +
    + Illegal WDs: +
      +
    • relative with scheme: "xx:foo/bar"
    • +
    • non existent directory
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + f does not exist + @throws AccessControlException if access denied + @throws IOException If an IO Error occurred + @throws UnresolvedLinkException If unresolved link occurred. + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + +
  • Progress - to report progress on the operation - default null +
  • Permission - umask is applied against permission: default is + FsPermissions:getDefault() + +
  • CreateParent - create missing parent path; default is to not + to create parents +
  • The defaults for the following are SS defaults of the file + server implementing the target path. Not all parameters make sense + for all kinds of file system - eg. localFS ignores Blocksize, + replication, checksum +
      +
    • BufferSize - buffersize used in FSDataOutputStream +
    • Blocksize - block size for file blocks +
    • ReplicationFactor - replication for blocks +
    • ChecksumParam - Checksum parameters. server default is used + if not specified. +
    + + + @return {@link FSDataOutputStream} for created file + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file f already exists + @throws FileNotFoundException If parent of f does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of f is not a + directory. + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is not valid]]> + + + + + + + + + + + + + + + + + + + + + dir already + exists + @throws FileNotFoundException If parent of dir does not exist + and createParent is false + @throws ParentNotDirectoryException If parent of dir is not a + directory + @throws UnsupportedFileSystemException If file system for dir + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path dir is not valid]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws InvalidPathException If path f is invalid + + @return if delete success true, not false.]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + @return input stream.]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + @return output stream.]]> + + + + + + + + + + + +
  • Fails if path is a directory. +
  • Fails if path does not exist. +
  • Fails if path is not closed. +
  • Fails if new size is greater than current size. + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + + @throws AccessControlException If access is denied + @throws FileNotFoundException If file f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory. +
  • Fails if src is a directory and dst is a file. +
  • Fails if the parent of dst does not exist or is a file. + +

    + If OVERWRITE option is not passed as an argument, rename fails if the dst + already exists. +

    + If OVERWRITE option is passed as an argument, rename overwrites the dst if + it is a file or an empty directory. Rename fails if dst is a non-empty + directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for details +

    + + @param src path to be renamed + @param dst new path after rename + @param options rename options. + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If dst already exists and + options has {@link Options.Rename#OVERWRITE} + option false. + @throws FileNotFoundException If src does not exist + @throws ParentNotDirectoryException If parent of dst is not a + directory + @throws UnsupportedFileSystemException If file system for src + and dst is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f + is not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server + + RuntimeExceptions: + @throws HadoopIllegalArgumentException If username or + groupname is invalid.]]> + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + f is + not supported.]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If the given path does not refer to a symlink + or an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + Given a path referring to a symlink of form: + + {@literal <---}X{@literal --->} + fs://host/A/B/link + {@literal <-----}Y{@literal ----->} + + In this path X is the scheme and authority that identify the file system, + and Y is the path leading up to the final path component "link". If Y is + a symlink itself then let Y' be the target of Y and X' be the scheme and + authority of Y'. Symlink targets may: + + 1. Fully qualified URIs + + fs://hostX/A/B/file Resolved according to the target file system. + + 2. Partially qualified URIs (eg scheme but no host) + + fs:///A/B/file Resolved according to the target file system. Eg resolving + a symlink to hdfs:///A results in an exception because + HDFS URIs must be fully qualified, while a symlink to + file:///A will not since Hadoop's local file systems + require partially qualified URIs. + + 3. Relative paths + + path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path + is "../B/file" then [Y'][path] is hdfs://host/B/file + + 4. Absolute paths + + path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path + is "/file" then [X][path] is hdfs://host/file + + + @param target the target of the symbolic link + @param link the path to be created that points to target + @param createParent if true then missing parent dirs are created if + false then parent must exist + + + @throws AccessControlException If access is denied + @throws FileAlreadyExistsException If file link already exists + @throws FileNotFoundException If target does not exist + @throws ParentNotDirectoryException If parent of link is not a + directory. + @throws UnsupportedFileSystemException If file system for + target or link is not supported + @throws IOException If an I/O error occurred]]> + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + f does not exist + @throws UnsupportedFileSystemException If file system for f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + f is + not supported + @throws IOException If an I/O error occurred + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + f does not exist. + @throws UnresolvedLinkException If unresolved link occurred. + @throws AccessControlException If access is denied. + @throws IOException If an I/O error occurred. + @return resolve path.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } describing + modifications + @throws IOException if an ACL could not be modified]]> + + + + + + + + } describing entries + to remove + @throws IOException if an ACL could not be modified]]> + + + + + + + + + + + + + + + + + + + + + + } describing + modifications, must include entries for user, group, and others for + compatibility with permission bits. + @throws IOException if an ACL could not be modified]]> + + + + + + + } which returns + each AclStatus + @throws IOException if an ACL could not be read]]> + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException If an I/O error occurred.]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException If an I/O error occurred.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs + of the file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException If an I/O error occurred.]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal <}String{@literal >} of the XAttr names of the + file or directory + @throws IOException If an I/O error occurred.]]> + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + Exceptions applicable to file systems accessed over RPC: + @throws RpcClientException If an exception occurred in the RPC client + @throws RpcServerException If an exception occurred in the RPC server + @throws UnexpectedServerException If server implementation throws + undeclared exception to RPC server]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Path Names + + The Hadoop file system supports a URI namespace and URI names. This enables + multiple types of file systems to be referenced using fully-qualified URIs. + Two common Hadoop file system implementations are +

      +
    • the local file system: file:///path +
    • the HDFS file system: hdfs://nnAddress:nnPort/path +
    + + The Hadoop file system also supports additional naming schemes besides URIs. + Hadoop has the concept of a default file system, which implies a + default URI scheme and authority. This enables slash-relative names + relative to the default FS, which are more convenient for users and + application writers. The default FS is typically set by the user's + environment, though it can also be manually specified. +

    + + Hadoop also supports working-directory-relative names, which are paths + relative to the current working directory (similar to Unix). The working + directory can be in a different file system than the default FS. +

    + Thus, Hadoop path names can be specified as one of the following: +

      +
    • a fully-qualified URI: scheme://authority/path (e.g. + hdfs://nnAddress:nnPort/foo/bar) +
    • a slash-relative name: path relative to the default file system (e.g. + /foo/bar) +
    • a working-directory-relative name: path relative to the working dir (e.g. + foo/bar) +
    + Relative paths with scheme (scheme:foo/bar) are illegal. + +

    Role of FileContext and Configuration Defaults

    + + The FileContext is the analogue of per-process file-related state in Unix. It + contains two properties: + +
      +
    • the default file system (for resolving slash-relative names) +
    • the umask (for file permissions) +
    + In general, these properties are obtained from the default configuration file + in the user's environment (see {@link Configuration}). + + Further file system properties are specified on the server-side. File system + operations default to using these server-side defaults unless otherwise + specified. +

    + The file system related server-side defaults are: +

      +
    • the home directory (default is "/user/userName") +
    • the initial wd (only for local fs) +
    • replication factor +
    • block size +
    • buffer size +
    • encryptDataTransfer +
    • checksum option. (checksumType and bytesPerChecksum) +
    + +

    Example Usage

    + + Example 1: use the default config read from the $HADOOP_CONFIG/core.xml. + Unspecified values come from core-defaults.xml in the release jar. +
      +
    • myFContext = FileContext.getFileContext(); // uses the default config + // which has your default FS +
    • myFContext.create(path, ...); +
    • myFContext.setWorkingDir(path); +
    • myFContext.open (path, ...); +
    • ... +
    + Example 2: Get a FileContext with a specific URI as the default FS +
      +
    • myFContext = FileContext.getFileContext(URI); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 3: FileContext with local file system as the default +
      +
    • myFContext = FileContext.getLocalFSFileContext(); +
    • myFContext.create(path, ...); +
    • ... +
    + Example 4: Use a specific config, ignoring $HADOOP_CONFIG + Generally you should not need use a config unless you are doing +
      +
    • configX = someConfigSomeOnePassedToYou; +
    • myFContext = getFileContext(configX); // configX is not changed, + // is passed down +
    • myFContext.create(path, ...); +
    • ... +
    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This implementation throws an UnsupportedOperationException. + + @return the protocol scheme for this FileSystem. + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • + If the configuration has the property + {@code "fs.$SCHEME.impl.disable.cache"} set to true, + a new instance will be created, initialized with the supplied URI and + configuration, then returned without being cached. +
  • +
  • + If the there is a cached FS instance matching the same URI, it will + be returned. +
  • +
  • + Otherwise: a new FS instance will be created, initialized with the + configuration and URI, cached and returned to the caller. +
  • + + @param uri uri of the filesystem. + @param conf configrution. + @return filesystem instance. + @throws IOException if the FileSystem cannot be instantiated.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + if f == null : + result = null + elif f.getLen() {@literal <=} start: + result = [] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] + + This call is most helpful with and distributed filesystem + where the hostnames of machines that contain blocks of the given file + can be determined. + + The default implementation returns an array containing one element: +
    + BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
    + 
    + + In HDFS, if file is three-replicated, the returned array contains + elements like: +
    + BlockLocation(offset: 0, length: BLOCK_SIZE,
    +   hosts: {"host1:9866", "host2:9866, host3:9866"})
    + BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
    +   hosts: {"host2:9866", "host3:9866, host4:9866"})
    + 
    + + And if a file is erasure-coded, the returned BlockLocation are logical + block groups. + + Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + there will be one BlockLocation returned, with 0 offset, actual file size + and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + 3. If the file size is less than one group size but greater than one + stripe size, then there will be one BlockLocation returned, with 0 offset, + actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + the actual blocks. + 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + for example, then the result will be like: +
    + BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
    +   "host2:9866","host3:9866","host4:9866","host5:9866"})
    + BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
    +   "host4:9866", "host5:9866"})
    + 
    + + @param file FilesStatus to get data from + @param start offset into the given file + @param len length for which to get locations for + @throws IOException IO failure + @return block location array.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: the default implementation is not atomic + @param f path to use for create + @throws IOException IO failure + @return if create new file success true,not false.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Fails if src is a file and dst is a directory.
  • +
  • Fails if src is a directory and dst is a file.
  • +
  • Fails if the parent of dst does not exist or is a file.
  • + +

    + If OVERWRITE option is not passed as an argument, rename fails + if the dst already exists. +

    +

    + If OVERWRITE option is passed as an argument, rename overwrites + the dst if it is a file or an empty directory. Rename fails if dst is + a non-empty directory. +

    + Note that atomicity of rename is dependent on the file system + implementation. Please refer to the file system documentation for + details. This default implementation is non atomic. +

    + This method is deprecated since it is a temporary method added to + support the transition from FileSystem to FileContext for user + applications. +

    + + @param src path to be renamed + @param dst new path after rename + @param options rename options. + @throws FileNotFoundException src path does not exist, or the parent + path of dst does not exist. + @throws FileAlreadyExistsException dest path exists and is a file + @throws ParentNotDirectoryException if the parent path of dest is not + a directory + @throws IOException on failure]]> +
    +
    + + + + + + +
  • Fails if path is a directory.
  • +
  • Fails if path does not exist.
  • +
  • Fails if path is not closed.
  • +
  • Fails if new size is greater than current size.
  • + + @param f The path to the file to be truncated + @param newLength The size the file is to be truncated to + + @return true if the file has been truncated to the desired + newLength and is immediately available to be reused for + write operations such as append, or + false if a background process of adjusting the length of + the last block has been started, and clients should wait for it to + complete before proceeding with further file updates. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default).]]> +
    +
    + + + + + + + + + + + + + + + + + + + + +
  • Clean shutdown of the JVM cannot be guaranteed.
  • +
  • The time to shut down a FileSystem will depends on the number of + files to delete. For filesystems where the cost of checking + for the existence of a file/directory and the actual delete operation + (for example: object stores) is high, the time to shutdown the JVM can be + significantly extended by over-use of this feature.
  • +
  • Connectivity problems with a remote filesystem may delay shutdown + further, and may cause the files to not be deleted.
  • + + @param f the path to delete. + @return true if deleteOnExit is successful, otherwise false. + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. +

    + Will not return null. Expect IOException upon access error. + @param f given path + @return the statuses of the files/directories in the given patch + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param f + a path name + @param filter + the user-supplied path filter + @return an array of FileStatus objects for the files under the given path + after applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @return a list of statuses for the files under the given paths after + applying the filter default Path filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + + + + Does not guarantee to return the List of files/directories status in a + sorted order. + + @param files + a list of paths + @param filter + the user-supplied path filter + @return a list of statuses for the files under the given paths after + applying the filter + @throws FileNotFoundException when the path does not exist + @throws IOException see specific implementation]]> + + + + + + + Return all the files that match filePattern and are not checksum + files. Results are sorted by their names. + +

    + A filename pattern is composed of regular characters and + special pattern matching characters, which are: + +

    +
    +
    +
    ? +
    Matches any single character. + +
    * +
    Matches zero or more characters. + +
    [abc] +
    Matches a single character from character set + {a,b,c}. + +
    [a-b] +
    Matches a single character from the character range + {a...b}. Note that character a must be + lexicographically less than or equal to character b. + +
    [^a] +
    Matches a single character that is not from character set or range + {a}. Note that the ^ character must occur + immediately to the right of the opening bracket. + +
    \c +
    Removes (escapes) any special meaning of character c. + +
    {ab,cd} +
    Matches a string from the string set {ab, cd} + +
    {ab,c{de,fh}} +
    Matches a string from the string set {ab, cde, cfh} + +
    +
    +
    + + @param pathPattern a glob specifying a path pattern + + @return an array of paths that match the path pattern + @throws IOException IO failure]]> +
    +
    + + + + + + + + + + + + + + f does not exist + @throws IOException If an I/O error occurred]]> + + + + + + + + + f does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + p does not exist + @throws IOException if any I/O error occurred]]> + + + + + + + + + + If the path is a directory, + if recursive is false, returns files in the directory; + if recursive is true, return files in the subtree rooted at the path. + If the path is a file, return the file's status and block locations. + + @param f is the path + @param recursive if the subdirectories need to be traversed recursively + + @return an iterator that traverses statuses of the files + + @throws FileNotFoundException when the path does not exist; + @throws IOException see specific implementation]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + undefined. + @throws IOException IO failure]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In some FileSystem implementations such as HDFS metadata + synchronization is essential to guarantee consistency of read requests + particularly in HA setting. + @throws IOException If an I/O error occurred. + @throws UnsupportedOperationException if the operation is unsupported.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to modify + @param name xattr name. + @param value xattr value. + @param flag xattr set flag + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attribute + @param name xattr name. + @return byte[] xattr value. + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @param names XAttr names. + @return Map describing the XAttrs of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to get extended attributes + @return List{@literal } of the XAttr names of the file or directory + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + Refer to the HDFS extended attributes user documentation for details. + + @param path Path to remove extended attribute + @param name xattr name + @throws IOException IO failure + @throws UnsupportedOperationException if the operation is unsupported + (default outcome).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is a default method which is intended to be overridden by + subclasses. The default implementation returns an empty storage statistics + object.

    + + @return The StorageStatistics for this FileSystem instance. + Will never be null.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + All user code that may potentially use the Hadoop Distributed + File System should be written to use a FileSystem object or its + successor, {@link FileContext}. +

    +

    + The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +

    + Notes +
      +
    1. The behaviour of the filesystem is + + specified in the Hadoop documentation. + However, the normative specification of the behavior of this class is + actually HDFS: if HDFS does not behave the way these Javadocs or + the specification in the Hadoop documentations define, assume that + the documentation is incorrect. +
    2. +
    3. The term {@code FileSystem} refers to an instance of this class.
    4. +
    5. The acronym "FS" is used as an abbreviation of FileSystem.
    6. +
    7. The term {@code filesystem} refers to the distributed/local filesystem + itself, rather than the class used to interact with it.
    8. +
    9. The term "file" refers to a file in the remote filesystem, + rather than instances of {@code java.io.File}.
    10. +
    + + This is a carefully evolving class. + New methods may be marked as Unstable or Evolving for their initial release, + as a warning that they are new and may change based on the + experience of use in applications. +

    + Important note for developers +

    + If you are making changes here to the public API or protected methods, + you must review the following subclasses and make sure that + they are filtering/passing through new methods as appropriate. + + {@link FilterFileSystem}: methods are passed through. If not, + then {@code TestFilterFileSystem.MustNotImplement} must be + updated with the unsupported interface. + Furthermore, if the new API's support is probed for via + {@link #hasPathCapability(Path, String)} then + {@link FilterFileSystem#hasPathCapability(Path, String)} + must return false, always. +

    + {@link ChecksumFileSystem}: checksums are created and + verified. +

    + {@code TestHarFileSystem} will need its {@code MustNotImplement} + interface updated. + +

    + There are some external places your changes will break things. + Do co-ordinate changes here. +

    + + HBase: HBoss +

    + Hive: HiveShim23 +

    + {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + returns true if the operation succeeded. When deleteSource is true, + this means "after the copy, delete(source) returned true" + If the destination is a directory, and mkdirs (dest) fails, + the operation will return false rather than raise any exception. +

    + The overwrite flag is about overwriting files; it has no effect about + handing an attempt to copy a file atop a directory (expect an IOException), + or a directory over a path which contains a file (mkdir will fail, so + "false"). +

    + The operation is recursive, and the deleteSource operation takes place + as each subdirectory is copied. Therefore, if an operation fails partway + through, the source tree may be partially deleted. +

    + @param srcFS source filesystem + @param srcStatus status of source + @param dstFS destination filesystem + @param dst path of source + @param deleteSource delete the source? + @param overwrite overwrite files at destination? + @param conf configuration to use when opening files + @return true if the operation succeeded. + @throws IOException failure]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } caller's + environment variables to use for expansion + @return String[] with absolute path to new jar in position 0 and + unexpanded wild card entry path in position 1 + @throws IOException if there is an I/O error while writing the jar file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FilterFileSystem contains + some other file system, which it uses as + its basic file system, possibly transforming + the data along the way or providing additional + functionality. The class FilterFileSystem + itself simply overrides all methods of + FileSystem with versions that + pass all requests to the contained file + system. Subclasses of FilterFileSystem + may further override some of these methods + and may also provide additional methods + and fields.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Return type on the {@link #build()} call. + @param type of builder itself.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + if there is no more data because the end of the stream has been + reached]]> + + + + + + + + + + length bytes have been read. + + @param position position in the input stream to seek + @param buffer buffer into which data is read + @param offset offset into the buffer in which data is written + @param length the number of bytes to read + @throws IOException IO problems + @throws EOFException If the end of stream is reached while reading. + If an exception is thrown an undetermined number + of bytes in the buffer may have been written.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + // Don't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + OutputStream out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + OutputStream out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + + + If the option is not related to the file system, the option will be ignored. + If the option is must, but not supported by the file system, a + {@link IllegalArgumentException} will be thrown.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + path is invalid]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return file]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The interface extends {@link IOStatisticsSource} so that there is no + need to cast an instance to see if is a source of statistics. + However, implementations MAY return null for their actual statistics. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A partial listing of the children of a parent directory. Since it is a + partial listing, multiple PartialListing may need to be combined to obtain + the full listing of a parent directory. +

    + ListingBatch behaves similar to a Future, in that getting the result via + {@link #get()} will throw an Exception if there was a failure.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + and the scheme is null, and the authority + is null. + + @return whether the path is absolute and the URI has no scheme nor + authority parts]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if and only if pathname + should be included]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @return actual number of bytes read; -1 means "none" + @throws IOException IO problems.]]> + + + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @param offset offset in the buffer + @param length number of bytes to read + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + Warning: Not all filesystems satisfy the thread-safety requirement. + @param position position within file + @param buffer destination buffer + @throws IOException IO problems. + @throws EOFException the end of the data was reached before + the read operation completed]]> + + + + + + + + + + + + + + + + + + + The position returned by getPos() after readVectored() is undefined. +

    +

    + If a file is changed while the readVectored() operation is in progress, the output is + undefined. Some ranges may have old data, some may have new and some may have both. +

    +

    + While a readVectored() operation is in progress, normal read api calls may block. +

    + @param ranges the byte ranges to read + @param allocate the function to allocate ByteBuffer + @throws IOException any IOE.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note
    : Returned list is not sorted in any given order, + due to reliance on Java's {@link File#list()} API.)]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + XAttr is byte[], this class is to + covert byte[] to some kind of string representation or convert back. + String representation is convenient for display and input. For example + display in screen as shell response and json response, input as http + or shell parameter.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + MUST NOT be part of/refer to + any object instance of significant memory size. + Applications SHOULD remove references when they are + no longer needed. + When logged at TRACE, prints the key and stack trace of the caller, + to allow for debugging of any problems. + @param key key + @param value new value + @return old value or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @return ftp]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is for reporting and testing.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + These are low-cost per-instance statistics provided by any Hadoop + I/O class instance. +

    + Consult the filesystem specification document for the requirements + of an implementation of this interface.]]> + + + + + + + + + + + + + + + + + + + + + + + Exceptions are caught and downgraded to debug logging. + @param source source of statistics. + @return a string for logging.]]> + + + + + + + + + + + + + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is designed to affordable to use in log statements. + @param source source of statistics -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + Whenever this object's toString() method is called, it evaluates the + statistics. +

    + This is for use in log statements where for the cost of creation + of this entry is low; it is affordable to use in log statements. + @param statistics statistics to stringify -may be null. + @return an object whose toString() operation returns the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It is serializable so that frameworks which can use java serialization + to propagate data (Spark, Flink...) can send the statistics + back. For this reason, TreeMaps are explicitly used as field types, + even though IDEs can recommend use of Map instead. + For security reasons, untrusted java object streams should never be + deserialized. If for some reason this is required, use + {@link #requiredSerializationClasses()} to get the list of classes + used when deserializing instances of this object. +

    +

    + It is annotated for correct serializations with jackson2. +

    ]]> +
    + + + + + + + + + This is not an atomic option. +

    + The instance can be serialized, and its + {@code toString()} method lists all the values. + @param statistics statistics + @return a snapshot of the current values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used to accrue values so as to dynamically update + the mean. If so, know that there is no synchronization + on the methods. +

    +

    + If a statistic has 0 samples then it is considered to be empty. +

    +

    + All 'empty' statistics are equivalent, independent of the sum value. +

    +

    + For non-empty statistics, sum and sample values must match + for equality. +

    +

    + It is serializable and annotated for correct serializations with jackson2. +

    +

    + Thread safety. The operations to add/copy sample data, are thread safe. +

    +
      +
    1. {@link #add(MeanStatistic)}
    2. +
    3. {@link #addSample(long)}
    4. +
    5. {@link #clear()}
    6. +
    7. {@link #setSamplesAndSum(long, long)}
    8. +
    9. {@link #set(MeanStatistic)}
    10. +
    11. {@link #setSamples(long)} and {@link #setSum(long)}
    12. +
    +

    + So is the {@link #mean()} method. This ensures that when + used to aggregated statistics, the aggregate value and sample + count are set and evaluated consistently. +

    +

    + Other methods marked as synchronized because Findbugs overreacts + to the idea that some operations to update sum and sample count + are synchronized, but that things like equals are not. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + names)}: {@value}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention: +

    +
      +
    • the name of the constants are uppercase, words separated by + underscores.
    • +
    • the value of the constants are lowercase of the constant names.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When adding new common statistic name constants, please make them unique. + By convention, they are implicitly unique: +
      +
    • + The name of the constants are uppercase, words separated by + underscores. +
    • +
    • + The value of the constants are lowercase of the constant names. +
    • +
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Since these methods are often vendor- or device-specific, operators + may implement this interface in order to achieve fencing. +

    + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +

    + If an implementation also implements {@link Configurable} then its + setConf method will be called upon instantiation.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + state (e.g ACTIVE/STANDBY) as well as + some additional information. + + @throws AccessControlException + if access is denied. + @throws IOException + if other errors happen + @see HAServiceStatus + @return HAServiceStatus.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hadoop.http.filter.initializers. + +

      +
    • StaticUserWebFilter - An authorization plugin that makes all +users a static configured user. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + public class IntArrayWritable extends ArrayWritable { + public IntArrayWritable() { + super(IntWritable.class); + } + } + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ByteWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the item + @param conf the configuration to store + @param item the object to be stored + @param keyName the name of the key to use + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param items the objects to be stored + @param keyName the name of the key to use + @throws IndexOutOfBoundsException if the items array is empty + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + + + + + the class of the item + @param conf the configuration to use + @param keyName the name of the key to use + @param itemClass the class of the item + @return restored object + @throws IOException : forwards Exceptions from the underlying + {@link Serialization} classes.]]> + + + + + DefaultStringifier offers convenience methods to store/load objects to/from + the configuration. + + @param the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a DoubleWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value argument is null or + its size is zero, the elementType argument must not be null. If + the argument value's size is bigger than zero, the argument + elementType is not be used. + + @param value enumSet value. + @param elementType elementType.]]> + + + + + value should not be null + or empty. + + @param value enumSet value.]]> + + + + + + + + + + + + + + value and elementType. If the value argument + is null or its size is zero, the elementType argument must not be + null. If the argument value's size is bigger than zero, the + argument elementType is not be used. + + @param value enumSet Value. + @param elementType elementType.]]> + + + + + + + + + + + + + + + + + + + o is an EnumSetWritable with the same value, + or both are null.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a FloatWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When two sequence files, which have same Key type but different Value + types, are mapped out to reduce, multiple Value types is not allowed. + In this case, this class can help you wrap instances with different types. +

    + +

    + Compared with ObjectWritable, this class is much more effective, + because ObjectWritable will append the class declaration as a String + into the output file in every Key-Value pair. +

    + +

    + Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +

    + + how to use it:
    + 1. Write your own class, such as GenericObject, which extends GenericWritable.
    + 2. Implements the abstract method getTypes(), defines + the classes which will be wrapped in GenericObject in application. + Attention: this classes defined in getTypes() method, must + implement Writable interface. +

    + + The code looks like this: +
    + public class GenericObject extends GenericWritable {
    + 
    +   private static Class[] CLASSES = {
    +               ClassType1.class, 
    +               ClassType2.class,
    +               ClassType3.class,
    +               };
    +
    +   protected Class[] getTypes() {
    +       return CLASSES;
    +   }
    +
    + }
    + 
    + + @since Nov 8, 2006]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a IntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + closes the input and output streams + at the end. + + @param in InputStrem to read from + @param out OutputStream to write to + @param conf the Configuration object. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param log the log to record problems to at debug level. Can be null. + @param closeables the objects to close + @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)} + instead]]> + + + + + + + ignore any {@link Throwable} or + null pointers. Must only be used for cleanup in exception handlers. + + @param logger the log to record problems to at debug level. Can be null. + @param closeables the objects to close]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is better than File#listDir because it does not ignore IOExceptions. + + @param dir The directory to list. + @param filter If non-null, the filter to use when listing + this directory. + @return The list of files in the directory. + + @throws IOException On I/O error]]> + + + + + + + + Borrowed from Uwe Schindler in LUCENE-5588 + @param fileToSync the file to fsync + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a LongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A map is a directory containing two files, the data file, + containing all keys and values in the map, and a smaller index + file, containing a fraction of the keys. The fraction is determined by + {@link Writer#getIndexInterval()}. + +

    The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +

    Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is an MD5Hash whose digest contains the + same values.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + className by first finding + it in the specified conf. If the specified conf is null, + try load it directly. + + @param conf configuration. + @param className classname. + @return Class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A {@link Comparator} that operates directly on byte representations of + objects. +

    + @param generic type. + @see DeserializerComparator]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SequenceFiles are flat files consisting of binary key/value + pairs. + +

    SequenceFile provides {@link SequenceFile.Writer}, + {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + reading and sorting respectively.

    + + There are three SequenceFile Writers based on the + {@link CompressionType} used to compress key/value pairs: +
      +
    1. + Writer : Uncompressed records. +
    2. +
    3. + RecordCompressWriter : Record-compressed files, only compress + values. +
    4. +
    5. + BlockCompressWriter : Block-compressed files, both keys & + values are collected in 'blocks' + separately and compressed. The size of + the 'block' is configurable. +
    + +

    The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.

    + +

    The recommended way is to use the static createWriter methods + provided by the SequenceFile to chose the preferred format.

    + +

    The {@link SequenceFile.Reader} acts as the bridge and can read any of the + above SequenceFile formats.

    + +

    SequenceFile Formats

    + +

    Essentially there are 3 different formats for SequenceFiles + depending on the CompressionType specified. All of them share a + common header described below. + +

    +
      +
    • + version - 3 bytes of magic header SEQ, followed by 1 byte of actual + version number (e.g. SEQ4 or SEQ6) +
    • +
    • + keyClassName -key class +
    • +
    • + valueClassName - value class +
    • +
    • + compression - A boolean which specifies if compression is turned on for + keys/values in this file. +
    • +
    • + blockCompression - A boolean which specifies if block-compression is + turned on for keys/values in this file. +
    • +
    • + compression codec - CompressionCodec class which is used for + compression of keys and/or values (if compression is + enabled). +
    • +
    • + metadata - {@link Metadata} for this file. +
    • +
    • + sync - A sync marker to denote end of the header. +
    • +
    + +
    Uncompressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Record-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record +
        +
      • Record length
      • +
      • Key length
      • +
      • Key
      • +
      • Compressed Value
      • +
      +
    • +
    • + A sync-marker every few 100 kilobytes or so. +
    • +
    + +
    Block-Compressed SequenceFile Format
    +
      +
    • + Header +
    • +
    • + Record Block +
        +
      • Uncompressed number of records in the block
      • +
      • Compressed key-lengths block-size
      • +
      • Compressed key-lengths block
      • +
      • Compressed keys block-size
      • +
      • Compressed keys block
      • +
      • Compressed value-lengths block-size
      • +
      • Compressed value-lengths block
      • +
      • Compressed values block-size
      • +
      • Compressed values block
      • +
      +
    • +
    • + A sync-marker every block. +
    • +
    + +

    The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.

    + + @see CompressionCodec]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a ShortWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the class of the objects to stringify]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + position. Note that this + method avoids using the converter or doing String instantiation. + + @param position input position. + @return the Unicode scalar value at position or -1 + if the position is invalid or points to a + trailing byte.]]> + + + + + + + + + + what in the backing + buffer, starting as position start. The starting + position is measured in bytes and the return value is in + terms of byte position in the buffer. The backing buffer is + not converted to a string for this operation. + + @param what input what. + @param start input start. + @return byte position of the first occurrence of the search + string in the UTF-8 buffer or -1 if not found]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: For performance reasons, this call does not clear the + underlying byte array that is retrievable via {@link #getBytes()}. + In order to free the byte-array memory, call {@link #set(byte[])} + with an empty byte array (For example, new byte[0]).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a Text with the same contents.]]> + + + + + + + + + + + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + + @param utf8 input utf8. + @param start input start. + @param length input length. + @param replace input replace. + @throws CharacterCodingException a character encoding or + decoding error occurs.]]> + + + + + + + + + + + + + + + replace is true, then + malformed input is replaced with the + substitution character, which is U+FFFD. Otherwise the + method throws a MalformedInputException. + + @param string input string. + @param replace input replace. + @return ByteBuffer: bytes stores at ByteBuffer.array() + and length is ByteBuffer.limit() + @throws CharacterCodingException a character encoding or decoding error occurs.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In + addition, it provides methods for string traversal without converting the + byte array to a string.

    Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is useful when a class may evolve, so that instances written by the + old version of the class may still be processed by the new version. To + handle this situation, {@link #readFields(DataInput)} + implementations should catch {@link VersionMismatchException}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VIntWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + o is a VLongWritable with the same value.]]> + + + + + + + + + + + + + + + + + + + + + + + + out. + + @param out DataOuput to serialize this object into. + @throws IOException any other problem for write.]]> + + + + + + + in. + +

    For efficiency, implementations should attempt to re-use storage in the + existing object where possible.

    + + @param in DataInput to deseriablize this object from. + @throws IOException any other problem for readFields.]]> +
    + + + Any key or value type in the Hadoop Map-Reduce + framework implements this interface.

    + +

    Implementations typically implement a static read(DataInput) + method which constructs a new instance, calls {@link #readFields(DataInput)} + and returns the instance.

    + +

    Example:

    +
    +     public class MyWritable implements Writable {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +
    +       // Default constructor to allow (de)serialization
    +       MyWritable() { }
    +
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +
    +       public static MyWritable read(DataInput in) throws IOException {
    +         MyWritable w = new MyWritable();
    +         w.readFields(in);
    +         return w;
    +       }
    +     }
    + 
    ]]> +
    + + + + + + + + WritableComparables can be compared to each other, typically + via Comparators. Any type which is to be used as a + key in the Hadoop Map-Reduce framework should implement this + interface.

    + +

    Note that hashCode() is frequently used in Hadoop to partition + keys. It's important that your implementation of hashCode() returns the same + result across different instances of the JVM. Note also that the default + hashCode() implementation in Object does not + satisfy this property.

    + +

    Example:

    +
    +     public class MyWritableComparable implements
    +      WritableComparable{@literal } {
    +       // Some data
    +       private int counter;
    +       private long timestamp;
    +       
    +       public void write(DataOutput out) throws IOException {
    +         out.writeInt(counter);
    +         out.writeLong(timestamp);
    +       }
    +       
    +       public void readFields(DataInput in) throws IOException {
    +         counter = in.readInt();
    +         timestamp = in.readLong();
    +       }
    +       
    +       public int compareTo(MyWritableComparable o) {
    +         int thisValue = this.value;
    +         int thatValue = o.value;
    +         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
    +       }
    +
    +       public int hashCode() {
    +         final int prime = 31;
    +         int result = 1;
    +         result = prime * result + counter;
    +         result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
    +         return result
    +       }
    +     }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The default implementation reads the data into two {@link + WritableComparable}s (using {@link + Writable#readFields(DataInput)}, then calls {@link + #compare(WritableComparable,WritableComparable)}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This base implementation uses the natural ordering. To define alternate + orderings, override {@link #compare(WritableComparable,WritableComparable)}. + +

    One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type T. + @param orig The object to copy + @param conf input Configuration. + @return The copied object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Enum type + @param in DataInput to read from + @param enumType Class type of Enum + @return Enum represented by String read from DataInput + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + len number of bytes in input streamin + @param in input stream + @param len number of bytes to skip + @throws IOException when skipped less number of bytes]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CompressionCodec for which to get the + Compressor + @param conf the Configuration object which contains confs for creating or reinit the compressor + @return Compressor for the given + CompressionCodec from the pool or a new one]]> + + + + + + + + + CompressionCodec for which to get the + Decompressor + @return Decompressor for the given + CompressionCodec the pool or a new one]]> + + + + + + Compressor to be returned to the pool]]> + + + + + + Decompressor to be returned to the + pool]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec object]]> + + + + + + + Codec aliases are case insensitive. +

    + The code alias is the short class name (without the package name). + If the short class name ends with 'Codec', then there are two aliases for + the codec, the complete short class name and the short class name without + the 'Codec' ending. For example for the 'GzipCodec' codec class name the + alias are 'gzip' and 'gzipcodec'. + + @param codecName the canonical class name of the codec + @return the codec class]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations are assumed to be buffered. This permits clients to + reposition the underlying input stream then call {@link #resetState()}, + without having to also synchronize client buffers.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + #setInput() should be called in order to provide more input.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the end of the compressed + data output stream has been reached.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true indicating that more input data is required. + (Both native and non-native versions of various Decompressors require + that the data passed in via b[] remain unmodified until + the caller is explicitly notified--via {@link #needsInput()}--that the + buffer may be safely modified. With this requirement, an extra + buffer-copy can be avoided.) + + @param b Input data + @param off Start offset + @param len Length]]> + + + + + true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called to + provide more input. + + @return true if the input data buffer is empty and + {@link #setInput(byte[], int, int)} should be called in + order to provide more input.]]> + + + + + + + + + + + + + true if a preset dictionary is needed for decompression. + @return true if a preset dictionary is needed for decompression]]> + + + + + true if the end of the decompressed + data output stream has been reached. Indicates a concatenated data stream + when finished() returns true and {@link #getRemaining()} + returns a positive value. finished() will be reset with the + {@link #reset()} method. + @return true if the end of the decompressed + data output stream has been reached.]]> + + + + + + + + + + + + + + true and getRemaining() returns a positive value. If + {@link #finished()} returns true and getRemaining() returns + a zero value, indicates that the end of data stream has been reached and + is not a concatenated data stream. + @return The number of bytes remaining in the compressed data buffer.]]> + + + + + true and {@link #getRemaining()} returns a positive value, + reset() is called before processing of the next data stream in the + concatenated data stream. {@link #finished()} will be reset and will + return false when reset() is called.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec + io.compress.passthrough.extension = .gz + + + Note: this is not a Splittable codec: it doesn't know the + capabilities of the passed in stream. It should be possible to + extend this in a subclass: the inner classes are marked as protected + to enable this. Do not retrofit splitting to this class..]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • "none" - No compression. +
  • "lzo" - LZO compression. +
  • "gz" - GZIP compression. + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • Block Compression. +
  • Named meta data blocks. +
  • Sorted or unsorted keys. +
  • Seek by key or by file offset. + + The memory footprint of a TFile includes the following: +
      +
    • Some constant overhead of reading or writing a compressed block. +
        +
      • Each compressed block requires one compression/decompression codec for + I/O. +
      • Temporary space to buffer the key. +
      • Temporary space to buffer the value (for TFile.Writer only). Values are + chunk encoded, so that we buffer at most one chunk of user data. By default, + the chunk buffer is 1MB. Reading chunked value does not require additional + memory. +
      +
    • TFile index, which is proportional to the total number of Data Blocks. + The total amount of memory needed to hold the index can be estimated as + (56+AvgKeySize)*NumBlocks. +
    • MetaBlock index, which is proportional to the total number of Meta + Blocks.The total amount of memory needed to hold the index for Meta Blocks + can be estimated as (40+AvgMetaBlockName)*NumMetaBlock. +
    +

    + The behavior of TFile can be customized by the following variables through + Configuration: +

      +
    • tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default + to 1MB. Values of the length less than the chunk size is guaranteed to have + known value length in read time (See + {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}). +
    • tfile.fs.output.buffer.size: Buffer size used for + FSDataOutputStream. Integer (in bytes). Default to 256KB. +
    • tfile.fs.input.buffer.size: Buffer size used for + FSDataInputStream. Integer (in bytes). Default to 256KB. +
    +

    + Suggestions on performance optimization. +

      +
    • Minimum block size. We recommend a setting of minimum block size between + 256KB to 1MB for general usage. Larger block size is preferred if files are + primarily for sequential access. However, it would lead to inefficient random + access (because there are more data to decompress). Smaller blocks are good + for random access, but require more memory to hold the block index, and may + be slower to create (because we must flush the compressor stream at the + conclusion of each data block, which leads to an FS I/O flush). Further, due + to the internal caching in Compression codec, the smallest possible block + size would be around 20KB-30KB. +
    • The current implementation does not offer true multi-threading for + reading. The implementation uses FSDataInputStream seek()+read(), which is + shown to be much faster than positioned-read call in single thread mode. + However, it also means that if multiple threads attempt to access the same + TFile (using multiple scanners) simultaneously, the actual I/O is carried out + sequentially even if they access different DFS blocks. +
    • Compression codec. Use "none" if the data is not very compressable (by + compressable, I mean a compression ratio at least 2:1). Generally, use "lzo" + as the starting point for experimenting. "gz" overs slightly better + compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to + decompress, comparing to "lzo". +
    • File system buffering, if the underlying FSDataInputStream and + FSDataOutputStream is already adequately buffered; or if applications + reads/writes keys and values in large buffers, we can reduce the sizes of + input/output buffering in TFile layer by setting the configuration parameters + "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size". +
    + + Some design rationale behind TFile can be found at Hadoop-3315.]]> + + + + + + + + + + + Utils#writeVLong(out, n). + + @param out + output stream + @param n + The integer to be encoded + @throws IOException raised on errors performing I/O. + @see Utils#writeVLong(DataOutput, long)]]> + + + + + + + + +
  • if n in [-32, 127): encode in one byte with the actual value. + Otherwise, +
  • if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; + byte[1]=n&0xff. Otherwise, +
  • if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - + 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, +
  • if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; + byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; + byte[3]=n&0xff. + Otherwise: +
  • if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = + (n>>24)&0xff; byte[2]=(n>>16)&0xff; + byte[3]=(n>>8)&0xff; byte[4]=n&0xff; +
  • if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = + (n>>32)&0xff; byte[2]=(n>>24)&0xff; + byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff; + byte[5]=n&0xff +
  • if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = + (n>>40)&0xff; byte[2]=(n>>32)&0xff; + byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff; + byte[5]=(n>>8)&0xff; byte[6]=n&0xff; +
  • if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = + (n>>48)&0xff; byte[2] = (n>>40)&0xff; + byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]= + (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff; +
  • if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = + (n>>54)&0xff; byte[2] = (n>>48)&0xff; + byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff; + byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]= + (n>>8)&0xff; byte[8]=n&0xff; + + + @param out + output stream + @param n + the integer number + @throws IOException raised on errors performing I/O.]]> + + + + + + + (int)Utils#readVLong(in). + + @param in + input stream + @return the decoded integer + @throws IOException raised on errors performing I/O. + + @see Utils#readVLong(DataInput)]]> + + + + + + + +
  • if (FB >= -32), return (long)FB; +
  • if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; +
  • if (FB in [-104, -73]), return (FB+88)<<16 + + (NB[0]&0xff)<<8 + NB[1]&0xff; +
  • if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff) + <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff; +
  • if (FB in [-128, -121]), return interpret NB[FB+129] as a signed + big-endian integer. + + @param in + input stream + @return the decoded long integer. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @param cmp + Comparator for the key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + Type of the input key. + @param list + The list + @param key + The input key. + @return The index to the desired element if it exists; or list.size() + otherwise.]]> + + + + + + + + + + + + + + + + + An experimental {@link Serialization} for Java {@link Serializable} classes. +

    + @see JavaSerializationComparator]]> +
    +
    + + + + + + + + + A {@link RawComparator} that uses a {@link JavaSerialization} + {@link Deserializer} to deserialize objects that are then compared via + their {@link Comparable} interfaces. +

    + @param generic type. + @see JavaSerialization]]> +
    +
    + + + + + + + + + + + + + +This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

    + +

    +To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + avro.reflect.pkgs or implement + {@link AvroReflectSerializable} interface.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + +This package provides Avro serialization in Hadoop. This can be used to +serialize/deserialize Avro types in Hadoop. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +

    + +

    +Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Implementations of this interface consume the {@link MetricsRecord} generated + from {@link MetricsSource}. It registers with {@link MetricsSystem} which + periodically pushes the {@link MetricsRecord} to the sink using + {@link #putMetrics(MetricsRecord)} method. If the implementing class also + implements {@link Closeable}, then the MetricsSystem will close the sink when + it is stopped.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the actual type of the source object + @param source object to register + @return the source object + @exception MetricsException Metrics Exception.]]> + + + + + + + + the actual type of the source object + @param source object to register + @param name of the source. Must be unique or null (then extracted from + the annotations of the source object.) + @param desc the description of the source (or null. See above.) + @return the source object + @exception MetricsException Metrics Exception.]]> + + + + + + + + + + + + + + + + + + + + +
  • {@link MetricsSource} generate and update metrics information.
  • +
  • {@link MetricsSink} consume the metrics information
  • + + + {@link MetricsSource} and {@link MetricsSink} register with the metrics + system. Implementations of {@link MetricsSystem} polls the + {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to + {@link MetricsSink}.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } (aggregate). + Filter out entries that don't have at least minSamples. + + @param minSamples input minSamples. + @return a map of peer DataNode Id to the average latency to that + node seen over the measurement period.]]> + + + + + + + + + + + This class maintains a group of rolling average metrics. It implements the + algorithm of rolling average, i.e. a number of sliding windows are kept to + roll over and evict old subsets of samples. Each window has a subset of + samples in a stream, where sub-sum and sub-total are collected. All sub-sums + and sub-totals in all windows will be aggregated to final-sum and final-total + used to compute final average, which is called rolling average. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class is a metrics sink that uses + {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every + roll interval a new directory will be created under the path specified by the + basepath property. All metrics will be logged to a file in the + current interval's directory in a file named <hostname>.log, where + <hostname> is the name of the host on which the metrics logging + process is running. The base path is set by the + <prefix>.sink.<instance>.basepath property. The + time zone used to create the current interval's directory name is GMT. If + the basepath property isn't specified, it will default to + "/tmp", which is the temp directory on whatever default file + system is configured for the cluster.

    + +

    The <prefix>.sink.<instance>.ignore-error + property controls whether an exception is thrown when an error is encountered + writing a log file. The default value is true. When set to + false, file errors are quietly swallowed.

    + +

    The roll-interval property sets the amount of time before + rolling the directory. The default value is 1 hour. The roll interval may + not be less than 1 minute. The property's value should be given as + number unit, where number is an integer value, and + unit is a valid unit. Valid units are minute, hour, + and day. The units are case insensitive and may be abbreviated or + plural. If no units are specified, hours are assumed. For example, + "2", "2h", "2 hour", and + "2 hours" are all valid ways to specify two hours.

    + +

    The roll-offset-interval-millis property sets the upper + bound on a random time interval (in milliseconds) that is used to delay + before the initial roll. All subsequent rolls will happen an integer + number of roll intervals after the initial roll, hence retaining the original + offset. The purpose of this property is to insert some variance in the roll + times so that large clusters using this sink on every node don't cause a + performance impact on HDFS by rolling simultaneously. The default value is + 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in + millis should be no less than the number of sink instances times 5. + +

    The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.

    + +

    Not all file systems support the ability to append to files. In file + systems without the ability to append to files, only one writer can write to + a file at a time. To allow for concurrent writes from multiple daemons on a + single host, the source property is used to set unique headers + for the log files. The property should be set to the name of + the source daemon, e.g. namenode. The value of the + source property should typically be the same as the property's + prefix. If this property is not set, the source is taken to be + unknown.

    + +

    Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>", where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.

    + +

    For file systems that allow append, the sink supports appending to the + existing file instead. If the allow-append property is set to + true, the sink will instead append to the existing file on file systems that + support appends. By default, the allow-append property is + false.

    + +

    Note that when writing to HDFS with allow-append set to true, + there is a minimum acceptable number of data nodes. If the number of data + nodes drops below that minimum, the append will succeed, but reading the + data will fail with an IOException in the DataStreamer class. The minimum + number of data nodes required for a successful append is generally 2 or + 3.

    + +

    Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.

    + +

    When using this sink in a secure (Kerberos) environment, two additional + properties must be set: keytab-key and + principal-key. keytab-key should contain the key by + which the keytab file can be found in the configuration, for example, + yarn.nodemanager.keytab. principal-key should + contain the key by which the principal can be found in the configuration, + for example, yarn.nodemanager.principal.]]> + + + + + + + + + + + + + + + + + + + + + + + + + CollectD StatsD plugin). +
    + To configure this plugin, you will need to add the following + entries to your hadoop-metrics2.properties file: +
    +

    + *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
    + [prefix].sink.statsd.server.host=
    + [prefix].sink.statsd.server.port=
    + [prefix].sink.statsd.skip.hostname=true|false (optional)
    + [prefix].sink.statsd.service.name=NameNode (name you want for service)
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName serviceName. + @param nameName nameName. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + ,name=}" + Where the {@literal and } are the supplied + parameters. + + @param serviceName serviceName. + @param nameName nameName. + @param properties - Key value pairs to define additional JMX ObjectName + properties. + @param theMbean - the MBean to register + @return the named used to register the MBean]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hostname or hostname:port. If + the specs string is null, defaults to localhost:defaultPort. + + @param specs server specs (see description) + @param defaultPort the default port if not specified + @return a list of InetSocketAddress objects.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is used when parts of Hadoop need know whether to apply + single rack vs multi-rack policies, such as during block placement. + Such algorithms behave differently if they are on multi-switch systems. +

    + + @return true if the mapping thinks that it is on a single switch]]> +
    +
    + + + + + + + + + + + + + + + + + This predicate simply assumes that all mappings not derived from + this class are multi-switch. + @param mapping the mapping to query + @return true if the base class says it is single switch, or the mapping + is not derived from this class.]]> + + + + It is not mandatory to + derive {@link DNSToSwitchMapping} implementations from it, but it is strongly + recommended, as it makes it easy for the Hadoop developers to add new methods + to this base class that are automatically picked up by all implementations. +

    + + This class does not extend the Configured + base class, and should not be changed to do so, as it causes problems + for subclasses. The constructor of the Configured calls + the {@link #setConf(Configuration)} method, which will call into the + subclasses before they have been fully constructed.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If a name cannot be resolved to a rack, the implementation + should return {@link NetworkTopology#DEFAULT_RACK}. This + is what the bundled implementations do, though it is not a formal requirement + + @param names the list of hosts to resolve (can be empty) + @return list of resolved network paths. + If names is empty, the returned list is also empty]]> + + + + + + + + + + + + + + + + + + + + + + + + Calling {@link #setConf(Configuration)} will trigger a + re-evaluation of the configuration settings and so be used to + set up the mapping script.]]> + + + + + + + + + + + + + + + + + + + + + This will get called in the superclass constructor, so a check is needed + to ensure that the raw mapping is defined before trying to relaying a null + configuration. +

    + @param conf input Configuration.]]> +
    + + + + + + + + + It contains a static class RawScriptBasedMapping that performs + the work: reading the configuration parameters, executing any defined + script, handling errors and such like. The outer + class extends {@link CachedDNSToSwitchMapping} to cache the delegated + queries. +

    + This DNS mapper's {@link #isSingleSwitch()} predicate returns + true if and only if a script is defined.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text + file. The columns are separated by whitespace. The first column is a DNS or + IP address and the second column specifies the rack where the address maps. +

    +

    + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +

    +

    + Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } mapping and {@literal <}groupId, groupName{@literal >} + mapping.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + }/host@realm. + @param principalName principal name of format as described above + @return host name if the the string conforms to the above format, else null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + generic type T. + @return generic type T.]]> + + + + + + + Generics Type T. + @return the result of the action + @throws IOException in the event of error]]> + + + + + + + generic type T. + @return the result of the action + @throws IOException in the event of error]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + } "jack" + + @param userName input userName. + @return userName without login method]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method]]> + + + + + + + + the return type of the run method + @param action the method to execute + @return the value from the run method + @throws IOException if the action throws an IOException + @throws Error if the action throws an Error + @throws RuntimeException if the action throws a RuntimeException + @throws InterruptedException if the action throws an InterruptedException + @throws UndeclaredThrowableException if the action throws something else]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CredentialProvider implementations must be thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (cause==null ? null : cause.toString()) (which + typically contains the class and detail message of cause). + @param cause the cause (which is saved for later retrieval by the + {@link #getCause()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.)]]> + + + + + + + + + + + + + + does not provide the stack trace for security purposes.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A User-Agent String is considered to be a browser if it matches + any of the regex patterns from browser-useragent-regex; the default + behavior is to consider everything a browser that matches the following: + "^Mozilla.*,^Opera.*". Subclasses can optionally override + this method to use different behavior. + + @param userAgent The User-Agent String, or null if there isn't one + @return true if the User-Agent String refers to a browser, false if not]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The type of the token identifier]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + T extends TokenIdentifier]]> + + + + + + + + + + DelegationTokenAuthenticatedURL. +

    + An instance of the default {@link DelegationTokenAuthenticator} will be + used.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used.]]> + + + + + DelegationTokenAuthenticatedURL using the default + {@link DelegationTokenAuthenticator} class. + + @param connConfigurator a connection configurator.]]> + + + + + DelegationTokenAuthenticatedURL. + + @param authenticator the {@link DelegationTokenAuthenticator} instance to + use, if null the default one will be used. + @param connConfigurator a connection configurator.]]> + + + + + + + + + + + + The default class is {@link KerberosDelegationTokenAuthenticator} + + @return the delegation token authenticator class to use as default.]]> + + + + + + + This method is provided to enable WebHDFS backwards compatibility. + + @param useQueryString TRUE if the token is transmitted in the + URL query string, FALSE if the delegation token is transmitted + using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP + header.]]> + + + + + TRUE if the token is transmitted in the URL query + string, FALSE if the delegation token is transmitted using the + {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]> + + + + + + + + + + + + + + + + + + Authenticator. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator. If the doAs parameter is not NULL, + the request will be done on behalf of the specified doAs user. + + @param url the URL to connect to. Only HTTP/S URLs are supported. + @param token the authentication token being used for the user. + @param doAs user to do the the request on behalf of, if NULL the request is + as self. + @return an authenticated {@link HttpURLConnection}. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @return a delegation token. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred.]]> + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @throws IOException if an IO error occurred.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + DelegationTokenAuthenticatedURL is a + {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token + functionality. +

    + The authentication mechanisms supported by default are Hadoop Simple + authentication (also known as pseudo authentication) and Kerberos SPNEGO + authentication. +

    + Additional authentication mechanisms can be supported via {@link + DelegationTokenAuthenticator} implementations. +

    + The default {@link DelegationTokenAuthenticator} is the {@link + KerberosDelegationTokenAuthenticator} class which supports + automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via + the {@link PseudoDelegationTokenAuthenticator} class. +

    + AuthenticatedURL instances are not thread-safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return abstract delegation token identifier.]]> + + + + + + + + + + + Authenticator + for authentication. + + @param url the URL to get the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token being used for the user where the + Delegation token will be stored. + @param renewer the renewer user. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return abstract delegation token identifier.]]> + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + + + Authenticator for authentication. + + @param url the URL to renew the delegation token from. Only HTTP/S URLs are + supported. + @param token the authentication token with the Delegation Token to renew. + @param doAsUser the user to do as, which will be the token owner. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred. + @throws AuthenticationException if an authentication exception occurred. + @return delegation token long value.]]> + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param dToken abstract delegation token identifier. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + Authenticator. + + @param url the URL to cancel the delegation token from. Only HTTP/S URLs + are supported. + @param token the authentication token with the Delegation Token to cancel. + @param dToken abstract delegation token identifier. + @param doAsUser the user to do as, which will be the token owner. + @throws IOException if an IO error occurred.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KerberosDelegationTokenAuthenticator provides support for + Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation + Token operations. +

    + It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP + endpoint does not trigger a SPNEGO authentication]]> + + + + + + + + + PseudoDelegationTokenAuthenticator provides support for + Hadoop's pseudo authentication mechanism that accepts + the user name specified as a query string parameter and support for Hadoop + Delegation Token operations. +

    + This mimics the model of Hadoop Simple authentication trusting the + {@link UserGroupInformation#getCurrentUser()} value.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + live. + @return a (snapshotted) map of blocker name->description values]]> + + + + + + + + + + + + + Do nothing if the service is null or not + in a state in which it can be/needs to be stopped. +

    + The service state is checked before the operation begins. + This process is not thread safe. + @param service a service or null]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • Any long-lived operation here will prevent the service state + change from completing in a timely manner.
  • +
  • If another thread is somehow invoked from the listener, and + that thread invokes the methods of the service (including + subclass-specific methods), there is a risk of a deadlock.
  • + + + + @param service the service that has changed.]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + The base implementation logs all arguments at the debug level, + then returns the passed in config unchanged.]]> + + + + + + + The action is to signal success by returning the exit code 0.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is called before {@link #init(Configuration)}; + Any non-null configuration that is returned from this operation + becomes the one that is passed on to that {@link #init(Configuration)} + operation. +

    + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +

    + @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> + + + + + + + The return value becomes the exit code of the launched process. +

    + If an exception is raised, the policy is: +

      +
    1. Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}: + the exception is passed up unmodified. +
    2. +
    3. Any exception which implements + {@link org.apache.hadoop.util.ExitCodeProvider}: + A new {@link ServiceLaunchException} is created with the exit code + and message of the thrown exception; the thrown exception becomes the + cause.
    4. +
    5. Any other exception: a new {@link ServiceLaunchException} is created + with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and + the message of the original exception (which becomes the cause).
    6. +
    + @return the exit code + @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed + up as the exit code and error text. + @throws Exception any exception to report. If it provides an exit code + this is used in a wrapping exception.]]> +
    +
    + + + The command line options will be passed down before the + {@link Service#init(Configuration)} operation is invoked via an + invocation of {@link LaunchableService#bindArgs(Configuration, List)} + After the service has been successfully started via {@link Service#start()} + the {@link LaunchableService#execute()} method is called to execute the + service. When this method returns, the service launcher will exit, using + the return code from the method as its exit option.]]> + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Approximate HTTP equivalent: {@code 400 Bad Request}]]> + + + + + + approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]> + + + + + + + + + + + Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]> + + + + + + Approximate HTTP equivalent: {@code 404: Not Found}]]> + + + + + + Approximate HTTP equivalent: {@code 405: Not allowed}]]> + + + + + + Approximate HTTP equivalent: {@code 406: Not Acceptable}]]> + + + + + + Approximate HTTP equivalent: {@code 408: Request Timeout}]]> + + + + + + Approximate HTTP equivalent: {@code 409: Conflict}]]> + + + + + + Approximate HTTP equivalent: {@code 500 Internal Server Error}]]> + + + + + + Approximate HTTP equivalent: {@code 501: Not Implemented}]]> + + + + + + Approximate HTTP equivalent: {@code 503 Service Unavailable}]]> + + + + + + If raised, this is expected to be raised server-side and likely due + to client/server version incompatibilities. +

    + Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> + + + + + + + + + + + + + + + Codes with a YARN prefix are YARN-related. +

    + Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +

    +    0-10: general command issues
    +   30-39: equivalent to the 3XX responses, where those responses are
    +          considered errors by the application.
    +   40-49: client-side/CLI/config problems
    +   50-59: service-side problems.
    +   60+  : application specific error codes
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. +

    + If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> + + + + + + This uses {@link String#format(String, Object...)} + to build the formatted exception in the ENGLISH locale. + @param exitCode exit code + @param cause inner cause + @param format format for message to use in exception + @param args list of arguments]]> + + + + + When caught by the ServiceLauncher, it will convert that + into a process exit code. + + The {@link #ServiceLaunchException(int, String, Object...)} constructor + generates formatted exceptions.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This will be 0 until a call + to {@link #finished()} has been made. +

    + @return the currently recorded duration.]]> +
    + + + + + + + + + +
    + + + + + + + + + + Clients and/or applications can use the provided Progressable + to explicitly report progress to the Hadoop framework. This is especially + important for operations which take significant amount of time since, + in-lieu of the reported progress, the framework has to assume that an error + has occurred and time-out the operation.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type. + @param theClass class of which an object is created + @param conf Configuration + @return a new object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Generics Type T + @param o object whose correctly-typed Class is to be obtained + @return the correctly typed Class of the given object.]]> + + + + + + + + + Generics Type. + @param conf input Configuration. + @param src the object to copy from + @param dst the object to copy into, which is destroyed + @return dst param (the copy) + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + kill -0 command or equivalent]]> + + + + + + + + + + + + + + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param parent File parent directory + @param basename String script file basename + @return File referencing the script in the directory]]> + + + + + + ".cmd" on Windows, or ".sh" otherwise. + + @param basename String script file basename + @return String script file name]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IOException. + @return the path to {@link #WINUTILS_EXE} + @throws RuntimeException if the path is not resolvable]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shell. + @return the thread that ran runCommand() that spawned this shell + or null if no thread is waiting for this shell to complete]]> + + + + + + + + + + + + Shell interface. + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException raised on errors performing I/O.]]> + + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @param timeout time in milliseconds after which script should be marked timeout + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + + + + Shell interface. + @param env the map of environment key=value + @param cmd shell command to execute. + @return the output of the executed command. + @throws IOException on any problem.]]> + + + + + Shell processes. + Iterates through a map of all currently running Shell + processes and destroys them one by one. This method is thread safe]]> + + + + + Shell objects. + + @return all shells set.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreateProcess synchronization object.]]> + + + + + os.name property.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Important: caller must check for this value being null. + The lack of such checks has led to many support issues being raised. +

    + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> + + + + + + + + + + + + + + Shell can be used to run shell commands like du or + df. It also offers facilities to gate commands by + time-intervals.]]> + + + + + + + + ShutdownHookManager singleton. + + @return ShutdownHookManager singleton.]]> + + + + + + + Runnable + @param priority priority of the shutdownHook.]]> + + + + + + + + + Runnable + @param priority priority of the shutdownHook + @param timeout timeout of the shutdownHook + @param unit unit of the timeout TimeUnit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ShutdownHookManager enables running shutdownHook + in a deterministic order, higher priority first. +

    + The JVM runs ShutdownHooks in a non-deterministic order or in parallel. + This class registers a single JVM shutdownHook and run all the + shutdownHooks registered to it (to this class) in order based on their + priority. + + Unless a hook was registered with a shutdown explicitly set through + {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + the shutdown time allocated to it is set by the configuration option + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + {@code core-site.xml}, with a default value of + {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + seconds.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool, is the standard for any Map-Reduce tool/application. + The tool/application should delegate the handling of + + standard command-line options to {@link ToolRunner#run(Tool, String[])} + and only handle its custom arguments.

    + +

    Here is how a typical Tool is implemented:

    +
    +     public class MyApp extends Configured implements Tool {
    +     
    +       public int run(String[] args) throws Exception {
    +         // Configuration processed by ToolRunner
    +         Configuration conf = getConf();
    +         
    +         // Create a JobConf using the processed conf
    +         JobConf job = new JobConf(conf, MyApp.class);
    +         
    +         // Process custom command-line options
    +         Path in = new Path(args[1]);
    +         Path out = new Path(args[2]);
    +         
    +         // Specify various job-specific parameters     
    +         job.setJobName("my-app");
    +         job.setInputPath(in);
    +         job.setOutputPath(out);
    +         job.setMapperClass(MyMapper.class);
    +         job.setReducerClass(MyReducer.class);
    +
    +         // Submit the job, then poll for progress until the job is complete
    +         RunningJob runningJob = JobClient.runJob(job);
    +         if (runningJob.isSuccessful()) {
    +           return 0;
    +         } else {
    +           return 1;
    +         }
    +       }
    +       
    +       public static void main(String[] args) throws Exception {
    +         // Let ToolRunner handle generic command-line options 
    +         int res = ToolRunner.run(new Configuration(), new MyApp(), args);
    +         
    +         System.exit(res);
    +       }
    +     }
    + 
    + + @see GenericOptionsParser + @see ToolRunner]]> +
    + + + + + + + + + + + + + Tool by {@link Tool#run(String[])}, after + parsing with the given generic arguments. Uses the given + Configuration, or builds one if null. + + Sets the Tool's configuration with the possibly modified + version of the conf. + + @param conf Configuration for the Tool. + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method. + @throws Exception Exception.]]> + + + + + + + + Tool with its Configuration. + + Equivalent to run(tool.getConf(), tool, args). + + @param tool Tool to run. + @param args command-line arguments to the tool. + @return exit code of the {@link Tool#run(String[])} method. + @throws Exception exception.]]> + + + + + + + + + + + + + + + + + ToolRunner can be used to run classes implementing + Tool interface. It works in conjunction with + {@link GenericOptionsParser} to parse the + + generic hadoop command line arguments and modifies the + Configuration of the Tool. The + application-specific options are passed along without being modified. +

    + + @see Tool + @see GenericOptionsParser]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bloom filter, as defined by Bloom in 1970. +

    + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this counting Bloom filter. +

    + Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> + + + + + + + + + + + + key -> count map. +

    NOTE: due to the bucket size of this filter, inserting the same + key more than 15 times will cause an overflow at all filter positions + associated with this key, and it will significantly increase the error + rate for this and other keys. For this reason the filter can only be + used to store small count values 0 <= N << 15. + @param key key to be tested + @return 0 if the key is not present. Otherwise, a positive value v will + be returned such that v == count with probability equal to the + error rate of this filter, and v > count otherwise. + Additionally, if the filter experienced an underflow as a result of + {@link #delete(Key)} operation, the return value may be lower than the + count with the probability of the false negative rate of such + filter.]]> + + + + + + + + + + + + + + + + + + + + + + counting Bloom filter, as defined by Fan et al. in a ToN + 2000 paper. +

    + A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> + + + + + + + + + + + + + + Builds an empty Dynamic Bloom filter. + @param vectorSize The number of bits in the vector. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}). + @param nr The threshold for the maximum number of keys to record in a + dynamic Bloom filter row.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dynamic Bloom filter, as defined in the INFOCOM 2006 paper. +

    + A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + each of the s rows is a standard Bloom filter. The creation + process of a DBF is iterative. At the start, the DBF is a 1 * m + bit matrix, i.e., it is composed of a single standard Bloom filter. + It assumes that nr elements are recorded in the + initial bit vector, where nr {@literal <=} n + (n is the cardinality of the set A to record in + the filter). +

    + As the size of A grows during the execution of the application, + several keys must be inserted in the DBF. When inserting a key into the DBF, + one must first get an active Bloom filter in the matrix. A Bloom filter is + active when the number of recorded keys, nr, is + strictly less than the current cardinality of A, n. + If an active Bloom filter is found, the key is inserted and + nr is incremented by one. On the other hand, if there + is no active Bloom filter, a new one is created (i.e., a new row is added to + the matrix) according to the current size of A and the element + is added in this new Bloom filter and the nr value of + this new Bloom filter is set to one. A given key is said to belong to the + DBF if the k positions are set to one in one of the matrix rows. +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> + + + + + + + + + Builds a hash function that must obey to a given maximum number of returned values and a highest value. + @param maxValue The maximum highest returned value. + @param nbHash The number of resulting hashed values. + @param hashType type of the hashing function (see {@link Hash}).]]> + + + + + this hash function. A NOOP]]> + + + + + + + + + + + + + + + + + + + The idea is to randomly select a bit to reset.]]> + + + + + + The idea is to select the bit to reset that will generate the minimum + number of false negative.]]> + + + + + + The idea is to select the bit to reset that will remove the maximum number + of false positive.]]> + + + + + + The idea is to select the bit to reset that will, at the same time, remove + the maximum number of false positve while minimizing the amount of false + negative generated.]]> + + + + + Originally created by + European Commission One-Lab Project 034819.]]> + + + + + + + + + + + + + + this filter. + @param nbHash The number of hash function to consider. + @param hashType type of the hashing function (see + {@link org.apache.hadoop.util.hash.Hash}).]]> + + + + + + + + + this retouched Bloom filter. +

    + Invariant: if the false positive is null, nothing happens. + @param key The false positive key to add.]]> + + + + + + this retouched Bloom filter. + @param coll The collection of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The list of false positive.]]> + + + + + + this retouched Bloom filter. + @param keys The array of false positive.]]> + + + + + + + this retouched Bloom filter. + @param scheme The selective clearing scheme to apply.]]> + + + + + + + + + + + + retouched Bloom filter, as defined in the CoNEXT 2006 paper. +

    + It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +

    + Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown]]> +
    + + + + + + + + + + + + Any exception generated in the future is + extracted and rethrown. +

    + @param future future to evaluate + @param timeout timeout to wait + @param unit time unit. + @param type of the result. + @return the result, if all went well. + @throws InterruptedIOException future was interrupted + @throws IOException if something went wrong + @throws RuntimeException any nested RTE thrown + @throws TimeoutException the future timed out.]]> +
    +
    + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + + type of return value. + @return nothing, ever. + @throws IOException either the inner IOException, or a wrapper around + any non-Runtime-Exception + @throws RuntimeException if that is the inner cause.]]> + + + + + + +
  • If it is an IOE: Return.
  • +
  • If it is a {@link UncheckedIOException}: return the cause
  • +
  • Completion/Execution Exceptions: extract and repeat
  • +
  • If it is an RTE or Error: throw.
  • +
  • Any other type: wrap in an IOE
  • + + + Recursively handles wrapped Execution and Completion Exceptions in + case something very complicated has happened. + @param e exception. + @return an IOException extracted or built from the cause. + @throws RuntimeException if that is the inner cause. + @throws Error if that is the inner cause.]]> +
    +
    + + + + + + + type of result + @param type of builder + @return the builder passed in.]]> + + + + + + + + + + fs.example.s3a.option becomes "s3a.option" + fs.example.fs.io.policy becomes "fs.io.policy" + fs.example.something becomes "something" + + @param builder builder to modify + @param conf configuration to read + @param prefix prefix to scan/strip + @param mandatory are the options to be mandatory or optional?]]> + + + + + + Return type. + @return the evaluated result. + @throws UnsupportedOperationException fail fast if unsupported + @throws IllegalArgumentException invalid argument]]> + + + + + Contains methods promoted from + {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they + are a key part of integrating async IO in application code. +

    +

    + One key feature is that the {@link #awaitFuture(Future)} and + {@link #awaitFuture(Future, long, TimeUnit)} calls will + extract and rethrow exceptions raised in the future's execution, + including extracting the inner IOException of any + {@code UncheckedIOException} raised in the future. + This makes it somewhat easier to execute IOException-raising + code inside futures. +

    ]]> +
    +
    + + + + + + + type + @return a remote iterator]]> + + + + + + type + @param iterator iterator. + @return a remote iterator]]> + + + + + + type + @param iterable iterable. + @return a remote iterator]]> + + + + + + type + @param array array. + @return a remote iterator]]> + + + + + + + source type + @param result type + @param iterator source + @param mapper transformation + @return a remote iterator]]> + + + + + + source type + @param result type + @param iterator source + @return a remote iterator]]> + + + + + + + + Elements are filtered in the hasNext() method; if not used + the filtering will be done on demand in the {@code next()} + call. +

    + @param type + @param iterator source + @param filter filter + @return a remote iterator]]> +
    +
    + + + + + source type. + @return a new iterator]]> + + + + + + + type + @return a list of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + type + @return an array of the values. + @throws IOException if the source RemoteIterator raises it.]]> + + + + + + + + and this classes log is set to DEBUG, + then the statistics of the operation are evaluated and logged at + debug. +

    + The number of entries processed is returned, as it is useful to + know this, especially during tests or when reporting values + to users. +

    + This does not close the iterator afterwards. + @param source iterator source + @param consumer consumer of the values. + @return the number of elements processed + @param type of source + @throws IOException if the source RemoteIterator or the consumer raise one.]]> +
    +
    + + + + type of source]]> + + + + + This aims to make it straightforward to use lambda-expressions to + transform the results of an iterator, without losing the statistics + in the process, and to chain the operations together. +

    + The closeable operation will be passed through RemoteIterators which + wrap other RemoteIterators. This is to support any iterator which + can be closed to release held connections, file handles etc. + Unless client code is written to assume that RemoteIterator instances + may be closed, this is not likely to be broadly used. It is added + to make it possible to adopt this feature in a managed way. +

    + One notable feature is that the + {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will + LOG at debug any IOStatistics provided by the iterator, if such + statistics are provided. There's no attempt at retrieval and logging + if the LOG is not set to debug, so it is a zero cost feature unless + the logger {@code org.apache.hadoop.fs.functional.RemoteIterators} + is at DEBUG. +

    + Based on the S3A Listing code, and some some work on moving other code + to using iterative listings so as to pick up the statistics.]]> +
    +
    + +
    + + + +
    diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md new file mode 100644 index 00000000000..0bdd1844b6e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/CHANGELOG.3.3.5.md @@ -0,0 +1,359 @@ + + +# Apache Hadoop Changelog + +## Release 3.3.5 - 2023-03-14 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | Replace all default Charset usage with UTF-8 | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | CryptoOutputStream::close leak when encrypted zones + quota exceptions | Critical | fs | Colm Dougan | Colm Dougan | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-18003](https://issues.apache.org/jira/browse/HADOOP-18003) | Add a method appendIfAbsent for CallerContext | Minor | . | Tao Li | Tao Li | +| [HDFS-16331](https://issues.apache.org/jira/browse/HDFS-16331) | Make dfs.blockreport.intervalMsec reconfigurable | Major | . | Tao Li | Tao Li | +| [HDFS-16371](https://issues.apache.org/jira/browse/HDFS-16371) | Exclude slow disks when choosing volume | Major | . | Tao Li | Tao Li | +| [HDFS-16400](https://issues.apache.org/jira/browse/HDFS-16400) | Reconfig DataXceiver parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16399](https://issues.apache.org/jira/browse/HDFS-16399) | Reconfig cache report parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16398](https://issues.apache.org/jira/browse/HDFS-16398) | Reconfig block report parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16396](https://issues.apache.org/jira/browse/HDFS-16396) | Reconfig slow peer parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16397](https://issues.apache.org/jira/browse/HDFS-16397) | Reconfig slow disk parameters for datanode | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7341](https://issues.apache.org/jira/browse/MAPREDUCE-7341) | Add a task-manifest output committer for Azure and GCS | Major | client | Steve Loughran | Steve Loughran | +| [HADOOP-18163](https://issues.apache.org/jira/browse/HADOOP-18163) | hadoop-azure support for the Manifest Committer of MAPREDUCE-7341 | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HDFS-16413](https://issues.apache.org/jira/browse/HDFS-16413) | Reconfig dfs usage parameters for datanode | Major | . | Tao Li | Tao Li | +| [HDFS-16521](https://issues.apache.org/jira/browse/HDFS-16521) | DFS API to retrieve slow datanodes | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16568](https://issues.apache.org/jira/browse/HDFS-16568) | dfsadmin -reconfig option to start/query reconfig on all live datanodes | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16582](https://issues.apache.org/jira/browse/HDFS-16582) | Expose aggregate latency of slow node as perceived by the reporting node | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | Slow peer metrics - add median, mad and upper latency limits | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-11241](https://issues.apache.org/jira/browse/YARN-11241) | Add uncleaning option for local app log file with log-aggregation enabled | Major | log-aggregation | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103) | High performance vectored read API in Hadoop | Major | common, fs, fs/adl, fs/s3 | Mukund Thakur | Mukund Thakur | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17276](https://issues.apache.org/jira/browse/HADOOP-17276) | Extend CallerContext to make it include many items | Major | . | Hui Fei | Hui Fei | +| [HDFS-15745](https://issues.apache.org/jira/browse/HDFS-15745) | Make DataNodePeerMetrics#LOW\_THRESHOLD\_MS and MIN\_OUTLIER\_DETECTION\_NODES configurable | Major | . | Haibin Huang | Haibin Huang | +| [HDFS-16266](https://issues.apache.org/jira/browse/HDFS-16266) | Add remote port information to HDFS audit log | Major | . | Tao Li | Tao Li | +| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori | +| [HDFS-16310](https://issues.apache.org/jira/browse/HDFS-16310) | RBF: Add client port to CallerContext for Router | Major | . | Tao Li | Tao Li | +| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren | +| [HDFS-16426](https://issues.apache.org/jira/browse/HDFS-16426) | fix nextBlockReportTime when trigger full block report force | Major | . | qinyuren | qinyuren | +| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin | +| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin | +| [HDFS-16262](https://issues.apache.org/jira/browse/HDFS-16262) | Async refresh of cached locations in DFSInputStream | Major | . | Bryan Beaudreault | Bryan Beaudreault | +| [HADOOP-18093](https://issues.apache.org/jira/browse/HADOOP-18093) | Better exception handling for testFileStatusOnMountLink() in ViewFsBaseTest.java | Trivial | . | Xing Lin | Xing Lin | +| [HDFS-16423](https://issues.apache.org/jira/browse/HDFS-16423) | balancer should not get blocks on stale storages | Major | balancer & mover | qinyuren | qinyuren | +| [HADOOP-18139](https://issues.apache.org/jira/browse/HADOOP-18139) | Allow configuration of zookeeper server principal | Major | auth | Owen O'Malley | Owen O'Malley | +| [YARN-11076](https://issues.apache.org/jira/browse/YARN-11076) | Upgrade jQuery version in Yarn UI2 | Major | yarn-ui-v2 | Tamas Domok | Tamas Domok | +| [HDFS-16495](https://issues.apache.org/jira/browse/HDFS-16495) | RBF should prepend the client ip rather than append it. | Major | . | Owen O'Malley | Owen O'Malley | +| [HADOOP-18144](https://issues.apache.org/jira/browse/HADOOP-18144) | getTrashRoot/s in ViewFileSystem should return viewFS path, not targetFS path | Major | common | Xing Lin | Xing Lin | +| [HADOOP-18162](https://issues.apache.org/jira/browse/HADOOP-18162) | hadoop-common enhancements for the Manifest Committer of MAPREDUCE-7341 | Major | fs | Steve Loughran | Steve Loughran | +| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | Zhaohui Wang | Zhaohui Wang | +| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16457](https://issues.apache.org/jira/browse/HDFS-16457) | Make fs.getspaceused.classname reconfigurable | Major | namenode | yanbin.zhang | yanbin.zhang | +| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li | +| [HDFS-16497](https://issues.apache.org/jira/browse/HDFS-16497) | EC: Add param comment for liveBusyBlockIndices with HDFS-14768 | Minor | erasure-coding, namanode | caozhiqiang | caozhiqiang | +| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-17551](https://issues.apache.org/jira/browse/HADOOP-17551) | Upgrade maven-site-plugin to 3.11.0 | Major | . | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16519](https://issues.apache.org/jira/browse/HDFS-16519) | Add throttler to EC reconstruction | Minor | datanode, ec | daimin | daimin | +| [HDFS-14478](https://issues.apache.org/jira/browse/HDFS-14478) | Add libhdfs APIs for openFile | Major | hdfs-client, libhdfs, native | Sahil Takiar | Sahil Takiar | +| [HADOOP-16202](https://issues.apache.org/jira/browse/HADOOP-16202) | Enhance openFile() for better read performance against object stores | Major | fs, fs/s3, tools/distcp | Steve Loughran | Steve Loughran | +| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles | +| [HDFS-16520](https://issues.apache.org/jira/browse/HDFS-16520) | Improve EC pread: avoid potential reading whole block | Major | dfsclient, ec, erasure-coding | daimin | daimin | +| [HADOOP-18167](https://issues.apache.org/jira/browse/HADOOP-18167) | Add metrics to track delegation token secret manager operations | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta | +| [HADOOP-18172](https://issues.apache.org/jira/browse/HADOOP-18172) | Change scope of getRootFallbackLink for InodeTree to make them accessible from outside package | Minor | . | Xing Lin | Xing Lin | +| [HDFS-16588](https://issues.apache.org/jira/browse/HDFS-16588) | Backport HDFS-16584 to branch-3.3. | Major | balancer & mover, namenode | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HADOOP-18244](https://issues.apache.org/jira/browse/HADOOP-18244) | Fix Hadoop-Common JavaDoc Error on branch-3.3 | Major | common | Shilun Fan | Shilun Fan | +| [HADOOP-18269](https://issues.apache.org/jira/browse/HADOOP-18269) | Misleading method name in DistCpOptions | Minor | tools/distcp | guophilipse | guophilipse | +| [HADOOP-18275](https://issues.apache.org/jira/browse/HADOOP-18275) | update os-maven-plugin to 1.7.0 | Minor | build | Steve Loughran | Steve Loughran | +| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16576](https://issues.apache.org/jira/browse/HDFS-16576) | Remove unused imports in HDFS project | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16629](https://issues.apache.org/jira/browse/HDFS-16629) | [JDK 11] Fix javadoc warnings in hadoop-hdfs module | Minor | hdfs | Shilun Fan | Shilun Fan | +| [YARN-11172](https://issues.apache.org/jira/browse/YARN-11172) | Fix testDelegationToken | Major | test | zhengchenyu | zhengchenyu | +| [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | Improve Magic Committer Performance | Minor | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18288](https://issues.apache.org/jira/browse/HADOOP-18288) | Total requests and total requests per sec served by RPC servers | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18336](https://issues.apache.org/jira/browse/HADOOP-18336) | tag FSDataInputStream.getWrappedStream() @Public/@Stable | Minor | fs | Steve Loughran | Ashutosh Gupta | +| [HADOOP-13144](https://issues.apache.org/jira/browse/HADOOP-13144) | Enhancing IPC client throughput via multiple connections per user | Minor | ipc | Jason Kace | Íñigo Goiri | +| [HDFS-16712](https://issues.apache.org/jira/browse/HDFS-16712) | Fix incorrect placeholder in DataNode.java | Major | . | ZanderXu | ZanderXu | +| [HDFS-16702](https://issues.apache.org/jira/browse/HDFS-16702) | MiniDFSCluster should report cause of exception in assertion error | Minor | hdfs | Steve Vaughan | Steve Vaughan | +| [HADOOP-18365](https://issues.apache.org/jira/browse/HADOOP-18365) | Updated addresses are still accessed using the old IP address | Major | common | Steve Vaughan | Steve Vaughan | +| [HDFS-16687](https://issues.apache.org/jira/browse/HDFS-16687) | RouterFsckServlet replicates code from DfsServlet base class | Major | federation | Steve Vaughan | Steve Vaughan | +| [HADOOP-18333](https://issues.apache.org/jira/browse/HADOOP-18333) | hadoop-client-runtime impact by CVE-2022-2047 CVE-2022-2048 due to shaded jetty | Major | build | phoebe chen | Ashutosh Gupta | +| [HADOOP-18406](https://issues.apache.org/jira/browse/HADOOP-18406) | Adds alignment context to call path for creating RPC proxy with multiple connections per user. | Major | ipc | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [HDFS-16684](https://issues.apache.org/jira/browse/HDFS-16684) | Exclude self from JournalNodeSyncer when using a bind host | Major | journal-node | Steve Vaughan | Steve Vaughan | +| [HDFS-16686](https://issues.apache.org/jira/browse/HDFS-16686) | GetJournalEditServlet fails to authorize valid Kerberos request | Major | journal-node | Steve Vaughan | Steve Vaughan | +| [YARN-11303](https://issues.apache.org/jira/browse/YARN-11303) | Upgrade jquery ui to 1.13.2 | Major | security | D M Murali Krishna Reddy | Ashutosh Gupta | +| [HADOOP-16769](https://issues.apache.org/jira/browse/HADOOP-16769) | LocalDirAllocator to provide diagnostics when file creation fails | Minor | util | Ramesh Kumar Thangarajan | Ashutosh Gupta | +| [HADOOP-18341](https://issues.apache.org/jira/browse/HADOOP-18341) | upgrade commons-configuration2 to 2.8.0 and commons-text to 1.9 | Major | . | PJ Fanning | PJ Fanning | +| [HDFS-16776](https://issues.apache.org/jira/browse/HDFS-16776) | Erasure Coding: The length of targets should be checked when DN gets a reconstruction task | Major | . | Kidd5368 | Kidd5368 | +| [HADOOP-18469](https://issues.apache.org/jira/browse/HADOOP-18469) | Add XMLUtils methods to centralise code that creates secure XML parsers | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | Remove the hadoop-openstack module | Major | build, fs, fs/swift | Steve Loughran | Steve Loughran | +| [HADOOP-18468](https://issues.apache.org/jira/browse/HADOOP-18468) | upgrade jettison json jar due to fix CVE-2022-40149 | Major | build | PJ Fanning | PJ Fanning | +| [HADOOP-17779](https://issues.apache.org/jira/browse/HADOOP-17779) | Lock File System Creator Semaphore Uninterruptibly | Minor | fs | David Mollitor | David Mollitor | +| [HADOOP-18360](https://issues.apache.org/jira/browse/HADOOP-18360) | Update commons-csv from 1.0 to 1.9.0. | Minor | common | Shilun Fan | Shilun Fan | +| [HADOOP-18493](https://issues.apache.org/jira/browse/HADOOP-18493) | update jackson-databind 2.12.7.1 due to CVE fixes | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | Update Bouncy Castle to 1.68 or later | Major | build | Takanobu Asanuma | PJ Fanning | +| [HADOOP-18497](https://issues.apache.org/jira/browse/HADOOP-18497) | Upgrade commons-text version to fix CVE-2022-42889 | Major | build | Xiaoqiao He | PJ Fanning | +| [HDFS-16795](https://issues.apache.org/jira/browse/HDFS-16795) | Use secure XML parser utils in hdfs classes | Major | . | PJ Fanning | PJ Fanning | +| [YARN-11330](https://issues.apache.org/jira/browse/YARN-11330) | Use secure XML parser utils in YARN | Major | . | PJ Fanning | PJ Fanning | +| [MAPREDUCE-7411](https://issues.apache.org/jira/browse/MAPREDUCE-7411) | Use secure XML parser utils in MapReduce | Major | . | PJ Fanning | PJ Fanning | +| [HADOOP-18512](https://issues.apache.org/jira/browse/HADOOP-18512) | upgrade woodstox-core to 5.4.0 for security fix | Major | common | phoebe chen | PJ Fanning | +| [YARN-11363](https://issues.apache.org/jira/browse/YARN-11363) | Remove unused TimelineVersionWatcher and TimelineVersion from hadoop-yarn-server-tests | Major | test, yarn | Ashutosh Gupta | Ashutosh Gupta | +| [YARN-11364](https://issues.apache.org/jira/browse/YARN-11364) | Docker Container to accept docker Image name with sha256 digest | Major | yarn | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18517](https://issues.apache.org/jira/browse/HADOOP-18517) | ABFS: Add fs.azure.enable.readahead option to disable readahead | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-18484](https://issues.apache.org/jira/browse/HADOOP-18484) | upgrade hsqldb to v2.7.1 due to CVE | Major | . | PJ Fanning | Ashutosh Gupta | +| [HDFS-16844](https://issues.apache.org/jira/browse/HDFS-16844) | [RBF] The routers should be resiliant against exceptions from StateStore | Major | rbf | Owen O'Malley | Owen O'Malley | +| [HADOOP-18573](https://issues.apache.org/jira/browse/HADOOP-18573) | Improve error reporting on non-standard kerberos names | Blocker | security | Steve Loughran | Steve Loughran | +| [HADOOP-18561](https://issues.apache.org/jira/browse/HADOOP-18561) | CVE-2021-37533 on commons-net is included in hadoop common and hadoop-client-runtime | Blocker | build | phoebe chen | Steve Loughran | +| [HADOOP-18067](https://issues.apache.org/jira/browse/HADOOP-18067) | Über-jira: S3A Hadoop 3.3.5 features | Major | fs/s3 | Steve Loughran | Mukund Thakur | +| [YARN-10444](https://issues.apache.org/jira/browse/YARN-10444) | Node Manager to use openFile() with whole-file read policy for localizing files. | Minor | nodemanager | Steve Loughran | Steve Loughran | +| [HADOOP-18661](https://issues.apache.org/jira/browse/HADOOP-18661) | Fix bin/hadoop usage script terminology | Blocker | scripts | Steve Loughran | Steve Loughran | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru | +| [YARN-10553](https://issues.apache.org/jira/browse/YARN-10553) | Refactor TestDistributedShell | Major | distributed-shell, test | Ahmed Hussein | Ahmed Hussein | +| [HDFS-15839](https://issues.apache.org/jira/browse/HDFS-15839) | RBF: Cannot get method setBalancerBandwidth on Router Client | Major | rbf | Yang Yun | Yang Yun | +| [HADOOP-17588](https://issues.apache.org/jira/browse/HADOOP-17588) | CryptoInputStream#close() should be synchronized | Major | . | Renukaprasad C | Renukaprasad C | +| [HADOOP-17836](https://issues.apache.org/jira/browse/HADOOP-17836) | Improve logging on ABFS error reporting | Minor | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17989](https://issues.apache.org/jira/browse/HADOOP-17989) | ITestAzureBlobFileSystemDelete failing "Operations has null HTTP response" | Major | fs/azure, test | Steve Loughran | Steve Loughran | +| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov | +| [YARN-11065](https://issues.apache.org/jira/browse/YARN-11065) | Bump follow-redirects from 1.13.3 to 1.14.7 in hadoop-yarn-ui | Major | yarn-ui-v2 | Akira Ajisaka | | +| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant | +| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant | +| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse | +| [YARN-10788](https://issues.apache.org/jira/browse/YARN-10788) | TestCsiClient fails | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-18126](https://issues.apache.org/jira/browse/HADOOP-18126) | Update junit 5 version due to build issues | Major | bulid | PJ Fanning | PJ Fanning | +| [YARN-11033](https://issues.apache.org/jira/browse/YARN-11033) | isAbsoluteResource is not correct for dynamically created queues | Minor | yarn | Tamas Domok | Tamas Domok | +| [YARN-10894](https://issues.apache.org/jira/browse/YARN-10894) | Follow up YARN-10237: fix the new test case in TestRMWebServicesCapacitySched | Major | . | Tamas Domok | Tamas Domok | +| [YARN-11022](https://issues.apache.org/jira/browse/YARN-11022) | Fix the documentation for max-parallel-apps in CS | Major | capacity scheduler | Tamas Domok | Tamas Domok | +| [HADOOP-18150](https://issues.apache.org/jira/browse/HADOOP-18150) | Fix ITestAuditManagerDisabled after S3A audit logging was enabled in HADOOP-18091 | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-17976](https://issues.apache.org/jira/browse/HADOOP-17976) | abfs etag extraction inconsistent between LIST and HEAD calls | Minor | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-18129](https://issues.apache.org/jira/browse/HADOOP-18129) | Change URI[] in INodeLink to String[] to reduce memory footprint of ViewFileSystem | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18145](https://issues.apache.org/jira/browse/HADOOP-18145) | Fileutil's unzip method causes unzipped files to lose their original permissions | Major | common | jingxiong zhong | jingxiong zhong | +| [HDFS-16518](https://issues.apache.org/jira/browse/HDFS-16518) | KeyProviderCache close cached KeyProvider with Hadoop ShutdownHookManager | Major | hdfs | Lei Yang | Lei Yang | +| [HADOOP-18169](https://issues.apache.org/jira/browse/HADOOP-18169) | getDelegationTokens in ViewFs should also fetch the token from the fallback FS | Major | . | Xing Lin | Xing Lin | +| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma | +| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin | +| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang | +| [HADOOP-18201](https://issues.apache.org/jira/browse/HADOOP-18201) | Remove base and bucket overrides for endpoint in ITestS3ARequesterPays.java | Major | fs/s3 | Mehakmeet Singh | Daniel Carl Jones | +| [HDFS-16536](https://issues.apache.org/jira/browse/HDFS-16536) | TestOfflineImageViewer fails on branch-3.3 | Major | test | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren | +| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren | +| [HADOOP-17564](https://issues.apache.org/jira/browse/HADOOP-17564) | Fix typo in UnixShellGuide.html | Trivial | . | Takanobu Asanuma | Ashutosh Gupta | +| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li | +| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta | +| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta | +| [HADOOP-16515](https://issues.apache.org/jira/browse/HADOOP-16515) | Update the link to compatibility guide | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta | +| [HADOOP-18222](https://issues.apache.org/jira/browse/HADOOP-18222) | Prevent DelegationTokenSecretManagerMetrics from registering multiple times | Major | . | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16540](https://issues.apache.org/jira/browse/HDFS-16540) | Data locality is lost when DataNode pod restarts in kubernetes | Major | namenode | Huaxiang Sun | Huaxiang Sun | +| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu | +| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke | +| [YARN-11141](https://issues.apache.org/jira/browse/YARN-11141) | Capacity Scheduler does not support ambiguous queue names when moving application across queues | Major | capacity scheduler | András Győri | András Győri | +| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack | +| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki | +| [YARN-11128](https://issues.apache.org/jira/browse/YARN-11128) | Fix comments in TestProportionalCapacityPreemptionPolicy\* | Minor | capacityscheduler, documentation | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18234](https://issues.apache.org/jira/browse/HADOOP-18234) | s3a access point xml examples are wrong | Minor | documentation, fs/s3 | Steve Loughran | Ashutosh Gupta | +| [HADOOP-18238](https://issues.apache.org/jira/browse/HADOOP-18238) | Fix reentrancy check in SFTPFileSystem.close() | Major | common | yi liu | Ashutosh Gupta | +| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-16608](https://issues.apache.org/jira/browse/HDFS-16608) | Fix the link in TestClientProtocolForPipelineRecovery | Minor | documentation | Samrat Deb | Samrat Deb | +| [HDFS-16563](https://issues.apache.org/jira/browse/HDFS-16563) | Namenode WebUI prints sensitive information on Token Expiry | Major | namanode, security, webhdfs | Renukaprasad C | Renukaprasad C | +| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu | +| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant | +| [HADOOP-18255](https://issues.apache.org/jira/browse/HADOOP-18255) | fsdatainputstreambuilder.md refers to hadoop 3.3.3, when it shouldn't | Minor | documentation | Steve Loughran | Ashutosh Gupta | +| [MAPREDUCE-7387](https://issues.apache.org/jira/browse/MAPREDUCE-7387) | Fix TestJHSSecurity#testDelegationToken AssertionError due to HDFS-16563 | Major | . | Shilun Fan | Shilun Fan | +| [MAPREDUCE-7369](https://issues.apache.org/jira/browse/MAPREDUCE-7369) | MapReduce tasks timing out when spends more time on MultipleOutputs#close | Major | . | Prabhu Joseph | Ashutosh Gupta | +| [MAPREDUCE-7391](https://issues.apache.org/jira/browse/MAPREDUCE-7391) | TestLocalDistributedCacheManager failing after HADOOP-16202 | Major | test | Steve Loughran | Steve Loughran | +| [HDFS-16591](https://issues.apache.org/jira/browse/HDFS-16591) | StateStoreZooKeeper fails to initialize | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HADOOP-18321](https://issues.apache.org/jira/browse/HADOOP-18321) | Fix when to read an additional record from a BZip2 text file split | Critical | io | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das | +| [HADOOP-18217](https://issues.apache.org/jira/browse/HADOOP-18217) | shutdownhookmanager should not be multithreaded (deadlock possible) | Minor | util | Catherinot Remi | | +| [MAPREDUCE-7372](https://issues.apache.org/jira/browse/MAPREDUCE-7372) | MapReduce set permission too late in copyJar method | Major | mrv2 | Zhang Dongsheng | | +| [HADOOP-18330](https://issues.apache.org/jira/browse/HADOOP-18330) | S3AFileSystem removes Path when calling createS3Client | Minor | fs/s3 | Ashutosh Pant | Ashutosh Pant | +| [HADOOP-18390](https://issues.apache.org/jira/browse/HADOOP-18390) | Fix out of sync import for HADOOP-18321 | Minor | . | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18340](https://issues.apache.org/jira/browse/HADOOP-18340) | deleteOnExit does not work with S3AFileSystem | Minor | fs/s3 | Huaxiang Sun | Huaxiang Sun | +| [HADOOP-18383](https://issues.apache.org/jira/browse/HADOOP-18383) | Codecs with @DoNotPool annotation are not closed causing memory leak | Major | common | Kevin Sewell | Kevin Sewell | +| [HDFS-16729](https://issues.apache.org/jira/browse/HDFS-16729) | RBF: fix some unreasonably annotated docs | Major | documentation, rbf | JiangHua Zhu | JiangHua Zhu | +| [HADOOP-18398](https://issues.apache.org/jira/browse/HADOOP-18398) | Prevent AvroRecord\*.class from being included non-test jar | Major | common | YUBI LEE | YUBI LEE | +| [HDFS-4043](https://issues.apache.org/jira/browse/HDFS-4043) | Namenode Kerberos Login does not use proper hostname for host qualified hdfs principal name. | Major | security | Ahad Rana | Steve Vaughan | +| [MAPREDUCE-7403](https://issues.apache.org/jira/browse/MAPREDUCE-7403) | Support spark dynamic partitioning in the Manifest Committer | Major | mrv2 | Steve Loughran | Steve Loughran | +| [HDFS-16732](https://issues.apache.org/jira/browse/HDFS-16732) | [SBN READ] Avoid get location from observer when the block report is delayed. | Critical | hdfs | zhengchenyu | zhengchenyu | +| [HADOOP-18375](https://issues.apache.org/jira/browse/HADOOP-18375) | Fix failure of shelltest for hadoop\_add\_ldlibpath | Minor | test | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-16755](https://issues.apache.org/jira/browse/HDFS-16755) | TestQJMWithFaults.testUnresolvableHostName() can fail due to unexpected host resolution | Minor | test | Steve Vaughan | Steve Vaughan | +| [HADOOP-18400](https://issues.apache.org/jira/browse/HADOOP-18400) | Fix file split duplicating records from a succeeding split when reading BZip2 text files | Critical | . | Ashutosh Gupta | Ashutosh Gupta | +| [HADOOP-18242](https://issues.apache.org/jira/browse/HADOOP-18242) | ABFS Rename Failure when tracking metadata is in incomplete state | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18456](https://issues.apache.org/jira/browse/HADOOP-18456) | NullPointerException in ObjectListingIterator's constructor | Blocker | fs/s3 | Quanlong Huang | Steve Loughran | +| [HADOOP-18444](https://issues.apache.org/jira/browse/HADOOP-18444) | Add Support for localized trash for ViewFileSystem in Trash.moveToAppropriateTrash | Major | . | Xing Lin | Xing Lin | +| [HADOOP-18443](https://issues.apache.org/jira/browse/HADOOP-18443) | Upgrade snakeyaml to 1.32 | Major | security | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16766](https://issues.apache.org/jira/browse/HDFS-16766) | hdfs ec command loads (administrator provided) erasure code policy files without disabling xml entity expansion | Major | security | Jing | Ashutosh Gupta | +| [HDFS-13369](https://issues.apache.org/jira/browse/HDFS-13369) | FSCK Report broken with RequestHedgingProxyProvider | Major | hdfs | Harshakiran Reddy | Ranith Sardar | +| [YARN-11039](https://issues.apache.org/jira/browse/YARN-11039) | LogAggregationFileControllerFactory::getFileControllerForRead can leak threads | Blocker | log-aggregation | Rajesh Balamohan | Steve Loughran | +| [HADOOP-18499](https://issues.apache.org/jira/browse/HADOOP-18499) | S3A to support HTTPS web proxies | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18233](https://issues.apache.org/jira/browse/HADOOP-18233) | Possible race condition with TemporaryAWSCredentialsProvider | Major | auth, fs/s3 | Jason Sleight | Jimmy Wong | +| [MAPREDUCE-7425](https://issues.apache.org/jira/browse/MAPREDUCE-7425) | Document Fix for yarn.app.mapreduce.client-am.ipc.max-retries | Major | yarn | teng wang | teng wang | +| [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | Disable abfs prefetching by default | Major | fs/azure | Mehakmeet Singh | Mehakmeet Singh | +| [HDFS-16836](https://issues.apache.org/jira/browse/HDFS-16836) | StandbyCheckpointer can still trigger rollback fs image after RU is finalized | Major | hdfs | Lei Yang | Lei Yang | +| [HADOOP-18324](https://issues.apache.org/jira/browse/HADOOP-18324) | Interrupting RPC Client calls can lead to thread exhaustion | Critical | ipc | Owen O'Malley | Owen O'Malley | +| [HDFS-16832](https://issues.apache.org/jira/browse/HDFS-16832) | [SBN READ] Fix NPE when check the block location of empty directory | Major | . | zhengchenyu | zhengchenyu | +| [HADOOP-18498](https://issues.apache.org/jira/browse/HADOOP-18498) | [ABFS]: Error introduced when SAS Token containing '?' prefix is passed | Minor | fs/azure | Sree Bhattacharyya | Sree Bhattacharyya | +| [HDFS-16847](https://issues.apache.org/jira/browse/HDFS-16847) | RBF: StateStore writer should not commit tmp fail if there was an error in writing the file. | Critical | hdfs, rbf | Simbarashe Dzinamarira | Simbarashe Dzinamarira | +| [HADOOP-18401](https://issues.apache.org/jira/browse/HADOOP-18401) | No ARM binaries in branch-3.3.x releases | Minor | build | Ling Xu | | +| [HADOOP-18408](https://issues.apache.org/jira/browse/HADOOP-18408) | [ABFS]: ITestAbfsManifestCommitProtocol fails on nonHNS configuration | Minor | fs/azure, test | Pranav Saxena | Sree Bhattacharyya | +| [HADOOP-18402](https://issues.apache.org/jira/browse/HADOOP-18402) | S3A committer NPE in spark job abort | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18569](https://issues.apache.org/jira/browse/HADOOP-18569) | NFS Gateway may release buffer too early | Blocker | nfs | Attila Doroszlai | Attila Doroszlai | +| [HADOOP-18574](https://issues.apache.org/jira/browse/HADOOP-18574) | Changing log level of IOStatistics increment to make the DEBUG logs less noisy | Major | fs/s3 | Mehakmeet Singh | Mehakmeet Singh | +| [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521) | ABFS ReadBufferManager buffer sharing across concurrent HTTP requests | Critical | fs/azure | Steve Loughran | Steve Loughran | +| [MAPREDUCE-7375](https://issues.apache.org/jira/browse/MAPREDUCE-7375) | JobSubmissionFiles don't set right permission after mkdirs | Major | mrv2 | Zhang Dongsheng | | +| [HADOOP-17717](https://issues.apache.org/jira/browse/HADOOP-17717) | Update wildfly openssl to 1.1.3.Final | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-18598](https://issues.apache.org/jira/browse/HADOOP-18598) | maven site generation doesn't include javadocs | Blocker | site | Steve Loughran | Steve Loughran | +| [HDFS-16895](https://issues.apache.org/jira/browse/HDFS-16895) | NamenodeHeartbeatService should use credentials of logged in user | Major | rbf | Hector Sandoval Chaverri | Hector Sandoval Chaverri | +| [HDFS-16853](https://issues.apache.org/jira/browse/HDFS-16853) | The UT TestLeaseRecovery2#testHardLeaseRecoveryAfterNameNodeRestart failed because HADOOP-18324 | Blocker | . | ZanderXu | ZanderXu | +| [HADOOP-18641](https://issues.apache.org/jira/browse/HADOOP-18641) | cyclonedx maven plugin breaks builds on recent maven releases (3.9.0) | Major | build | Steve Loughran | Steve Loughran | +| [HDFS-16923](https://issues.apache.org/jira/browse/HDFS-16923) | The getListing RPC will throw NPE if the path does not exist | Critical | . | ZanderXu | ZanderXu | +| [HDFS-16896](https://issues.apache.org/jira/browse/HDFS-16896) | HDFS Client hedged read has increased failure rate than without hedged read | Major | hdfs-client | Tom McCormick | Tom McCormick | +| [YARN-11383](https://issues.apache.org/jira/browse/YARN-11383) | Workflow priority mappings is case sensitive | Major | yarn | Aparajita Choudhary | Aparajita Choudhary | +| [HDFS-16939](https://issues.apache.org/jira/browse/HDFS-16939) | Fix the thread safety bug in LowRedundancyBlocks | Major | namanode | Shuyan Zhang | Shuyan Zhang | +| [HDFS-16934](https://issues.apache.org/jira/browse/HDFS-16934) | org.apache.hadoop.hdfs.tools.TestDFSAdmin#testAllDatanodesReconfig regression | Minor | dfsadmin, test | Steve Loughran | Shilun Fan | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-16573](https://issues.apache.org/jira/browse/HDFS-16573) | Fix test TestDFSStripedInputStreamWithRandomECPolicy | Minor | test | daimin | daimin | +| [HDFS-16637](https://issues.apache.org/jira/browse/HDFS-16637) | TestHDFSCLI#testAll consistently failing | Major | . | Viraj Jasani | Viraj Jasani | +| [YARN-11248](https://issues.apache.org/jira/browse/YARN-11248) | Add unit test for FINISHED\_CONTAINERS\_PULLED\_BY\_AM event on DECOMMISSIONING | Major | test | Ashutosh Gupta | Ashutosh Gupta | +| [HDFS-16625](https://issues.apache.org/jira/browse/HDFS-16625) | Unit tests aren't checking for PMDK availability | Major | test | Steve Vaughan | Steve Vaughan | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13293](https://issues.apache.org/jira/browse/HDFS-13293) | RBF: The RouterRPCServer should transfer client IP via CallerContext to NamenodeRpcServer | Major | rbf | Baolong Mao | Hui Fei | +| [HDFS-15630](https://issues.apache.org/jira/browse/HDFS-15630) | RBF: Fix wrong client IP info in CallerContext when requests mount points with multi-destinations. | Major | rbf | Chengwei Wang | Chengwei Wang | +| [HADOOP-17152](https://issues.apache.org/jira/browse/HADOOP-17152) | Implement wrapper for guava newArrayList and newLinkedList | Major | common | Ahmed Hussein | Viraj Jasani | +| [HADOOP-17851](https://issues.apache.org/jira/browse/HADOOP-17851) | S3A to support user-specified content encoding | Minor | fs/s3 | Holden Karau | Holden Karau | +| [HADOOP-17492](https://issues.apache.org/jira/browse/HADOOP-17492) | abfs listLocatedStatus to support incremental/async page fetching | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-17409](https://issues.apache.org/jira/browse/HADOOP-17409) | Remove S3Guard - no longer needed | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18084](https://issues.apache.org/jira/browse/HADOOP-18084) | ABFS: Add testfilePath while verifying test contents are read correctly | Minor | fs/azure, test | Anmol Asrani | Anmol Asrani | +| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree | +| [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) | S3A auditing leaks memory through ThreadLocal references | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18071](https://issues.apache.org/jira/browse/HADOOP-18071) | ABFS: Set driver global timeout for ITestAzureBlobFileSystemBasics | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17765](https://issues.apache.org/jira/browse/HADOOP-17765) | ABFS: Use Unique File Paths in Tests | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-17862](https://issues.apache.org/jira/browse/HADOOP-17862) | ABFS: Fix unchecked cast compiler warning for AbfsListStatusRemoteIterator | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-18075](https://issues.apache.org/jira/browse/HADOOP-18075) | ABFS: Fix failure caused by listFiles() in ITestAbfsRestOperationException | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-18112](https://issues.apache.org/jira/browse/HADOOP-18112) | Implement paging during S3 multi object delete. | Critical | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-16204](https://issues.apache.org/jira/browse/HADOOP-16204) | ABFS tests to include terasort | Minor | fs/azure, test | Steve Loughran | Steve Loughran | +| [HDFS-13248](https://issues.apache.org/jira/browse/HDFS-13248) | RBF: Namenode need to choose block location for the client | Major | . | Wu Weiwei | Owen O'Malley | +| [HADOOP-13704](https://issues.apache.org/jira/browse/HADOOP-13704) | S3A getContentSummary() to move to listFiles(recursive) to count children; instrument use | Minor | fs/s3 | Steve Loughran | Ahmar Suhail | +| [HADOOP-14661](https://issues.apache.org/jira/browse/HADOOP-14661) | S3A to support Requester Pays Buckets | Minor | common, util | Mandus Momberg | Daniel Carl Jones | +| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren | +| [HADOOP-17682](https://issues.apache.org/jira/browse/HADOOP-17682) | ABFS: Support FileStatus input to OpenFileWithOptions() via OpenFileParameters | Major | fs/azure | Sumangala Patki | Sumangala Patki | +| [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | Use jersey-json that is built to use jackson2 | Major | build | Akira Ajisaka | PJ Fanning | +| [HADOOP-18104](https://issues.apache.org/jira/browse/HADOOP-18104) | Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [HADOOP-18168](https://issues.apache.org/jira/browse/HADOOP-18168) | ITestMarkerTool.testRunLimitedLandsatAudit failing due to most of bucket content purged | Minor | fs/s3, test | Steve Loughran | Daniel Carl Jones | +| [HADOOP-12020](https://issues.apache.org/jira/browse/HADOOP-12020) | Support configuration of different S3 storage classes | Major | fs/s3 | Yann Landrin-Schweitzer | Monthon Klongklaew | +| [HADOOP-18105](https://issues.apache.org/jira/browse/HADOOP-18105) | Implement a variant of ElasticByteBufferPool which uses weak references for garbage collection. | Major | common, fs | Mukund Thakur | Mukund Thakur | +| [HADOOP-18107](https://issues.apache.org/jira/browse/HADOOP-18107) | Vectored IO support for large S3 files. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18106](https://issues.apache.org/jira/browse/HADOOP-18106) | Handle memory fragmentation in S3 Vectored IO implementation. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-17461](https://issues.apache.org/jira/browse/HADOOP-17461) | Add thread-level IOStatistics Context | Major | fs, fs/azure, fs/s3 | Steve Loughran | Mehakmeet Singh | +| [HADOOP-18372](https://issues.apache.org/jira/browse/HADOOP-18372) | ILoadTestS3ABulkDeleteThrottling failing | Minor | fs/s3, test | Steve Loughran | Ahmar Suhail | +| [HADOOP-18368](https://issues.apache.org/jira/browse/HADOOP-18368) | ITestCustomSigner fails when access point name has '-' | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-15964](https://issues.apache.org/jira/browse/HADOOP-15964) | Add S3A support for Async Scatter/Gather IO | Major | fs/s3 | Steve Loughran | Mukund Thakur | +| [HADOOP-18366](https://issues.apache.org/jira/browse/HADOOP-18366) | ITestS3Select.testSelectSeekFullLandsat is timing out | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-18373](https://issues.apache.org/jira/browse/HADOOP-18373) | IOStatisticsContext tuning | Minor | fs/s3, test | Steve Loughran | Viraj Jasani | +| [HADOOP-18227](https://issues.apache.org/jira/browse/HADOOP-18227) | Add input stream IOstats for vectored IO api in S3A. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18392](https://issues.apache.org/jira/browse/HADOOP-18392) | Propagate vectored s3a input stream stats to file system stats. | Major | fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18355](https://issues.apache.org/jira/browse/HADOOP-18355) | Update previous index properly while validating overlapping ranges. | Major | common, fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18371](https://issues.apache.org/jira/browse/HADOOP-18371) | s3a FS init logs at warn if fs.s3a.create.storage.class is unset | Blocker | fs/s3 | Steve Loughran | Viraj Jasani | +| [HADOOP-18385](https://issues.apache.org/jira/browse/HADOOP-18385) | ITestS3ACannedACLs failure; not in a span | Major | fs/s3, test | Steve Loughran | Ashutosh Gupta | +| [HADOOP-18403](https://issues.apache.org/jira/browse/HADOOP-18403) | Fix FileSystem leak in ITestS3AAWSCredentialsProvider | Minor | fs/s3 | Viraj Jasani | Viraj Jasani | +| [HADOOP-17882](https://issues.apache.org/jira/browse/HADOOP-17882) | distcp to use openFile() with sequential IO; ranges of reads | Major | tools/distcp | Steve Loughran | Steve Loughran | +| [HADOOP-18391](https://issues.apache.org/jira/browse/HADOOP-18391) | Improve VectoredReadUtils#readVectored() for direct buffers | Major | fs | Steve Loughran | Mukund Thakur | +| [HADOOP-18407](https://issues.apache.org/jira/browse/HADOOP-18407) | Improve vectored IO api spec. | Minor | fs, fs/s3 | Mukund Thakur | Mukund Thakur | +| [HADOOP-18339](https://issues.apache.org/jira/browse/HADOOP-18339) | S3A storage class option only picked up when buffering writes to disk | Major | fs/s3 | Steve Loughran | Monthon Klongklaew | +| [HADOOP-18410](https://issues.apache.org/jira/browse/HADOOP-18410) | S3AInputStream.unbuffer() async drain not releasing http connections | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18439](https://issues.apache.org/jira/browse/HADOOP-18439) | Fix VectoredIO for LocalFileSystem when checksum is enabled. | Major | common | Mukund Thakur | Mukund Thakur | +| [HADOOP-18416](https://issues.apache.org/jira/browse/HADOOP-18416) | ITestS3AIOStatisticsContext failure | Major | fs/s3, test | Steve Loughran | Mehakmeet Singh | +| [HADOOP-18347](https://issues.apache.org/jira/browse/HADOOP-18347) | Restrict vectoredIO threadpool to reduce memory pressure | Major | common, fs, fs/adl, fs/s3 | Rajesh Balamohan | Mukund Thakur | +| [HADOOP-18463](https://issues.apache.org/jira/browse/HADOOP-18463) | Add an integration test to process data asynchronously during vectored read. | Major | . | Mukund Thakur | Mukund Thakur | +| [HADOOP-15460](https://issues.apache.org/jira/browse/HADOOP-15460) | S3A FS to add "fs.s3a.create.performance" to the builder file creation option set | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | Upgrade AWS SDK to V2 - Prerequisites | Minor | . | Ahmar Suhail | Ahmar Suhail | +| [HADOOP-18480](https://issues.apache.org/jira/browse/HADOOP-18480) | upgrade AWS SDK to 1.12.316 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18460](https://issues.apache.org/jira/browse/HADOOP-18460) | ITestS3AContractVectoredRead.testStopVectoredIoOperationsUnbuffer failing | Minor | fs/s3, test | Steve Loughran | Mukund Thakur | +| [HADOOP-18488](https://issues.apache.org/jira/browse/HADOOP-18488) | Cherrypick HADOOP-11245 to branch-3.3 | Major | . | Wei-Chiu Chuang | Ashutosh Gupta | +| [HADOOP-18481](https://issues.apache.org/jira/browse/HADOOP-18481) | AWS v2 SDK upgrade log to not warn of use standard AWS Credential Providers | Major | fs/s3 | Steve Loughran | Ahmar Suhail | +| [HADOOP-18476](https://issues.apache.org/jira/browse/HADOOP-18476) | Abfs and S3A FileContext bindings to close wrapped filesystems in finalizer | Blocker | fs/azure, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18304](https://issues.apache.org/jira/browse/HADOOP-18304) | Improve S3A committers documentation clarity | Trivial | documentation | Daniel Carl Jones | Daniel Carl Jones | +| [HADOOP-18465](https://issues.apache.org/jira/browse/HADOOP-18465) | S3A server-side encryption tests fail before checking encryption tests should skip | Minor | fs/s3, test | Daniel Carl Jones | Daniel Carl Jones | +| [HADOOP-18530](https://issues.apache.org/jira/browse/HADOOP-18530) | ChecksumFileSystem::readVectored might return byte buffers not positioned at 0 | Blocker | fs | Harshit Gupta | Harshit Gupta | +| [HADOOP-18457](https://issues.apache.org/jira/browse/HADOOP-18457) | ABFS: Support for account level throttling | Major | . | Anmol Asrani | Anmol Asrani | +| [HADOOP-18560](https://issues.apache.org/jira/browse/HADOOP-18560) | AvroFSInput opens a stream twice and discards the second one without closing | Blocker | fs | Steve Loughran | Steve Loughran | +| [HADOOP-18526](https://issues.apache.org/jira/browse/HADOOP-18526) | Leak of S3AInstrumentation instances via hadoop Metrics references | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546) | disable purging list of in progress reads in abfs stream closed | Blocker | fs/azure | Steve Loughran | Pranav Saxena | +| [HADOOP-18577](https://issues.apache.org/jira/browse/HADOOP-18577) | ABFS: add probes of readahead fix | Major | fs/azure | Steve Loughran | Steve Loughran | +| [HADOOP-11867](https://issues.apache.org/jira/browse/HADOOP-11867) | Add a high-performance vectored read API. | Major | fs, fs/azure, fs/s3, hdfs-client | Gopal Vijayaraghavan | Mukund Thakur | +| [HADOOP-18507](https://issues.apache.org/jira/browse/HADOOP-18507) | VectorIO FileRange type to support a "reference" field | Major | fs | Steve Loughran | Steve Loughran | +| [HADOOP-18627](https://issues.apache.org/jira/browse/HADOOP-18627) | site intro docs to make clear Kerberos is mandatory for secure clusters | Major | site | Steve Loughran | Arnout Engelen | +| [HADOOP-17584](https://issues.apache.org/jira/browse/HADOOP-17584) | s3a magic committer may commit more data | Major | fs/s3 | yinan zhan | Steve Loughran | +| [HADOOP-18642](https://issues.apache.org/jira/browse/HADOOP-18642) | Cut excess dependencies from hadoop-azure, hadoop-aliyun transitive imports; fix LICENSE-binary | Blocker | build, fs/azure, fs/oss | Steve Loughran | Steve Loughran | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-15854](https://issues.apache.org/jira/browse/HDFS-15854) | Make some parameters configurable for SlowDiskTracker and SlowPeerTracker | Major | . | Tao Li | Tao Li | +| [YARN-10747](https://issues.apache.org/jira/browse/YARN-10747) | Bump YARN CSI protobuf version to 3.7.1 | Major | . | Siyao Meng | Siyao Meng | +| [HDFS-16139](https://issues.apache.org/jira/browse/HDFS-16139) | Update BPServiceActor Scheduler's nextBlockReportTime atomically | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18014](https://issues.apache.org/jira/browse/HADOOP-18014) | CallerContext should not include some characters | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [MAPREDUCE-7371](https://issues.apache.org/jira/browse/MAPREDUCE-7371) | DistributedCache alternative APIs should not use DistributedCache APIs internally | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18114](https://issues.apache.org/jira/browse/HADOOP-18114) | Documentation Syntax Error Fix \> AWS Assumed Roles | Trivial | documentation, fs/s3 | Joey Krabacher | Joey Krabacher | +| [HDFS-16481](https://issues.apache.org/jira/browse/HDFS-16481) | Provide support to set Http and Rpc ports in MiniJournalCluster | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16502](https://issues.apache.org/jira/browse/HDFS-16502) | Reconfigure Block Invalidate limit | Major | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16522](https://issues.apache.org/jira/browse/HDFS-16522) | Set Http and Ipc ports for Datanodes in MiniDFSCluster | Major | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani | +| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C | +| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani | +| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update google-gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak | +| [HADOOP-18397](https://issues.apache.org/jira/browse/HADOOP-18397) | Shutdown AWSSecurityTokenService when its resources are no longer in use | Major | fs/s3 | Viraj Jasani | Viraj Jasani | +| [HADOOP-18575](https://issues.apache.org/jira/browse/HADOOP-18575) | Make XML transformer factory more lenient | Major | common | PJ Fanning | PJ Fanning | +| [HADOOP-18586](https://issues.apache.org/jira/browse/HADOOP-18586) | Update the year to 2023 | Major | . | Ayush Saxena | Ayush Saxena | +| [HADOOP-18587](https://issues.apache.org/jira/browse/HADOOP-18587) | upgrade to jettison 1.5.3 to fix CVE-2022-40150 | Major | common | PJ Fanning | PJ Fanning | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md new file mode 100644 index 00000000000..b2357e827d2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.5/RELEASENOTES.3.3.5.md @@ -0,0 +1,89 @@ + + +# Apache Hadoop 3.3.5 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-17956](https://issues.apache.org/jira/browse/HADOOP-17956) | *Major* | **Replace all default Charset usage with UTF-8** + +All of the default charset usages have been replaced to UTF-8. If the default charset of your environment is not UTF-8, the behavior can be different. + + +--- + +* [HADOOP-15983](https://issues.apache.org/jira/browse/HADOOP-15983) | *Major* | **Use jersey-json that is built to use jackson2** + +Use modified jersey-json 1.20 in https://github.com/pjfanning/jersey-1.x/tree/v1.20 that uses Jackson 2.x. By this change, Jackson 1.x dependency has been removed from Hadoop. +downstream applications which explicitly exclude jersey from transitive dependencies must now exclude com.github.pjfanning:jersey-json + + +--- + +* [HDFS-16595](https://issues.apache.org/jira/browse/HDFS-16595) | *Major* | **Slow peer metrics - add median, mad and upper latency limits** + +Namenode metrics that represent Slownode Json now include three important factors (median, median absolute deviation, upper latency limit) that can help user determine how urgently a given slownode requires manual intervention. + + +--- + +* [HADOOP-17833](https://issues.apache.org/jira/browse/HADOOP-17833) | *Minor* | **Improve Magic Committer Performance** + +S3A filesytem's createFile() operation supports an option to disable all safety checks when creating a file. Consult the documentation and use with care + + +--- + +* [HADOOP-18382](https://issues.apache.org/jira/browse/HADOOP-18382) | *Minor* | **Upgrade AWS SDK to V2 - Prerequisites** + +In preparation for an (incompatible but necessary) move to the AWS SDK v2, some uses of internal/deprecated uses of AWS classes/interfaces are logged as warnings, though only once during the life of a JVM. Set the log "org.apache.hadoop.fs.s3a.SDKV2Upgrade" to only log at INFO to hide these. + + +--- + +* [HADOOP-18442](https://issues.apache.org/jira/browse/HADOOP-18442) | *Major* | **Remove the hadoop-openstack module** + +The swift:// connector for openstack support has been removed. It had fundamental problems (swift's handling of files \> 4GB). A subset of the S3 protocol is now exported by almost all object store services -please use that through the s3a connector instead. The hadoop-openstack jar remains, only now it is empty of code. This is to ensure that projects which declare the JAR a dependency will still have successful builds. + + +--- + +* [HADOOP-17563](https://issues.apache.org/jira/browse/HADOOP-17563) | *Major* | **Update Bouncy Castle to 1.68 or later** + +bouncy castle 1.68+ is a multirelease JAR containing java classes compiled for different target JREs. older versions of asm.jar and maven shade plugin may have problems with these. fix: upgrade the dependencies + + +--- + +* [HADOOP-18528](https://issues.apache.org/jira/browse/HADOOP-18528) | *Major* | **Disable abfs prefetching by default** + +ABFS block prefetching has been disabled to avoid HADOOP-18521 and buffer sharing on multithreaded processes (Hive, Spark etc). This will have little/no performance impact on queries against Parquet or ORC data, but can slow down sequential stream processing, including CSV files -however, the read data will be correct. +It may slow down distcp downloads, where the race condition does not arise. For maximum distcp performance re-enable the readahead by setting fs.abfs.enable.readahead to true. + + +--- + +* [HADOOP-18621](https://issues.apache.org/jira/browse/HADOOP-18621) | *Critical* | **CryptoOutputStream::close leak when encrypted zones + quota exceptions** + +**WARNING: No release note provided for this change.** + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml new file mode 100644 index 00000000000..399b62b3010 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.3.5.xml @@ -0,0 +1,835 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

    + +

    The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml new file mode 100644 index 00000000000..2c67744dfbe --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.3.5.xml @@ -0,0 +1,113 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml new file mode 100644 index 00000000000..9c9eeee9637 --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.3.5.xml @@ -0,0 +1,28963 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileStatus of a given cache file on hdfs + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DistributedCache is a facility provided by the Map-Reduce + framework to cache files (text, archives, jars etc.) needed by applications. +

    + +

    Applications specify the files, via urls (hdfs:// or http://) to be cached + via the {@link org.apache.hadoop.mapred.JobConf}. The + DistributedCache assumes that the files specified via urls are + already present on the {@link FileSystem} at the path specified by the url + and are accessible by every machine in the cluster.

    + +

    The framework will copy the necessary files on to the worker node before + any tasks for the job are executed on that node. Its efficiency stems from + the fact that the files are only copied once per job and the ability to + cache archives which are un-archived on the workers.

    + +

    DistributedCache can be used to distribute simple, read-only + data/text files and/or more complex types such as archives, jars etc. + Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes. + Jars may be optionally added to the classpath of the tasks, a rudimentary + software distribution mechanism. Files have execution permissions. + In older version of Hadoop Map/Reduce users could optionally ask for symlinks + to be created in the working directory of the child task. In the current + version symlinks are always created. If the URL does not have a fragment + the name of the file or directory will be used. If multiple files or + directories map to the same link name, the last one added, will be used. All + others will not even be downloaded.

    + +

    DistributedCache tracks modification timestamps of the cache + files. Clearly the cache files should not be modified by the application + or externally while the job is executing.

    + +

    Here is an illustrative example on how to use the + DistributedCache:

    +

    +     // Setting up the cache for the application
    +
    +     1. Copy the requisite files to the FileSystem:
    +
    +     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
    +     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
    +     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
    +     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
    +     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
    +     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    +
    +     2. Setup the application's JobConf:
    +
    +     JobConf job = new JobConf();
    +     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
    +                                   job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job);
    +     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job);
    +     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job);
    +
    +     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
    +     or {@link org.apache.hadoop.mapred.Reducer}:
    +
    +     public static class MapClass extends MapReduceBase
    +     implements Mapper<K, V, K, V> {
    +
    +       private Path[] localArchives;
    +       private Path[] localFiles;
    +
    +       public void configure(JobConf job) {
    +         // Get the cached archives/files
    +         File f = new File("./map.zip/some/file/in/zip.txt");
    +       }
    +
    +       public void map(K key, V value,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Use data from the cached archives/files here
    +         // ...
    +         // ...
    +         output.collect(k, v);
    +       }
    +     }
    +
    + 
    + + It is also very common to use the DistributedCache by using + {@link org.apache.hadoop.util.GenericOptionsParser}. + + This class includes methods that should be used by users + (specifically those mentioned in the example above, as well + as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}), + as well as methods intended for use by the MapReduce framework + (e.g., {@link org.apache.hadoop.mapred.JobClient}). + + @see org.apache.hadoop.mapred.JobConf + @see org.apache.hadoop.mapred.JobClient + @see org.apache.hadoop.mapreduce.Job]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker, + as {@link JobTracker.State} + + {@link JobTracker.State} should no longer be used on M/R 2.x. The function + is kept to be compatible with M/R 1.x applications. + + @return the invalid state of the JobTracker.]]> + + + + + + + + + + + + + + ClusterStatus provides clients with information such as: +
      +
    1. + Size of the cluster. +
    2. +
    3. + Name of the trackers. +
    4. +
    5. + Task capacity of the cluster. +
    6. +
    7. + The number of currently running map and reduce tasks. +
    8. +
    9. + State of the JobTracker. +
    10. +
    11. + Details regarding black listed trackers. +
    12. +
    + +

    Clients can query for the latest ClusterStatus, via + {@link JobClient#getClusterStatus()}.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link Group}s, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Group of counters, comprising of counters from a particular + counter {@link Enum} class. + +

    Grouphandles localization of the class name and the + counter names.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat always returns + true. Implementations that may deal with non-splittable files must + override this method. + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param fs the file system that the file is on + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobConf, int)}. + + Implementations of FileInputFormat can also override the + {@link #isSplitable(FileSystem, Path)} method to prevent input files + from being split-up in certain situations. Implementations that may + deal with non-splittable files must override this method, since + the default implementation assumes splitting is always possible.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Note: The following is valid only if the {@link OutputCommitter} + is {@link FileOutputCommitter}. If OutputCommitter is not + a FileOutputCommitter, the task's temporary output + directory is same as {@link #getOutputPath(JobConf)} i.e. + ${mapreduce.output.fileoutputformat.outputdir}$

    + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) + are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in ${mapreduce.task.output.dir} during execution + of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the + framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    Note: the value of ${mapreduce.task.output.dir} during + execution of a particular task-attempt is actually + ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}, and this value is + set by the map-reduce framework. So, just create any side-files in the + path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce + task to take advantage of this feature.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + + + + + + The generated name can be used to create custom files from within the + different tasks for the job, the names for different tasks will not collide + with each other.

    + +

    The given name is postfixed with the task type, 'm' for maps, 'r' for + reduces and the task partition number. For example, give a name 'test' + running on the first map o the job the generated name will be + 'test-m-00000'.

    + + @param conf the configuration for the job. + @param name the name to make unique. + @return a unique name accross all tasks of the job.]]> +
    +
    + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueName} method to make the file name + unique for the task.

    + + @param conf the configuration for the job. + @param name the name for the file. + @return a unique path accross all tasks of the job.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    or + conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength); +

    + @see FixedLengthRecordReader]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. + + @param job job configuration. + @param numSplits the desired number of splits, a hint. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + It is the responsibility of the RecordReader to respect + record boundaries while processing the logical split to present a + record-oriented view to the individual task.

    + + @param split the {@link InputSplit} + @param job the job that this split belongs to + @return a {@link RecordReader}]]> +
    +
    + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapreduce.input.fileinputformat.split.minsize.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to be respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibilty to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see JobClient + @see FileInputFormat]]> + + + + + + + + + + InputSplit. + + @return the number of bytes in the input split. + @throws IOException]]> + + + + + + InputSplit is + located as an array of Strings. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + SplitLocationInfos describing how the split + data is stored at each location. A null value indicates that all the + locations have the data stored on disk. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + jobid doesn't correspond to any known job. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobClient is the primary interface for the user-job to interact + with the cluster. + + JobClient provides facilities to submit jobs, track their + progress, access component-tasks' reports/logs, get the Map-Reduce cluster + status information etc. + +

    The job submission process involves: +

      +
    1. + Checking the input and output specifications of the job. +
    2. +
    3. + Computing the {@link InputSplit}s for the job. +
    4. +
    5. + Setup the requisite accounting information for the {@link DistributedCache} + of the job, if necessary. +
    6. +
    7. + Copying the job's jar and configuration to the map-reduce system directory + on the distributed file-system. +
    8. +
    9. + Submitting the job to the cluster and optionally monitoring + it's status. +
    10. +
    + + Normally the user creates the application, describes various facets of the + job via {@link JobConf} and then uses the JobClient to submit + the job and monitor its progress. + +

    Here is an example on how to use JobClient:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     JobClient.runJob(job);
    + 
    + + Job Control + +

    At times clients would chain map-reduce jobs to accomplish complex tasks + which cannot be done via a single map-reduce job. This is fairly easy since + the output of the job, typically, goes to distributed file-system and that + can be used as the input for the next job.

    + +

    However, this also means that the onus on ensuring jobs are complete + (success/failure) lies squarely on the clients. In such situations the + various job-control options are: +

      +
    1. + {@link #runJob(JobConf)} : submits the job and returns only after + the job has completed. +
    2. +
    3. + {@link #submitJob(JobConf)} : only submits the job, then poll the + returned handle to the {@link RunningJob} to query status and make + scheduling decisions. +
    4. +
    5. + {@link JobConf#setJobEndNotificationURI(String)} : setup a notification + on job-completion, thus avoiding polling. +
    6. +
    + + @see JobConf + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + If the parameter {@code loadDefaults} is false, the new instance + will not load resources from the default files. + + @param loadDefaults specifies whether to load from the default files]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if framework should keep the intermediate files + for failed tasks, false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the outputs of the maps are to be compressed, + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the combiner sort being + stable in any sense. (In any case, with the order of available + map-outputs to the combiner being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys for the + combiner. It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class)]]> +
    +
    + + + + This comparator should be provided if the equivalence rules for keys + for sorting the intermediates are different from those for grouping keys + before each call to + {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.

    + +

    For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed + in a single call to the reduce function if K1 and K2 compare as equal.

    + +

    Since {@link #setOutputKeyComparatorClass(Class)} can be used to control + how keys are sorted, this can be used in conjunction to simulate + secondary sort on values.

    + +

    Note: This is not a guarantee of the reduce sort being + stable in any sense. (In any case, with the order of available + map-outputs to the reduce being non-deterministic, it wouldn't make + that much sense.)

    + + @param theClass the comparator class to be used for grouping keys. + It should implement RawComparator. + @see #setOutputKeyComparatorClass(Class) + @see #setCombinerKeyGroupingComparator(Class)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. Typically the combiner is same as the + the {@link Reducer} for the job i.e. {@link #getReducerClass()}. + + @return the user-defined combiner class used to combine map-outputs.]]> + + + + + + combiner class used to combine map-outputs + before being sent to the reducers. + +

    The combiner is an application-specified aggregation operation, which + can help cut down the amount of data transferred between the + {@link Mapper} and the {@link Reducer}, leading to better performance.

    + +

    The framework may invoke the combiner 0, 1, or multiple times, in both + the mapper and reducer tasks. In general, the combiner is called as the + sort/merge result is written to disk. The combiner must: +

      +
    • be side-effect free
    • +
    • have the same input and output key types and the same input and + output value types
    • +
    + +

    Typically the combiner is same as the Reducer for the + job i.e. {@link #setReducerClass(Class)}.

    + + @param theClass the user-defined combiner class used to combine + map-outputs.]]> +
    +
    + + + true. + + @return true if speculative execution be used for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + true. + + @return true if speculative execution be + used for this job for map tasks, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + true. + + @return true if speculative execution be used + for reduce tasks for this job, + false otherwise.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + 1. + + @return the number of map tasks for this job.]]> + + + + + + Note: This is only a hint to the framework. The actual + number of spawned map tasks depends on the number of {@link InputSplit}s + generated by the job's {@link InputFormat#getSplits(JobConf, int)}. + + A custom {@link InputFormat} is typically used to accurately control + the number of map tasks for the job.

    + + How many maps? + +

    The number of maps is usually driven by the total size of the inputs + i.e. total number of blocks of the input files.

    + +

    The right level of parallelism for maps seems to be around 10-100 maps + per-node, although it has been set up to 300 or so for very cpu-light map + tasks. Task setup takes awhile, so it is best if the maps take at least a + minute to execute.

    + +

    The default behavior of file-based {@link InputFormat}s is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of input files. However, the {@link FileSystem} blocksize of the + input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapreduce.input.fileinputformat.split.minsize.

    + +

    Thus, if you expect 10TB of input data and have a blocksize of 128MB, + you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is + used to set it even higher.

    + + @param n the number of map tasks for this job. + @see InputFormat#getSplits(JobConf, int) + @see FileInputFormat + @see FileSystem#getDefaultBlockSize() + @see FileStatus#getBlockSize()]]> +
    +
    + + + 1. + + @return the number of reduce tasks for this job.]]> + + + + + + How many reduces? + +

    The right number of reduces seems to be 0.95 or + 1.75 multiplied by ( + available memory for reduce tasks + (The value of this should be smaller than + numNodes * yarn.nodemanager.resource.memory-mb + since the resource of memory is shared by map tasks and other + applications) / + + mapreduce.reduce.memory.mb). +

    + +

    With 0.95 all of the reduces can launch immediately and + start transfering map outputs as the maps finish. With 1.75 + the faster nodes will finish their first round of reduces and launch a + second wave of reduces doing a much better job of load balancing.

    + +

    Increasing the number of reduces increases the framework overhead, but + increases load balancing and lowers the cost of failures.

    + +

    The scaling factors above are slightly less than whole numbers to + reserve a few reduce slots in the framework for speculative-tasks, failures + etc.

    + + Reducer NONE + +

    It is legal to set the number of reduce-tasks to zero.

    + +

    In this case the output of the map-tasks directly go to distributed + file-system, to the path set by + {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the + framework doesn't sort the map-outputs before writing it out to HDFS.

    + + @param n the number of reduce tasks for this job.]]> +
    +
    + + + mapreduce.map.maxattempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + + + + + + + mapreduce.reduce.maxattempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noFailures, the + tasktracker is blacklisted for this job. + + @param noFailures maximum no. of failures of a given job per tasktracker.]]> + + + + + blacklisted for this job. + + @return the maximum no. of failures of a given job per tasktracker.]]> + + + + + failed. + + Defaults to zero, i.e. any failed map-task results in + the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of map tasks that can fail without + the job being aborted.]]> + + + + + failed. + + Defaults to zero, i.e. any failed reduce-task results + in the job being declared as {@link JobStatus#FAILED}. + + @return the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + failed. + + @param percent the maximum percentage of reduce tasks that can fail without + the job being aborted.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The debug script can aid debugging of failed map tasks. The script is + given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf.
    + 
    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script needs to be symlinked.

    + +

    Here is an example on how to submit a script +

    + job.setMapDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 
    + + @param mDbgScript the script name]]> +
    +
    + + + + + + + + + The debug script can aid debugging of failed reduce tasks. The script + is given task's stdout, stderr, syslog, jobconf files as arguments.

    + +

    The debug command, run on the node where the map failed, is:

    +

    + $script $stdout $stderr $syslog $jobconf.
    + 
    + +

    The script file is distributed through {@link DistributedCache} + APIs. The script file needs to be symlinked

    + +

    Here is an example on how to submit a script +

    + job.setReduceDebugScript("./myscript");
    + DistributedCache.createSymlink(job);
    + DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
    + 
    + + @param rDbgScript the script name]]> +
    +
    + + + + + + + + null if it hasn't + been set. + @see #setJobEndNotificationURI(String)]]> + + + + + + The uri can contain 2 special parameters: $jobId and + $jobStatus. Those, if present, are replaced by the job's + identifier and completion-status respectively.

    + +

    This is typically used by application-writers to implement chaining of + Map-Reduce jobs in an asynchronous manner.

    + + @param uri the job end notification uri + @see JobStatus]]> +
    +
    + + + + + + + + + + + + + + + When a job starts, a shared directory is created at location + + ${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ . + This directory is exposed to the users through + mapreduce.job.local.dir . + So, the tasks can use this space + as scratch space and share files among them.

    + This value is available as System property also. + + @return The localized job specific shared directory]]> +
    +
    + + + + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value will be used + after converting it from bytes to MB. + @return memory required to run a map task of the job, in MB,]]> + + + + + + + + + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different + from {@link #DISABLED_MEMORY_LIMIT}, that value will be used + after converting it from bytes to MB. + @return memory required to run a reduce task of the job, in MB.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method is deprecated. Now, different memory limits can be + set for map and reduce tasks of a job, in MB. +

    + For backward compatibility, if the job configuration sets the + key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. + Otherwise, this method will return the larger of the values returned by + {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} + after converting them into bytes. + + @return Memory required to run a task of this job, in bytes. + @see #setMaxVirtualMemoryForTask(long) + @deprecated Use {@link #getMemoryForMapTask()} and + {@link #getMemoryForReduceTask()}]]> + + + + + + + mapred.task.maxvmem is split into + mapreduce.map.memory.mb + and mapreduce.map.memory.mb,mapred + each of the new key are set + as mapred.task.maxvmem / 1024 + as new values are in MB + + @param vmem Maximum amount of virtual memory in bytes any task of this job + can use. + @see #getMaxVirtualMemoryForTask() + @deprecated + Use {@link #setMemoryForMapTask(long mem)} and + Use {@link #setMemoryForReduceTask(long mem)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + k1=v1,k2=v2. Further it can + reference existing environment variables via $key on + Linux or %key% on Windows. + + Example: +

      +
    • A=foo - This will set the env variable A to foo.
    • +
    + + @deprecated Use {@link #MAPRED_MAP_TASK_ENV} or + {@link #MAPRED_REDUCE_TASK_ENV}]]> +
    + + + + k1=v1,k2=v2. Further it can + reference existing environment variables via $key on + Linux or %key% on Windows. + + Example: +
      +
    • A=foo - This will set the env variable A to foo.
    • +
    + + You can also add environment variables individually by appending + .VARNAME to this configuration key, where VARNAME is + the name of the environment variable. + + Example: +
      +
    • mapreduce.map.env.VARNAME=value
    • +
    ]]> +
    +
    + + + k1=v1,k2=v2. Further it can + reference existing environment variables via $key on + Linux or %key% on Windows. + + Example: +
      +
    • A=foo - This will set the env variable A to foo.
    • +
    + + You can also add environment variables individually by appending + .VARNAME to this configuration key, where VARNAME is + the name of the environment variable. + + Example: +
      +
    • mapreduce.reduce.env.VARNAME=value
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobConf is the primary interface for a user to describe a + map-reduce job to the Hadoop framework for execution. The framework tries to + faithfully execute the job as-is described by JobConf, however: +
      +
    1. + Some configuration parameters might have been marked as + + final by administrators and hence cannot be altered. +
    2. +
    3. + While some job parameters are straight-forward to set + (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly + with the rest of the framework and/or job-configuration and is relatively + more complex for the user to control finely + (e.g. {@link #setNumMapTasks(int)}). +
    4. +
    + +

    JobConf typically specifies the {@link Mapper}, combiner + (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and + {@link OutputFormat} implementations to be used etc. + +

    Optionally JobConf is used to specify other advanced facets + of the job such as Comparators to be used, files to be put in + the {@link DistributedCache}, whether or not intermediate and/or job outputs + are to be compressed (and how), debugability via user-provided scripts + ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), + for doing post-processing on task logs, task's stdout, stderr, syslog. + and etc.

    + +

    Here is an example on how to configure a job via JobConf:

    +

    +     // Create a new JobConf
    +     JobConf job = new JobConf(new Configuration(), MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     FileInputFormat.setInputPaths(job, new Path("in"));
    +     FileOutputFormat.setOutputPath(job, new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setCombinerClass(MyJob.MyReducer.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +     
    +     job.setInputFormat(SequenceFileInputFormat.class);
    +     job.setOutputFormat(SequenceFileOutputFormat.class);
    + 
    + + @see JobClient + @see ClusterStatus + @see Tool + @see DistributedCache]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + any job + run on the jobtracker started at 200707121733, we would use : +
     
    + JobID.getTaskIDsPattern("200707121733", null);
    + 
    + which will return : +
     "job_200707121733_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @return a regex pattern matching JobIDs]]> +
    +
    + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Output pairs need not be of the same types as input pairs. A given + input pair may map to zero or many output pairs. Output pairs are + collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes significant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapreduce.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the input key. + @param value the input value. + @param output collects mapped keys and values. + @param reporter facility to report progress.]]> +
    + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link JobConf} for the + job via the {@link JobConfigurable#configure(JobConf)} and initialize + themselves. Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    The framework then calls + {@link #map(Object, Object, OutputCollector, Reporter)} + for each key/value pair in the InputSplit for that task.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the grouping by specifying + a Comparator via + {@link JobConf#setOutputKeyComparatorClass(Class)}.

    + +

    The grouped Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    The intermediate, grouped outputs are always stored in + {@link SequenceFile}s. Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the JobConf.

    + +

    If the job has + zero + reduces then the output of the Mapper is directly written + to the {@link FileSystem} without grouping by keys.

    + +

    Example:

    +

    +     public class MyMapper<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Mapper<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +       
    +       private String mapTaskId;
    +       private String inputFile;
    +       private int noRecords = 0;
    +       
    +       public void configure(JobConf job) {
    +         mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
    +         inputFile = job.get(JobContext.MAP_INPUT_FILE);
    +       }
    +       
    +       public void map(K key, V val,
    +                       OutputCollector<K, V> output, Reporter reporter)
    +       throws IOException {
    +         // Process the <key, value> pair (assume this takes a while)
    +         // ...
    +         // ...
    +         
    +         // Let the framework know that we are alive, and kicking!
    +         // reporter.progress();
    +         
    +         // Process some more
    +         // ...
    +         // ...
    +         
    +         // Increment the no. of <key, value> pairs processed
    +         ++noRecords;
    +
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +        
    +         // Every 100 records update application-level status
    +         if ((noRecords%100) == 0) {
    +           reporter.setStatus(mapTaskId + " processed " + noRecords + 
    +                              " from input-file: " + inputFile); 
    +         }
    +         
    +         // Output the result
    +         output.collect(key, val);
    +       }
    +     }
    + 
    + +

    Applications may write a custom {@link MapRunnable} to exert greater + control on map processing e.g. multi-threaded Mappers etc.

    + + @see JobConf + @see InputFormat + @see Partitioner + @see Reducer + @see MapReduceBase + @see MapRunnable + @see SequenceFile]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + Provides default no-op implementations for a few methods, most non-trivial + applications need to override some of them.

    ]]> +
    +
    + + + + + + + + + + + <key, value> pairs. + +

    Mapping of input records to output records is complete when this method + returns.

    + + @param input the {@link RecordReader} to read the input records. + @param output the {@link OutputCollector} to collect the outputrecords. + @param reporter {@link Reporter} to report progress, status-updates etc. + @throws IOException]]> +
    +
    + + Custom implementations of MapRunnable can exert greater + control on map processing e.g. multi-threaded, asynchronous mappers etc.

    + + @see Mapper]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nearly + equal content length.
    + Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} + to construct RecordReader's for MultiFileSplit's. + @see MultiFileSplit]]> +
    +
    + + + + + + + + + + + + + MultiFileSplit can be used to implement {@link RecordReader}'s, with + reading one record per file. + @see FileSplit + @see MultiFileInputFormat]]> + + + + + + + + + + + + + + + <key, value> pairs output by {@link Mapper}s + and {@link Reducer}s. + +

    OutputCollector is the generalization of the facility + provided by the Map-Reduce framework to collect data output by either the + Mapper or the Reducer i.e. intermediate outputs + or the output of the job.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if task output recovery is supported, + false otherwise + @throws IOException + @see #recoverTask(TaskAttemptContext)]]> + + + + + + + true repeatable job commit is supported, + false otherwise + @throws IOException]]> + + + + + + + + + + + OutputCommitter. This is called from the application master + process, but it is called individually for each task. + + If an exception is thrown the task will be attempted again. + + @param taskContext Context of the task whose output is being recovered + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + The methods in this class can be called from several different processes and + from several different contexts. It is important to know which process and + which context each is called from. Each method should be marked accordingly + in its documentation. It is also important to note that not all methods are + guaranteed to be called once and only once. If a method is not guaranteed to + have this property the output committer needs to handle this appropriately. + Also note it will only be in rare situations where they may be called + multiple times for the same task. + + @see FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + Implementations which write to filesystems which support delegation + tokens usually collect the tokens for the destination path(s) + and attach them to the job configuration. + @param ignored + @param job job configuration. + @throws IOException when output should not be attempted]]> +
    +
    + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter + @see JobConf]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be paritioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + +

    Note: A Partitioner is created only when there are multiple + reducers.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.0 to 1.0. + @throws IOException]]> + + + + RecordReader reads <key, value> pairs from an + {@link InputSplit}. + +

    RecordReader, typically, converts the byte-oriented view of + the input, provided by the InputSplit, and presents a + record-oriented view for the {@link Mapper} and {@link Reducer} tasks for + processing. It thus assumes the responsibility of processing record + boundaries and presenting the tasks with keys and values.

    + + @see InputSplit + @see InputFormat]]> +
    +
    + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param reporter facility to report progress. + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + Reduces values for a given key. + +

    The framework calls this method for each + <key, (list of values)> pair in the grouped inputs. + Output values must be of the same type as input values. Input keys must + not be altered. The framework will reuse the key and value objects + that are passed into the reduce, therefore the application should clone + the objects they want to keep a copy of. In many cases, all values are + combined into zero or one value. +

    + +

    Output pairs are collected with calls to + {@link OutputCollector#collect(Object,Object)}.

    + +

    Applications can use the {@link Reporter} provided to report progress + or just indicate that they are alive. In scenarios where the application + takes a significant amount of time to process individual key/value + pairs, this is crucial since the framework might assume that the task has + timed-out and kill that task. The other way of avoiding this is to set + + mapreduce.task.timeout to a high-enough value (or even zero for no + time-outs).

    + + @param key the key. + @param values the list of values to reduce. + @param output to collect keys and combined values. + @param reporter facility to report progress.]]> +
    + + + The number of Reducers for the job is set by the user via + {@link JobConf#setNumReduceTasks(int)}. Reducer implementations + can access the {@link JobConf} for the job via the + {@link JobConfigurable#configure(JobConf)} method and initialize themselves. + Similarly they can use the {@link Closeable#close()} method for + de-initialization.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + + Shuffle + +

      Reducer is input the grouped output of a {@link Mapper}. + In the phase the framework, for each Reducer, fetches the + relevant partition of the output of all the Mappers, via HTTP. +

      +
    2. + +
    3. + Sort + +

      The framework groups Reducer inputs by keys + (since different Mappers may have output the same key) in this + stage.

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + + SecondarySort + +

      If equivalence rules for keys while grouping the intermediates are + different from those for grouping keys before reduction, then one may + specify a Comparator via + {@link JobConf#setOutputValueGroupingComparator(Class)}.Since + {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to + control how intermediate keys are grouped, these can be used in conjunction + to simulate secondary sort on values.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. + Reduce + +

      In this phase the + {@link #reduce(Object, Iterator, OutputCollector, Reporter)} + method is called for each <key, (list of values)> pair in + the grouped inputs.

      +

      The output of the reduce task is typically written to the + {@link FileSystem} via + {@link OutputCollector#collect(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    +     public class MyReducer<K extends WritableComparable, V extends Writable> 
    +     extends MapReduceBase implements Reducer<K, V, K, V> {
    +     
    +       static enum MyCounters { NUM_RECORDS }
    +        
    +       private String reduceTaskId;
    +       private int noKeys = 0;
    +       
    +       public void configure(JobConf job) {
    +         reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
    +       }
    +       
    +       public void reduce(K key, Iterator<V> values,
    +                          OutputCollector<K, V> output, 
    +                          Reporter reporter)
    +       throws IOException {
    +       
    +         // Process
    +         int noValues = 0;
    +         while (values.hasNext()) {
    +           V value = values.next();
    +           
    +           // Increment the no. of values for this key
    +           ++noValues;
    +           
    +           // Process the <key, value> pair (assume this takes a while)
    +           // ...
    +           // ...
    +           
    +           // Let the framework know that we are alive, and kicking!
    +           if ((noValues%10) == 0) {
    +             reporter.progress();
    +           }
    +         
    +           // Process some more
    +           // ...
    +           // ...
    +           
    +           // Output the <key, value> 
    +           output.collect(key, value);
    +         }
    +         
    +         // Increment the no. of <key, list of values> pairs processed
    +         ++noKeys;
    +         
    +         // Increment counters
    +         reporter.incrCounter(NUM_RECORDS, 1);
    +         
    +         // Every 100 keys update application-level status
    +         if ((noKeys%100) == 0) {
    +           reporter.setStatus(reduceTaskId + " processed " + noKeys);
    +         }
    +       }
    +     }
    + 
    + + @see Mapper + @see Partitioner + @see Reporter + @see MapReduceBase]]> +
    +
    + + + + + + + + + + + + + + Counter of the given group/name.]]> + + + + + + + Counter of the given group/name.]]> + + + + + + + Enum. + @param amount A non-negative amount by which the counter is to + be incremented.]]> + + + + + + + + + + + + + + InputSplit that the map is reading from. + @throws UnsupportedOperationException if called outside a mapper]]> + + + + + + + + + + + + + + {@link Mapper} and {@link Reducer} can use the Reporter + provided to report progress or just indicate that they are alive. In + scenarios where the application takes significant amount of time to + process individual key/value pairs, this is crucial since the framework + might assume that the task has timed-out and kill that task. + +

    Applications can also update {@link Counters} via the provided + Reporter .

    + + @see Progressable + @see Counters]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job retired, else false. + @throws IOException]]> + + + + + + + + + + RunningJob is the user-interface to query for details on a + running Map-Reduce job. + +

    Clients can get hold of RunningJob via the {@link JobClient} + and then query the running-job for details such as name, configuration, + progress etc.

    + + @see JobClient]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param conf the {@link JobConf} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. + false otherwise.]]> + + + + + + + + + + + + + true if auto increment + {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. + false otherwise.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hadoop provides an optional mode of execution in which the bad records + are detected and skipped in further attempts. + +

    This feature can be used when map/reduce tasks crashes deterministically on + certain input. This happens due to bugs in the map/reduce function. The usual + course would be to fix these bugs. But sometimes this is not possible; + perhaps the bug is in third party libraries for which the source code is + not available. Due to this, the task never reaches to completion even with + multiple attempts and complete data for that task is lost.

    + +

    With this feature, only a small portion of data is lost surrounding + the bad record, which may be acceptable for some user applications. + see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}

    + +

    The skipping mode gets kicked off after certain no of failures + see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}

    + +

    In the skipping mode, the map/reduce task maintains the record range which + is getting processed at all times. Before giving the input to the + map/reduce function, it sends this record range to the Task tracker. + If task crashes, the Task tracker knows which one was the last reported + range. On further attempts that range get skipped.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + + + + + + all task attempt IDs + of any jobtracker, in any job, of the first + map task, we would use : +
     
    + TaskAttemptID.getTaskAttemptIDsPattern(null, null, TaskType.MAP, 1, null);
    + 
    + which will return : +
     "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param type the {@link TaskType} + @param taskId taskId number, or null + @param attemptId the task attempt number, or null + @return a regex pattern matching TaskAttemptIDs]]> +
    +
    + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +

     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param isMap whether the tip is a map, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs + @deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType, + Integer)}]]> +
    + + + + + + + + the first map task + of any jobtracker, of any job, we would use : +
     
    + TaskID.getTaskIDsPattern(null, null, true, 1);
    + 
    + which will return : +
     "task_[^_]*_[0-9]*_m_000001*" 
    + @param jtIdentifier jobTracker identifier, or null + @param jobId job number, or null + @param type the {@link TaskType}, or null + @param taskId taskId number, or null + @return a regex pattern matching TaskIDs]]> +
    +
    + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapred.join.expr property and + user-supplied join types from mapred.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + mapred.join.define.<ident> to a classname. In the expression + mapred.join.expr, the identifier will be assumed to be a + ComposableRecordReader. + mapred.join.keycomparator can be a classname used to compare keys + in the join. + @see #setFormat + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    +
    + JobConf mapAConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, mapAConf);
    +
    + JobConf mapBConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, mapBConf);
    +
    + JobConf reduceConf = new JobConf(false);
    + ...
    + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, reduceConf);
    +
    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, null);
    +
    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
    +   LongWritable.class, LongWritable.class, true, null);
    +
    + FileInputFormat.setInputPaths(conf, inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    + ...
    +
    + JobClient jc = new JobClient(conf);
    + RunningJob job = jc.submitJob(conf);
    + ...
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Reducer leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Reducer does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. + + @param job job's JobConf to add the Reducer class. + @param klass the Reducer class to add. + @param inputKeyClass reducer input key class. + @param inputValueClass reducer input value class. + @param outputKeyClass reducer output key class. + @param outputValueClass reducer output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param reducerConf a JobConf with the configuration for the Reducer + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + + + + + + + + It has to be specified how key and values are passed from one element of + the chain to the next, by value or by reference. If a Mapper leverages the + assumed semantics that the key and values are not modified by the collector + 'by value' must be used. If the Mapper does not expect this semantics, as + an optimization to avoid serialization and deserialization 'by reference' + can be used. +

    + For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's JobConf. This + precedence is in effect when the task is running. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain + . + + @param job chain job's JobConf to add the Mapper class. + @param klass the Mapper class to add. + @param inputKeyClass mapper input key class. + @param inputValueClass mapper input value class. + @param outputKeyClass mapper output key class. + @param outputValueClass mapper output value class. + @param byValue indicates if key/values should be passed by value + to the next Mapper in the chain, if any. + @param mapperConf a JobConf with the configuration for the Mapper + class. It is recommended to use a JobConf without default values using the + JobConf(boolean loadDefaults) constructor with FALSE.]]> + + + + + + + If this method is overriden super.configure(...) should be + invoked at the beginning of the overwriter method.]]> + + + + + + + + + + reduce(...) method of the Reducer with the + map(...) methods of the Mappers in the chain.]]> + + + + + + + If this method is overriden super.close() should be + invoked at the end of the overwriter method.]]> + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion, the output of the first becomes the input of the + second, and so on until the last Mapper, the output of the last Mapper will + be written to the task's output. +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. + This enables having reusable specialized Mappers that can be combined to + perform composite operations within a single task. +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use maching output and input key and + value classes as no conversion is done by the chaining code. +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    +

    + ...
    + conf.setJobName("chain");
    + conf.setInputFormat(TextInputFormat.class);
    + conf.setOutputFormat(TextOutputFormat.class);
    +
    + JobConf mapAConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, mapAConf);
    +
    + JobConf mapBConf = new JobConf(false);
    + ...
    + ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, mapBConf);
    +
    + JobConf reduceConf = new JobConf(false);
    + ...
    + ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, reduceConf);
    +
    + ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, null);
    +
    + ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
    +   LongWritable.class, LongWritable.class, true, null);
    +
    + FileInputFormat.setInputPaths(conf, inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    + ...
    +
    + JobClient jc = new JobClient(conf);
    + RunningJob job = jc.submitJob(conf);
    + ...
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for CombineFileSplit's. + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileRecordReader. + + Subclassing is needed to get a concrete record reader wrapper because of the + constructor requirement. + + @see CombineFileRecordReader + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + SequenceFileInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + TextInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the name output is multi, false + if it is single. If the name output is not defined it returns + false]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + @param conf job conf to enableadd the named output. + @param enabled indicates if the counters will be enabled or not.]]> +
    +
    + + + + + By default these counters are disabled. +

    + MultipleOutputs supports counters, by default the are disabled. + The counters group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. + + + @param conf job conf to enableadd the named output. + @return TRUE if the counters are enabled, FALSE if they are disabled.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + If overriden subclasses must invoke super.close() at the + end of their close() + + @throws java.io.IOException thrown if any of the MultipleOutput files + could not be closed properly.]]> + + + + OutputCollector passed to + the map() and reduce() methods of the + Mapper and Reducer implementations. +

    + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + A named output can be a single file or a multi file. The later is referred as + a multi named output. +

    + A multi named output is an unbound set of files all sharing the same + OutputFormat, key class and value class configuration. +

    + When named outputs are used within a Mapper implementation, + key/values written to a name output are not part of the reduce phase, only + key/values written to the job OutputCollector are part of the + reduce phase. +

    + MultipleOutputs supports counters, by default the are disabled. The counters + group is the {@link MultipleOutputs} class name. +

    + The names of the counters are the same as the named outputs. For multi + named outputs the name of the counter is the concatenation of the named + output, and underscore '_' and the multiname. +

    + Job configuration usage pattern is: +

    +
    + JobConf conf = new JobConf();
    +
    + conf.setInputPath(inDir);
    + FileOutputFormat.setOutputPath(conf, outDir);
    +
    + conf.setMapperClass(MOMap.class);
    + conf.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional multi sequencefile based output 'sequence' for the
    + // job
    + MultipleOutputs.addMultiNamedOutput(conf, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + JobClient jc = new JobClient();
    + RunningJob job = jc.submitJob(conf);
    +
    + ...
    + 
    +

    + Job configuration usage pattern is: +

    +
    + public class MOReduce implements
    +   Reducer<WritableComparable, Writable> {
    + private MultipleOutputs mos;
    +
    + public void configure(JobConf conf) {
    + ...
    + mos = new MultipleOutputs(conf);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + OutputCollector output, Reporter reporter)
    + throws IOException {
    + ...
    + mos.getCollector("text", reporter).collect(key, new Text("Hello"));
    + mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
    + mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
    + ...
    + }
    +
    + public void close() throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + of {@link org.apache.hadoop.mapred.MapRunner}, when the Map + operation is not CPU bound in order to improve throughput. +

    + Map implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured to use this MapRunnable class (using + the JobConf.setMapRunnerClass method) and + the number of threads the thread-pool can use with the + mapred.map.multithreadedrunner.threads property, its default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile. + @deprecated Use + {@link #setPartitionFile(Configuration, Path)} + instead]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cluster. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ClusterMetrics provides clients with information such as: +

      +
    1. + Size of the cluster. +
    2. +
    3. + Number of blacklisted and decommissioned trackers. +
    4. +
    5. + Slot capacity of the cluster. +
    6. +
    7. + The number of currently occupied/reserved map and reduce slots. +
    8. +
    9. + The number of currently running map and reduce tasks. +
    10. +
    11. + The number of job submissions. +
    12. +
    + +

    Clients can query for the latest ClusterMetrics, via + {@link Cluster#getClusterStatus()}.

    + + @see Cluster]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counters represent global counters, defined either by the + Map-Reduce framework or applications. Each Counter is named by + an {@link Enum} and has a long for the value.

    + +

    Counters are bunched into Groups, each comprising of + counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + the type of counter + @param the type of counter group + @param counters the old counters object]]> + + + + Counters holds per job/task counters, defined either by the + Map-Reduce framework or applications. Each Counter can be of + any {@link Enum} type.

    + +

    Counters are bunched into {@link CounterGroup}s, each + comprising of counters from a particular Enum class.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each {@link InputSplit} is then assigned to an individual {@link Mapper} + for processing.

    + +

    Note: The split is a logical split of the inputs and the + input files are not physically split into chunks. For e.g. a split could + be <input-file-path, start, offset> tuple. The InputFormat + also creates the {@link RecordReader} to read the {@link InputSplit}. + + @param context job configuration. + @return an array of {@link InputSplit}s for the job.]]> + + + + + + + + + + + + + InputFormat describes the input-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the InputFormat of the + job to:

    +

      +
    1. + Validate the input-specification of the job. +
    2. + Split-up the input file(s) into logical {@link InputSplit}s, each of + which is then assigned to an individual {@link Mapper}. +
    3. +
    4. + Provide the {@link RecordReader} implementation to be used to glean + input records from the logical InputSplit for processing by + the {@link Mapper}. +
    5. +
    + +

    The default behavior of file-based {@link InputFormat}s, typically + sub-classes of {@link FileInputFormat}, is to split the + input into logical {@link InputSplit}s based on the total size, in + bytes, of the input files. However, the {@link FileSystem} blocksize of + the input files is treated as an upper bound for input splits. A lower bound + on the split size can be set via + + mapreduce.input.fileinputformat.split.minsize.

    + +

    Clearly, logical splits based on input-size is insufficient for many + applications since record boundaries are to respected. In such cases, the + application has to also implement a {@link RecordReader} on whom lies the + responsibility to respect record-boundaries and present a record-oriented + view of the logical InputSplit to the individual task. + + @see InputSplit + @see RecordReader + @see FileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + SplitLocationInfos describing how the split + data is stored at each location. A null value indicates that all the + locations have the data stored on disk. + @throws IOException]]> + + + + InputSplit represents the data to be processed by an + individual {@link Mapper}. + +

    Typically, it presents a byte-oriented view on the input and is the + responsibility of {@link RecordReader} of the job to process this and present + a record-oriented view. + + @see InputFormat + @see RecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + A Cluster will be created from the conf parameter only when it's needed. + + @param conf the configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException]]> + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param conf the configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException]]> + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param status job status + @param conf job configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException]]> + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param ignored + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException + @deprecated Use {@link #getInstance()}]]> + + + + + + + + Job makes a copy of the Configuration so + that any necessary internal modifications do not reflect on the incoming + parameter. + + @param ignored + @param conf job configuration + @return the {@link Job} , with no connection to a cluster yet. + @throws IOException + @deprecated Use {@link #getInstance(Configuration)}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progress of the job's map-tasks, as a float between 0.0 + and 1.0. When all map tasks have completed, the function returns 1.0. + + @return the progress of the job's map-tasks. + @throws IOException]]> + + + + + + progress of the job's reduce-tasks, as a float between 0.0 + and 1.0. When all reduce tasks have completed, the function returns 1.0. + + @return the progress of the job's reduce-tasks. + @throws IOException]]> + + + + + + + progress of the job's cleanup-tasks, as a float between 0.0 + and 1.0. When all cleanup tasks have completed, the function returns 1.0. + + @return the progress of the job's cleanup-tasks. + @throws IOException]]> + + + + + + progress of the job's setup-tasks, as a float between 0.0 + and 1.0. When all setup tasks have completed, the function returns 1.0. + + @return the progress of the job's setup-tasks. + @throws IOException]]> + + + + + + true if the job is complete, else false. + @throws IOException]]> + + + + + + true if the job succeeded, else false. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + OutputFormat to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Mapper to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + Partitioner to use + @throws IllegalStateException if the job is submitted]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if speculative execution + should be turned on, else false.]]> + + + + + + true if speculative execution + should be turned on for map tasks, + else false.]]> + + + + + + true if speculative execution + should be turned on for reduce tasks, + else false.]]> + + + + + + true, job-setup and job-cleanup will be + considered from {@link OutputCommitter} + else ignored.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The access permissions of the file will determine whether the localized + file will be shared across jobs. If the file is not readable by other or + if any of its parent directories is not executable by other, then the + file will not be shared. In the case of a path that ends in "/*", + sharing of the localized files will be determined solely from the + access permissions of the parent directories. The access permissions of + the individual files will be ignored. + + @param uri The uri of the cache to be localized. + @param conf Configuration to add the cache to.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + JobTracker is lost]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Job. + @throws IOException if fail to close.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + It allows the user to configure the + job, submit it, control its execution, and query the state. The set methods + only work until the job is submitted, afterwards they will throw an + IllegalStateException.

    + +

    + Normally the user creates the application, describes various facets of the + job via {@link Job} and then submits the job and monitor its progress.

    + +

    Here is an example on how to submit a job:

    +

    +     // Create a new Job
    +     Job job = Job.getInstance();
    +     job.setJarByClass(MyJob.class);
    +     
    +     // Specify various job-specific parameters     
    +     job.setJobName("myjob");
    +     
    +     job.setInputPath(new Path("in"));
    +     job.setOutputPath(new Path("out"));
    +     
    +     job.setMapperClass(MyJob.MyMapper.class);
    +     job.setReducerClass(MyJob.MyReducer.class);
    +
    +     // Submit the job, then poll for progress until the job is complete
    +     job.waitForCompletion(true);
    + 
    ]]> +
    + + + + + + + + + + + + + + + + + + + + + + + 1. + @return the number of reduce tasks for this job.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mapred.map.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per map task.]]> + + + + + mapred.reduce.max.attempts + property. If this property is not already set, the default is 4 attempts. + + @return the max number of attempts per reduce task.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example JobID is : + job_200707121733_0003 , which represents the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse JobID strings, but rather + use appropriate constructors or {@link #forName(String)} method. + + @see TaskID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the key input type to the Mapper + @param the value input type to the Mapper + @param the key output type from the Mapper + @param the value output type from the Mapper]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Maps are the individual tasks which transform input records into a + intermediate records. The transformed intermediate records need not be of + the same type as the input records. A given input pair may map to zero or + many output pairs.

    + +

    The Hadoop Map-Reduce framework spawns one map task for each + {@link InputSplit} generated by the {@link InputFormat} for the job. + Mapper implementations can access the {@link Configuration} for + the job via the {@link JobContext#getConfiguration()}. + +

    The framework first calls + {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by + {@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)} + for each key/value pair in the InputSplit. Finally + {@link #cleanup(org.apache.hadoop.mapreduce.Mapper.Context)} is called.

    + +

    All intermediate values associated with a given output key are + subsequently grouped by the framework, and passed to a {@link Reducer} to + determine the final output. Users can control the sorting and grouping by + specifying two key {@link RawComparator} classes.

    + +

    The Mapper outputs are partitioned per + Reducer. Users can control which keys (and hence records) go to + which Reducer by implementing a custom {@link Partitioner}. + +

    Users can optionally specify a combiner, via + {@link Job#setCombinerClass(Class)}, to perform local aggregation of the + intermediate outputs, which helps to cut down the amount of data transferred + from the Mapper to the Reducer. + +

    Applications can specify if and how the intermediate + outputs are to be compressed and which {@link CompressionCodec}s are to be + used via the Configuration.

    + +

    If the job has zero + reduces then the output of the Mapper is directly written + to the {@link OutputFormat} without sorting by keys.

    + +

    Example:

    +

    + public class TokenCounterMapper 
    +     extends Mapper<Object, Text, Text, IntWritable>{
    +    
    +   private final static IntWritable one = new IntWritable(1);
    +   private Text word = new Text();
    +   
    +   public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    +     StringTokenizer itr = new StringTokenizer(value.toString());
    +     while (itr.hasMoreTokens()) {
    +       word.set(itr.nextToken());
    +       context.write(word, one);
    +     }
    +   }
    + }
    + 
    + +

    Applications may override the + {@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert + greater control on map processing e.g. multi-threaded Mappers + etc.

    + + @see InputFormat + @see JobContext + @see Partitioner + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + MarkableIterator is a wrapper iterator class that + implements the {@link MarkableIteratorInterface}.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if task output recovery is supported, + false otherwise + @see #recoverTask(TaskAttemptContext) + @deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]> + + + + + + + true repeatable job commit is supported, + false otherwise + @throws IOException]]> + + + + + + + true if task output recovery is supported, + false otherwise + @throws IOException + @see #recoverTask(TaskAttemptContext)]]> + + + + + + + OutputCommitter. This is called from the application master + process, but it is called individually for each task. + + If an exception is thrown the task will be attempted again. + + This may be called multiple times for the same task. But from different + application attempts. + + @param taskContext Context of the task whose output is being recovered + @throws IOException]]> + + + + OutputCommitter describes the commit of task output for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputCommitter of + the job to:

    +

      +
    1. + Setup the job during initialization. For example, create the temporary + output directory for the job during the initialization of the job. +
    2. +
    3. + Cleanup the job after the job completion. For example, remove the + temporary output directory after the job completion. +
    4. +
    5. + Setup the task temporary output. +
    6. +
    7. + Check whether a task needs a commit. This is to avoid the commit + procedure if a task does not need commit. +
    8. +
    9. + Commit of the task output. +
    10. +
    11. + Discard the task commit. +
    12. +
    + The methods in this class can be called from several different processes and + from several different contexts. It is important to know which process and + which context each is called from. Each method should be marked accordingly + in its documentation. It is also important to note that not all methods are + guaranteed to be called once and only once. If a method is not guaranteed to + have this property the output committer needs to handle this appropriately. + Also note it will only be in rare situations where they may be called + multiple times for the same task. + + @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter + @see JobContext + @see TaskAttemptContext]]> +
    +
    + + + + + + + + + + + + + + + + + + + This is to validate the output specification for the job when it is + a job is submitted. Typically checks that it does not already exist, + throwing an exception when it already exists, so that output is not + overwritten.

    + + Implementations which write to filesystems which support delegation + tokens usually collect the tokens for the destination path(s) + and attach them to the job context's JobConf. + @param context information about the job + @throws IOException when output should not be attempted]]> +
    +
    + + + + + + + + + + OutputFormat describes the output-specification for a + Map-Reduce job. + +

    The Map-Reduce framework relies on the OutputFormat of the + job to:

    +

      +
    1. + Validate the output-specification of the job. For e.g. check that the + output directory doesn't already exist. +
    2. + Provide the {@link RecordWriter} implementation to be used to write out + the output files of the job. Output files are stored in a + {@link FileSystem}. +
    3. +
    + + @see RecordWriter]]> +
    +
    + + + + + + + + + + + Typically a hash function on a all or a subset of the key.

    + + @param key the key to be partioned. + @param value the entry value. + @param numPartitions the total number of partitions. + @return the partition number for the key.]]> +
    +
    + + Partitioner controls the partitioning of the keys of the + intermediate map-outputs. The key (or a subset of the key) is used to derive + the partition, typically by a hash function. The total number of partitions + is the same as the number of reduce tasks for the job. Hence this controls + which of the m reduce tasks the intermediate key (and hence the + record) is sent for reduction.

    + +

    Note: A Partitioner is created only when there are multiple + reducers.

    + +

    Note: If you require your Partitioner class to obtain the Job's + configuration object, implement the {@link Configurable} interface.

    + + @see Reducer]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "N/A" + + @return Scheduling information associated to particular Job Queue]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @param ]]> + + + + + + + + + + + + + + + + + + + + + + RecordWriter to future operations. + + @param context the context of the task + @throws IOException]]> + + + + RecordWriter writes the output <key, value> pairs + to an output file. + +

    RecordWriter implementations write the job outputs to the + {@link FileSystem}. + + @see OutputFormat]]> + + + + + + + + + + + + + + + + + + + + + + the class of the input keys + @param the class of the input values + @param the class of the output keys + @param the class of the output values]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Reducer implementations + can access the {@link Configuration} for the job via the + {@link JobContext#getConfiguration()} method.

    + +

    Reducer has 3 primary phases:

    +
      +
    1. + + Shuffle + +

      The Reducer copies the sorted output from each + {@link Mapper} using HTTP across the network.

      +
    2. + +
    3. + Sort + +

      The framework merge sorts Reducer inputs by + keys + (since different Mappers may have output the same key).

      + +

      The shuffle and sort phases occur simultaneously i.e. while outputs are + being fetched they are merged.

      + + SecondarySort + +

      To achieve a secondary sort on the values returned by the value + iterator, the application should extend the key with the secondary + key and define a grouping comparator. The keys will be sorted using the + entire key, but will be grouped using the grouping comparator to decide + which keys and values are sent in the same call to reduce.The grouping + comparator is specified via + {@link Job#setGroupingComparatorClass(Class)}. The sort order is + controlled by + {@link Job#setSortComparatorClass(Class)}.

      + + + For example, say that you want to find duplicate web pages and tag them + all with the url of the "best" known example. You would set up the job + like: +
        +
      • Map Input Key: url
      • +
      • Map Input Value: document
      • +
      • Map Output Key: document checksum, url pagerank
      • +
      • Map Output Value: url
      • +
      • Partitioner: by checksum
      • +
      • OutputKeyComparator: by checksum and then decreasing pagerank
      • +
      • OutputValueGroupingComparator: by checksum
      • +
      +
    4. + +
    5. + Reduce + +

      In this phase the + {@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)} + method is called for each <key, (collection of values)> in + the sorted inputs.

      +

      The output of the reduce task is typically written to a + {@link RecordWriter} via + {@link Context#write(Object, Object)}.

      +
    6. +
    + +

    The output of the Reducer is not re-sorted.

    + +

    Example:

    +

    + public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
    +                                                 Key,IntWritable> {
    +   private IntWritable result = new IntWritable();
    + 
    +   public void reduce(Key key, Iterable<IntWritable> values,
    +                      Context context) throws IOException, InterruptedException {
    +     int sum = 0;
    +     for (IntWritable val : values) {
    +       sum += val.get();
    +     }
    +     result.set(sum);
    +     context.write(key, result);
    +   }
    + }
    + 
    + + @see Mapper + @see Partitioner]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + counterName. + @param counterName counter name + @return the Counter for the given counterName]]> + + + + + + + groupName and + counterName. + @param counterName counter name + @return the Counter for the given groupName and + counterName]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskAttemptID is : + attempt_200707121733_0003_m_000005_0 , which represents the + zeroth task attempt for the fifth map task in the third job + running at the jobtracker started at 200707121733. +

    + Applications should never construct or parse TaskAttemptID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + An example TaskID is : + task_200707121733_0003_m_000005 , which represents the + fifth map task in the third job running at the jobtracker + started at 200707121733. +

    + Applications should never construct or parse TaskID strings + , but rather use appropriate constructors or {@link #forName(String)} + method. + + @see JobID + @see TaskAttemptID]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCommitter for the task-attempt]]> + + + + the input key type for the task + @param the input value type for the task + @param the output key type for the task + @param the output value type for the task]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of the other counter + @param type of the other counter group + @param counters the counters object to copy + @param groupFactory the factory for new groups]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of counter inside the counters + @param type of group inside the counters]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of the counter for the group]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The key and values are passed from one element of the chain to the next, by + value. For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's Configuration. This + precedence is in effect when the task is running. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain +

    + + @param job + The job. + @param klass + the Mapper class to add. + @param inputKeyClass + mapper input key class. + @param inputValueClass + mapper input value class. + @param outputKeyClass + mapper output key class. + @param outputValueClass + mapper output value class. + @param mapperConf + a configuration for the Mapper class. It is recommended to use a + Configuration without default values using the + Configuration(boolean loadDefaults) constructor with + FALSE.]]> +
    + + + + + + + + + + + + The Mapper classes are invoked in a chained (or piped) fashion, the output of + the first becomes the input of the second, and so on until the last Mapper, + the output of the last Mapper will be written to the task's output. +

    +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed in a chain. This enables having + reusable specialized Mappers that can be combined to perform composite + operations within a single task. +

    +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use matching output and input key and + value classes as no conversion is done by the chaining code. +

    +

    + Using the ChainMapper and the ChainReducer classes is possible to compose + Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the chain. +

    + ChainMapper usage pattern: +

    + +

    + ...
    + Job = new Job(conf);
    +
    + Configuration mapAConf = new Configuration(false);
    + ...
    + ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, mapAConf);
    +
    + Configuration mapBConf = new Configuration(false);
    + ...
    + ChainMapper.addMapper(job, BMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, mapBConf);
    +
    + ...
    +
    + job.waitForComplettion(true);
    + ...
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + The key and values are passed from one element of the chain to the next, by + value. For the added Reducer the configuration given for it, + reducerConf, have precedence over the job's Configuration. + This precedence is in effect when the task is running. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + + @param job + the job + @param klass + the Reducer class to add. + @param inputKeyClass + reducer input key class. + @param inputValueClass + reducer input value class. + @param outputKeyClass + reducer output key class. + @param outputValueClass + reducer output value class. + @param reducerConf + a configuration for the Reducer class. It is recommended to use a + Configuration without default values using the + Configuration(boolean loadDefaults) constructor with + FALSE.]]> +
    +
    + + + + + + + + + + + + The key and values are passed from one element of the chain to the next, by + value For the added Mapper the configuration given for it, + mapperConf, have precedence over the job's Configuration. This + precedence is in effect when the task is running. +

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainMapper, this is done by the addMapper for the last mapper in the + chain. +

    + + @param job + The job. + @param klass + the Mapper class to add. + @param inputKeyClass + mapper input key class. + @param inputValueClass + mapper input value class. + @param outputKeyClass + mapper output key class. + @param outputValueClass + mapper output value class. + @param mapperConf + a configuration for the Mapper class. It is recommended to use a + Configuration without default values using the + Configuration(boolean loadDefaults) constructor with + FALSE.]]> +
    +
    + + + + + + + + + + + For each record output by the Reducer, the Mapper classes are invoked in a + chained (or piped) fashion. The output of the reducer becomes the input of + the first mapper and output of first becomes the input of the second, and so + on until the last Mapper, the output of the last Mapper will be written to + the task's output. +

    +

    + The key functionality of this feature is that the Mappers in the chain do not + need to be aware that they are executed after the Reducer or in a chain. This + enables having reusable specialized Mappers that can be combined to perform + composite operations within a single task. +

    +

    + Special care has to be taken when creating chains that the key/values output + by a Mapper are valid for the following Mapper in the chain. It is assumed + all Mappers and the Reduce in the chain use matching output and input key and + value classes as no conversion is done by the chaining code. +

    +

    Using the ChainMapper and the ChainReducer classes is possible to + compose Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]. And + immediate benefit of this pattern is a dramatic reduction in disk IO.

    +

    + IMPORTANT: There is no need to specify the output key/value classes for the + ChainReducer, this is done by the setReducer or the addMapper for the last + element in the chain. +

    + ChainReducer usage pattern: +

    + +

    + ...
    + Job = new Job(conf);
    + ....
    +
    + Configuration reduceConf = new Configuration(false);
    + ...
    + ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class,
    +   Text.class, Text.class, true, reduceConf);
    +
    + ChainReducer.addMapper(job, CMap.class, Text.class, Text.class,
    +   LongWritable.class, Text.class, false, null);
    +
    + ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class,
    +   LongWritable.class, LongWritable.class, true, null);
    +
    + ...
    +
    + job.waitForCompletion(true);
    + ...
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBInputFormat emits LongWritables containing the record number as + key and DBWritables as value. + + The SQL query, and input class can be using one of the two + setInput methods.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link DBOutputFormat} accepts <key,value> pairs, where + key has a type extending DBWritable. Returned {@link RecordWriter} + writes only the key to the database with a batch SQL query.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBWritable. DBWritable, is similar to {@link Writable} + except that the {@link #write(PreparedStatement)} method takes a + {@link PreparedStatement}, and {@link #readFields(ResultSet)} + takes a {@link ResultSet}. +

    + Implementations are responsible for writing the fields of the object + to PreparedStatement, and reading the fields of the object from the + ResultSet. + +

    Example:

    + If we have the following table in the database : +
    + CREATE TABLE MyTable (
    +   counter        INTEGER NOT NULL,
    +   timestamp      BIGINT  NOT NULL,
    + );
    + 
    + then we can read/write the tuples from/to the table with : +

    + public class MyWritable implements Writable, DBWritable {
    +   // Some data     
    +   private int counter;
    +   private long timestamp;
    +       
    +   //Writable#write() implementation
    +   public void write(DataOutput out) throws IOException {
    +     out.writeInt(counter);
    +     out.writeLong(timestamp);
    +   }
    +       
    +   //Writable#readFields() implementation
    +   public void readFields(DataInput in) throws IOException {
    +     counter = in.readInt();
    +     timestamp = in.readLong();
    +   }
    +       
    +   public void write(PreparedStatement statement) throws SQLException {
    +     statement.setInt(1, counter);
    +     statement.setLong(2, timestamp);
    +   }
    +       
    +   public void readFields(ResultSet resultSet) throws SQLException {
    +     counter = resultSet.getInt(1);
    +     timestamp = resultSet.getLong(2);
    +   } 
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RecordReader's for + CombineFileSplit's. + + @see CombineFileSplit]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileRecordReader. + + Subclassing is needed to get a concrete record reader wrapper because of the + constructor requirement. + + @see CombineFileRecordReader + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + th Path]]> + + + + + + th Path]]> + + + + + + + + + + + th Path]]> + + + + + + + + + + + + + + + + + + + + + + + + + + CombineFileSplit can be used to implement {@link RecordReader}'s, + with reading one record per file. + + @see FileSplit + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + SequenceFileInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + CombineFileInputFormat-equivalent for + TextInputFormat. + + @see CombineFileInputFormat]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat always returns + true. Implementations that may deal with non-splittable files must + override this method. + + FileInputFormat implementations can override this and return + false to ensure that individual input files are never split-up + so that {@link Mapper}s process entire files. + + @param context the job context + @param filename the file name to check + @return is this file splitable?]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileInputFormat is the base class for all file-based + InputFormats. This provides a generic implementation of + {@link #getSplits(JobContext)}. + + Implementations of FileInputFormat can also override the + {@link #isSplitable(JobContext, Path)} method to prevent input files + from being split-up in certain situations. Implementations that may + deal with non-splittable files must override this method, since + the default implementation assumes splitting is always possible.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    or + conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength); +

    + @see FixedLengthRecordReader]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the Job was added.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([,]*) + func ::= tbl(,"") + class ::= @see java.lang.Class#forName(java.lang.String) + path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) + } + Reads expression from the mapreduce.join.expr property and + user-supplied join types from mapreduce.join.define.<ident> + types. Paths supplied to tbl are given as input paths to the + InputFormat class listed. + @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ,

    ) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + (tbl(,),tbl(,),...,tbl(,)) }]]> + + + + + + + + mapreduce.join.define.<ident> to a classname. + In the expression mapreduce.join.expr, the identifier will be + assumed to be a ComposableRecordReader. + mapreduce.join.keycomparator can be a classname used to compare + keys in the join. + @see #setFormat + @see JoinRecordReader + @see MultiFilterRecordReader]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + capacity children to position + id in the parent reader. + The id of a root CompositeRecordReader is -1 by convention, but relying + on this is not recommended.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + override(S1,S2,S3) will prefer values + from S3 over S2, and values from S2 over S1 for all keys + emitted from all sources.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [<child1>,<child2>,...,<childn>]]]> + + + + + + + out. + TupleWritable format: + {@code + ...... + }]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + the map's input key type + @param the map's input value type + @param the map's output key type + @param the map's output value type + @param job the job + @return the mapper class to run]]> + + + + + + + the map input key type + @param the map input value type + @param the map output key type + @param the map output value type + @param job the job to modify + @param cls the class to use as the mapper]]> + + + + + + + + + + + + + + + + + It can be used instead of the default implementation, + {@link org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU + bound in order to improve throughput. +

    + Mapper implementations using this MapRunnable must be thread-safe. +

    + The Map-Reduce job has to be configured with the mapper to use via + {@link #setMapperClass(Job, Class)} and + the number of thread the thread-pool can use with the + {@link #getNumberOfThreads(JobContext)} method. The default + value is 10 threads. +

    ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MapContext to be wrapped + @return a wrapped Mapper.Context for custom implementations]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

  • + In applications which take a classname of committer in + a configuration option, set it to the canonical name of this class + (see {@link #NAME}). When this class is instantiated, it will + use the factory mechanism to locate the configured committer for the + destination. +
  • +
  • + In code, explicitly create an instance of this committer through + its constructor, then invoke commit lifecycle operations on it. + The dynamically configured committer will be created in the constructor + and have the lifecycle operations relayed to it. +
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the job output should be compressed, + false otherwise]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tasks' Side-Effect Files + +

    Some applications need to create/write-to side-files, which differ from + the actual job-outputs. + +

    In such cases there could be issues with 2 instances of the same TIP + (running simultaneously e.g. speculative tasks) trying to open/write-to the + same file (path) on HDFS. Hence the application-writer will have to pick + unique names per task-attempt (e.g. using the attemptid, say + attempt_200709221812_0001_m_000000_0), not just per TIP.

    + +

    To get around this the Map-Reduce framework helps the application-writer + out by maintaining a special + ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} + sub-directory for each task-attempt on HDFS where the output of the + task-attempt goes. On successful completion of the task-attempt the files + in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) + are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the + framework discards the sub-directory of unsuccessful task-attempts. This + is completely transparent to the application.

    + +

    The application-writer can take advantage of this by creating any + side-files required in a work directory during execution + of his task i.e. via + {@link #getWorkOutputPath(TaskInputOutputContext)}, and + the framework will move them out similarly - thus she doesn't have to pick + unique paths per task-attempt.

    + +

    The entire discussion holds true for maps of jobs with + reducer=NONE (i.e. 0 reduces) since output of the map, in that case, + goes directly to HDFS.

    + + @return the {@link Path} to the task's temporary output directory + for the map-reduce job.]]> +
    +
    + + + + + + + + The path can be used to create custom files from within the map and + reduce tasks. The path name will be unique for each task. The path parent + will be the job output directory.

    ls + +

    This method uses the {@link #getUniqueFile} method to make the file name + unique for the task.

    + + @param context the context for the task. + @param name the name for the file. + @param extension the extension for the file + @return a unique path accross all tasks of the job.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: when the baseOutputPath is a path that resolves + outside of the final job output directory, the directory is created + immediately and then persists through subsequent task retries, breaking + the concept of output committing.]]> + + + + + + + + + + Warning: when the baseOutputPath is a path that resolves + outside of the final job output directory, the directory is created + immediately and then persists through subsequent task retries, breaking + the concept of output committing.]]> + + + + + + + super.close() at the + end of their close()]]> + + + + + Case one: writing to additional outputs other than the job default output. + + Each additional output, or named output, may be configured with its own + OutputFormat, with its own key class and with its own value + class. +

    + +

    + Case two: to write data to different files provided by user +

    + +

    + MultipleOutputs supports counters, by default they are disabled. The + counters group is the {@link MultipleOutputs} class name. The names of the + counters are the same as the output name. These count the number records + written to each output name. +

    + + Usage pattern for job submission: +
    +
    + Job job = new Job();
    +
    + FileInputFormat.setInputPath(job, inDir);
    + FileOutputFormat.setOutputPath(job, outDir);
    +
    + job.setMapperClass(MOMap.class);
    + job.setReducerClass(MOReduce.class);
    + ...
    +
    + // Defines additional single text based output 'text' for the job
    + MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class,
    + LongWritable.class, Text.class);
    +
    + // Defines additional sequence-file based output 'sequence' for the job
    + MultipleOutputs.addNamedOutput(job, "seq",
    +   SequenceFileOutputFormat.class,
    +   LongWritable.class, Text.class);
    + ...
    +
    + job.waitForCompletion(true);
    + ...
    + 
    +

    + Usage in Reducer: +

    + <K, V> String generateFileName(K k, V v) {
    +   return k.toString() + "_" + v.toString();
    + }
    + 
    + public class MOReduce extends
    +   Reducer<WritableComparable, Writable,WritableComparable, Writable> {
    + private MultipleOutputs mos;
    + public void setup(Context context) {
    + ...
    + mos = new MultipleOutputs(context);
    + }
    +
    + public void reduce(WritableComparable key, Iterator<Writable> values,
    + Context context)
    + throws IOException {
    + ...
    + mos.write("text", , key, new Text("Hello"));
    + mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a");
    + mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b");
    + mos.write(key, new Text("value"), generateFileName(key, new Text("value")));
    + ...
    + }
    +
    + public void cleanup(Context) throws IOException {
    + mos.close();
    + ...
    + }
    +
    + }
    + 
    + +

    + When used in conjuction with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat, + MultipleOutputs can mimic the behaviour of MultipleTextOutputFormat and MultipleSequenceFileOutputFormat + from the old Hadoop API - ie, output can be written from the Reducer to more than one location. +

    + +

    + Use MultipleOutputs.write(KEYOUT key, VALUEOUT value, String baseOutputPath) to write key and + value to a path specified by baseOutputPath, with no need to specify a named output. + Warning: when the baseOutputPath passed to MultipleOutputs.write + is a path that resolves outside of the final job output directory, the + directory is created immediately and then persists through subsequent + task retries, breaking the concept of output committing: +

    + +
    + private MultipleOutputs<Text, Text> out;
    + 
    + public void setup(Context context) {
    +   out = new MultipleOutputs<Text, Text>(context);
    +   ...
    + }
    + 
    + public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    + for (Text t : values) {
    +   out.write(key, t, generateFileName(<parameter list...>));
    +   }
    + }
    + 
    + protected void cleanup(Context context) throws IOException, InterruptedException {
    +   out.close();
    + }
    + 
    + +

    + Use your own code in generateFileName() to create a custom path to your results. + '/' characters in baseOutputPath will be translated into directory levels in your file system. + Also, append your custom-generated path with "part" or similar, otherwise your output will be -00000, -00001 etc. + No call to context.write() is necessary. See example generateFileName() code below. +

    + +
    + private String generateFileName(Text k) {
    +   // expect Text k in format "Surname|Forename"
    +   String[] kStr = k.toString().split("\\|");
    +   
    +   String sName = kStr[0];
    +   String fName = kStr[1];
    +
    +   // example for k = Smith|John
    +   // output written to /user/hadoop/path/to/output/Smith/John-r-00000 (etc)
    +   return sName + "/" + fName;
    + }
    + 
    + +

    + Using MultipleOutputs in this way will still create zero-sized default output, eg part-00000. + To prevent this use LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); + instead of job.setOutputFormatClass(TextOutputFormat.class); in your Hadoop job configuration. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • If an explicit committer factory is named, it is used.
  • +
  • The output path is examined. + If is non null and there is an explicit schema for that filesystem, + its factory is instantiated.
  • +
  • Otherwise, an instance of {@link FileOutputCommitter} is + created.
  • + + + In {@link FileOutputFormat}, the created factory has its method + {@link #createOutputCommitter(Path, TaskAttemptContext)} with a task + attempt context and a possibly null path.]]> +
    +
    + + + + + + + + + + This allows the user to specify the key class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param job the {@link Job} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + This allows the user to specify the value class to be different + from the actual class ({@link BytesWritable}) used for writing

    + + @param job the {@link Job} to modify + @param theClass the SequenceFile output key class.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + bytes[left:(right+1)] in Python syntax. + + @param conf configuration object + @param left left Python-style offset + @param right right Python-style offset]]> + + + + + + + bytes[offset:] in Python syntax. + + @param conf configuration object + @param offset left Python-style offset]]> + + + + + + + bytes[:(offset+1)] in Python syntax. + + @param conf configuration object + @param offset right Python-style offset]]> + + + + + + + + + + + + + + + + + + + + + Partition {@link BinaryComparable} keys using a configurable part of + the bytes array returned by {@link BinaryComparable#getBytes()}.

    + +

    The subarray to be used for the partitioning can be defined by means + of the following properties: +

      +
    • + mapreduce.partition.binarypartitioner.left.offset: + left offset in array (0 by default) +
    • +
    • + mapreduce.partition.binarypartitioner.right.offset: + right offset in array (-1 by default) +
    • +
    + Like in Python, both negative and positive offsets are allowed, but + the meaning is slightly different. In case of an array of length 5, + for instance, the possible offsets are: +
    
    +  +---+---+---+---+---+
    +  | B | B | B | B | B |
    +  +---+---+---+---+---+
    +    0   1   2   3   4
    +   -5  -4  -3  -2  -1
    + 
    + The first row of numbers gives the position of the offsets 0...5 in + the array; the second row gives the corresponding negative offsets. + Contrary to Python, the specified subarray has byte i + and j as first and last element, repectively, when + i and j are the left and right offset. + +

    For Hadoop programs written in Java, it is advisable to use one of + the following static convenience methods for setting the offsets: +

      +
    • {@link #setOffsets}
    • +
    • {@link #setLeftOffset}
    • +
    • {@link #setRightOffset}
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + total.order.partitioner.natural.order is not false, a trie + of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + will be built. Otherwise, keys will be located using a binary search of + the partition keyset using the {@link org.apache.hadoop.io.RawComparator} + defined for this job. The input file must be sorted with the same + comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.]]> + + + + + + + + + + + + + + R reduces, there are R-1 + keys in the SequenceFile.]]> + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ReduceContext to be wrapped + @return a wrapped Reducer.Context for custom implementations]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml new file mode 100644 index 00000000000..f2650d508e8 --- /dev/null +++ b/hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.3.5.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 53e2de63c4a..5cec569dfa8 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -134,7 +134,7 @@ false - 3.3.4 + 3.3.5 -unstable diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml new file mode 100644 index 00000000000..9e31a72ac07 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_API_3.3.5.xml @@ -0,0 +1,26420 @@ + + + + + + + + + + + + + + + + + + + + The interface used by clients to obtain a new {@link ApplicationId} for + submitting new applications.

    + +

    The ResourceManager responds with a new, monotonically + increasing, {@link ApplicationId} which is used by the client to submit + a new application.

    + +

    The ResourceManager also responds with details such + as maximum resource capabilities in the cluster as specified in + {@link GetNewApplicationResponse}.

    + + @param request request to get a new ApplicationId + @return response containing the new ApplicationId to be used + to submit an application + @throws YarnException + @throws IOException + @see #submitApplication(SubmitApplicationRequest)]]> +
    +
    + + + + + + The interface used by clients to submit a new application to the + ResourceManager.

    + +

    The client is required to provide details such as queue, + {@link Resource} required to run the ApplicationMaster, + the equivalent of {@link ContainerLaunchContext} for launching + the ApplicationMaster etc. via the + {@link SubmitApplicationRequest}.

    + +

    Currently the ResourceManager sends an immediate (empty) + {@link SubmitApplicationResponse} on accepting the submission and throws + an exception if it rejects the submission. However, this call needs to be + followed by {@link #getApplicationReport(GetApplicationReportRequest)} + to make sure that the application gets properly submitted - obtaining a + {@link SubmitApplicationResponse} from ResourceManager doesn't guarantee + that RM 'remembers' this application beyond failover or restart. If RM + failover or RM restart happens before ResourceManager saves the + application's state successfully, the subsequent + {@link #getApplicationReport(GetApplicationReportRequest)} will throw + a {@link ApplicationNotFoundException}. The Clients need to re-submit + the application with the same {@link ApplicationSubmissionContext} when + it encounters the {@link ApplicationNotFoundException} on the + {@link #getApplicationReport(GetApplicationReportRequest)} call.

    + +

    During the submission process, it checks whether the application + already exists. If the application exists, it will simply return + SubmitApplicationResponse

    + +

    In secure mode,the ResourceManager verifies access to + queues etc. before accepting the application submission.

    + + @param request request to submit a new application + @return (empty) response on accepting the submission + @throws YarnException + @throws IOException + @see #getNewApplication(GetNewApplicationRequest)]]> +
    +
    + + + + + + The interface used by clients to request the + ResourceManager to fail an application attempt.

    + +

    The client, via {@link FailApplicationAttemptRequest} provides the + {@link ApplicationAttemptId} of the attempt to be failed.

    + +

    In secure mode,the ResourceManager verifies access to the + application, queue etc. before failing the attempt.

    + +

    Currently, the ResourceManager returns an empty response + on success and throws an exception on rejecting the request.

    + + @param request request to fail an attempt + @return ResourceManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException + @see #getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + The interface used by clients to request the + ResourceManager to abort submitted application.

    + +

    The client, via {@link KillApplicationRequest} provides the + {@link ApplicationId} of the application to be aborted.

    + +

    In secure mode,the ResourceManager verifies access to the + application, queue etc. before terminating the application.

    + +

    Currently, the ResourceManager returns an empty response + on success and throws an exception on rejecting the request.

    + + @param request request to abort a submitted application + @return ResourceManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException + @see #getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + The interface used by clients to get metrics about the cluster from + the ResourceManager.

    + +

    The ResourceManager responds with a + {@link GetClusterMetricsResponse} which includes the + {@link YarnClusterMetrics} with details such as number of current + nodes in the cluster.

    + + @param request request for cluster metrics + @return cluster metrics + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to get a report of all nodes + in the cluster from the ResourceManager.

    + +

    The ResourceManager responds with a + {@link GetClusterNodesResponse} which includes the + {@link NodeReport} for all the nodes in the cluster.

    + + @param request request for report on all nodes + @return report on all nodes + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to get information about queues + from the ResourceManager.

    + +

    The client, via {@link GetQueueInfoRequest}, can ask for details such + as used/total resources, child queues, running applications etc.

    + +

    In secure mode,the ResourceManager verifies access before + providing the information.

    + + @param request request to get queue information + @return queue information + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to get information about queue + acls for current user from the ResourceManager. +

    + +

    The ResourceManager responds with queue acls for all + existing queues.

    + + @param request request to get queue acls for current user + @return queue acls for current user + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + The interface used by clients to obtain a new {@link ReservationId} for + submitting new reservations.

    + +

    The ResourceManager responds with a new, unique, + {@link ReservationId} which is used by the client to submit + a new reservation.

    + + @param request to get a new ReservationId + @return response containing the new ReservationId to be used + to submit a new reservation + @throws YarnException if the reservation system is not enabled. + @throws IOException on IO failures. + @see #submitReservation(ReservationSubmissionRequest)]]> +
    +
    + + + + + + + The interface used by clients to submit a new reservation to the + {@code ResourceManager}. +

    + +

    + The client packages all details of its request in a + {@link ReservationSubmissionRequest} object. This contains information + about the amount of capacity, temporal constraints, and concurrency needs. + Furthermore, the reservation might be composed of multiple stages, with + ordering dependencies among them. +

    + +

    + In order to respond, a new admission control component in the + {@code ResourceManager} performs an analysis of the resources that have + been committed over the period of time the user is requesting, verify that + the user requests can be fulfilled, and that it respect a sharing policy + (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined + that the ReservationSubmissionRequest is satisfiable the + {@code ResourceManager} answers with a + {@link ReservationSubmissionResponse} that include a non-null + {@link ReservationId}. Upon failure to find a valid allocation the response + is an exception with the reason. + + On application submission the client can use this {@link ReservationId} to + obtain access to the reserved resources. +

    + +

    + The system guarantees that during the time-range specified by the user, the + reservationID will be corresponding to a valid reservation. The amount of + capacity dedicated to such queue can vary overtime, depending of the + allocation that has been determined. But it is guaranteed to satisfy all + the constraint expressed by the user in the + {@link ReservationSubmissionRequest}. +

    + + @param request the request to submit a new Reservation + @return response the {@link ReservationId} on accepting the submission + @throws YarnException if the request is invalid or reservation cannot be + created successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to update an existing Reservation. This is + referred to as a re-negotiation process, in which a user that has + previously submitted a Reservation. +

    + +

    + The allocation is attempted by virtually substituting all previous + allocations related to this Reservation with new ones, that satisfy the new + {@link ReservationUpdateRequest}. Upon success the previous allocation is + substituted by the new one, and on failure (i.e., if the system cannot find + a valid allocation for the updated request), the previous allocation + remains valid. + + The {@link ReservationId} is not changed, and applications currently + running within this reservation will automatically receive the resources + based on the new allocation. +

    + + @param request to update an existing Reservation (the ReservationRequest + should refer to an existing valid {@link ReservationId}) + @return response empty on successfully updating the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + updated successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to remove an existing Reservation. + + Upon deletion of a reservation applications running with this reservation, + are automatically downgraded to normal jobs running without any dedicated + reservation. +

    + + @param request to remove an existing Reservation (the ReservationRequest + should refer to an existing valid {@link ReservationId}) + @return response empty on successfully deleting the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + deleted successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to get the list of reservations in a plan. + The reservationId will be used to search for reservations to list if it is + provided. Otherwise, it will select active reservations within the + startTime and endTime (inclusive). +

    + + @param request to list reservations in a plan. Contains fields to select + String queue, ReservationId reservationId, long startTime, + long endTime, and a bool includeReservationAllocations. + + queue: Required. Cannot be null or empty. Refers to the + reservable queue in the scheduler that was selected when + creating a reservation submission + {@link ReservationSubmissionRequest}. + + reservationId: Optional. If provided, other fields will + be ignored. + + startTime: Optional. If provided, only reservations that + end after the startTime will be selected. This defaults + to 0 if an invalid number is used. + + endTime: Optional. If provided, only reservations that + start on or before endTime will be selected. This defaults + to Long.MAX_VALUE if an invalid number is used. + + includeReservationAllocations: Optional. Flag that + determines whether the entire reservation allocations are + to be returned. Reservation allocations are subject to + change in the event of re-planning as described by + {@code ReservationDefinition}. + + @return response that contains information about reservations that are + being searched for. + @throws YarnException if the request is invalid + @throws IOException on IO failures]]> +
    +
    + + + + + + + The interface used by client to get node to labels mappings in existing cluster +

    + + @param request + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get labels to nodes mappings + in existing cluster +

    + + @param request + @return labels to nodes mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get node labels in the cluster +

    + + @param request to get node labels collection of this cluster + @return node labels collection of this cluster + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to set priority of an application. +

    + @param request to set priority of an application + @return an empty response + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by clients to request the + ResourceManager to signal a container. For example, + the client can send command OUTPUT_THREAD_DUMP to dump threads of the + container.

    + +

    The client, via {@link SignalContainerRequest} provides the + id of the container and the signal command.

    + +

    In secure mode,the ResourceManager verifies access to the + application before signaling the container. + The user needs to have MODIFY_APP permission.

    + +

    Currently, the ResourceManager returns an empty response + on success and throws an exception on rejecting the request.

    + + @param request request to signal a container + @return ResourceManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to set ApplicationTimeouts of an application. + The UpdateApplicationTimeoutsRequest should have timeout value with + absolute time with ISO8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ. +

    + Note: If application timeout value is less than or equal to current + time then update application throws YarnException. + @param request to set ApplicationTimeouts of an application + @return a response with updated timeouts. + @throws YarnException if update request has empty values or application is + in completing states. + @throws IOException on IO failures]]> +
    +
    + + + + + + + The interface used by clients to get all the resource profiles that are + available on the ResourceManager. +

    + @param request request to get all the resource profiles + @return Response containing a map of the profile name to Resource + capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + The interface to get the details for a specific resource profile. +

    + @param request request to get the details of a resource profile + @return Response containing the details for a particular resource profile + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + The interface to get the details for a specific resource profile. +

    + @param request request to get the details of a resource profile + @return Response containing the details for a particular resource profile + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + The interface used by client to get attributes to nodes mappings + available in ResourceManager. +

    + + @param request request to get details of attributes to nodes mapping. + @return Response containing the details of attributes to nodes mappings. + @throws YarnException if any error happens inside YARN + @throws IOException incase of other errors]]> +
    +
    + + + + + + + The interface used by client to get node attributes available in + ResourceManager. +

    + + @param request request to get node attributes collection of this cluster. + @return Response containing node attributes collection. + @throws YarnException if any error happens inside YARN. + @throws IOException incase of other errors.]]> +
    +
    + + + + + + + The interface used by client to get node to attributes mappings. + in existing cluster. +

    + + @param request request to get nodes to attributes mapping. + @return nodes to attributes mappings. + @throws YarnException if any error happens inside YARN. + @throws IOException]]> +
    +
    + + The protocol between clients and the ResourceManager + to submit/abort jobs and to get information on applications, cluster metrics, + nodes, queues and ACLs.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The protocol between clients and the ApplicationHistoryServer to + get the information of completed applications etc. +

    ]]> +
    +
    + + + + + + + + + + The interface used by a new ApplicationMaster to register with + the ResourceManager. +

    + +

    + The ApplicationMaster needs to provide details such as RPC + Port, HTTP tracking url etc. as specified in + {@link RegisterApplicationMasterRequest}. +

    + +

    + The ResourceManager responds with critical details such as + maximum resource capabilities in the cluster as specified in + {@link RegisterApplicationMasterResponse}. +

    + +

    + Re-register is only allowed for Unmanaged Application Master + (UAM) HA, with + {@link org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext#getKeepContainersAcrossApplicationAttempts()} + set to true. +

    + + @param request registration request + @return registration respose + @throws YarnException + @throws IOException + @throws InvalidApplicationMasterRequestException The exception is thrown + when an ApplicationMaster tries to register more then once. + @see RegisterApplicationMasterRequest + @see RegisterApplicationMasterResponse]]> +
    +
    + + + + + + The interface used by an ApplicationMaster to notify the + ResourceManager about its completion (success or failed).

    + +

    The ApplicationMaster has to provide details such as + final state, diagnostics (in case of failures) etc. as specified in + {@link FinishApplicationMasterRequest}.

    + +

    The ResourceManager responds with + {@link FinishApplicationMasterResponse}.

    + + @param request completion request + @return completion response + @throws YarnException + @throws IOException + @see FinishApplicationMasterRequest + @see FinishApplicationMasterResponse]]> +
    +
    + + + + + + + The main interface between an ApplicationMaster and the + ResourceManager. +

    + +

    + The ApplicationMaster uses this interface to provide a list of + {@link ResourceRequest} and returns unused {@link Container} allocated to + it via {@link AllocateRequest}. Optionally, the + ApplicationMaster can also blacklist resources which + it doesn't want to use. +

    + +

    + This also doubles up as a heartbeat to let the + ResourceManager know that the ApplicationMaster + is alive. Thus, applications should periodically make this call to be kept + alive. The frequency depends on + {@link YarnConfiguration#RM_AM_EXPIRY_INTERVAL_MS} which defaults to + {@link YarnConfiguration#DEFAULT_RM_AM_EXPIRY_INTERVAL_MS}. +

    + +

    + The ResourceManager responds with list of allocated + {@link Container}, status of completed containers and headroom information + for the application. +

    + +

    + The ApplicationMaster can use the available headroom + (resources) to decide how to utilized allocated resources and make informed + decisions about future resource requests. +

    + + @param request + allocation request + @return allocation response + @throws YarnException + @throws IOException + @throws InvalidApplicationMasterRequestException + This exception is thrown when an ApplicationMaster calls allocate + without registering first. + @throws InvalidResourceBlacklistRequestException + This exception is thrown when an application provides an invalid + specification for blacklist of resources. + @throws InvalidResourceRequestException + This exception is thrown when a {@link ResourceRequest} is out of + the range of the configured lower and upper limits on the + resources. + @see AllocateRequest + @see AllocateResponse]]> +
    +
    + + The protocol between a live instance of ApplicationMaster + and the ResourceManager.

    + +

    This is used by the ApplicationMaster to register/unregister + and to request and obtain resources in the cluster from the + ResourceManager.

    ]]> +
    +
    + + + + + + + + + + The interface used by clients to claim a resource with the + SharedCacheManager. The client uses a checksum to identify the + resource and an {@link ApplicationId} to identify which application will be + using the resource. +

    + +

    + The SharedCacheManager responds with whether or not the + resource exists in the cache. If the resource exists, a Path + to the resource in the shared cache is returned. If the resource does not + exist, the response is empty. +

    + + @param request request to claim a resource in the shared cache + @return response indicating if the resource is already in the cache + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to release a resource with the + SharedCacheManager. This method is called once an application + is no longer using a claimed resource in the shared cache. The client uses + a checksum to identify the resource and an {@link ApplicationId} to + identify which application is releasing the resource. +

    + +

    + Note: This method is an optimization and the client is not required to call + it for correctness. +

    + +

    + Currently the SharedCacheManager sends an empty response. +

    + + @param request request to release a resource in the shared cache + @return (empty) response on releasing the resource + @throws YarnException + @throws IOException]]> +
    +
    + + + The protocol between clients and the SharedCacheManager to claim + and release resources in the shared cache. +

    ]]> +
    +
    + + + + + + + + + + The ApplicationMaster provides a list of + {@link StartContainerRequest}s to a NodeManager to + start {@link Container}s allocated to it using this interface. +

    + +

    + The ApplicationMaster has to provide details such as allocated + resource capability, security tokens (if enabled), command to be executed + to start the container, environment for the process, necessary + binaries/jar/shared-objects etc. via the {@link ContainerLaunchContext} in + the {@link StartContainerRequest}. +

    + +

    + The NodeManager sends a response via + {@link StartContainersResponse} which includes a list of + {@link Container}s of successfully launched {@link Container}s, a + containerId-to-exception map for each failed {@link StartContainerRequest} in + which the exception indicates errors from per container and a + allServicesMetaData map between the names of auxiliary services and their + corresponding meta-data. Note: None-container-specific exceptions will + still be thrown by the API method itself. +

    +

    + The ApplicationMaster can use + {@link #getContainerStatuses(GetContainerStatusesRequest)} to get updated + statuses of the to-be-launched or launched containers. +

    + + @param request + request to start a list of containers + @return response including conatinerIds of all successfully launched + containers, a containerId-to-exception map for failed requests and + a allServicesMetaData map. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The ApplicationMaster requests a NodeManager to + stop a list of {@link Container}s allocated to it using this + interface. +

    + +

    + The ApplicationMaster sends a {@link StopContainersRequest} + which includes the {@link ContainerId}s of the containers to be stopped. +

    + +

    + The NodeManager sends a response via + {@link StopContainersResponse} which includes a list of {@link ContainerId} + s of successfully stopped containers, a containerId-to-exception map for + each failed request in which the exception indicates errors from per + container. Note: None-container-specific exceptions will still be thrown by + the API method itself. ApplicationMaster can use + {@link #getContainerStatuses(GetContainerStatusesRequest)} to get updated + statuses of the containers. +

    + + @param request + request to stop a list of containers + @return response which includes a list of containerIds of successfully + stopped containers, a containerId-to-exception map for failed + requests. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The API used by the ApplicationMaster to request for current + statuses of Containers from the NodeManager. +

    + +

    + The ApplicationMaster sends a + {@link GetContainerStatusesRequest} which includes the {@link ContainerId}s + of all containers whose statuses are needed. +

    + +

    + The NodeManager responds with + {@link GetContainerStatusesResponse} which includes a list of + {@link ContainerStatus} of the successfully queried containers and a + containerId-to-exception map for each failed request in which the exception + indicates errors from per container. Note: None-container-specific + exceptions will still be thrown by the API method itself. +

    + + @param request + request to get ContainerStatuses of containers with + the specified ContainerIds + @return response containing the list of ContainerStatus of the + successfully queried containers and a containerId-to-exception map + for failed requests. + + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The API used by the ApplicationMaster to request for + resource increase of running containers on the NodeManager. +

    + + @param request + request to increase resource of a list of containers + @return response which includes a list of containerIds of containers + whose resource has been successfully increased and a + containerId-to-exception map for failed requests. + + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The API used by the ApplicationMaster to request for + resource update of running containers on the NodeManager. +

    + + @param request + request to update resource of a list of containers + @return response which includes a list of containerIds of containers + whose resource has been successfully updated and a + containerId-to-exception map for failed requests. + + @throws YarnException Exception specific to YARN + @throws IOException IOException thrown from NodeManager]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The protocol between an ApplicationMaster and a + NodeManager to start/stop and increase resource of containers + and to get status of running containers.

    + +

    If security is enabled the NodeManager verifies that the + ApplicationMaster has truly been allocated the container + by the ResourceManager and also verifies all interactions such + as stopping the container or obtaining status information for the container. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + response id used to track duplicate responses. + @return response id]]> + + + + + + response id used to track duplicate responses. + @param id response id]]> + + + + + current progress of application. + @return current progress of application]]> + + + + + + current progress of application + @param progress current progress of application]]> + + + + + ResourceRequest to update the + ResourceManager about the application's resource requirements. + @return the list of ResourceRequest + @see ResourceRequest]]> + + + + + + ResourceRequest to update the + ResourceManager about the application's resource requirements. + @param resourceRequests list of ResourceRequest to update the + ResourceManager about the application's + resource requirements + @see ResourceRequest]]> + + + + + ContainerId of containers being + released by the ApplicationMaster. + @return list of ContainerId of containers being + released by the ApplicationMaster]]> + + + + + + ContainerId of containers being + released by the ApplicationMaster + @param releaseContainers list of ContainerId of + containers being released by the + ApplicationMaster]]> + + + + + ResourceBlacklistRequest being sent by the + ApplicationMaster. + @return the ResourceBlacklistRequest being sent by the + ApplicationMaster + @see ResourceBlacklistRequest]]> + + + + + + ResourceBlacklistRequest to inform the + ResourceManager about the blacklist additions and removals + per the ApplicationMaster. + + @param resourceBlacklistRequest the ResourceBlacklistRequest + to inform the ResourceManager about + the blacklist additions and removals + per the ApplicationMaster + @see ResourceBlacklistRequest]]> + + + + + ApplicationMaster. + @return list of {@link UpdateContainerRequest} + being sent by the + ApplicationMaster.]]> + + + + + + ResourceManager about the containers that need to be + updated. + @param updateRequests list of UpdateContainerRequest for + containers to be updated]]> + + + + + ApplicationMaster. + @return list of {@link SchedulingRequest} being sent by the + ApplicationMaster.]]> + + + + + + ResourceManager about the application's resource requirements + (potentially including allocation tags and placement constraints). + @param schedulingRequests list of {@link SchedulingRequest} to update + the ResourceManager about the application's resource + requirements.]]> + + + + + + + + + + + + + + + + + The core request sent by the ApplicationMaster to the + ResourceManager to obtain resources in the cluster.

    + +

    The request includes: +

      +
    • A response id to track duplicate responses.
    • +
    • Progress information.
    • +
    • + A list of {@link ResourceRequest} to inform the + ResourceManager about the application's + resource requirements. +
    • +
    • + A list of unused {@link Container} which are being returned. +
    • +
    • + A list of {@link UpdateContainerRequest} to inform + the ResourceManager about the change in + requirements of running containers. +
    • +
    + + @see ApplicationMasterProtocol#allocate(AllocateRequest)]]> +
    +
    + + + + + + + responseId of the request. + @see AllocateRequest#setResponseId(int) + @param responseId responseId of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + progress of the request. + @see AllocateRequest#setProgress(float) + @param progress progress of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + askList of the request. + @see AllocateRequest#setAskList(List) + @param askList askList of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + releaseList of the request. + @see AllocateRequest#setReleaseList(List) + @param releaseList releaseList of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + resourceBlacklistRequest of the request. + @see AllocateRequest#setResourceBlacklistRequest( + ResourceBlacklistRequest) + @param resourceBlacklistRequest + resourceBlacklistRequest of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + updateRequests of the request. + @see AllocateRequest#setUpdateRequests(List) + @param updateRequests updateRequests of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + schedulingRequests of the request. + @see AllocateRequest#setSchedulingRequests(List) + @param schedulingRequests SchedulingRequest of the request + @return {@link AllocateRequestBuilder}]]> + + + + + + trackingUrl of the request. + @see AllocateRequest#setTrackingUrl(String) + @param trackingUrl new tracking url + @return {@link AllocateRequestBuilder}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ResourceManager needs the + ApplicationMaster to take some action then it will send an + AMCommand to the ApplicationMaster. See AMCommand + for details on commands and actions for them. + @return AMCommand if the ApplicationMaster should + take action, null otherwise + @see AMCommand]]> + + + + + last response id. + @return last response id]]> + + + + + newly allocated Container by the + ResourceManager. + @return list of newly allocated Container]]> + + + + + available headroom for resources in the cluster for the + application. + @return limit of available headroom for resources in the cluster for the + application]]> + + + + + completed containers' statuses. + @return the list of completed containers' statuses]]> + + + + + updated NodeReports. Updates could + be changes in health, availability etc of the nodes. + @return The delta of updated nodes since the last response]]> + + + + + + + + + + + The message is a snapshot of the resources the RM wants back from the AM. + While demand persists, the RM will repeat its request; applications should + not interpret each message as a request for additional + resources on top of previous messages. Resources requested consistently + over some duration may be forcibly killed by the RM. + + @return A specification of the resources to reclaim from this AM.]]> + + + + + + 1) AM is receiving first container on underlying NodeManager.
    + OR
    + 2) NMToken master key rolled over in ResourceManager and AM is getting new + container on the same underlying NodeManager. +

    + AM will receive one NMToken per NM irrespective of the number of containers + issued on same NM. AM is expected to store these tokens until issued a + new token for the same NM. + @return list of NMTokens required for communicating with NM]]> + + + + + ResourceManager. + @return list of newly increased containers]]> + + + + + + + + + + + + + + + + + + + + + + + + + + UpdateContainerError for + containers updates requests that were in error]]> + + + + + ResourceManager from previous application attempts which + have not been reported to the Application Master yet. +
    + These containers were recovered by the RM after the application master + had already registered. This may happen after RM restart when some NMs get + delayed in connecting to the RM and reporting the active containers. + Since they were not reported in the registration + response, they are reported in the response to the AM heartbeat. + + @return the list of running containers as viewed by + ResourceManager from previous application attempts.]]> +
    +
    + + + + + + + ResourceManager the + ApplicationMaster during resource negotiation. +

    + The response, includes: +

      +
    • Response ID to track duplicate responses.
    • +
    • + An AMCommand sent by ResourceManager to let the + {@code ApplicationMaster} take some actions (resync, shutdown etc.). +
    • +
    • A list of newly allocated {@link Container}.
    • +
    • A list of completed {@link Container}s' statuses.
    • +
    • + The available headroom for resources in the cluster for the + application. +
    • +
    • A list of nodes whose status has been updated.
    • +
    • The number of available nodes in a cluster.
    • +
    • A description of resources requested back by the cluster
    • +
    • AMRMToken, if AMRMToken has been rolled over
    • +
    • + A list of {@link Container} representing the containers + whose resource has been increased. +
    • +
    • + A list of {@link Container} representing the containers + whose resource has been decreased. +
    • +
    + + @see ApplicationMasterProtocol#allocate(AllocateRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: {@link NMToken} will be used for authenticating communication with + {@code NodeManager}. + @return the list of container tokens to be used for authorization during + container resource update. + @see NMToken]]> + + + + + + AllocateResponse.getUpdatedContainers. + The token contains the container id and resource capability required for + container resource update. + @param containersToUpdate the list of container tokens to be used + for container resource increase.]]> + + + + The request sent by Application Master to the + Node Manager to change the resource quota of a container.

    + + @see ContainerManagementProtocol#updateContainer(ContainerUpdateRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + The response sent by the NodeManager to the + ApplicationMaster when asked to update container resource. +

    + + @see ContainerManagementProtocol#updateContainer(ContainerUpdateRequest)]]> +
    +
    + + + + + + + + + + + ApplicationAttemptId of the attempt to be failed. + @return ApplicationAttemptId of the attempt.]]> + + + + + + + The request sent by the client to the ResourceManager + to fail an application attempt.

    + +

    The request includes the {@link ApplicationAttemptId} of the attempt to + be failed.

    + + @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest)]]> +
    +
    + + + + + + + The response sent by the ResourceManager to the client + failing an application attempt.

    + +

    Currently it's empty.

    + + @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest)]]> +
    +
    + + + + + + + + + + + + + final state of the ApplicationMaster. + @return final state of the ApplicationMaster]]> + + + + + + final state of the ApplicationMaster + @param finalState final state of the ApplicationMaster]]> + + + + + diagnostic information on application failure. + @return diagnostic information on application failure]]> + + + + + + diagnostic information on application failure. + @param diagnostics diagnostic information on application failure]]> + + + + + tracking URL for the ApplicationMaster. + This url if contains scheme then that will be used by resource manager + web application proxy otherwise it will default to http. + @return tracking URLfor the ApplicationMaster]]> + + + + + + final tracking URLfor the ApplicationMaster. + This is the web-URL to which ResourceManager or web-application proxy will + redirect client/users once the application is finished and the + ApplicationMaster is gone. +

    + If the passed url has a scheme then that will be used by the + ResourceManager and web-application proxy, otherwise the scheme will + default to http. +

    +

    + Empty, null, "N/A" strings are all valid besides a real URL. In case an url + isn't explicitly passed, it defaults to "N/A" on the ResourceManager. +

    + + @param url + tracking URLfor the ApplicationMaster]]> + + + + + The final request includes details such: +

      +
    • Final state of the {@code ApplicationMaster}
    • +
    • + Diagnostic information in case of failure of the + {@code ApplicationMaster} +
    • +
    • Tracking URL
    • +
    + + @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)]]> +
    +
    + + + + + + + + + + + + ResourceManager to a + ApplicationMaster on it's completion. +

    + The response, includes: +

      +
    • A flag which indicates that the application has successfully unregistered + with the RM and the application can safely stop.
    • +
    +

    + Note: The flag indicates whether the application has successfully + unregistered and is safe to stop. The application may stop after the flag is + true. If the application stops before the flag is true then the RM may retry + the application. + + @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationAttemptId of an application attempt. + + @return ApplicationAttemptId of an application attempt]]> + + + + + + ApplicationAttemptId of an application attempt + + @param applicationAttemptId + ApplicationAttemptId of an application attempt]]> + + + + + The request sent by a client to the ResourceManager to get an + {@link ApplicationAttemptReport} for an application attempt. +

    + +

    + The request should include the {@link ApplicationAttemptId} of the + application attempt. +

    + + @see ApplicationAttemptReport + @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)]]> +
    +
    + + + + + + + + + + + ApplicationAttemptReport for the application attempt. + + @return ApplicationAttemptReport for the application attempt]]> + + + + + + ApplicationAttemptReport for the application attempt. + + @param applicationAttemptReport + ApplicationAttemptReport for the application attempt]]> + + + + + The response sent by the ResourceManager to a client requesting + an application attempt report. +

    + +

    + The response includes an {@link ApplicationAttemptReport} which has the + details about the particular application attempt +

    + + @see ApplicationAttemptReport + @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)]]> +
    +
    + + + + + + + + + + + ApplicationId of an application + + @return ApplicationId of an application]]> + + + + + + ApplicationId of an application + + @param applicationId + ApplicationId of an application]]> + + + + + The request from clients to get a list of application attempt reports of an + application from the ResourceManager. +

    + + @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)]]> +
    +
    + + + + + + + + + + + ApplicationReport of an application. + + @return a list of ApplicationReport of an application]]> + + + + + + ApplicationReport of an application. + + @param applicationAttempts + a list of ApplicationReport of an application]]> + + + + + The response sent by the ResourceManager to a client requesting + a list of {@link ApplicationAttemptReport} for application attempts. +

    + +

    + The ApplicationAttemptReport for each application includes the + details of an application attempt. +

    + + @see ApplicationAttemptReport + @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)]]> +
    +
    + + + + + + + + + + + ApplicationId of the application. + @return ApplicationId of the application]]> + + + + + + ApplicationId of the application + @param applicationId ApplicationId of the application]]> + + + + The request sent by a client to the ResourceManager to + get an {@link ApplicationReport} for an application.

    + +

    The request should include the {@link ApplicationId} of the + application.

    + + @see ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest) + @see ApplicationReport]]> +
    +
    + + + + + + + + ApplicationReport for the application. + @return ApplicationReport for the application]]> + + + + The response sent by the ResourceManager to a client + requesting an application report.

    + +

    The response includes an {@link ApplicationReport} which has details such + as user, queue, name, host on which the ApplicationMaster is + running, RPC port, tracking URL, diagnostics, start time etc.

    + + @see ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + The request from clients to get a report of Applications matching the + giving application types in the cluster from the + ResourceManager. +

    + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + +

    Setting any of the parameters to null, would just disable that + filter

    + + @param scope {@link ApplicationsRequestScope} to filter by + @param users list of users to filter by + @param queues list of scheduler queues to filter by + @param applicationTypes types of applications + @param applicationTags application tags to filter by + @param applicationStates application states to filter by + @param startRange range of application start times to filter by + @param finishRange range of application finish times to filter by + @param limit number of applications to limit to + @return {@link GetApplicationsRequest} to be used with + {@link ApplicationClientProtocol#getApplications(GetApplicationsRequest)}]]> +
    +
    + + + + + The request from clients to get a report of Applications matching the + giving application types in the cluster from the + ResourceManager. +

    + + @param scope {@link ApplicationsRequestScope} to filter by + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in {@link GetApplicationsRequest}]]> +
    +
    + + + + + The request from clients to get a report of Applications matching the + giving application types in the cluster from the + ResourceManager. +

    + + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in {@link GetApplicationsRequest}]]> +
    +
    + + + + + The request from clients to get a report of Applications matching the + giving application states in the cluster from the + ResourceManager. +

    + + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in {@link GetApplicationsRequest}]]> +
    +
    + + + + + + The request from clients to get a report of Applications matching the + giving and application types and application types in the cluster from the + ResourceManager. +

    + + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest) + @return a report of Applications in GetApplicationsRequest]]> +
    +
    + + + + + + + + + + + + The request from clients to get a report of Applications + in the cluster from the ResourceManager.

    + + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)]]> +
    +
    + + + + + + + + ApplicationReport for applications. + @return ApplicationReport for applications]]> + + + + The response sent by the ResourceManager to a client + requesting an {@link ApplicationReport} for applications.

    + +

    The ApplicationReport for each application includes details + such as user, queue, name, host on which the ApplicationMaster + is running, RPC port, tracking URL, diagnostics, start time etc.

    + + @see ApplicationReport + @see ApplicationClientProtocol#getApplications(GetApplicationsRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + The request from clients to get node to attribute value mapping for all or + given set of Node AttributeKey's in the cluster from the + ResourceManager. +

    + + @see ApplicationClientProtocol#getAttributesToNodes + (GetAttributesToNodesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client requesting + node to attribute value mapping for all or given set of Node AttributeKey's. +

    + + @see ApplicationClientProtocol#getAttributesToNodes + (GetAttributesToNodesRequest)]]> +
    +
    + + + + + + + + + The request sent by clients to get cluster metrics from the + ResourceManager.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#getClusterMetrics(GetClusterMetricsRequest)]]> +
    +
    + + + + + + + + YarnClusterMetrics for the cluster. + @return YarnClusterMetrics for the cluster]]> + + + + ResourceManager to a client + requesting cluster metrics. + + @see YarnClusterMetrics + @see ApplicationClientProtocol#getClusterMetrics(GetClusterMetricsRequest)]]> + + + + + + + + + + + + + + + The request from clients to get node attributes in the cluster from the + ResourceManager. +

    + + @see ApplicationClientProtocol#getClusterNodeAttributes + (GetClusterNodeAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client requesting + a node attributes in cluster. +

    + + @see ApplicationClientProtocol#getClusterNodeAttributes + (GetClusterNodeAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The request from clients to get a report of all nodes + in the cluster from the ResourceManager.

    + + The request will ask for all nodes in the given {@link NodeState}s. + + @see ApplicationClientProtocol#getClusterNodes(GetClusterNodesRequest)]]> +
    +
    + + + + + + + + NodeReport for all nodes in the cluster. + @return NodeReport for all nodes in the cluster]]> + + + + The response sent by the ResourceManager to a client + requesting a {@link NodeReport} for all nodes.

    + +

    The NodeReport contains per-node information such as + available resources, number of containers, tracking url, rack name, health + status etc. + + @see NodeReport + @see ApplicationClientProtocol#getClusterNodes(GetClusterNodesRequest)]]> + + + + + + + + + + + + + ContainerId of the Container. + + @return ContainerId of the Container]]> + + + + + + ContainerId of the container + + @param containerId + ContainerId of the container]]> + + + + + The request sent by a client to the ResourceManager to get an + {@link ContainerReport} for a container. +

    ]]> +
    +
    + + + + + + + + + + + ContainerReport for the container. + + @return ContainerReport for the container]]> + + + + + + + + The response sent by the ResourceManager to a client requesting + a container report. +

    + +

    + The response includes a {@link ContainerReport} which has details of a + container. +

    ]]> +
    +
    + + + + + + + + + + + ApplicationAttemptId of an application attempt. + + @return ApplicationAttemptId of an application attempt]]> + + + + + + ApplicationAttemptId of an application attempt + + @param applicationAttemptId + ApplicationAttemptId of an application attempt]]> + + + + + The request from clients to get a list of container reports, which belong to + an application attempt from the ResourceManager. +

    + + @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)]]> +
    +
    + + + + + + + + + + + ContainerReport for all the containers of an + application attempt. + + @return a list of ContainerReport for all the containers of an + application attempt]]> + + + + + + ContainerReport for all the containers of an + application attempt. + + @param containers + a list of ContainerReport for all the containers of + an application attempt]]> + + + + + The response sent by the ResourceManager to a client requesting + a list of {@link ContainerReport} for containers. +

    + +

    + The ContainerReport for each container includes the container + details. +

    + + @see ContainerReport + @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)]]> +
    +
    + + + + + + + + + + + ContainerIds of containers for which to obtain + the ContainerStatus. + + @return the list of ContainerIds of containers for which to + obtain the ContainerStatus.]]> + + + + + + ContainerIds of containers for which to obtain + the ContainerStatus + + @param containerIds + a list of ContainerIds of containers for which to + obtain the ContainerStatus]]> + + + + ApplicationMaster to the + NodeManager to get {@link ContainerStatus} of requested + containers. + + @see ContainerManagementProtocol#getContainerStatuses(GetContainerStatusesRequest)]]> + + + + + + + + + + ContainerStatuses of the requested containers. + + @return ContainerStatuses of the requested containers.]]> + + + + + + + + + NodeManager to the + ApplicationMaster when asked to obtain the + ContainerStatus of requested containers. + + @see ContainerManagementProtocol#getContainerStatuses(GetContainerStatusesRequest)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The request sent by clients to get a new {@link ApplicationId} for + submitting an application.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#getNewApplication(GetNewApplicationRequest)]]> +
    +
    + + + + + + + + new ApplicationId allocated by the + ResourceManager. + @return new ApplicationId allocated by the + ResourceManager]]> + + + + + ResourceManager in the cluster. + @return maximum capability of allocated resources in the cluster]]> + + + + The response sent by the ResourceManager to the client for + a request to get a new {@link ApplicationId} for submitting applications.

    + +

    Clients can submit an application with the returned + {@link ApplicationId}.

    + + @see ApplicationClientProtocol#getNewApplication(GetNewApplicationRequest)]]> +
    +
    + + + + + + + + + The request sent by clients to get a new {@code ReservationId} for + submitting an reservation.

    + + {@code ApplicationClientProtocol#getNewReservation(GetNewReservationRequest)}]]> +
    +
    + + + + + + + + + + + + The response sent by the ResourceManager to the client for + a request to get a new {@link ReservationId} for submitting reservations.

    + +

    Clients can submit an reservation with the returned + {@link ReservationId}.

    + + {@code ApplicationClientProtocol#getNewReservation(GetNewReservationRequest)}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + The request from clients to get nodes to attributes mapping + in the cluster from the ResourceManager. +

    + + @see ApplicationClientProtocol#getNodesToAttributes + (GetNodesToAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client requesting + nodes to attributes mapping. +

    + + @see ApplicationClientProtocol#getNodesToAttributes + (GetNodesToAttributesRequest)]]> +
    +
    + + + + + + + + + + + + + + queue name for which to get queue information. + @return queue name for which to get queue information]]> + + + + + + queue name for which to get queue information + @param queueName queue name for which to get queue information]]> + + + + + active applications required? + @return true if applications' information is to be included, + else false]]> + + + + + + active applications? + @param includeApplications fetch information about active + applications?]]> + + + + + child queues required? + @return true if information about child queues is required, + else false]]> + + + + + + child queues? + @param includeChildQueues fetch information about child queues?]]> + + + + + child queue hierarchy required? + @return true if information about entire hierarchy is + required, false otherwise]]> + + + + + + child queue hierarchy? + @param recursive fetch information on the entire child queue + hierarchy?]]> + + + + The request sent by clients to get queue information + from the ResourceManager.

    + + @see ApplicationClientProtocol#getQueueInfo(GetQueueInfoRequest)]]> +
    +
    + + + + + + + + QueueInfo for the specified queue. + @return QueueInfo for the specified queue]]> + + + + + The response includes a {@link QueueInfo} which has details such as + queue name, used/total capacities, running applications, child queues etc. + + @see QueueInfo + @see ApplicationClientProtocol#getQueueInfo(GetQueueInfoRequest)]]> + + + + + + + + + + + The request sent by clients to the ResourceManager to + get queue acls for the current user.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + QueueUserACLInfo per queue for the user. + @return QueueUserACLInfo per queue for the user]]> + + + + The response sent by the ResourceManager to clients + seeking queue acls for the user.

    + +

    The response contains a list of {@link QueueUserACLInfo} which + provides information about {@link QueueACL} per queue.

    + + @see QueueACL + @see QueueUserACLInfo + @see ApplicationClientProtocol#getQueueUserAcls(GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: {@link NMToken} will be used for authenticating communication with + {@code NodeManager}. + @return the list of container tokens to be used for authorization during + container resource increase. + @see NMToken]]> + + + + + + AllocateResponse.getIncreasedContainers. + The token contains the container id and resource capability required for + container resource increase. + @param containersToIncrease the list of container tokens to be used + for container resource increase.]]> + + + + The request sent by Application Master to the + Node Manager to change the resource quota of a container.

    + + @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + The response sent by the NodeManager to the + ApplicationMaster when asked to increase container resource. +

    + + @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)]]> +
    +
    + + + + + + + + + + + ApplicationId of the application to be aborted. + @return ApplicationId of the application to be aborted]]> + + + + + + + + diagnostics to which the application is being killed. + @return diagnostics to which the application is being killed]]> + + + + + + diagnostics to which the application is being killed. + @param diagnostics diagnostics to which the application is being + killed]]> + + + + The request sent by the client to the ResourceManager + to abort a submitted application.

    + +

    The request includes the {@link ApplicationId} of the application to be + aborted.

    + + @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)]]> +
    +
    + + + + + + + + + + + + ResourceManager to the client aborting + a submitted application. +

    + The response, includes: +

      +
    • + A flag which indicates that the process of killing the application is + completed or not. +
    • +
    + Note: user is recommended to wait until this flag becomes true, otherwise if + the ResourceManager crashes before the process of killing the + application is completed, the ResourceManager may retry this + application on recovery. + + @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)]]> +
    +
    + + + + + + + + + + + + ApplicationId of the application to be moved. + @return ApplicationId of the application to be moved]]> + + + + + + ApplicationId of the application to be moved. + @param appId ApplicationId of the application to be moved]]> + + + + + + + + + + + + + + + The request sent by the client to the ResourceManager + to move a submitted application to a different queue.

    + +

    The request includes the {@link ApplicationId} of the application to be + moved and the queue to place it in.

    + + @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest)]]> +
    +
    + + + + + + + + The response sent by the ResourceManager to the client moving + a submitted application to a different queue. +

    +

    + A response without exception means that the move has completed successfully. +

    + + @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest)]]> +
    +
    + + + + + + + + + + + RegisterApplicationMasterRequest. + If port, trackingUrl is not used, use the following default value: +
      +
    • port: -1
    • +
    • trackingUrl: null
    • +
    + The port is allowed to be any integer larger than or equal to -1. + @return the new instance of RegisterApplicationMasterRequest]]> +
    +
    + + + host on which the ApplicationMaster is + running. + @return host on which the ApplicationMaster is running]]> + + + + + + host on which the ApplicationMaster is + running. + @param host host on which the ApplicationMaster + is running]]> + + + + + RPC port on which the {@code ApplicationMaster} is + responding. + @return the RPC port on which the {@code ApplicationMaster} + is responding]]> + + + + + + RPC port on which the {@code ApplicationMaster} is + responding. + @param port RPC port on which the {@code ApplicationMaster} + is responding]]> + + + + + tracking URL for the ApplicationMaster. + This url if contains scheme then that will be used by resource manager + web application proxy otherwise it will default to http. + @return tracking URL for the ApplicationMaster]]> + + + + + + tracking URLfor the ApplicationMaster while + it is running. This is the web-URL to which ResourceManager or + web-application proxy will redirect client/users while the application and + the ApplicationMaster are still running. +

    + If the passed url has a scheme then that will be used by the + ResourceManager and web-application proxy, otherwise the scheme will + default to http. +

    +

    + Empty, null, "N/A" strings are all valid besides a real URL. In case an url + isn't explicitly passed, it defaults to "N/A" on the ResourceManager. +

    + + @param trackingUrl + tracking URLfor the ApplicationMaster]]> + + + + + PlacementConstraint associated with the tags, i.e., each + {@link org.apache.hadoop.yarn.api.records.SchedulingRequest} that has those + tags will be placed taking into account the corresponding constraint. + + @return A map of Placement Constraints.]]> + + + + + + PlacementConstraint associated with the tags. + For example: + Map < + <hb_regionserver> -> node_anti_affinity, + <hb_regionserver, hb_master> -> rack_affinity, + ... + > + @param placementConstraints Placement Constraint Mapping.]]> + + + + + The registration includes details such as: +

      +
    • Hostname on which the AM is running.
    • +
    • RPC Port
    • +
    • Tracking URL
    • +
    + + @see ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)]]> +
    +
    + + + + + + + + ResourceManager in the cluster. + @return maximum capability of allocated resources in the cluster]]> + + + + + ApplicationACLs for the application. + @return all the ApplicationACLs]]> + + + + + Get ClientToAMToken master key.

    +

    The ClientToAMToken master key is sent to ApplicationMaster + by ResourceManager via {@link RegisterApplicationMasterResponse} + , used to verify corresponding ClientToAMToken.

    + @return ClientToAMToken master key]]> +
    +
    + + + + + + + + + Get the queue that the application was placed in.

    + @return the queue that the application was placed in.]]> + + + + + + Set the queue that the application was placed in.

    ]]> + + + + + + Get the list of running containers as viewed by + ResourceManager from previous application attempts. +

    + + @return the list of running containers as viewed by + ResourceManager from previous application attempts + @see RegisterApplicationMasterResponse#getNMTokensFromPreviousAttempts()]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + The response contains critical details such as: +
      +
    • Maximum capability for allocated resources in the cluster.
    • +
    • {@code ApplicationACL}s for the application.
    • +
    • ClientToAMToken master key.
    • +
    + + @see ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)]]> +
    +
    + + + + + + + + + + + + + + + + ContainerId of the container to re-initialize. + + @return ContainerId of the container to re-initialize.]]> + + + + + ContainerLaunchContext to re-initialize the container + with. + + @return ContainerLaunchContext of to re-initialize the + container with.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationId of the resource to be released. + + @return ApplicationId]]> + + + + + + ApplicationId of the resource to be released. + + @param id ApplicationId]]> + + + + + key of the resource to be released. + + @return key]]> + + + + + + key of the resource to be released. + + @param key unique identifier for the resource]]> + + + + The request from clients to release a resource in the shared cache.

    ]]> +
    +
    + + + + + + + + The response to clients from the SharedCacheManager when + releasing a resource in the shared cache. +

    + +

    + Currently, this is empty. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The response sent by the ResourceManager to a client on + reservation submission.

    + +

    Currently, this is empty.

    + + {@code ApplicationClientProtocol#submitReservation( + ReservationSubmissionRequest)}]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerId of the container to localize resources. + + @return ContainerId of the container to localize resources.]]> + + + + + LocalResource required by the container. + + @return all LocalResource required by the container]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerId of the container to signal. + @return ContainerId of the container to signal.]]> + + + + + + ContainerId of the container to signal.]]> + + + + + SignalContainerCommand of the signal request. + @return SignalContainerCommand of the signal request.]]> + + + + + + SignalContainerCommand of the signal request.]]> + + + + The request sent by the client to the ResourceManager + or by the ApplicationMaster to the NodeManager + to signal a container. + @see SignalContainerCommand

    ]]> +
    +
    + + + + + + + The response sent by the ResourceManager to the client + signalling a container.

    + +

    Currently it's empty.

    + + @see ApplicationClientProtocol#signalToContainer(SignalContainerRequest)]]> +
    +
    + + + + + + + + + + + + ContainerLaunchContext for the container to be started + by the NodeManager. + + @return ContainerLaunchContext for the container to be started + by the NodeManager]]> + + + + + + ContainerLaunchContext for the container to be started + by the NodeManager + @param context ContainerLaunchContext for the container to be + started by the NodeManager]]> + + + + + + Note: {@link NMToken} will be used for authenticating communication with + {@code NodeManager}. + @return the container token to be used for authorization during starting + container. + @see NMToken + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> + + + + + + + The request sent by the ApplicationMaster to the + NodeManager to start a container.

    + +

    The ApplicationMaster has to provide details such as + allocated resource capability, security tokens (if enabled), command + to be executed to start the container, environment for the process, + necessary binaries/jar/shared-objects etc. via the + {@link ContainerLaunchContext}.

    + + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + The request which contains a list of {@link StartContainerRequest} sent by + the ApplicationMaster to the NodeManager to + start containers. +

    + +

    + In each {@link StartContainerRequest}, the ApplicationMaster has + to provide details such as allocated resource capability, security tokens (if + enabled), command to be executed to start the container, environment for the + process, necessary binaries/jar/shared-objects etc. via the + {@link ContainerLaunchContext}. +

    + + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> +
    +
    + + + + + + + + ContainerId s of the containers that are + started successfully. + + @return the list of ContainerId s of the containers that are + started successfully. + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> + + + + + + + + + + + Get the meta-data from all auxiliary services running on the + NodeManager. +

    +

    + The meta-data is returned as a Map between the auxiliary service names and + their corresponding per service meta-data as an opaque blob + ByteBuffer +

    + +

    + To be able to interpret the per-service meta-data, you should consult the + documentation for the Auxiliary-service configured on the NodeManager +

    + + @return a Map between the names of auxiliary services and their + corresponding meta-data]]> +
    +
    + + + The response sent by the NodeManager to the + ApplicationMaster when asked to start an allocated + container. +

    + + @see ContainerManagementProtocol#startContainers(StartContainersRequest)]]> +
    +
    + + + + + + + + + + + ContainerIds of the containers to be stopped. + @return ContainerIds of containers to be stopped]]> + + + + + + ContainerIds of the containers to be stopped. + @param containerIds ContainerIds of the containers to be stopped]]> + + + + The request sent by the ApplicationMaster to the + NodeManager to stop containers.

    + + @see ContainerManagementProtocol#stopContainers(StopContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + The response sent by the NodeManager to the + ApplicationMaster when asked to stop allocated + containers. +

    + + @see ContainerManagementProtocol#stopContainers(StopContainersRequest)]]> +
    +
    + + + + + + + + + + + ApplicationSubmissionContext for the application. + @return ApplicationSubmissionContext for the application]]> + + + + + + ApplicationSubmissionContext for the application. + @param context ApplicationSubmissionContext for the + application]]> + + + + The request sent by a client to submit an application to the + ResourceManager.

    + +

    The request, via {@link ApplicationSubmissionContext}, contains + details such as queue, {@link Resource} required to run the + ApplicationMaster, the equivalent of + {@link ContainerLaunchContext} for launching the + ApplicationMaster etc. + + @see ApplicationClientProtocol#submitApplication(SubmitApplicationRequest)]]> + + + + + + + + + The response sent by the ResourceManager to a client on + application submission.

    + +

    Currently, this is empty.

    + + @see ApplicationClientProtocol#submitApplication(SubmitApplicationRequest)]]> +
    +
    + + + + + + + + + + + + ApplicationId of the application. + + @return ApplicationId of the application]]> + + + + + + ApplicationId of the application. + + @param applicationId ApplicationId of the application]]> + + + + + Priority of the application to be set. + + @return Priority of the application to be set.]]> + + + + + + Priority of the application. + + @param priority Priority of the application]]> + + + + + The request sent by the client to the ResourceManager to set or + update the application priority. +

    +

    + The request includes the {@link ApplicationId} of the application and + {@link Priority} to be set for an application +

    + + @see ApplicationClientProtocol#updateApplicationPriority(UpdateApplicationPriorityRequest)]]> +
    +
    + + + + + + + + + + + Priority of the application to be set. + @return Updated Priority of the application.]]> + + + + + + Priority of the application. + + @param priority Priority of the application]]> + + + + + The response sent by the ResourceManager to the client on update + the application priority. +

    +

    + A response without exception means that the move has completed successfully. +

    + + @see ApplicationClientProtocol#updateApplicationPriority(UpdateApplicationPriorityRequest)]]> +
    +
    + + + + + + + + + + + + ApplicationId of the application. + @return ApplicationId of the application]]> + + + + + + ApplicationId of the application. + @param applicationId ApplicationId of the application]]> + + + + + ApplicationTimeouts of the application. Timeout value is + in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ. + @return all ApplicationTimeouts of the application.]]> + + + + + + ApplicationTimeouts for the application. Timeout value + is absolute. Timeout value should meet ISO8601 format. Support ISO8601 + format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries + are cleared before adding the new Map. + @param applicationTimeouts ApplicationTimeoutss for the + application]]> + + + + + The request sent by the client to the ResourceManager to set or + update the application timeout. +

    +

    + The request includes the {@link ApplicationId} of the application and timeout + to be set for an application +

    ]]> +
    +
    + + + + + + + + + + ApplicationTimeouts of the application. Timeout value is + in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ. + @return all ApplicationTimeouts of the application.]]> + + + + + + ApplicationTimeouts for the application. Timeout value + is absolute. Timeout value should meet ISO8601 format. Support ISO8601 + format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries + are cleared before adding the new Map. + @param applicationTimeouts ApplicationTimeoutss for the + application]]> + + + + + The response sent by the ResourceManager to the client on update + application timeout. +

    +

    + A response without exception means that the update has completed + successfully. +

    ]]> +
    +
    + + + + + + + + ApplicationId of the resource to be used. + + @return ApplicationId]]> + + + + + + ApplicationId of the resource to be used. + + @param id ApplicationId]]> + + + + + key of the resource to be used. + + @return key]]> + + + + + + key of the resource to be used. + + @param key unique identifier for the resource]]> + + + + + The request from clients to the SharedCacheManager that claims a + resource in the shared cache. +

    ]]> +
    +
    + + + + + + + + Path corresponding to the requested resource in the + shared cache. + + @return String A Path if the resource exists in the shared + cache, null otherwise]]> + + + + + + Path corresponding to a resource in the shared cache. + + @param p A Path corresponding to a resource in the shared + cache]]> + + + + + The response from the SharedCacheManager to the client that indicates whether + a requested resource exists in the cache. +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationId of the ApplicationAttempId. + @return ApplicationId of the ApplicationAttempId]]> + + + + + attempt id of the Application. + @return attempt id of the Application]]> + + + + + + + + + + + + + + + + + + + + + ApplicationAttemptId denotes the particular attempt + of an ApplicationMaster for a given {@link ApplicationId}.

    + +

    Multiple attempts might be needed to run an application to completion due + to temporal failures of the ApplicationMaster such as hardware + failures, connectivity issues etc. on the node on which it was scheduled.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + YarnApplicationAttemptState of the application attempt. + + @return YarnApplicationAttemptState of the application attempt]]> + + + + + RPC port of this attempt ApplicationMaster. + + @return RPC port of this attempt ApplicationMaster]]> + + + + + host on which this attempt of + ApplicationMaster is running. + + @return host on which this attempt of + ApplicationMaster is running]]> + + + + + diagnositic information of the application attempt in case + of errors. + + @return diagnositic information of the application attempt in case + of errors]]> + + + + + tracking url for the application attempt. + + @return tracking url for the application attempt]]> + + + + + original tracking url for the application attempt. + + @return original tracking url for the application attempt]]> + + + + + ApplicationAttemptId of this attempt of the + application + + @return ApplicationAttemptId of the attempt]]> + + + + + ContainerId of AMContainer for this attempt + + @return ContainerId of the attempt]]> + + + + + + + finish time of the application. + + @return finish time of the application]]> + + + + + It includes details such as: +
      +
    • {@link ApplicationAttemptId} of the application.
    • +
    • Host on which the ApplicationMaster of this attempt is + running.
    • +
    • RPC port of the ApplicationMaster of this attempt.
    • +
    • Tracking URL.
    • +
    • Diagnostic information in case of errors.
    • +
    • {@link YarnApplicationAttemptState} of the application attempt.
    • +
    • {@link ContainerId} of the master Container.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + ApplicationId + which is unique for all applications started by a particular instance + of the ResourceManager. + @return short integer identifier of the ApplicationId]]> + + + + + start time of the ResourceManager which is + used to generate globally unique ApplicationId. + @return start time of the ResourceManager]]> + + + + + + + + + + + + + + + + + + + + + ApplicationId represents the globally unique + identifier for an application.

    + +

    The globally unique nature of the identifier is achieved by using the + cluster timestamp i.e. start-time of the + ResourceManager along with a monotonically increasing counter + for the application.

    ]]> +
    +
    + + + + + + + + ApplicationId of the application. + @return ApplicationId of the application]]> + + + + + ApplicationAttemptId of the current + attempt of the application + @return ApplicationAttemptId of the attempt]]> + + + + + user who submitted the application. + @return user who submitted the application]]> + + + + + queue to which the application was submitted. + @return queue to which the application was submitted]]> + + + + + name of the application. + @return name of the application]]> + + + + + host on which the ApplicationMaster + is running. + @return host on which the ApplicationMaster + is running]]> + + + + + RPC port of the ApplicationMaster. + @return RPC port of the ApplicationMaster]]> + + + + + client token for communicating with the + ApplicationMaster. +

    + ClientToAMToken is the security token used by the AMs to verify + authenticity of any client. +

    + +

    + The ResourceManager, provides a secure token (via + {@link ApplicationReport#getClientToAMToken()}) which is verified by the + ApplicationMaster when the client directly talks to an AM. +

    + @return client token for communicating with the + ApplicationMaster]]> +
    +
    + + + YarnApplicationState of the application. + @return YarnApplicationState of the application]]> + + + + + diagnositic information of the application in case of + errors. + @return diagnositic information of the application in case + of errors]]> + + + + + tracking url for the application. + @return tracking url for the application]]> + + + + + start time of the application. + @return start time of the application]]> + + + + + + + + + finish time of the application. + @return finish time of the application]]> + + + + + final finish status of the application. + @return final finish status of the application]]> + + + + + + + + + + + + + + + + + + + + + + + + + + The AMRM token is required for AM to RM scheduling operations. For + managed Application Masters YARN takes care of injecting it. For unmanaged + Applications Masters, the token must be obtained via this method and set + in the {@link org.apache.hadoop.security.UserGroupInformation} of the + current user. +

    + The AMRM token will be returned only if all the following conditions are + met: +

      +
    • the requester is the owner of the ApplicationMaster
    • +
    • the application master is an unmanaged ApplicationMaster
    • +
    • the application master is in ACCEPTED state
    • +
    + Else this method returns NULL. + + @return the AM to RM token if available.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link ApplicationId} of the application.
    • +
    • Applications user.
    • +
    • Application queue.
    • +
    • Application name.
    • +
    • Host on which the ApplicationMaster is running.
    • +
    • RPC port of the ApplicationMaster.
    • +
    • Tracking URL.
    • +
    • {@link YarnApplicationState} of the application.
    • +
    • Diagnostic information in case of errors.
    • +
    • Start time of the application.
    • +
    • Client {@link Token} of the application (if security is enabled).
    • +
    + + @see ApplicationClientProtocol#getApplicationReport(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest)]]> +
    +
    + + + + + + + + + + + + + Resource. -1 for invalid/inaccessible reports. + @return the used Resource]]> + + + + + Resource. -1 for invalid/inaccessible reports. + @return the reserved Resource]]> + + + + + Resource. -1 for invalid/inaccessible reports. + @return the needed Resource]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationId of the submitted application. + @return ApplicationId of the submitted application]]> + + + + + + ApplicationId of the submitted application. + @param applicationId ApplicationId of the submitted + application]]> + + + + + name. + @return application name]]> + + + + + + name. + @param applicationName application name]]> + + + + + queue to which the application is being submitted. + @return queue to which the application is being submitted]]> + + + + + + queue to which the application is being submitted + @param queue queue to which the application is being submitted]]> + + + + + Priority of the application. + @return Priority of the application]]> + + + + + ContainerLaunchContext to describe the + Container with which the ApplicationMaster is + launched. + @return ContainerLaunchContext for the + ApplicationMaster container]]> + + + + + + ContainerLaunchContext to describe the + Container with which the ApplicationMaster is + launched. + @param amContainer ContainerLaunchContext for the + ApplicationMaster container]]> + + + + + YarnApplicationState. + Such apps will not be retried by the RM on app attempt failure. + The default value is false. + @return true if the AM is not managed by the RM]]> + + + + + + + + + + + + + + + + + + + + + + ApplicationMaster for this + application. Please note this will be DEPRECATED, use getResource + in getAMContainerResourceRequest instead. + + @return the resource required by the ApplicationMaster for + this application.]]> + + + + + + ApplicationMaster for this + application. + + @param resource the resource required by the ApplicationMaster + for this application.]]> + + + + + + + + + + + + + + + + + + + + + + + For managed AM, if the flag is true, running containers will not be killed + when application attempt fails and these containers will be retrieved by + the new application attempt on registration via + {@link ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)}. +

    +

    + For unmanaged AM, if the flag is true, RM allows re-register and returns + the running containers in the same attempt back to the UAM for HA. +

    + + @param keepContainers the flag which indicates whether to keep containers + across application attempts.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + getResource and getPriority of + ApplicationSubmissionContext. + + Number of containers and Priority will be ignored. + + @return ResourceRequest of the AM container + @deprecated See {@link #getAMContainerResourceRequests()}]]> + + + + + + + + + + + getAMContainerResourceRequest and its behavior. + + Number of containers and Priority will be ignored. + + @return List of ResourceRequests of the AM container]]> + + + + + + + + + + + + + + + + + + + + + + LogAggregationContext of the application + + @return LogAggregationContext of the application]]> + + + + + + LogAggregationContext for the application + + @param logAggregationContext + for the application]]> + + + + + + + + + + + + + + + + ApplicationTimeouts of the application. Timeout value is + in seconds. + @return all ApplicationTimeouts of the application.]]> + + + + + + ApplicationTimeouts for the application in seconds. + All pre-existing Map entries are cleared before adding the new Map. +

    + Note: If application timeout value is less than or equal to zero + then application submission will throw an exception. +

    + @param applicationTimeouts ApplicationTimeoutss for the + application]]> +
    +
    + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link ApplicationId} of the application.
    • +
    • Application user.
    • +
    • Application name.
    • +
    • {@link Priority} of the application.
    • +
    • + {@link ContainerLaunchContext} of the container in which the + ApplicationMaster is executed. +
    • +
    • + maxAppAttempts. The maximum number of application attempts. + It should be no larger than the global number of max attempts in the + YARN configuration. +
    • +
    • + attemptFailuresValidityInterval. The default value is -1. + when attemptFailuresValidityInterval in milliseconds is set to + {@literal >} 0, the failure number will no take failures which happen + out of the validityInterval into failure count. If failure count + reaches to maxAppAttempts, the application will be failed. +
    • +
    • Optional, application-specific {@link LogAggregationContext}
    • +
    + + @see ContainerLaunchContext + @see ApplicationClientProtocol#submitApplication(org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + expiryTime for given timeout type. + @return expiryTime in ISO8601 standard with format + yyyy-MM-dd'T'HH:mm:ss.SSSZ.]]> + + + + + + expiryTime for given timeout type. + @param expiryTime in ISO8601 standard with format + yyyy-MM-dd'T'HH:mm:ss.SSSZ.]]> + + + + + Remaining Time of an application for given timeout type. + @return Remaining Time in seconds.]]> + + + + + + Remaining Time of an application for given timeout type. + @param remainingTime in seconds.]]> + + + + +
  • {@link ApplicationTimeoutType} of the timeout type.
  • +
  • Expiry time in ISO8601 standard with format + yyyy-MM-dd'T'HH:mm:ss.SSSZ or "UNLIMITED".
  • +
  • Remaining time in seconds.
  • + + The possible values for {ExpiryTime, RemainingTimeInSeconds} are +
      +
    • {UNLIMITED,-1} : Timeout is not configured for given timeout type + (LIFETIME).
    • +
    • {ISO8601 date string, 0} : Timeout is configured and application has + completed.
    • +
    • {ISO8601 date string, greater than zero} : Timeout is configured and + application is RUNNING. Application will be timed out after configured + value.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resource allocated to the container. + @return Resource allocated to the container]]> + + + + + Priority at which the Container was + allocated. + @return Priority at which the Container was + allocated]]> + + + + + ContainerToken for the container. +

    ContainerToken is the security token used by the framework + to verify authenticity of any Container.

    + +

    The ResourceManager, on container allocation provides a + secure token which is verified by the NodeManager on + container launch.

    + +

    Applications do not need to care about ContainerToken, they + are transparently handled by the framework - the allocated + Container includes the ContainerToken.

    + + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) + + @return ContainerToken for the container]]> +
    +
    + + + ID corresponding to the original {@code + ResourceRequest{@link #getAllocationRequestId()}}s which is satisfied by + this allocated {@code Container}. +

    + The scheduler may return multiple {@code AllocateResponse}s corresponding + to the same ID as and when scheduler allocates {@code Container}s. + Applications can continue to completely ignore the returned ID in + the response and use the allocation for any of their outstanding requests. +

    + + @return the ID corresponding to the original allocation request + which is satisfied by this allocation.]]> + + + + + The {@code ResourceManager} is the sole authority to allocate any + {@code Container} to applications. The allocated {@code Container} + is always on a single node and has a unique {@link ContainerId}. It has + a specific amount of {@link Resource} allocated. +

    + It includes details such as: +

      +
    • {@link ContainerId} for the container, which is globally unique.
    • +
    • + {@link NodeId} of the node on which it is allocated. +
    • +
    • HTTP uri of the node.
    • +
    • {@link Resource} allocated to the container.
    • +
    • {@link Priority} at which the container was allocated.
    • +
    • + Container {@link Token} of the container, used to securely verify + authenticity of the allocation. +
    • +
    + + Typically, an {@code ApplicationMaster} receives the {@code Container} + from the {@code ResourceManager} during resource-negotiation and then + talks to the {@code NodeManager} to start/stop containers. + + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) + @see ContainerManagementProtocol#stopContainers(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationAttemptId of the application to which the + Container was assigned. +

    + Note: If containers are kept alive across application attempts via + {@link ApplicationSubmissionContext#setKeepContainersAcrossApplicationAttempts(boolean)} + the ContainerId does not necessarily contain the current + running application attempt's ApplicationAttemptId This + container can be allocated by previously exited application attempt and + managed by the current running attempt thus have the previous application + attempt's ApplicationAttemptId. +

    + + @return ApplicationAttemptId of the application to which the + Container was assigned]]> +
    +
    + + + ContainerId, + which doesn't include epoch. Note that this method will be marked as + deprecated, so please use getContainerId instead. + @return lower 32 bits of identifier of the ContainerId]]> + + + + + ContainerId. Upper 24 bits are + reserved as epoch of cluster, and lower 40 bits are reserved as + sequential number of containers. + @return identifier of the ContainerId]]> + + + + + + + + + + + + + + + + + + + + + + + + ContainerId represents a globally unique identifier + for a {@link Container} in the cluster.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LocalResource required by the container. + @return all LocalResource required by the container]]> + + + + + + LocalResource required by the container. All pre-existing + Map entries are cleared before adding the new Map + @param localResources LocalResource required by the container]]> + + + + + + Get application-specific binary service data. This is a map keyed + by the name of each {@link AuxiliaryService} that is configured on a + NodeManager and value correspond to the application specific data targeted + for the keyed {@link AuxiliaryService}. +

    + +

    + This will be used to initialize this application on the specific + {@link AuxiliaryService} running on the NodeManager by calling + {@link AuxiliaryService#initializeApplication(ApplicationInitializationContext)} +

    + + @return application-specific binary service data]]> +
    +
    + + + + + Set application-specific binary service data. This is a map keyed + by the name of each {@link AuxiliaryService} that is configured on a + NodeManager and value correspond to the application specific data targeted + for the keyed {@link AuxiliaryService}. All pre-existing Map entries are + preserved. +

    + + @param serviceData + application-specific binary service data]]> +
    +
    + + + environment variables for the container. + @return environment variables for the container]]> + + + + + + environment variables for the container. All pre-existing Map + entries are cleared before adding the new Map + @param environment environment variables for the container]]> + + + + + commands for launching the container. + @return the list of commands for launching the container]]> + + + + + + commands for launching the container. All + pre-existing List entries are cleared before adding the new List + @param commands the list of commands for launching the container]]> + + + + + ApplicationACLs for the application. + @return all the ApplicationACLs]]> + + + + + + ApplicationACLs for the application. All pre-existing + Map entries are cleared before adding the new Map + @param acls ApplicationACLs for the application]]> + + + + + ContainerRetryContext to relaunch container. + @return ContainerRetryContext to relaunch container.]]> + + + + + + ContainerRetryContext to relaunch container. + @param containerRetryContext ContainerRetryContext to + relaunch container.]]> + + + + + It includes details such as: +
      +
    • {@link ContainerId} of the container.
    • +
    • {@link Resource} allocated to the container.
    • +
    • User to whom the container is allocated.
    • +
    • Security tokens (if security is enabled).
    • +
    • + {@link LocalResource} necessary for running the container such + as binaries, jar, shared-objects, side-files etc. +
    • +
    • Optional, application-specific binary service data.
    • +
    • Environment variables for the launched process.
    • +
    • Command to launch the container.
    • +
    • Retry strategy when container exits with failure.
    • +
    + + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + ContainerId of the container. + + @return ContainerId of the container.]]> + + + + + + + + Resource of the container. + + @return allocated Resource of the container.]]> + + + + + + + + NodeId where container is running. + + @return allocated NodeId where container is running.]]> + + + + + + + + Priority of the container. + + @return allocated Priority of the container.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerState of the container. + + @return final ContainerState of the container.]]> + + + + + + + + exit status of the container. + + @return final exit status of the container.]]> + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link ContainerId} of the container.
    • +
    • Allocated Resources to the container.
    • +
    • Assigned Node id.
    • +
    • Assigned Priority.
    • +
    • Creation Time.
    • +
    • Finish Time.
    • +
    • Container Exit Status.
    • +
    • {@link ContainerState} of the container.
    • +
    • Diagnostic information in case of errors.
    • +
    • Log URL.
    • +
    • nodeHttpAddress
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It provides details such as: +
      +
    • + {@link ContainerRetryPolicy} : + - NEVER_RETRY(DEFAULT value): no matter what error code is when container + fails to run, just do not retry. + - RETRY_ON_ALL_ERRORS: no matter what error code is, when container fails + to run, just retry. + - RETRY_ON_SPECIFIC_ERROR_CODES: when container fails to run, do retry if + the error code is one of errorCodes, otherwise do not retry. + + Note: if error code is 137(SIGKILL) or 143(SIGTERM), it will not retry + because it is usually killed on purpose. +
    • +
    • + maxRetries specifies how many times to retry if need to retry. + If the value is -1, it means retry forever. +
    • +
    • retryInterval specifies delaying some time before relaunch + container, the unit is millisecond.
    • +
    • + failuresValidityInterval: default value is -1. + When failuresValidityInterval in milliseconds is set to {@literal >} 0, + the failure number will not take failures which happen out of the + failuresValidityInterval into failure count. If failure count + reaches to maxRetries, the container will be failed. +
    • +
    ]]> +
    +
    + + + + + + + + + + Retry policy for relaunching a Container.

    ]]> +
    +
    + + + + + + + + + + State of a Container.

    ]]> +
    +
    + + + + + + + + ContainerId of the container. + @return ContainerId of the container]]> + + + + + ExecutionType of the container. + @return ExecutionType of the container]]> + + + + + ContainerState of the container. + @return ContainerState of the container]]> + + + + + Get the exit status for the container.

    + +

    Note: This is valid only for completed containers i.e. containers + with state {@link ContainerState#COMPLETE}. + Otherwise, it returns an ContainerExitStatus.INVALID. +

    + +

    Containers killed by the framework, either due to being released by + the application or being 'lost' due to node failures etc. have a special + exit code of ContainerExitStatus.ABORTED.

    + +

    When threshold number of the nodemanager-local-directories or + threshold number of the nodemanager-log-directories become bad, then + container is not launched and is exited with ContainersExitStatus.DISKS_FAILED. +

    + + @return exit status for the container]]> +
    +
    + + + diagnostic messages for failed containers. + @return diagnostic messages for failed containers]]> + + + + + Resource allocated to the container. + @return Resource allocated to the container]]> + + + + + + + + + + + + + + + + + + + + It provides details such as: +
      +
    • {@code ContainerId} of the container.
    • +
    • {@code ExecutionType} of the container.
    • +
    • {@code ContainerState} of the container.
    • +
    • Exit status of a completed container.
    • +
    • Diagnostic message for a failed container.
    • +
    • {@link Resource} allocated to the container.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The execution types are the following: +
      +
    • {@link #GUARANTEED} - this container is guaranteed to start its + execution, once the corresponding start container request is received by + an NM. +
    • {@link #OPPORTUNISTIC} - the execution of this container may not start + immediately at the NM that receives the corresponding start container + request (depending on the NM's available resources). Moreover, it may be + preempted if it blocks a GUARANTEED container from being executed. +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + ExecutionType of the requested container. + + @param execType + ExecutionType of the requested container]]> + + + + + ExecutionType. + + @return ExecutionType.]]> + + + + + + + + + + + ResourceRequest. + Defaults to false. + @return whether ExecutionType request should be strictly honored]]> + + + + + + + + + ExecutionType as well as flag that explicitly asks the + configuredScheduler to return Containers of exactly the Execution Type + requested.]]> + + + + + + + + + + + + Application.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • resource key
  • +
  • {@link LocalizationState} of the resource
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + location of the resource to be localized. + @return location of the resource to be localized]]> + + + + + + location of the resource to be localized. + @param resource location of the resource to be localized]]> + + + + + size of the resource to be localized. + @return size of the resource to be localized]]> + + + + + + size of the resource to be localized. + @param size size of the resource to be localized]]> + + + + + timestamp of the resource to be localized, used + for verification. + @return timestamp of the resource to be localized]]> + + + + + + timestamp of the resource to be localized, used + for verification. + @param timestamp timestamp of the resource to be localized]]> + + + + + LocalResourceType of the resource to be localized. + @return LocalResourceType of the resource to be localized]]> + + + + + + LocalResourceType of the resource to be localized. + @param type LocalResourceType of the resource to be localized]]> + + + + + LocalResourceVisibility of the resource to be + localized. + @return LocalResourceVisibility of the resource to be + localized]]> + + + + + + LocalResourceVisibility of the resource to be + localized. + @param visibility LocalResourceVisibility of the resource to be + localized]]> + + + + + pattern that should be used to extract entries from the + archive (only used when type is PATTERN). + @return pattern that should be used to extract entries from the + archive.]]> + + + + + + pattern that should be used to extract entries from the + archive (only used when type is PATTERN). + @param pattern pattern that should be used to extract entries + from the archive.]]> + + + + + + + + + + + shouldBeUploadedToSharedCache + of this request]]> + + + + LocalResource represents a local resource required to + run a container.

    + +

    The NodeManager is responsible for localizing the resource + prior to launching the container.

    + +

    Applications can specify {@link LocalResourceType} and + {@link LocalResourceVisibility}.

    + + @see LocalResourceType + @see LocalResourceVisibility + @see ContainerLaunchContext + @see ApplicationSubmissionContext + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + + + type + of a resource localized by the {@code NodeManager}. +

    + The type can be one of: +

      +
    • + {@link #FILE} - Regular file i.e. uninterpreted bytes. +
    • +
    • + {@link #ARCHIVE} - Archive, which is automatically unarchived by the + NodeManager. +
    • +
    • + {@link #PATTERN} - A hybrid between {@link #ARCHIVE} and {@link #FILE}. +
    • +
    + + @see LocalResource + @see ContainerLaunchContext + @see ApplicationSubmissionContext + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + + + visibility + of a resource localized by the {@code NodeManager}. +

    + The visibility can be one of: +

      +
    • {@link #PUBLIC} - Shared by all users on the node.
    • +
    • + {@link #PRIVATE} - Shared among all applications of the + same user on the node. +
    • +
    • + {@link #APPLICATION} - Shared only among containers of the + same application on the node. +
    • +
    + + @see LocalResource + @see ContainerLaunchContext + @see ApplicationSubmissionContext + @see ContainerManagementProtocol#startContainers(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • + includePattern. It uses Java Regex to filter the log files + which match the defined include pattern and those log files + will be uploaded when the application finishes. +
    • +
    • + excludePattern. It uses Java Regex to filter the log files + which match the defined exclude pattern and those log files + will not be uploaded when application finishes. If the log file + name matches both the include and the exclude pattern, this file + will be excluded eventually. +
    • +
    • + rolledLogsIncludePattern. It uses Java Regex to filter the log files + which match the defined include pattern and those log files + will be aggregated in a rolling fashion. +
    • +
    • + rolledLogsExcludePattern. It uses Java Regex to filter the log files + which match the defined exclude pattern and those log files + will not be aggregated in a rolling fashion. If the log file + name matches both the include and the exclude pattern, this file + will be excluded eventually. +
    • +
    • + policyClassName. The policy class name that implements + ContainerLogAggregationPolicy. At runtime, nodemanager will the policy + if a given container's log should be aggregated based on the + ContainerType and other runtime state such as exit code by calling + ContainerLogAggregationPolicy#shouldDoLogAggregation. + This is useful when the app only wants to aggregate logs of a subset of + containers. Here are the available policies. Please make sure to specify + the canonical name by prefixing org.apache.hadoop.yarn.server. + nodemanager.containermanager.logaggregation. + to the class simple name below. + NoneContainerLogAggregationPolicy: skip aggregation for all containers. + AllContainerLogAggregationPolicy: aggregate all containers. + AMOrFailedContainerLogAggregationPolicy: aggregate application master + or failed containers. + FailedOrKilledContainerLogAggregationPolicy: aggregate failed or killed + containers + FailedContainerLogAggregationPolicy: aggregate failed containers + AMOnlyLogAggregationPolicy: aggregate application master containers + SampleContainerLogAggregationPolicy: sample logs of successful worker + containers, in addition to application master and failed/killed + containers. + LimitSizeContainerLogAggregationPolicy: skip aggregation for killed + containers whose log size exceeds the limit of container log size. + If it isn't specified, it will use the cluster-wide default policy + defined by configuration yarn.nodemanager.log-aggregation.policy.class. + The default value of yarn.nodemanager.log-aggregation.policy.class is + AllContainerLogAggregationPolicy. +
    • +
    • + policyParameters. The parameters passed to the policy class via + ContainerLogAggregationPolicy#parseParameters during the policy object + initialization. This is optional. Some policy class might use parameters + to adjust its settings. It is up to policy class to define the scheme of + parameters. + For example, SampleContainerLogAggregationPolicy supports the format of + "SR:0.5,MIN:50", which means sample rate of 50% beyond the first 50 + successful worker containers. +
    • +
    + + @see ApplicationSubmissionContext]]> +
    +
    + + + + + + + + NodeManager for which the NMToken + is used to authenticate. + @return the {@link NodeId} of the NodeManager for which the + NMToken is used to authenticate.]]> + + + + + + + + NodeManager + @return the {@link Token} used for authenticating with NodeManager]]> + + + + + + + + + + + + The NMToken is used for authenticating communication with + NodeManager

    +

    It is issued by ResourceMananger when ApplicationMaster + negotiates resource with ResourceManager and + validated on NodeManager side.

    + @see AllocateResponse#getNMTokens()]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Node Attribute is a kind of a label which represents one of the + attribute/feature of a Node. Its different from node partition label as + resource guarantees across the queues will not be maintained for these type + of labels. +

    +

    + A given Node can be mapped with any kind of attribute, few examples are + HAS_SSD=true, JAVA_VERSION=JDK1.8, OS_TYPE=WINDOWS. +

    +

    + Its not compulsory for all the attributes to have value, empty string is the + default value of the NodeAttributeType.STRING +

    +

    + Node Attribute Prefix is used as namespace to segregate the attributes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + Node Attribute Info describes a NodeAttribute. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + Node AttributeKey uniquely identifies a given Node Attribute. Node Attribute + is identified based on attribute prefix and name. +

    +

    + Node Attribute Prefix is used as namespace to segregate the attributes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + Type of a node Attribute. +

    + Based on this attribute expressions and values will be evaluated.]]> +
    +
    + + + + + + + + + + + + + hostname of the node. + @return hostname of the node]]> + + + + + port for communicating with the node. + @return port for communicating with the node]]> + + + + + + + + + + + + + + + + + + + NodeId is the unique identifier for a node.

    + +

    It includes the hostname and port to uniquely + identify the node. Thus, it is unique across restarts of any + NodeManager.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NodeId of the node. + @return NodeId of the node]]> + + + + + NodeState of the node. + @return NodeState of the node]]> + + + + + http address of the node. + @return http address of the node]]> + + + + + rack name for the node. + @return rack name for the node]]> + + + + + used Resource on the node. + @return used Resource on the node]]> + + + + + total Resource on the node. + @return total Resource on the node]]> + + + + + diagnostic health report of the node. + @return diagnostic health report of the node]]> + + + + + last timestamp at which the health report was received. + @return last timestamp at which the health report was received]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes details such as: +
      +
    • {@link NodeId} of the node.
    • +
    • HTTP Tracking URL of the node.
    • +
    • Rack name for the node.
    • +
    • Used {@link Resource} on the node.
    • +
    • Total available {@link Resource} of the node.
    • +
    • Number of running containers on the node.
    • +
    + + @see ApplicationClientProtocol#getClusterNodes(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest)]]> +
    +
    + + + + + + + + + + + + + + + + State of a Node.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + Mapping of Attribute Value to a Node. +

    ]]> +
    +
    + + + + + + + + + + + + ResourceManager. + @see PreemptionContract + @see StrictPreemptionContract]]> + + + + + + + + + + ApplicationMaster about resources requested back by the + ResourceManager. + @see AllocateRequest#setAskList(List)]]> + + + + + ApplicationMaster that may be reclaimed by the + ResourceManager. If the AM prefers a different set of + containers, then it may checkpoint or kill containers matching the + description in {@link #getResourceRequest}. + @return Set of containers at risk if the contract is not met.]]> + + + + ResourceManager. + The ApplicationMaster (AM) can satisfy this request according + to its own priorities to prevent containers from being forcibly killed by + the platform. + @see PreemptionMessage]]> + + + + + + + + + + ResourceManager]]> + + + + + + + + + + The AM should decode both parts of the message. The {@link + StrictPreemptionContract} specifies particular allocations that the RM + requires back. The AM can checkpoint containers' state, adjust its execution + plan to move the computation, or take no action and hope that conditions that + caused the RM to ask for the container will change. +

    + In contrast, the {@link PreemptionContract} also includes a description of + resources with a set of containers. If the AM releases containers matching + that profile, then the containers enumerated in {@link + PreemptionContract#getContainers()} may not be killed. +

    + Each preemption message reflects the RM's current understanding of the + cluster state, so a request to return N containers may not + reflect containers the AM is releasing, recently exited containers the RM has + yet to learn about, or new containers allocated before the message was + generated. Conversely, an RM may request a different profile of containers in + subsequent requests. +

    + The policy enforced by the RM is part of the scheduler. Generally, only + containers that have been requested consistently should be killed, but the + details are not specified.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The ACL is one of: +

      +
    • + {@link #SUBMIT_APPLICATIONS} - ACL to submit applications to the queue. +
    • +
    • {@link #ADMINISTER_QUEUE} - ACL to administer the queue.
    • +
    + + @see QueueInfo + @see ApplicationClientProtocol#getQueueUserAcls(org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + name of the queue. + @return name of the queue]]> + + + + + configured capacity of the queue. + @return configured capacity of the queue]]> + + + + + maximum capacity of the queue. + @return maximum capacity of the queue]]> + + + + + current capacity of the queue. + @return current capacity of the queue]]> + + + + + child queues of the queue. + @return child queues of the queue]]> + + + + + running applications of the queue. + @return running applications of the queue]]> + + + + + QueueState of the queue. + @return QueueState of the queue]]> + + + + + accessible node labels of the queue. + @return accessible node labels of the queue]]> + + + + + default node label expression of the queue, this takes + affect only when the ApplicationSubmissionContext and + ResourceRequest don't specify their + NodeLabelExpression. + + @return default node label expression of the queue]]> + + + + + + + + queue stats for the queue + + @return queue stats of the queue]]> + + + + + + + + + + + preemption status of the queue. + @return if property is not in proto, return null; + otherwise, return preemption status of the queue]]> + + + + + + + + + + + + + + + It includes information such as: +
      +
    • Queue name.
    • +
    • Capacity of the queue.
    • +
    • Maximum capacity of the queue.
    • +
    • Current capacity of the queue.
    • +
    • Child queues.
    • +
    • Running applications.
    • +
    • {@link QueueState} of the queue.
    • +
    • {@link QueueConfigurations} of the queue.
    • +
    + + @see QueueState + @see QueueConfigurations + @see ApplicationClientProtocol#getQueueInfo(org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest)]]> +
    +
    + + + + + + + + + + + A queue is in one of: +
      +
    • {@link #RUNNING} - normal state.
    • +
    • {@link #STOPPED} - not accepting new application submissions.
    • +
    • + {@link #DRAINING} - not accepting new application submissions + and waiting for applications finish. +
    • +
    + + @see QueueInfo + @see ApplicationClientProtocol#getQueueInfo(org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + queue name of the queue. + @return queue name of the queue]]> + + + + + QueueACL for the given user. + @return list of QueueACL for the given user]]> + + + + QueueUserACLInfo provides information {@link QueueACL} for + the given user.

    + + @see QueueACL + @see ApplicationClientProtocol#getQueueUserAcls(org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The ACL is one of: +
      +
    • + {@link #ADMINISTER_RESERVATIONS} - ACL to create, list, update and + delete reservations. +
    • +
    • {@link #LIST_RESERVATIONS} - ACL to list reservations.
    • +
    • {@link #SUBMIT_RESERVATIONS} - ACL to create reservations.
    • +
    + Users can always list, update and delete their own reservations.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes: +
      +
    • Duration of the reservation.
    • +
    • Acceptance time of the duration.
    • +
    • + List of {@link ResourceAllocationRequest}, which includes the time + interval, and capability of the allocation. + {@code ResourceAllocationRequest} represents an allocation + made for a reservation for the current state of the queue. This can be + changed for reasons such as re-planning, but will always be subject to + the constraints of the user contract as described by + {@link ReservationDefinition} +
    • +
    • {@link ReservationId} of the reservation.
    • +
    • {@link ReservationDefinition} used to make the reservation.
    • +
    + + @see ResourceAllocationRequest + @see ReservationId + @see ReservationDefinition]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start time of the {@code ResourceManager} which is used to + generate globally unique {@link ReservationId}. + + @return start time of the {@code ResourceManager}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {@link ReservationId} represents the globally unique identifier for + a reservation. +

    + +

    + The globally unique nature of the identifier is achieved by using the + cluster timestamp i.e. start-time of the {@code ResourceManager} + along with a monotonically increasing counter for the reservation. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes: +
      +
    • {@link Resource} required for each request.
    • +
    • + Number of containers, of above specifications, which are required by the + application. +
    • +
    • Concurrency that indicates the gang size of the request.
    • +
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + memory of the resource. Note - while memory has + never had a unit specified, all YARN configurations have specified memory + in MB. The assumption has been that the daemons and applications are always + using the same units. With the introduction of the ResourceInformation + class we have support for units - so this function will continue to return + memory but in the units of MB + + @return memory(in MB) of the resource]]> + + + + + memory of the resource. Note - while memory has + never had a unit specified, all YARN configurations have specified memory + in MB. The assumption has been that the daemons and applications are always + using the same units. With the introduction of the ResourceInformation + class we have support for units - so this function will continue to return + memory but in the units of MB + + @return memory of the resource]]> + + + + + + memory of the resource. Note - while memory has + never had a unit specified, all YARN configurations have specified memory + in MB. The assumption has been that the daemons and applications are always + using the same units. With the introduction of the ResourceInformation + class we have support for units - so this function will continue to set + memory but the assumption is that the value passed is in units of MB. + + @param memory memory(in MB) of the resource]]> + + + + + + memory of the resource. + @param memory memory of the resource]]> + + + + + number of virtual cpu cores of the resource. + + Virtual cores are a unit for expressing CPU parallelism. A node's capacity + should be configured with virtual cores equal to its number of physical + cores. A container should be requested with the number of cores it can + saturate, i.e. the average number of threads it expects to have runnable + at a time. + + @return num of virtual cpu cores of the resource]]> + + + + + + number of virtual cpu cores of the resource. + + Virtual cores are a unit for expressing CPU parallelism. A node's capacity + should be configured with virtual cores equal to its number of physical + cores. A container should be requested with the number of cores it can + saturate, i.e. the average number of threads it expects to have runnable + at a time. + + @param vCores number of virtual cpu cores of the resource]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Resource models a set of computer resources in the + cluster.

    + +

    Currently it models both memory and CPU.

    + +

    The unit for memory is megabytes. CPU is modeled with virtual cores + (vcores), a unit for expressing parallelism. A node's capacity should + be configured with virtual cores equal to its number of physical cores. A + container should be requested with the number of cores it can saturate, i.e. + the average number of threads it expects to have runnable at a time.

    + +

    Virtual cores take integer values and thus currently CPU-scheduling is + very coarse. A complementary axis for CPU requests that represents + processing power will likely be added in the future to enable finer-grained + resource configuration.

    + +

    Typically, applications request Resource of suitable + capability to run their component tasks.

    + + @see ResourceRequest + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + It includes: +
      +
    • StartTime of the allocation.
    • +
    • EndTime of the allocation.
    • +
    • {@link Resource} reserved for the allocation.
    • +
    + + @see Resource]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + blacklist of resources + for the application. + + @see ResourceRequest + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + host/rack string represents an arbitrary + host name. + + @param hostName host/rack on which the allocation is desired + @return whether the given host/rack string represents an arbitrary + host name]]> + + + + + Priority of the request. + @return Priority of the request]]> + + + + + + Priority of the request + @param priority Priority of the request]]> + + + + + host/rack) on which the allocation + is desired. + + A special value of * signifies that any resource + (host/rack) is acceptable. + + @return resource (e.g. host/rack) on which the allocation + is desired]]> + + + + + + host/rack) on which the allocation + is desired. + + A special value of * signifies that any resource name + (e.g. host/rack) is acceptable. + + @param resourceName (e.g. host/rack) on which the + allocation is desired]]> + + + + + + + + + + + + + + + + ResourceRequest. Defaults to true. + + @return whether locality relaxation is enabled with this + ResourceRequest.]]> + + + + + + ExecutionTypeRequest of the requested container. + + @param execSpec + ExecutionTypeRequest of the requested container]]> + + + + + ResourceRequest. Defaults to true. + + @return whether locality relaxation is enabled with this + ResourceRequest.]]> + + + + + + For a request at a network hierarchy level, set whether locality can be relaxed + to that level and beyond.

    + +

    If the flag is off on a rack-level ResourceRequest, + containers at that request's priority will not be assigned to nodes on that + request's rack unless requests specifically for those nodes have also been + submitted.

    + +

    If the flag is off on an {@link ResourceRequest#ANY}-level + ResourceRequest, containers at that request's priority will + only be assigned on racks for which specific requests have also been + submitted.

    + +

    For example, to request a container strictly on a specific node, the + corresponding rack-level and any-level requests should have locality + relaxation set to false. Similarly, to request a container strictly on a + specific rack, the corresponding any-level request should have locality + relaxation set to false.

    + + @param relaxLocality whether locality relaxation is enabled with this + ResourceRequest.]]> + + + + + + + + + + + + + + + + ID corresponding to this allocation request. This + ID is an identifier for different {@code ResourceRequest}s from the same + application. The allocated {@code Container}(s) received as part of the + {@code AllocateResponse} response will have the ID corresponding to the + original {@code ResourceRequest} for which the RM made the allocation. +

    + The scheduler may return multiple {@code AllocateResponse}s corresponding + to the same ID as and when scheduler allocates {@code Container}(s). + Applications can continue to completely ignore the returned ID in + the response and use the allocation for any of their outstanding requests. +

    + If one wishes to replace an entire {@code ResourceRequest} corresponding to + a specific ID, they can simply cancel the corresponding {@code + ResourceRequest} and submit a new one afresh. + + @return the ID corresponding to this allocation request.]]> + + + + + + ID corresponding to this allocation request. This + ID is an identifier for different {@code ResourceRequest}s from the same + application. The allocated {@code Container}(s) received as part of the + {@code AllocateResponse} response will have the ID corresponding to the + original {@code ResourceRequest} for which the RM made the allocation. +

    + The scheduler may return multiple {@code AllocateResponse}s corresponding + to the same ID as and when scheduler allocates {@code Container}(s). + Applications can continue to completely ignore the returned ID in + the response and use the allocation for any of their outstanding requests. +

    + If one wishes to replace an entire {@code ResourceRequest} corresponding to + a specific ID, they can simply cancel the corresponding {@code + ResourceRequest} and submit a new one afresh. +

    + If the ID is not set, scheduler will continue to work as previously and all + allocated {@code Container}(s) will have the default ID, -1. + + @param allocationRequestID the ID corresponding to this allocation + request.]]> + + + + + + Resource capability of the request. + @param capability Resource capability of the request]]> + + + + + Resource capability of the request. + @return Resource capability of the request]]> + + + + + + + + + + + + + + + + + + It includes: +

      +
    • {@link Priority} of the request.
    • +
    • + The name of the host or rack on which the allocation is + desired. A special value of * signifies that + any host/rack is acceptable to the application. +
    • +
    • {@link Resource} required for each request.
    • +
    • + Number of containers, of above specifications, which are required + by the application. +
    • +
    • + A boolean relaxLocality flag, defaulting to {@code true}, + which tells the {@code ResourceManager} if the application wants + locality to be loose (i.e. allows fall-through to rack or any) + or strict (i.e. specify hard constraint on resource allocation). +
    • +
    + + @see Resource + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> +
    +
    + + + + + + + priority of the request. + @see ResourceRequest#setPriority(Priority) + @param priority priority of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + resourceName of the request. + @see ResourceRequest#setResourceName(String) + @param resourceName resourceName of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + capability of the request. + @see ResourceRequest#setCapability(Resource) + @param capability capability of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + numContainers of the request. + @see ResourceRequest#setNumContainers(int) + @param numContainers numContainers of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + relaxLocality of the request. + @see ResourceRequest#setRelaxLocality(boolean) + @param relaxLocality relaxLocality of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + nodeLabelExpression of the request. + @see ResourceRequest#setNodeLabelExpression(String) + @param nodeLabelExpression + nodeLabelExpression of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + executionTypeRequest of the request. + @see ResourceRequest#setExecutionTypeRequest( + ExecutionTypeRequest) + @param executionTypeRequest + executionTypeRequest of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + executionTypeRequest of the request with 'ensure + execution type' flag set to true. + @see ResourceRequest#setExecutionTypeRequest( + ExecutionTypeRequest) + @param executionType executionType of the request. + @return {@link ResourceRequestBuilder}]]> + + + + + + allocationRequestId of the request. + @see ResourceRequest#setAllocationRequestId(long) + @param allocationRequestId + allocationRequestId of the request + @return {@link ResourceRequestBuilder}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + virtual memory. + + @return virtual memory in MB]]> + + + + + + virtual memory. + + @param vmem virtual memory in MB]]> + + + + + physical memory. + + @return physical memory in MB]]> + + + + + + physical memory. + + @param pmem physical memory in MB]]> + + + + + CPU utilization (The amount of vcores used). + + @return CPU utilization]]> + + + + + + CPU utilization (The amount of vcores used). + + @param cpu CPU utilization]]> + + + + + + custom resource utilization + (The amount of custom resource used). + + @param resourceName resourceName of custom resource + @return resourceName utilization]]> + + + + + + + + + + + + custom resource utilization + (The amount of custom resource used). + @param resourceName resourceName + @param utilization utilization of custom resource]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ResourceUtilization models the utilization of a set of computer + resources in the cluster. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + allocationRequestId of the request. + + @see SchedulingRequest#setAllocationRequestId(long) + @param allocationRequestId allocationRequestId of the + request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + priority of the request. + + @param priority priority of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder} + @see SchedulingRequest#setPriority(Priority)]]> + + + + + + executionType of the request. + + @see SchedulingRequest#setExecutionType(ExecutionTypeRequest) + @param executionType executionType of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + allocationTags of the request. + + @see SchedulingRequest#setAllocationTags(Set) + @param allocationTags allocationsTags of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + executionType of the request. + + @see SchedulingRequest#setResourceSizing(ResourceSizing) + @param resourceSizing resourceSizing of the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + placementConstraintExpression of the request. + + @see SchedulingRequest#setPlacementConstraint( + PlacementConstraint) + @param placementConstraintExpression placementConstraints of + the request + @return {@link SchedulingRequest.SchedulingRequestBuilder}]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ApplicationMaster that may be reclaimed by the + ResourceManager. + @return the set of {@link ContainerId} to be preempted.]]> + + + + ApplicationMaster (AM) + may attempt to checkpoint work or adjust its execution plan to accommodate + it. In contrast to {@link PreemptionContract}, the AM has no flexibility in + selecting which resources to return to the cluster. + @see PreemptionMessage]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Token is the security entity used by the framework + to verify authenticity of any resource.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContainerId of the container. + @return ContainerId of the container]]> + + + + + + + + + + + ContainerUpdateType of the container. + @return ContainerUpdateType of the container.]]> + + + + + + ContainerUpdateType of the container. + @param updateType of the Container]]> + + + + + ContainerId of the container. + @return ContainerId of the container]]> + + + + + + ContainerId of the container. + @param containerId ContainerId of the container]]> + + + + + ExecutionType of the container. + @return ExecutionType of the container]]> + + + + + + ExecutionType of the container. + @param executionType ExecutionType of the container]]> + + + + + + Resource capability of the request. + @param capability Resource capability of the request]]> + + + + + Resource capability of the request. + @return Resource capability of the request]]> + + + + + + + + + + + + It includes: +
      +
    • version for the container.
    • +
    • {@link ContainerId} for the container.
    • +
    • + {@link Resource} capability of the container after the update request + is completed. +
    • +
    • + {@link ExecutionType} of the container after the update request is + completed. +
    • +
    + + Update rules: +
      +
    • + Currently only ONE aspect of the container can be updated per request + (user can either update Capability OR ExecutionType in one request.. + not both). +
    • +
    • + There must be only 1 update request per container in an allocate call. +
    • +
    • + If a new update request is sent for a container (in a subsequent allocate + call) before the first one is satisfied by the Scheduler, it will + overwrite the previous request. +
    • +
    + @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)]]> +
    +
    + + + + + + + + + + + + + + + ContainerUpdateType. + @return ContainerUpdateType]]> + + + + + + ContainerUpdateType. + @param updateType ContainerUpdateType]]> + + + + + Container. + @return Container]]> + + + + + + Container. + @param container Container]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + URL represents a serializable {@link java.net.URL}.

    ]]> +
    +
    + + + + + + + + + + RMAppAttempt.]]> + + + + + + + + + + + + ApplicationMaster.]]> + + + + + + + + + + NodeManagers in the cluster. + @return number of NodeManagers in the cluster]]> + + + + + DecommissionedNodeManagers in the cluster. + + @return number of DecommissionedNodeManagers in the cluster]]> + + + + + ActiveNodeManagers in the cluster. + + @return number of ActiveNodeManagers in the cluster]]> + + + + + LostNodeManagers in the cluster. + + @return number of LostNodeManagers in the cluster]]> + + + + + UnhealthyNodeManagers in the cluster. + + @return number of UnhealthyNodeManagers in the cluster]]> + + + + + RebootedNodeManagers in the cluster. + + @return number of RebootedNodeManagers in the cluster]]> + + + + YarnClusterMetrics represents cluster metrics.

    + +

    Currently only number of NodeManagers is provided.

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class contains the information about a timeline domain, which is used + to a user to host a number of timeline entities, isolating them from others'. + The user can also define the reader and writer users/groups for the the + domain, which is used to control the access to its entities. +

    + +

    + The reader and writer users/groups pattern that the user can supply is the + same as what AccessControlList takes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The class that contains the the meta information of some conceptual entity + and its related events. The entity can be an application, an application + attempt, a container or whatever the user-defined object. +

    + +

    + Primary filters will be used to index the entities in + TimelineStore, such that users should carefully choose the + information they want to store as the primary filters. The remaining can be + stored as other information. +

    ]]> +
    +
    + + + + + + + + + + + + + ApplicationId of the + TimelineEntityGroupId. + + @return ApplicationId of the + TimelineEntityGroupId]]> + + + + + + + + timelineEntityGroupId. + + @return timelineEntityGroupId]]> + + + + + + + + + + + + + + + + + + + TimelineEntityGroupId is an abstract way for + timeline service users to represent #a group of related timeline data. + For example, all entities that represents one data flow DAG execution + can be grouped into one timeline entity group.

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class contains the information about a timeline service domain, which is + used to a user to host a number of timeline entities, isolating them from + others'. The user can also define the reader and writer users/groups for + the domain, which is used to control the access to its entities. +

    +

    + The reader and writer users/groups pattern that the user can supply is the + same as what AccessControlList takes. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The constuctor is used to construct a proxy {@link TimelineEntity} or its + subclass object from the real entity object that carries information. +

    + +

    + It is usually used in the case where we want to recover class polymorphism + after deserializing the entity from its JSON form. +

    + @param entity the real entity that carries information]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Note: Entities will be stored in the order of idPrefix specified. + If users decide to set idPrefix for an entity, they MUST provide + the same prefix for every update of this entity. +

    + Example:
    + TimelineEntity entity = new TimelineEntity();
    + entity.setIdPrefix(value);
    + 
    + Users can use {@link TimelineServiceHelper#invertLong(long)} to invert + the prefix if necessary. + + @param entityIdPrefix prefix for an entity.]]> +
    +
    + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name property as a + InetSocketAddress. On an HA cluster, + this fetches the address corresponding to the RM identified by + {@link #RM_HA_ID}. + @param name property name. + @param defaultAddress the default value + @param defaultPort the default port + @return InetSocketAddress]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + yarn.resourcemanager.scheduler.class + cannot handle placement constraints, the corresponding SchedulingRequests + will be rejected. As of now, only the capacity scheduler supports + SchedulingRequests. In particular, it currently supports anti-affinity + constraints (no affinity or cardinality) and places one container at a + time. The advantage of this handler compared to the placement-processor is + that it follows the same ordering rules for queues (sorted by utilization, + priority) and apps (sorted by FIFO/fairness/priority) as the ones followed + by the main scheduler.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OPPORTUNISTIC containers on the NM.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • default
  • +
  • docker
  • +
  • javasandbox
  • +
  • runc
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  • NONE - the RM will do nothing special.
  • +
  • LENIENT - the RM will generate and provide a keystore and truststore + to the AM, which it is free to use for HTTPS in its tracking URL web + server. The RM proxy will still allow HTTP connections to AMs that opt + not to use HTTPS.
  • +
  • STRICT - this is the same as LENIENT, except that the RM proxy will + only allow HTTPS connections to AMs; HTTP connections will be blocked + and result in a warning page to the user.
  • + ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Default platform-specific CLASSPATH for YARN applications. A + comma-separated list of CLASSPATH entries constructed based on the client + OS environment expansion syntax. +

    +

    + Note: Use {@link #DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH} for + cross-platform practice i.e. submit an application from a Windows client to + a Linux/Unix server or vice versa. +

    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The information is passed along to applications via + {@link StartContainersResponse#getAllServicesMetaData()} that is returned by + {@link ContainerManagementProtocol#startContainers(StartContainersRequest)} +

    + + @return meta-data for this service that should be made available to + applications.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The method used by the NodeManager log aggregation service + to initial the policy object with parameters specified by the application + or the cluster-wide setting. +

    + + @param parameters parameters with scheme defined by the policy class.]]> +
    +
    + + + + + The method used by the NodeManager log aggregation service + to ask the policy object if a given container's logs should be aggregated. +

    + + @param logContext ContainerLogContext + @return Whether or not the container's logs should be aggregated.]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The method used by administrators to ask SCM to run cleaner task right away +

    + + @param request request SharedCacheManager to run a cleaner task + @return SharedCacheManager returns an empty response + on success and throws an exception on rejecting the request + @throws YarnException + @throws IOException]]> +
    +
    + + + The protocol between administrators and the SharedCacheManager +

    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + Tag1(N1),P1:Tag2(N2),P2:...:TagN(Nn),Pn

    + + where TagN(Nn) is a key value pair to determine the source + allocation tag and the number of allocations, such as: + +

    foo(3)

    + + Optional when using NodeAttribute Constraint. + + and where Pn can be any form of a valid constraint expression, + such as: + +
      +
    • in,node,foo,bar
    • +
    • notin,node,foo,bar,1,2
    • +
    • and(notin,node,foo:notin,node,bar)
    • +
    + + and NodeAttribute Constraint such as + +
      +
    • yarn.rm.io/foo=true
    • +
    • java=1.7,1.8
    • +
    + @param expression expression string. + @return a map of source tags to placement constraint mapping. + @throws PlacementConstraintParseException]]> +
    +
    + + + + + +
    + +
    + + + + + +
    diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml new file mode 100644 index 00000000000..a2b0cd041fc --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.3.5.xml @@ -0,0 +1,3067 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + If the user does not have VIEW_APP access then the following + fields in the report will be set to stubbed values: +

      +
    • host - set to "N/A"
    • +
    • RPC port - set to -1
    • +
    • client token - set to "N/A"
    • +
    • diagnostics - set to "N/A"
    • +
    • tracking URL - set to "N/A"
    • +
    • original tracking URL - set to "N/A"
    • +
    • resource usage report - all values are -1
    • +
    + + @param appId + {@link ApplicationId} of the application that needs a report + @return application report + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get a report (ApplicationReport) of all Applications in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @return a list of reports for all applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given ApplicationAttempt. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param applicationAttemptId + {@link ApplicationAttemptId} of the application attempt that needs + a report + @return application attempt report + @throws YarnException + @throws ApplicationAttemptNotFoundException if application attempt + not found + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (ApplicationAttempts) of Application in the cluster. +

    + + @param applicationId + @return a list of reports for all application attempts for specified + application + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given Container. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param containerId + {@link ContainerId} of the container that needs a report + @return container report + @throws YarnException + @throws ContainerNotFoundException if container not found + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (Containers) of ApplicationAttempt in the cluster. +

    + + @param applicationAttemptId + @return a list of reports of all containers for specified application + attempt + @throws YarnException + @throws IOException]]> +
    +
    +
    + + + + + + + + + {@code + AMRMClient.createAMRMClientContainerRequest() + } + @return the newly create AMRMClient instance.]]> + + + + + + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + addContainerRequest are sent to the + ResourceManager. New containers assigned to the master are + retrieved. Status of completed containers and node health updates are also + retrieved. This also doubles up as a heartbeat to the ResourceManager and + must be made periodically. The call may not always return any new + allocations of containers. App should not make concurrent allocate + requests. May cause request loss. + +

    + Note : If the user has not removed container requests that have already + been satisfied, then the re-register may end up sending the entire + container requests to the RM (including matched requests). Which would mean + the RM could end up giving it a lot of new allocated containers. +

    + + @param progressIndicator Indicates progress made by the master + @return the response of the allocate request + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + + + + + allocate + @param req Resource request]]> + + + + + + + + + + + + + allocate. + Any previous pending resource change request of the same container will be + removed. + + Application that calls this method is expected to maintain the + Containers that are returned from previous successful + allocations or resource changes. By passing in the existing container and a + target resource capability to this method, the application requests the + ResourceManager to change the existing resource allocation to the target + resource allocation. + + @deprecated use + {@link #requestContainerUpdate(Container, UpdateContainerRequest)} + + @param container The container returned from the last successful resource + allocation or resource change + @param capability The target resource capability of the container]]> + + + + + + + allocate. + Any previous pending update request of the same container will be + removed. + + @param container The container returned from the last successful resource + allocation or update + @param updateContainerRequest The UpdateContainerRequest.]]> + + + + + + + + + + + + + + + + + + + + + + + + ContainerRequests matching the given + parameters. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + Each collection in the list contains requests with identical + Resource size that fit in the given capability. In a + collection, requests will be returned in the same order as they were added. + + NOTE: This API only matches Container requests that were created by the + client WITHOUT the allocationRequestId being set. + + @return Collection of request matching the parameters]]> + + + + + + + + + ContainerRequests matching the given + parameters. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + Each collection in the list contains requests with identical + Resource size that fit in the given capability. In a + collection, requests will be returned in the same order as they were added. + specify an ExecutionType. + + NOTE: This API only matches Container requests that were created by the + client WITHOUT the allocationRequestId being set. + + @param priority Priority + @param resourceName Location + @param executionType ExecutionType + @param capability Capability + @return Collection of request matching the parameters]]> + + + + + + + + + + + + + ContainerRequests matching the given + allocationRequestId. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + + NOTE: This API only matches Container requests that were created by the + client WITH the allocationRequestId being set to a non-default value. + + @param allocationRequestId Allocation Request Id + @return Collection of request matching the parameters]]> + + + + + + + + + + + + + AMRMClient. This cache must + be shared with the {@link NMClient} used to manage containers for the + AMRMClient +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @param nmTokenCache the NM token cache to use.]]> + + + + + AMRMClient. This cache must be + shared with the {@link NMClient} used to manage containers for the + AMRMClient. +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @return the NM token cache.]]> + + + + + + + + + + + + + + + + + + + + + + + + check to return true for each 1000 ms. + See also {@link #waitFor(java.util.function.Supplier, int)} + and {@link #waitFor(java.util.function.Supplier, int, int)} + @param check the condition for which it should wait]]> + + + + + + + + check to return true for each + checkEveryMillis ms. + See also {@link #waitFor(java.util.function.Supplier, int, int)} + @param check user defined checker + @param checkEveryMillis interval to call check]]> + + + + + + + + + check to return true for each + checkEveryMillis ms. In the main loop, this method will log + the message "waiting in main loop" for each logInterval times + iteration to confirm the thread is alive. + @param check user defined checker + @param checkEveryMillis interval to call check + @param logInterval interval to log for each]]> + + + + + + + + + + + + + + + + + + + + + + + + + + Start an allocated container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the allocated container, including the + Id, the assigned node's Id and the token via {@link Container}. In + addition, the AM needs to provide the {@link ContainerLaunchContext} as + well.

    + + @param container the allocated container + @param containerLaunchContext the context information needed by the + NodeManager to launch the + container + @return a map between the auxiliary service names and their outputs + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Increase the resource of a container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

    + + @param container the container with updated token. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Update the resources of a container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

    + + @param container the container with updated token. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + + Stop an started container.

    + + @param containerId the Id of the started container + @param nodeId the Id of the NodeManager + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + + Query the status of a container.

    + + @param containerId the Id of the started container + @param nodeId the Id of the NodeManager + + @return the status of a container. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + + + Re-Initialize the Container.

    + + @param containerId the Id of the container to Re-Initialize. + @param containerLaunchContex the updated ContainerLaunchContext. + @param autoCommit commit re-initialization automatically ? + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Restart the specified container.

    + + @param containerId the Id of the container to restart. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Rollback last reInitialization of the specified container.

    + + @param containerId the Id of the container to restart. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + + + Commit last reInitialization of the specified container.

    + + @param containerId the Id of the container to commit reInitialize. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
    +
    + + + + Set whether the containers that are started by this client, and are + still running should be stopped when the client stops. By default, the + feature should be enabled.

    However, containers will be stopped only + when service is stopped. i.e. after {@link NMClient#stop()}. + + @param enabled whether the feature is enabled or not]]> +
    +
    + + + + NMClient. This cache must be + shared with the {@link AMRMClient} that requested the containers managed + by this NMClient +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @param nmTokenCache the NM token cache to use.]]> + + + + + NMClient. This cache must be + shared with the {@link AMRMClient} that requested the containers managed + by this NMClient +

    + If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @return the NM token cache]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + By default YARN client libraries {@link AMRMClient} and {@link NMClient} use + {@link #getSingleton()} instance of the cache. +

      +
    • + Using the singleton instance of the cache is appropriate when running a + single ApplicationMaster in the same JVM. +
    • +
    • + When using the singleton, users don't need to do anything special, + {@link AMRMClient} and {@link NMClient} are already set up to use the + default singleton {@link NMTokenCache} +
    • +
    + If running multiple Application Masters in the same JVM, a different cache + instance should be used for each Application Master. +
      +
    • + If using the {@link AMRMClient} and the {@link NMClient}, setting up + and using an instance cache is as follows: +
      +   NMTokenCache nmTokenCache = new NMTokenCache();
      +   AMRMClient rmClient = AMRMClient.createAMRMClient();
      +   NMClient nmClient = NMClient.createNMClient();
      +   nmClient.setNMTokenCache(nmTokenCache);
      +   ...
      + 
      +
    • +
    • + If using the {@link AMRMClientAsync} and the {@link NMClientAsync}, + setting up and using an instance cache is as follows: +
      +   NMTokenCache nmTokenCache = new NMTokenCache();
      +   AMRMClient rmClient = AMRMClient.createAMRMClient();
      +   NMClient nmClient = NMClient.createNMClient();
      +   nmClient.setNMTokenCache(nmTokenCache);
      +   AMRMClientAsync rmClientAsync = new AMRMClientAsync(rmClient, 1000, [AMRM_CALLBACK]);
      +   NMClientAsync nmClientAsync = new NMClientAsync("nmClient", nmClient, [NM_CALLBACK]);
      +   ...
      + 
      +
    • +
    • + If using {@link ApplicationMasterProtocol} and + {@link ContainerManagementProtocol} directly, setting up and using an + instance cache is as follows: +
      +   NMTokenCache nmTokenCache = new NMTokenCache();
      +   ...
      +   ApplicationMasterProtocol amPro = ClientRMProxy.createRMProxy(conf, ApplicationMasterProtocol.class);
      +   ...
      +   AllocateRequest allocateRequest = ...
      +   ...
      +   AllocateResponse allocateResponse = rmClient.allocate(allocateRequest);
      +   for (NMToken token : allocateResponse.getNMTokens()) {
      +     nmTokenCache.setToken(token.getNodeId().toString(), token.getToken());
      +   }
      +   ...
      +   ContainerManagementProtocolProxy nmPro = ContainerManagementProtocolProxy(conf, nmTokenCache);
      +   ...
      +   nmPro.startContainer(container, containerContext);
      +   ...
      + 
      +
    • +
    + It is also possible to mix the usage of a client ({@code AMRMClient} or + {@code NMClient}, or the async versions of them) with a protocol proxy + ({@code ContainerManagementProtocolProxy} or + {@code ApplicationMasterProtocol}).]]> +
    +
    + + + + + + + + + + + + + + The method to claim a resource with the SharedCacheManager. + The client uses a checksum to identify the resource and an + {@link ApplicationId} to identify which application will be using the + resource. +

    + +

    + The SharedCacheManager responds with whether or not the + resource exists in the cache. If the resource exists, a URL to + the resource in the shared cache is returned. If the resource does not + exist, null is returned instead. +

    + +

    + Once a URL has been returned for a resource, that URL is safe to use for + the lifetime of the application that corresponds to the provided + ApplicationId. +

    + + @param applicationId ApplicationId of the application using the resource + @param resourceKey the key (i.e. checksum) that identifies the resource + @return URL to the resource, or null if it does not exist]]> +
    +
    + + + + + + + The method to release a resource with the SharedCacheManager. + This method is called once an application is no longer using a claimed + resource in the shared cache. The client uses a checksum to identify the + resource and an {@link ApplicationId} to identify which application is + releasing the resource. +

    + +

    + Note: This method is an optimization and the client is not required to call + it for correctness. +

    + + @param applicationId ApplicationId of the application releasing the + resource + @param resourceKey the key (i.e. checksum) that identifies the resource]]> +
    +
    + + + + + + + + + + +
    + + + + + + + + + + + + + + + + Obtain a {@link YarnClientApplication} for a new application, + which in turn contains the {@link ApplicationSubmissionContext} and + {@link org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse} + objects. +

    + + @return {@link YarnClientApplication} built for a new application + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Submit a new application to YARN. It is a blocking call - it + will not return {@link ApplicationId} until the submitted application is + submitted successfully and accepted by the ResourceManager. +

    + +

    + Users should provide an {@link ApplicationId} as part of the parameter + {@link ApplicationSubmissionContext} when submitting a new application, + otherwise it will throw the {@link ApplicationIdNotProvidedException}. +

    + +

    This internally calls {@link ApplicationClientProtocol#submitApplication + (SubmitApplicationRequest)}, and after that, it internally invokes + {@link ApplicationClientProtocol#getApplicationReport + (GetApplicationReportRequest)} and waits till it can make sure that the + application gets properly submitted. If RM fails over or RM restart + happens before ResourceManager saves the application's state, + {@link ApplicationClientProtocol + #getApplicationReport(GetApplicationReportRequest)} will throw + the {@link ApplicationNotFoundException}. This API automatically resubmits + the application with the same {@link ApplicationSubmissionContext} when it + catches the {@link ApplicationNotFoundException}

    + + @param appContext + {@link ApplicationSubmissionContext} containing all the details + needed to submit a new application + @return {@link ApplicationId} of the accepted application + @throws YarnException + @throws IOException + @see #createApplication()]]> +
    +
    + + + + + + + Fail an application attempt identified by given ID. +

    + + @param applicationAttemptId + {@link ApplicationAttemptId} of the attempt to fail. + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException + @see #getQueueAclsInfo()]]> +
    +
    + + + + + + + Kill an application identified by given ID. +

    + + @param applicationId + {@link ApplicationId} of the application that needs to be killed + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException + @see #getQueueAclsInfo()]]> +
    +
    + + + + + + + + Kill an application identified by given ID. +

    + @param applicationId {@link ApplicationId} of the application that needs to + be killed + @param diagnostics for killing an application. + @throws YarnException in case of errors or if YARN rejects the request due + to access-control restrictions. + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given Application. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + +

    + If the user does not have VIEW_APP access then the following + fields in the report will be set to stubbed values: +

      +
    • host - set to "N/A"
    • +
    • RPC port - set to -1
    • +
    • client token - set to "N/A"
    • +
    • diagnostics - set to "N/A"
    • +
    • tracking URL - set to "N/A"
    • +
    • original tracking URL - set to "N/A"
    • +
    • resource usage report - all values are -1
    • +
    + + @param appId + {@link ApplicationId} of the application that needs a report + @return application report + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The AMRM token is required for AM to RM scheduling operations. For + managed Application Masters YARN takes care of injecting it. For unmanaged + Applications Masters, the token must be obtained via this method and set + in the {@link org.apache.hadoop.security.UserGroupInformation} of the + current user. +

    + The AMRM token will be returned only if all the following conditions are + met: +

      +
    • the requester is the owner of the ApplicationMaster
    • +
    • the application master is an unmanaged ApplicationMaster
    • +
    • the application master is in ACCEPTED state
    • +
    + Else this method returns NULL. + + @param appId {@link ApplicationId} of the application to get the AMRM token + @return the AMRM token if available + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get a report (ApplicationReport) of all Applications in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @return a list of reports of all running applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report (ApplicationReport) of Applications + matching the given application types in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationTypes set of application types you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application states in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application types and application states in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application types, application states and application tags in the cluster. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @param applicationTags set of application tags you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + Get a report (ApplicationReport) of Applications matching the given users, + queues, application types and application states in the cluster. If any of + the params is set to null, it is not used when filtering. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param queues set of queues you are interested in + @param users set of users you are interested in + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a list of ApplicationReports that match the given + {@link GetApplicationsRequest}. +

    + +

    + If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

    + + @param request the request object to get the list of applications. + @return The list of ApplicationReports that match the request + @throws YarnException Exception specific to YARN. + @throws IOException Exception mostly related to connection errors.]]> +
    +
    + + + + + + Get metrics ({@link YarnClusterMetrics}) about the cluster. +

    + + @return cluster metrics + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of nodes ({@link NodeReport}) in the cluster. +

    + + @param states The {@link NodeState}s to filter on. If no filter states are + given, nodes in all states will be returned. + @return A list of node reports + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a delegation token so as to be able to talk to YARN using those tokens. + + @param renewer + Address of the renewer who can renew these tokens when needed by + securely talking to YARN. + @return a delegation token ({@link Token}) that can be used to + talk to YARN + @throws YarnException + @throws IOException]]> + + + + + + + + + Get information ({@link QueueInfo}) about a given queue. +

    + + @param queueName + Name of the queue whose information is needed + @return queue information + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException]]> +
    +
    + + + + + + Get information ({@link QueueInfo}) about all queues, recursively if there + is a hierarchy +

    + + @return a list of queue-information for all queues + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get information ({@link QueueInfo}) about top level queues. +

    + + @return a list of queue-information for all the top-level queues + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get information ({@link QueueInfo}) about all the immediate children queues + of the given queue +

    + + @param parent + Name of the queue whose child-queues' information is needed + @return a list of queue-information for all queues who are direct children + of the given parent queue. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Get information about acls for current user on all the + existing queues. +

    + + @return a list of queue acls ({@link QueueUserACLInfo}) for + current user + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given ApplicationAttempt. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param applicationAttemptId + {@link ApplicationAttemptId} of the application attempt that needs + a report + @return application attempt report + @throws YarnException + @throws ApplicationAttemptNotFoundException if application attempt + not found + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (ApplicationAttempts) of Application in the cluster. +

    + + @param applicationId application id of the app + @return a list of reports for all application attempts for specified + application. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + Get a report of the given Container. +

    + +

    + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

    + + @param containerId + {@link ContainerId} of the container that needs a report + @return container report + @throws YarnException + @throws ContainerNotFoundException if container not found. + @throws IOException]]> +
    +
    + + + + + + + Get a report of all (Containers) of ApplicationAttempt in the cluster. +

    + + @param applicationAttemptId application attempt id + @return a list of reports of all containers for specified application + attempts + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + Attempts to move the given application to the given queue. +

    + + @param appId + Application to move. + @param queue + Queue to place it in to. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + Obtain a {@link GetNewReservationResponse} for a new reservation, + which contains the {@link ReservationId} object. +

    + + @return The {@link GetNewReservationResponse} containing a new + {@link ReservationId} object. + @throws YarnException if reservation cannot be created. + @throws IOException if reservation cannot be created.]]> +
    +
    + + + + + + + The interface used by clients to submit a new reservation to the + {@code ResourceManager}. +

    + +

    + The client packages all details of its request in a + {@link ReservationSubmissionRequest} object. This contains information + about the amount of capacity, temporal constraints, and gang needs. + Furthermore, the reservation might be composed of multiple stages, with + ordering dependencies among them. +

    + +

    + In order to respond, a new admission control component in the + {@code ResourceManager} performs an analysis of the resources that have + been committed over the period of time the user is requesting, verify that + the user requests can be fulfilled, and that it respect a sharing policy + (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined + that the ReservationRequest is satisfiable the {@code ResourceManager} + answers with a {@link ReservationSubmissionResponse} that includes a + {@link ReservationId}. Upon failure to find a valid allocation the response + is an exception with the message detailing the reason of failure. +

    + +

    + The semantics guarantees that the {@link ReservationId} returned, + corresponds to a valid reservation existing in the time-range request by + the user. The amount of capacity dedicated to such reservation can vary + overtime, depending of the allocation that has been determined. But it is + guaranteed to satisfy all the constraint expressed by the user in the + {@link ReservationDefinition} +

    + + @param request request to submit a new Reservation + @return response contains the {@link ReservationId} on accepting the + submission + @throws YarnException if the reservation cannot be created successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to update an existing Reservation. This is + referred to as a re-negotiation process, in which a user that has + previously submitted a Reservation. +

    + +

    + The allocation is attempted by virtually substituting all previous + allocations related to this Reservation with new ones, that satisfy the new + {@link ReservationDefinition}. Upon success the previous allocation is + atomically substituted by the new one, and on failure (i.e., if the system + cannot find a valid allocation for the updated request), the previous + allocation remains valid. +

    + + @param request to update an existing Reservation (the + {@link ReservationUpdateRequest} should refer to an existing valid + {@link ReservationId}) + @return response empty on successfully updating the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + updated successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to remove an existing Reservation. +

    + + @param request to remove an existing Reservation (the + {@link ReservationDeleteRequest} should refer to an existing valid + {@link ReservationId}) + @return response empty on successfully deleting the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + deleted successfully + @throws IOException]]> +
    +
    + + + + + + + The interface used by clients to get the list of reservations in a plan. + The reservationId will be used to search for reservations to list if it is + provided. Otherwise, it will select active reservations within the + startTime and endTime (inclusive). +

    + + @param request to list reservations in a plan. Contains fields to select + String queue, ReservationId reservationId, long startTime, + long endTime, and a bool includeReservationAllocations. + + queue: Required. Cannot be null or empty. Refers to the + reservable queue in the scheduler that was selected when + creating a reservation submission + {@link ReservationSubmissionRequest}. + + reservationId: Optional. If provided, other fields will + be ignored. + + startTime: Optional. If provided, only reservations that + end after the startTime will be selected. This defaults + to 0 if an invalid number is used. + + endTime: Optional. If provided, only reservations that + start on or before endTime will be selected. This defaults + to Long.MAX_VALUE if an invalid number is used. + + includeReservationAllocations: Optional. Flag that + determines whether the entire reservation allocations are + to be returned. Reservation allocations are subject to + change in the event of re-planning as described by + {@link ReservationDefinition}. + + @return response that contains information about reservations that are + being searched for. + @throws YarnException if the request is invalid + @throws IOException if the request failed otherwise]]> +
    +
    + + + + + + The interface used by client to get node to labels mappings in existing cluster +

    + + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by client to get labels to nodes mapping + in existing cluster +

    + + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get labels to nodes mapping + for specified labels in existing cluster +

    + + @param labels labels for which labels to nodes mapping has to be retrieved + @return labels to nodes mappings for specific labels + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + The interface used by client to get node labels in the cluster +

    + + @return cluster node labels collection + @throws YarnException when there is a failure in + {@link ApplicationClientProtocol} + @throws IOException when there is a failure in + {@link ApplicationClientProtocol}]]> +
    +
    + + + + + + + + The interface used by client to set priority of an application +

    + @param applicationId + @param priority + @return updated priority of an application. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + Signal a container identified by given ID. +

    + + @param containerId + {@link ContainerId} of the container that needs to be signaled + @param command the signal container command + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + + + + + Get the resource profiles available in the RM. +

    + @return a Map of the resource profile names to their capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
    +
    + + + + + + + Get the details of a specific resource profile from the RM. +

    + @param profile the profile name + @return resource profile name with its capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other others]]> +
    +
    + + + + + + Get available resource types supported by RM. +

    + @return list of supported resource types with detailed information + @throws YarnException if any issue happens inside YARN + @throws IOException in case of other others]]> +
    +
    + + + + + + The interface used by client to get node attributes in the cluster. +

    + + @return cluster node attributes collection + @throws YarnException when there is a failure in + {@link ApplicationClientProtocol} + @throws IOException when there is a failure in + {@link ApplicationClientProtocol}]]> +
    +
    + + + + + + + The interface used by client to get mapping of AttributeKey to associated + NodeToAttributeValue list for specified node attributeKeys in the cluster. +

    + + @param attributes AttributeKeys for which associated NodeToAttributeValue + mapping value has to be retrieved. If empty or null is set then + will return mapping for all attributeKeys in the cluster + @return mapping of AttributeKey to List of associated + NodeToAttributeValue's. + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get all node to attribute mapping in + existing cluster. +

    + + @param hostNames HostNames for which host to attributes mapping has to + be retrived.If empty or null is set then will return + all nodes to attributes mapping in cluster. + @return Node to attribute mappings + @throws YarnException + @throws IOException]]> +
    +
    + + + + + + + The interface used by client to get a shell to a container. +

    + + @param containerId Container ID + @param command Shell type + @throws IOException if connection fails.]]> +
    +
    +
    + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + Create a new instance of AMRMClientAsync.

    + + @param intervalMs heartbeat interval in milliseconds between AM and RM + @param callbackHandler callback handler that processes responses from + the ResourceManager]]> +
    +
    + + + + + + Create a new instance of AMRMClientAsync.

    + + @param client the AMRMClient instance + @param intervalMs heartbeat interval in milliseconds between AM and RM + @param callbackHandler callback handler that processes responses from + the ResourceManager]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + + + + + + + + + allocate + @param req Resource request]]> + + + + + + + + + + + + + allocate. + Any previous pending resource change request of the same container will be + removed. + + Application that calls this method is expected to maintain the + Containers that are returned from previous successful + allocations or resource changes. By passing in the existing container and a + target resource capability to this method, the application requests the + ResourceManager to change the existing resource allocation to the target + resource allocation. + + @deprecated use + {@link #requestContainerUpdate(Container, UpdateContainerRequest)} + + @param container The container returned from the last successful resource + allocation or resource change + @param capability The target resource capability of the container]]> + + + + + + + allocate. + Any previous pending update request of the same container will be + removed. + + @param container The container returned from the last successful resource + allocation or update + @param updateContainerRequest The UpdateContainerRequest.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + check to return true for each 1000 ms. + See also {@link #waitFor(java.util.function.Supplier, int)} + and {@link #waitFor(java.util.function.Supplier, int, int)} + @param check the condition for which it should wait]]> + + + + + + + + check to return true for each + checkEveryMillis ms. + See also {@link #waitFor(java.util.function.Supplier, int, int)} + @param check user defined checker + @param checkEveryMillis interval to call check]]> + + + + + + + + + check to return true for each + checkEveryMillis ms. In the main loop, this method will log + the message "waiting in main loop" for each logInterval times + iteration to confirm the thread is alive. + @param check user defined checker + @param checkEveryMillis interval to call check + @param logInterval interval to log for each]]> + + + + + + + + + + AMRMClientAsync handles communication with the ResourceManager + and provides asynchronous updates on events such as container allocations and + completions. It contains a thread that sends periodic heartbeats to the + ResourceManager. + + It should be used by implementing a CallbackHandler: +
    + {@code
    + class MyCallbackHandler extends AMRMClientAsync.AbstractCallbackHandler {
    +   public void onContainersAllocated(List containers) {
    +     [run tasks on the containers]
    +   }
    +
    +   public void onContainersUpdated(List containers) {
    +     [determine if resource allocation of containers have been increased in
    +      the ResourceManager, and if so, inform the NodeManagers to increase the
    +      resource monitor/enforcement on the containers]
    +   }
    +
    +   public void onContainersCompleted(List statuses) {
    +     [update progress, check whether app is done]
    +   }
    +   
    +   public void onNodesUpdated(List updated) {}
    +   
    +   public void onReboot() {}
    + }
    + }
    + 
    + + The client's lifecycle should be managed similarly to the following: + +
    + {@code
    + AMRMClientAsync asyncClient = 
    +     createAMRMClientAsync(appAttId, 1000, new MyCallbackhandler());
    + asyncClient.init(conf);
    + asyncClient.start();
    + RegisterApplicationMasterResponse response = asyncClient
    +    .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
    +       appMasterTrackingUrl);
    + asyncClient.addContainerRequest(containerRequest);
    + [... wait for application to complete]
    + asyncClient.unregisterApplicationMaster(status, appMsg, trackingUrl);
    + asyncClient.stop();
    + }
    + 
    ]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Update the resources of a container.

    + +

    The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

    + + @param container the container with updated token.]]> +
    +
    + + + + + + Re-Initialize the Container.

    + + @param containerId the Id of the container to Re-Initialize. + @param containerLaunchContex the updated ContainerLaunchContext. + @param autoCommit commit re-initialization automatically ?]]> +
    +
    + + + + Restart the specified container.

    + + @param containerId the Id of the container to restart.]]> +
    +
    + + + + Rollback last reInitialization of the specified container.

    + + @param containerId the Id of the container to restart.]]> +
    +
    + + + + Commit last reInitialization of the specified container.

    + + @param containerId the Id of the container to commit reInitialize.]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + NMClientAsync handles communication with all the NodeManagers + and provides asynchronous updates on getting responses from them. It + maintains a thread pool to communicate with individual NMs where a number of + worker threads process requests to NMs by using {@link NMClientImpl}. The max + size of the thread pool is configurable through + {@link YarnConfiguration#NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE}. + + It should be used in conjunction with a CallbackHandler. For example + +
    + {@code
    + class MyCallbackHandler extends NMClientAsync.AbstractCallbackHandler {
    +   public void onContainerStarted(ContainerId containerId,
    +       Map allServiceResponse) {
    +     [post process after the container is started, process the response]
    +   }
    +
    +   public void onContainerResourceIncreased(ContainerId containerId,
    +       Resource resource) {
    +     [post process after the container resource is increased]
    +   }
    +
    +   public void onContainerStatusReceived(ContainerId containerId,
    +       ContainerStatus containerStatus) {
    +     [make use of the status of the container]
    +   }
    +
    +   public void onContainerStopped(ContainerId containerId) {
    +     [post process after the container is stopped]
    +   }
    +
    +   public void onStartContainerError(
    +       ContainerId containerId, Throwable t) {
    +     [handle the raised exception]
    +   }
    +
    +   public void onGetContainerStatusError(
    +       ContainerId containerId, Throwable t) {
    +     [handle the raised exception]
    +   }
    +
    +   public void onStopContainerError(
    +       ContainerId containerId, Throwable t) {
    +     [handle the raised exception]
    +   }
    + }
    + }
    + 
    + + The client's life-cycle should be managed like the following: + +
    + {@code
    + NMClientAsync asyncClient = 
    +     NMClientAsync.createNMClientAsync(new MyCallbackhandler());
    + asyncClient.init(conf);
    + asyncClient.start();
    + asyncClient.startContainer(container, containerLaunchContext);
    + [... wait for container being started]
    + asyncClient.getContainerStatus(container.getId(), container.getNodeId(),
    +     container.getContainerToken());
    + [... handle the status in the callback instance]
    + asyncClient.stopContainer(container.getId(), container.getNodeId(),
    +     container.getContainerToken());
    + [... wait for container being stopped]
    + asyncClient.stop();
    + }
    + 
    ]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml new file mode 100644 index 00000000000..311a793df5a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.3.5.xml @@ -0,0 +1,3982 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Type of proxy. + @return Proxy to the ResourceManager for the specified client protocol. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create a new instance of AppAdminClient. +

    + + @param appType application type + @param conf configuration + @return app admin client]]> +
    +
    + + + + + + + + + + Launch a new YARN application. +

    + + @param fileName specification of application + @param appName name of the application + @param lifetime lifetime of the application + @param queue queue of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Stop a YARN application (attempt to stop gracefully before killing the + application). In the case of a long-running service, the service may be + restarted later. +

    + + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Start a YARN application from a previously saved specification. In the + case of a long-running service, the service must have been previously + launched/started and then stopped, or previously saved but not started. +

    + + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + + + + Save the specification for a YARN application / long-running service. + The application may be started later. +

    + + @param fileName specification of application to save + @param appName name of the application + @param lifetime lifetime of the application + @param queue queue of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Remove the specification and all application data for a YARN application. + The application cannot be running. +

    + + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + + Change the number of running containers for a component of a YARN + application / long-running service. +

    + + @param appName the name of the application + @param componentCounts map of component name to new component count or + amount to change existing component count (e.g. + 5, +5, -5) + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + Upload AM dependencies to HDFS. This makes future application launches + faster since the dependencies do not have to be uploaded on each launch. +

    + + @param destinationFolder + an optional HDFS folder where dependency tarball will be uploaded + @return exit code + @throws IOException + IOException + @throws YarnException + exception in client or server]]> +
    +
    + + + + + + + Get detailed app specific status string for a YARN application. +

    + + @param appIdOrName appId or appName + @return status string + @throws IOException IOException + @throws YarnException exception in client or server]]> +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + Send the information of a number of conceptual entities to the timeline + server. It is a blocking API. The method will not return until it gets the + response from the timeline server. +

    + + @param entities + the collection of {@link TimelineEntity} + @return the error information if the sent entities are not correctly stored + @throws IOException if there are I/O errors + @throws YarnException if entities are incomplete/invalid]]> +
    +
    + + + + + + + + + Send the information of a number of conceptual entities to the timeline + server. It is a blocking API. The method will not return until it gets the + response from the timeline server. + + This API is only for timeline service v1.5 +

    + + @param appAttemptId {@link ApplicationAttemptId} + @param groupId {@link TimelineEntityGroupId} + @param entities + the collection of {@link TimelineEntity} + @return the error information if the sent entities are not correctly stored + @throws IOException if there are I/O errors + @throws YarnException if entities are incomplete/invalid]]> +
    +
    + + + + + + + Send the information of a domain to the timeline server. It is a + blocking API. The method will not return until it gets the response from + the timeline server. +

    + + @param domain + an {@link TimelineDomain} object + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + + Send the information of a domain to the timeline server. It is a + blocking API. The method will not return until it gets the response from + the timeline server. + + This API is only for timeline service v1.5 +

    + + @param domain + an {@link TimelineDomain} object + @param appAttemptId {@link ApplicationAttemptId} + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + Get a delegation token so as to be able to talk to the timeline server in a + secure way. +

    + + @param renewer + Address of the renewer who can renew these tokens when needed by + securely talking to the timeline server + @return a delegation token ({@link Token}) that can be used to talk to the + timeline server + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + Renew a timeline delegation token. +

    + + @param timelineDT + the delegation token to renew + @return the new expiration time + @throws IOException + @throws YarnException]]> +
    +
    + + + + + + + Cancel a timeline delegation token. +

    + + @param timelineDT + the delegation token to cancel + @throws IOException + @throws YarnException]]> +
    +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parameterized event of type T]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputStream to be checksumed + @return the message digest of the input stream + @throws IOException]]> + + + + + + + + + + + + SharedCacheChecksum object based on the configurable + algorithm implementation + (see yarn.sharedcache.checksum.algo.impl) + + @return SharedCacheChecksum object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The object type on which this state machine operates. + @param The state of the entity. + @param The external eventType to be handled. + @param The event object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When {@link #limit} would be reached on append, past messages will be + truncated from head, and a header telling the user about truncation will be + prepended, with ellipses in between header and messages. +

    + Note that header and ellipses are not counted against {@link #limit}. +

    + An example: + +

    + {@code
    +   // At the beginning it's an empty string
    +   final Appendable shortAppender = new BoundedAppender(80);
    +   // The whole message fits into limit
    +   shortAppender.append(
    +       "message1 this is a very long message but fitting into limit\n");
    +   // The first message is truncated, the second not
    +   shortAppender.append("message2 this is shorter than the previous one\n");
    +   // The first message is deleted, the second truncated, the third
    +   // preserved
    +   shortAppender.append("message3 this is even shorter message, maybe.\n");
    +   // The first two are deleted, the third one truncated, the last preserved
    +   shortAppender.append("message4 the shortest one, yet the greatest :)");
    +   // Current contents are like this:
    +   // Diagnostic messages truncated, showing last 80 chars out of 199:
    +   // ...s is even shorter message, maybe.
    +   // message4 the shortest one, yet the greatest :)
    + }
    + 
    +

    + Note that null values are {@link #append(CharSequence) append}ed + just like in {@link StringBuilder#append(CharSequence) original + implementation}. +

    + Note that this class is not thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml new file mode 100644 index 00000000000..123217545fe --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.3.5.xml @@ -0,0 +1,1456 @@ + + + + + + + + + + + + + + + + + + + + + + + + true if the node is healthy, else false]]> + + + + + diagnostic health report of the node. + @return diagnostic health report of the node]]> + + + + + last timestamp at which the health report was received. + @return last timestamp at which the health report was received]]> + + + + + It includes information such as: +

      +
    • + An indicator of whether the node is healthy, as determined by the + health-check script. +
    • +
    • The previous time at which the health status was reported.
    • +
    • A diagnostic report on the health status.
    • +
    + + @see NodeReport + @see ApplicationClientProtocol#getClusterNodes(org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest)]]> +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + type of the proxy + @return the proxy instance + @throws IOException if fails to create the proxy]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true if the iteration has more elements.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +