4028 lines
136 KiB
XML
4028 lines
136 KiB
XML
<?xml version="1.0"?>
|
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
|
|
<!--
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
-->
|
|
|
|
<!-- Do not modify this file directly. Instead, copy entries that you -->
|
|
<!-- wish to modify from this file into core-site.xml and change them -->
|
|
<!-- there. If core-site.xml does not already exist, create it. -->
|
|
|
|
<configuration>
|
|
|
|
<!--- global properties -->
|
|
|
|
<property>
|
|
<name>hadoop.common.configuration.version</name>
|
|
<value>3.0.0</value>
|
|
<description>version of this configuration file</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.tmp.dir</name>
|
|
<value>/tmp/hadoop-${user.name}</value>
|
|
<description>A base for other temporary directories.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.filter.initializers</name>
|
|
<value>org.apache.hadoop.http.lib.StaticUserWebFilter</value>
|
|
<description>A comma separated list of class names. Each class in the list
|
|
must extend org.apache.hadoop.http.FilterInitializer. The corresponding
|
|
Filter will be initialized. Then, the Filter will be applied to all user
|
|
facing jsp and servlet web pages. The ordering of the list defines the
|
|
ordering of the filters.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.idle_timeout.ms</name>
|
|
<value>60000</value>
|
|
<description>
|
|
NN/JN/DN Server connection timeout in milliseconds.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.metrics.enabled</name>
|
|
<value>true</value>
|
|
<description>
|
|
If true, set Jetty's StatisticsHandler to HTTP server to collect
|
|
HTTP layer metrics and register them to Hadoop metrics system.
|
|
</description>
|
|
</property>
|
|
|
|
<!--- security properties -->
|
|
|
|
<property>
|
|
<name>hadoop.security.authorization</name>
|
|
<value>false</value>
|
|
<description>Is service-level authorization enabled?</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.instrumentation.requires.admin</name>
|
|
<value>false</value>
|
|
<description>
|
|
Indicates if administrator ACLs are required to access
|
|
instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.authentication</name>
|
|
<value>simple</value>
|
|
<description>Possible values are simple (no authentication), and kerberos
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping</name>
|
|
<value>org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback</value>
|
|
<description>
|
|
Class for user to group mapping (get groups for a given user) for ACL.
|
|
The default implementation,
|
|
org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback,
|
|
will determine if the Java Native Interface (JNI) is available. If JNI is
|
|
available the implementation will use the API within hadoop to resolve a
|
|
list of groups for a user. If JNI is not available then the shell
|
|
implementation, ShellBasedUnixGroupsMapping, is used. This implementation
|
|
shells out to the Linux/Unix environment with the
|
|
<code>bash -c groups</code> command to resolve a list of groups for a user.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.dns.interface</name>
|
|
<description>
|
|
The name of the Network Interface from which the service should determine
|
|
its host name for Kerberos login. e.g. eth2. In a multi-homed environment,
|
|
the setting can be used to affect the _HOST substitution in the service
|
|
Kerberos principal. If this configuration value is not set, the service
|
|
will use its default hostname as returned by
|
|
InetAddress.getLocalHost().getCanonicalHostName().
|
|
|
|
Most clusters will not require this setting.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.dns.nameserver</name>
|
|
<description>
|
|
The host name or IP address of the name server (DNS) which a service Node
|
|
should use to determine its own host name for Kerberos Login. Requires
|
|
hadoop.security.dns.interface.
|
|
|
|
Most clusters will not require this setting.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.resolver.impl</name>
|
|
<value>org.apache.hadoop.net.DNSDomainNameResolver</value>
|
|
<description>
|
|
The resolver implementation used to resolve FQDN for Kerberos
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.dns.log-slow-lookups.enabled</name>
|
|
<value>false</value>
|
|
<description>
|
|
Time name lookups (via SecurityUtil) and log them if they exceed the
|
|
configured threshold.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.dns.log-slow-lookups.threshold.ms</name>
|
|
<value>1000</value>
|
|
<description>
|
|
If slow lookup logging is enabled, this threshold is used to decide if a
|
|
lookup is considered slow enough to be logged.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.groups.cache.secs</name>
|
|
<value>300</value>
|
|
<description>
|
|
This is the config controlling the validity of the entries in the cache
|
|
containing the user->group mapping. When this duration has expired,
|
|
then the implementation of the group mapping provider is invoked to get
|
|
the groups of the user and then cached back.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.groups.negative-cache.secs</name>
|
|
<value>30</value>
|
|
<description>
|
|
Expiration time for entries in the the negative user-to-group mapping
|
|
caching, in seconds. This is useful when invalid users are retrying
|
|
frequently. It is suggested to set a small value for this expiration, since
|
|
a transient error in group lookup could temporarily lock out a legitimate
|
|
user.
|
|
|
|
Set this to zero or negative value to disable negative user-to-group caching.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.groups.cache.warn.after.ms</name>
|
|
<value>5000</value>
|
|
<description>
|
|
If looking up a single user to group takes longer than this amount of
|
|
milliseconds, we will log a warning message.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.groups.cache.background.reload</name>
|
|
<value>false</value>
|
|
<description>
|
|
Whether to reload expired user->group mappings using a background thread
|
|
pool. If set to true, a pool of
|
|
hadoop.security.groups.cache.background.reload.threads is created to
|
|
update the cache in the background.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.groups.cache.background.reload.threads</name>
|
|
<value>3</value>
|
|
<description>
|
|
Only relevant if hadoop.security.groups.cache.background.reload is true.
|
|
Controls the number of concurrent background user->group cache entry
|
|
refreshes. Pending refresh requests beyond this value are queued and
|
|
processed when a thread is free.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.groups.shell.command.timeout</name>
|
|
<value>0s</value>
|
|
<description>
|
|
Used by the ShellBasedUnixGroupsMapping class, this property controls how
|
|
long to wait for the underlying shell command that is run to fetch groups.
|
|
Expressed in seconds (e.g. 10s, 1m, etc.), if the running command takes
|
|
longer than the value configured, the command is aborted and the groups
|
|
resolver would return a result of no groups found. A value of 0s (default)
|
|
would mean an infinite wait (i.e. wait until the command exits on its own).
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.connection.timeout.ms</name>
|
|
<value>60000</value>
|
|
<description>
|
|
This property is the connection timeout (in milliseconds) for LDAP
|
|
operations. If the LDAP provider doesn't establish a connection within the
|
|
specified period, it will abort the connect attempt. Non-positive value
|
|
means no LDAP connection timeout is specified in which case it waits for the
|
|
connection to establish until the underlying network times out.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.read.timeout.ms</name>
|
|
<value>60000</value>
|
|
<description>
|
|
This property is the read timeout (in milliseconds) for LDAP
|
|
operations. If the LDAP provider doesn't get a LDAP response within the
|
|
specified period, it will abort the read attempt. Non-positive value
|
|
means no read timeout is specified in which case it waits for the response
|
|
infinitely.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.num.attempts</name>
|
|
<value>3</value>
|
|
<description>
|
|
This property is the number of attempts to be made for LDAP operations.
|
|
If this limit is exceeded, LdapGroupsMapping will return an empty
|
|
group list.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.num.attempts.before.failover</name>
|
|
<value>3</value>
|
|
<description>
|
|
This property is the number of attempts to be made for LDAP operations
|
|
using a single LDAP instance. If multiple LDAP servers are configured
|
|
and this number of failed operations is reached, we will switch to the
|
|
next LDAP server. The configuration for the overall number of attempts
|
|
will still be respected, failover will thus be performed only if this
|
|
property is less than hadoop.security.group.mapping.ldap.num.attempts.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.url</name>
|
|
<value></value>
|
|
<description>
|
|
The URL of the LDAP server(s) to use for resolving user groups when using
|
|
the LdapGroupsMapping user to group mapping. Supports configuring multiple
|
|
LDAP servers via a comma-separated list.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.ssl</name>
|
|
<value>false</value>
|
|
<description>
|
|
Whether or not to use SSL when connecting to the LDAP server.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.ssl.keystore</name>
|
|
<value></value>
|
|
<description>
|
|
File path to the SSL keystore that contains the SSL certificate required
|
|
by the LDAP server.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.ssl.keystore.password.file</name>
|
|
<value></value>
|
|
<description>
|
|
The path to a file containing the password of the LDAP SSL keystore. If
|
|
the password is not configured in credential providers and the property
|
|
hadoop.security.group.mapping.ldap.ssl.keystore.password is not set,
|
|
LDAPGroupsMapping reads password from the file.
|
|
|
|
IMPORTANT: This file should be readable only by the Unix user running
|
|
the daemons and should be a local file.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.ssl.keystore.password</name>
|
|
<value></value>
|
|
<description>
|
|
The password of the LDAP SSL keystore. this property name is used as an
|
|
alias to get the password from credential providers. If the password can
|
|
not be found and hadoop.security.credential.clear-text-fallback is true
|
|
LDAPGroupsMapping uses the value of this property for password.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.conversion.rule</name>
|
|
<value>none</value>
|
|
<description>
|
|
The rule is applied on the group names received from LDAP when
|
|
RuleBasedLdapGroupsMapping is configured.
|
|
Supported rules are "to_upper", "to_lower" and "none".
|
|
to_upper: This will convert all the group names to uppercase.
|
|
to_lower: This will convert all the group names to lowercase.
|
|
none: This will retain the source formatting, this is default value.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.credential.clear-text-fallback</name>
|
|
<value>true</value>
|
|
<description>
|
|
true or false to indicate whether or not to fall back to storing credential
|
|
password as clear text. The default value is true. This property only works
|
|
when the password can't not be found from credential providers.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.credential.provider.path</name>
|
|
<value></value>
|
|
<description>
|
|
A comma-separated list of URLs that indicates the type and
|
|
location of a list of providers that should be consulted.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.credstore.java-keystore-provider.password-file</name>
|
|
<value></value>
|
|
<description>
|
|
The path to a file containing the custom password for all keystores
|
|
that may be configured in the provider path.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.ssl.truststore</name>
|
|
<value></value>
|
|
<description>
|
|
File path to the SSL truststore that contains the root certificate used to
|
|
sign the LDAP server's certificate. Specify this if the LDAP server's
|
|
certificate is not signed by a well known certificate authority.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.ssl.truststore.password.file</name>
|
|
<value></value>
|
|
<description>
|
|
The path to a file containing the password of the LDAP SSL truststore.
|
|
|
|
IMPORTANT: This file should be readable only by the Unix user running
|
|
the daemons.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.bind.users</name>
|
|
<value></value>
|
|
<description>
|
|
Aliases of users to be used to bind as when connecting to the LDAP
|
|
server(s). Each alias will have to have its distinguished name and
|
|
password specified through:
|
|
hadoop.security.group.mapping.ldap.bind.user
|
|
and a password configuration such as:
|
|
hadoop.security.group.mapping.ldap.bind.password.alias
|
|
|
|
For example, if:
|
|
hadoop.security.group.mapping.ldap.bind.users=alias1,alias2
|
|
|
|
then the following configuration is valid:
|
|
hadoop.security.group.mapping.ldap.bind.users.alias1.bind.user=bindUser1
|
|
hadoop.security.group.mapping.ldap.bind.users.alias1.bind.password.alias=
|
|
bindPasswordAlias1
|
|
hadoop.security.group.mapping.ldap.bind.users.alias2.bind.user=bindUser2
|
|
hadoop.security.group.mapping.ldap.bind.users.alias2.bind.password.alias=
|
|
bindPasswordAlias2
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.bind.user</name>
|
|
<value></value>
|
|
<description>
|
|
The distinguished name of the user to bind as when connecting to the LDAP
|
|
server. This may be left blank if the LDAP server supports anonymous binds.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.bind.password.alias</name>
|
|
<value></value>
|
|
<description>
|
|
The alias of the bind user to be used to get the password from credential
|
|
providers. If the alias is empty, property
|
|
hadoop.security.group.mapping.ldap.bind.password is used instead.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.bind.password.file</name>
|
|
<value></value>
|
|
<description>
|
|
The path to a file containing the password of the bind user. If
|
|
the password is not configured in credential providers and the property
|
|
hadoop.security.group.mapping.ldap.bind.password is not set,
|
|
LDAPGroupsMapping reads password from the file.
|
|
|
|
IMPORTANT: This file should be readable only by the Unix user running
|
|
the daemons and should be a local file.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.bind.password</name>
|
|
<value></value>
|
|
<description>
|
|
The password of the bind user. this property name is used as an
|
|
alias to get the password from credential providers. If the password can
|
|
not be found and hadoop.security.credential.clear-text-fallback is true
|
|
LDAPGroupsMapping uses the value of this property for password.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.base</name>
|
|
<value></value>
|
|
<description>
|
|
The search base for the LDAP connection. This is a distinguished name,
|
|
and will typically be the root of the LDAP directory.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.userbase</name>
|
|
<value></value>
|
|
<description>
|
|
The search base for the LDAP connection for user search query. This is a
|
|
distinguished name, and its the root of the LDAP directory for users.
|
|
If not set, hadoop.security.group.mapping.ldap.base is used.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.groupbase</name>
|
|
<value></value>
|
|
<description>
|
|
The search base for the LDAP connection for group search . This is a
|
|
distinguished name, and its the root of the LDAP directory for groups.
|
|
If not set, hadoop.security.group.mapping.ldap.base is used.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.search.filter.user</name>
|
|
<value>(&(objectClass=user)(sAMAccountName={0}))</value>
|
|
<description>
|
|
An additional filter to use when searching for LDAP users. The default will
|
|
usually be appropriate for Active Directory installations. If connecting to
|
|
an LDAP server with a non-AD schema, this should be replaced with
|
|
(&(objectClass=inetOrgPerson)(uid={0}). {0} is a special string used to
|
|
denote where the username fits into the filter.
|
|
|
|
If the LDAP server supports posixGroups, Hadoop can enable the feature by
|
|
setting the value of this property to "posixAccount" and the value of
|
|
the hadoop.security.group.mapping.ldap.search.filter.group property to
|
|
"posixGroup".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.search.filter.group</name>
|
|
<value>(objectClass=group)</value>
|
|
<description>
|
|
An additional filter to use when searching for LDAP groups. This should be
|
|
changed when resolving groups against a non-Active Directory installation.
|
|
|
|
See the description of hadoop.security.group.mapping.ldap.search.filter.user
|
|
to enable posixGroups support.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.search.attr.memberof</name>
|
|
<value></value>
|
|
<description>
|
|
The attribute of the user object that identifies its group objects. By
|
|
default, Hadoop makes two LDAP queries per user if this value is empty. If
|
|
set, Hadoop will attempt to resolve group names from this attribute,
|
|
instead of making the second LDAP query to get group objects. The value
|
|
should be 'memberOf' for an MS AD installation.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.search.attr.member</name>
|
|
<value>member</value>
|
|
<description>
|
|
The attribute of the group object that identifies the users that are
|
|
members of the group. The default will usually be appropriate for
|
|
any LDAP installation.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.search.attr.group.name</name>
|
|
<value>cn</value>
|
|
<description>
|
|
The attribute of the group object that identifies the group name. The
|
|
default will usually be appropriate for all LDAP systems.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.search.group.hierarchy.levels</name>
|
|
<value>0</value>
|
|
<description>
|
|
The number of levels to go up the group hierarchy when determining
|
|
which groups a user is part of. 0 Will represent checking just the
|
|
group that the user belongs to. Each additional level will raise the
|
|
time it takes to execute a query by at most
|
|
hadoop.security.group.mapping.ldap.directory.search.timeout.
|
|
The default will usually be appropriate for all LDAP systems.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.posix.attr.uid.name</name>
|
|
<value>uidNumber</value>
|
|
<description>
|
|
The attribute of posixAccount to use when groups for membership.
|
|
Mostly useful for schemas wherein groups have memberUids that use an
|
|
attribute other than uidNumber.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.posix.attr.gid.name</name>
|
|
<value>gidNumber</value>
|
|
<description>
|
|
The attribute of posixAccount indicating the group id.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.directory.search.timeout</name>
|
|
<value>10000</value>
|
|
<description>
|
|
The attribute applied to the LDAP SearchControl properties to set a
|
|
maximum time limit when searching and awaiting a result.
|
|
Set to 0 if infinite wait period is desired.
|
|
Default is 10 seconds. Units in milliseconds.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.ldap.group.search.filter.pattern</name>
|
|
<value></value>
|
|
<description>
|
|
Comma separated values that needs to be substituted in the group search
|
|
filter during group lookup. The values are substituted in the order they
|
|
appear in the list, the first value will replace {0} the second {1} and
|
|
so on.
|
|
</description>
|
|
</property>
|
|
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.providers</name>
|
|
<value></value>
|
|
<description>
|
|
Comma separated of names of other providers to provide user to group
|
|
mapping. Used by CompositeGroupsMapping.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.group.mapping.providers.combined</name>
|
|
<value>true</value>
|
|
<description>
|
|
true or false to indicate whether groups from the providers are combined or
|
|
not. The default value is true. If true, then all the providers will be
|
|
tried to get groups and all the groups are combined to return as the final
|
|
results. Otherwise, providers are tried one by one in the configured list
|
|
order, and if any groups are retrieved from any provider, then the groups
|
|
will be returned without trying the left ones.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.service.user.name.key</name>
|
|
<value></value>
|
|
<description>
|
|
For those cases where the same RPC protocol is implemented by multiple
|
|
servers, this configuration is required for specifying the principal
|
|
name to use for the service when the client wishes to make an RPC call.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>fs.azure.user.agent.prefix</name>
|
|
<value>unknown</value>
|
|
<description>
|
|
WASB passes User-Agent header to the Azure back-end. The default value
|
|
contains WASB version, Java Runtime version, Azure Client library version,
|
|
and the value of the configuration option fs.azure.user.agent.prefix.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.uid.cache.secs</name>
|
|
<value>14400</value>
|
|
<description>
|
|
This is the config controlling the validity of the entries in the cache
|
|
containing the userId to userName and groupId to groupName used by
|
|
NativeIO getFstat().
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.service.shutdown.timeout</name>
|
|
<value>30s</value>
|
|
<description>
|
|
Timeout to wait for each shutdown operation to complete.
|
|
If a hook takes longer than this time to complete, it will be interrupted,
|
|
so the service will shutdown. This allows the service shutdown
|
|
to recover from a blocked operation.
|
|
Some shutdown hooks may need more time than this, for example when
|
|
a large amount of data needs to be uploaded to an object store.
|
|
In this situation: increase the timeout.
|
|
|
|
The minimum duration of the timeout is 1 second, "1s".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.rpc.protection</name>
|
|
<value>authentication</value>
|
|
<description>A comma-separated list of protection values for secured sasl
|
|
connections. Possible values are authentication, integrity and privacy.
|
|
authentication means authentication only and no integrity or privacy;
|
|
integrity implies authentication and integrity are enabled; and privacy
|
|
implies all of authentication, integrity and privacy are enabled.
|
|
hadoop.security.saslproperties.resolver.class can be used to override
|
|
the hadoop.rpc.protection for a connection at the server side.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.saslproperties.resolver.class</name>
|
|
<value></value>
|
|
<description>SaslPropertiesResolver used to resolve the QOP used for a
|
|
connection. If not specified, the full set of values specified in
|
|
hadoop.rpc.protection is used while determining the QOP used for the
|
|
connection. If a class is specified, then the QOP values returned by
|
|
the class will be used while determining the QOP used for the connection.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.sensitive-config-keys</name>
|
|
<value>
|
|
secret$
|
|
password$
|
|
ssl.keystore.pass$
|
|
fs.s3a.server-side-encryption.key
|
|
fs.s3a.*.server-side-encryption.key
|
|
fs.s3a.encryption.algorithm
|
|
fs.s3a.encryption.key
|
|
fs.s3a.secret.key
|
|
fs.s3a.*.secret.key
|
|
fs.s3a.session.key
|
|
fs.s3a.*.session.key
|
|
fs.s3a.session.token
|
|
fs.s3a.*.session.token
|
|
fs.azure.account.key.*
|
|
fs.azure.oauth2.*
|
|
fs.adl.oauth2.*
|
|
fs.gs.encryption.*
|
|
fs.gs.proxy.*
|
|
fs.gs.auth.*
|
|
credential$
|
|
oauth.*secret
|
|
oauth.*password
|
|
oauth.*token
|
|
hadoop.security.sensitive-config-keys
|
|
</value>
|
|
<description>A comma-separated or multi-line list of regular expressions to
|
|
match configuration keys that should be redacted where appropriate, for
|
|
example, when logging modified properties during a reconfiguration,
|
|
private credentials should not be logged.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.token.service.use_ip</name>
|
|
<value>true</value>
|
|
<description>
|
|
Controls whether tokens always use IP addresses.
|
|
DNS changes will not be detected if this option is enabled.
|
|
Existing client connections that break will always reconnect
|
|
to the IP of the original host. New clients will connect
|
|
to the host's new IP but fail to locate a token.
|
|
Disabling this option will allow existing and new clients
|
|
to detect an IP change and continue to locate the new host's token.
|
|
|
|
In secure multi-homed environments, this parameter will need to
|
|
be set to false on both cluster servers and clients (see HADOOP-7733).
|
|
If it is not set correctly, the symptom will be inability to
|
|
submit an application to YARN from an external client
|
|
(with error "client host not a member of the Hadoop cluster"),
|
|
or even from an in-cluster client if server failover occurs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.workaround.non.threadsafe.getpwuid</name>
|
|
<value>true</value>
|
|
<description>Some operating systems or authentication modules are known to
|
|
have broken implementations of getpwuid_r and getpwgid_r, such that these
|
|
calls are not thread-safe. Symptoms of this problem include JVM crashes
|
|
with a stack trace inside these functions. If your system exhibits this
|
|
issue, enable this configuration parameter to include a lock around the
|
|
calls as a workaround.
|
|
|
|
An incomplete list of some systems known to have this issue is available
|
|
at http://wiki.apache.org/hadoop/KnownBrokenPwuidImplementations
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.kerberos.kinit.command</name>
|
|
<value>kinit</value>
|
|
<description>Used to periodically renew Kerberos credentials when provided
|
|
to Hadoop. The default setting assumes that kinit is in the PATH of users
|
|
running the Hadoop client. Change this to the absolute path to kinit if this
|
|
is not the case.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.kerberos.min.seconds.before.relogin</name>
|
|
<value>60</value>
|
|
<description>The minimum time between relogin attempts for Kerberos, in
|
|
seconds.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.kerberos.keytab.login.autorenewal.enabled</name>
|
|
<value>false</value>
|
|
<description>Used to enable automatic renewal of keytab based kerberos login.
|
|
By default the automatic renewal is disabled for keytab based kerberos login.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.auth_to_local</name>
|
|
<value></value>
|
|
<description>Maps kerberos principals to local user names</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.auth_to_local.mechanism</name>
|
|
<value>hadoop</value>
|
|
<description>The mechanism by which auth_to_local rules are evaluated.
|
|
If set to 'hadoop' it will not allow resulting local user names to have
|
|
either '@' or '/'. If set to 'MIT' it will follow MIT evaluation rules
|
|
and the restrictions of 'hadoop' do not apply.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.token.files</name>
|
|
<value></value>
|
|
<description>List of token cache files that have delegation tokens for hadoop service</description>
|
|
</property>
|
|
|
|
<!-- i/o properties -->
|
|
<property>
|
|
<name>io.file.buffer.size</name>
|
|
<value>4096</value>
|
|
<description>The size of buffer for use in sequence files.
|
|
The size of this buffer should probably be a multiple of hardware
|
|
page size (4096 on Intel x86), and it determines how much data is
|
|
buffered during read and write operations. Must be greater than zero.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.bytes.per.checksum</name>
|
|
<value>512</value>
|
|
<description>The number of bytes per checksum. Must not be larger than
|
|
io.file.buffer.size.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.skip.checksum.errors</name>
|
|
<value>false</value>
|
|
<description>If true, when a checksum error is encountered while
|
|
reading a sequence file, entries are skipped, instead of throwing an
|
|
exception.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.compression.codecs</name>
|
|
<value></value>
|
|
<description>A comma-separated list of the compression codec classes that can
|
|
be used for compression/decompression. In addition to any classes specified
|
|
with this property (which take precedence), codec classes on the classpath
|
|
are discovered using a Java ServiceLoader.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.compression.codec.bzip2.library</name>
|
|
<value>system-native</value>
|
|
<description>The native-code library to be used for compression and
|
|
decompression by the bzip2 codec. This library could be specified
|
|
either by by name or the full pathname. In the former case, the
|
|
library is located by the dynamic linker, usually searching the
|
|
directories specified in the environment variable LD_LIBRARY_PATH.
|
|
|
|
The value of "system-native" indicates that the default system
|
|
library should be used. To indicate that the algorithm should
|
|
operate entirely in Java, specify "java-builtin".</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.serializations</name>
|
|
<value>org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization</value>
|
|
<description>A list of serialization classes that can be used for
|
|
obtaining serializers and deserializers.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.seqfile.local.dir</name>
|
|
<value>${hadoop.tmp.dir}/io/local</value>
|
|
<description>The local directory where sequence file stores intermediate
|
|
data files during merge. May be a comma-separated list of
|
|
directories on different devices in order to spread disk i/o.
|
|
Directories that do not exist are ignored.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.map.index.skip</name>
|
|
<value>0</value>
|
|
<description>Number of index entries to skip between each entry.
|
|
Zero by default. Setting this to values larger than zero can
|
|
facilitate opening large MapFiles using less memory.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.map.index.interval</name>
|
|
<value>128</value>
|
|
<description>
|
|
MapFile consist of two files - data file (tuples) and index file
|
|
(keys). For every io.map.index.interval records written in the
|
|
data file, an entry (record-key, data-file-position) is written
|
|
in the index file. This is to allow for doing binary search later
|
|
within the index file to look up records by their keys and get their
|
|
closest positions in the data file.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.erasurecode.codec.rs.rawcoders</name>
|
|
<value>rs_native,rs_java</value>
|
|
<description>
|
|
Comma separated raw coder implementations for the rs codec. The earlier
|
|
factory is prior to followings in case of failure of creating raw coders.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.erasurecode.codec.rs-legacy.rawcoders</name>
|
|
<value>rs-legacy_java</value>
|
|
<description>
|
|
Comma separated raw coder implementations for the rs-legacy codec. The earlier
|
|
factory is prior to followings in case of failure of creating raw coders.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.erasurecode.codec.xor.rawcoders</name>
|
|
<value>xor_native,xor_java</value>
|
|
<description>
|
|
Comma separated raw coder implementations for the xor codec. The earlier
|
|
factory is prior to followings in case of failure of creating raw coders.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- file system properties -->
|
|
|
|
<property>
|
|
<name>fs.defaultFS</name>
|
|
<value>file:///</value>
|
|
<description>The name of the default file system. A URI whose
|
|
scheme and authority determine the FileSystem implementation. The
|
|
uri's scheme determines the config property (fs.SCHEME.impl) naming
|
|
the FileSystem implementation class. The uri's authority is used to
|
|
determine the host, port, etc. for a filesystem.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.default.name</name>
|
|
<value>file:///</value>
|
|
<description>Deprecated. Use (fs.defaultFS) property
|
|
instead</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.trash.interval</name>
|
|
<value>0</value>
|
|
<description>Number of minutes after which the checkpoint
|
|
gets deleted. If zero, the trash feature is disabled.
|
|
This option may be configured both on the server and the
|
|
client. If trash is disabled server side then the client
|
|
side configuration is checked. If trash is enabled on the
|
|
server side then the value configured on the server is
|
|
used and the client configuration value is ignored.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.trash.checkpoint.interval</name>
|
|
<value>0</value>
|
|
<description>Number of minutes between trash checkpoints.
|
|
Should be smaller or equal to fs.trash.interval. If zero,
|
|
the value is set to the value of fs.trash.interval.
|
|
Every time the checkpointer runs it creates a new checkpoint
|
|
out of current and removes checkpoints created more than
|
|
fs.trash.interval minutes ago.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.protected.directories</name>
|
|
<value></value>
|
|
<description>A comma-separated list of directories which cannot
|
|
be deleted or renamed even by the superuser unless they are empty. This
|
|
setting can be used to guard important system directories
|
|
against accidental deletion due to administrator error.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.file.impl</name>
|
|
<value>org.apache.hadoop.fs.local.LocalFs</value>
|
|
<description>The AbstractFileSystem for file: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.har.impl</name>
|
|
<value>org.apache.hadoop.fs.HarFs</value>
|
|
<description>The AbstractFileSystem for har: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.hdfs.impl</name>
|
|
<value>org.apache.hadoop.fs.Hdfs</value>
|
|
<description>The FileSystem for hdfs: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.viewfs.impl</name>
|
|
<value>org.apache.hadoop.fs.viewfs.ViewFs</value>
|
|
<description>The AbstractFileSystem for view file system for viewfs: uris
|
|
(ie client side mount table:).</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.rename.strategy</name>
|
|
<value>SAME_MOUNTPOINT</value>
|
|
<description>Allowed rename strategy to rename between multiple mountpoints.
|
|
Allowed values are SAME_MOUNTPOINT,SAME_TARGET_URI_ACROSS_MOUNTPOINT and
|
|
SAME_FILESYSTEM_ACROSS_MOUNTPOINT.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.hdfs.impl</name>
|
|
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
|
|
<description>The DistributedFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are hdfs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.s3a.impl</name>
|
|
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
|
|
<description>The S3AFileSystem for view file system overload scheme when
|
|
child file system and ViewFSOverloadScheme's schemes are s3a.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.ofs.impl</name>
|
|
<value>org.apache.hadoop.fs.ozone.RootedOzoneFileSystem</value>
|
|
<description>The RootedOzoneFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are ofs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.o3fs.impl</name>
|
|
<value>org.apache.hadoop.fs.ozone.OzoneFileSystem</value>
|
|
<description>The OzoneFileSystem for view file system overload scheme when
|
|
child file system and ViewFSOverloadScheme's schemes are o3fs.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.ftp.impl</name>
|
|
<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
|
|
<description>The FTPFileSystem for view file system overload scheme when
|
|
child file system and ViewFSOverloadScheme's schemes are ftp.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.webhdfs.impl</name>
|
|
<value>org.apache.hadoop.hdfs.web.WebHdfsFileSystem</value>
|
|
<description>The WebHdfsFileSystem for view file system overload scheme when
|
|
child file system and ViewFSOverloadScheme's schemes are webhdfs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.swebhdfs.impl</name>
|
|
<value>org.apache.hadoop.hdfs.web.SWebHdfsFileSystem</value>
|
|
<description>The SWebHdfsFileSystem for view file system overload scheme when
|
|
child file system and ViewFSOverloadScheme's schemes are swebhdfs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.file.impl</name>
|
|
<value>org.apache.hadoop.fs.LocalFileSystem</value>
|
|
<description>The LocalFileSystem for view file system overload scheme when
|
|
child file system and ViewFSOverloadScheme's schemes are file.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.abfs.impl</name>
|
|
<value>org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem</value>
|
|
<description>The AzureBlobFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are abfs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.abfss.impl</name>
|
|
<value>org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem</value>
|
|
<description>The SecureAzureBlobFileSystem for view file system overload
|
|
scheme when child file system and ViewFSOverloadScheme's schemes are abfss.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.wasb.impl</name>
|
|
<value>org.apache.hadoop.fs.azure.NativeAzureFileSystem</value>
|
|
<description>The NativeAzureFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are wasb.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.oss.impl</name>
|
|
<value>org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem</value>
|
|
<description>The AliyunOSSFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are oss.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.http.impl</name>
|
|
<value>org.apache.hadoop.fs.http.HttpFileSystem</value>
|
|
<description>The HttpFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are http.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.gs.impl</name>
|
|
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
|
|
<description>The GoogleHadoopFS/Google Cloud Storage file system for view
|
|
file system overload scheme when child file system and ViewFSOverloadScheme's
|
|
schemes are gs.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.viewfs.overload.scheme.target.https.impl</name>
|
|
<value>org.apache.hadoop.fs.http.HttpsFileSystem</value>
|
|
<description>The HttpsFileSystem for view file system overload scheme
|
|
when child file system and ViewFSOverloadScheme's schemes are https.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.ftp.impl</name>
|
|
<value>org.apache.hadoop.fs.ftp.FtpFs</value>
|
|
<description>The FileSystem for Ftp: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.ftp.impl</name>
|
|
<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
|
|
<description>The implementation class of the FTP FileSystem</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.webhdfs.impl</name>
|
|
<value>org.apache.hadoop.fs.WebHdfs</value>
|
|
<description>The FileSystem for webhdfs: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.swebhdfs.impl</name>
|
|
<value>org.apache.hadoop.fs.SWebHdfs</value>
|
|
<description>The FileSystem for swebhdfs: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.ftp.host</name>
|
|
<value>0.0.0.0</value>
|
|
<description>FTP filesystem connects to this server</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.ftp.host.port</name>
|
|
<value>21</value>
|
|
<description>
|
|
FTP filesystem connects to fs.ftp.host on this port
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.ftp.data.connection.mode</name>
|
|
<value>ACTIVE_LOCAL_DATA_CONNECTION_MODE</value>
|
|
<description>Set the FTPClient's data connection mode based on configuration.
|
|
Valid values are ACTIVE_LOCAL_DATA_CONNECTION_MODE,
|
|
PASSIVE_LOCAL_DATA_CONNECTION_MODE and PASSIVE_REMOTE_DATA_CONNECTION_MODE.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.ftp.transfer.mode</name>
|
|
<value>BLOCK_TRANSFER_MODE</value>
|
|
<description>
|
|
Set FTP's transfer mode based on configuration. Valid values are
|
|
STREAM_TRANSFER_MODE, BLOCK_TRANSFER_MODE and COMPRESSED_TRANSFER_MODE.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.ftp.timeout</name>
|
|
<value>0</value>
|
|
<description>
|
|
FTP filesystem's timeout in seconds.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.df.interval</name>
|
|
<value>60000</value>
|
|
<description>Disk usage statistics refresh interval in msec.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.du.interval</name>
|
|
<value>600000</value>
|
|
<description>File space usage statistics refresh interval in msec.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.automatic.close</name>
|
|
<value>true</value>
|
|
<description>By default, FileSystem instances are automatically closed at program
|
|
exit using a JVM shutdown hook. Setting this property to false disables this
|
|
behavior. This is an advanced option that should only be used by server applications
|
|
requiring a more carefully orchestrated shutdown sequence.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.access.key</name>
|
|
<description>AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.secret.key</name>
|
|
<description>AWS secret key used by S3A file system. Omit for IAM role-based or provider-based authentication.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.aws.credentials.provider</name>
|
|
<value>
|
|
org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,
|
|
org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,
|
|
com.amazonaws.auth.EnvironmentVariableCredentialsProvider,
|
|
org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider
|
|
</value>
|
|
<description>
|
|
Comma-separated class names of credential provider classes which implement
|
|
com.amazonaws.auth.AWSCredentialsProvider.
|
|
|
|
When S3A delegation tokens are not enabled, this list will be used
|
|
to directly authenticate with S3 and other AWS services.
|
|
When S3A Delegation tokens are enabled, depending upon the delegation
|
|
token binding it may be used
|
|
to communicate wih the STS endpoint to request session/role
|
|
credentials.
|
|
|
|
These are loaded and queried in sequence for a valid set of credentials.
|
|
Each listed class must implement one of the following means of
|
|
construction, which are attempted in order:
|
|
* a public constructor accepting java.net.URI and
|
|
org.apache.hadoop.conf.Configuration,
|
|
* a public constructor accepting org.apache.hadoop.conf.Configuration,
|
|
* a public static method named getInstance that accepts no
|
|
arguments and returns an instance of
|
|
com.amazonaws.auth.AWSCredentialsProvider, or
|
|
* a public default constructor.
|
|
|
|
Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows
|
|
anonymous access to a publicly accessible S3 bucket without any credentials.
|
|
Please note that allowing anonymous access to an S3 bucket compromises
|
|
security and therefore is unsuitable for most use cases. It can be useful
|
|
for accessing public data sets without requiring AWS credentials.
|
|
|
|
If unspecified, then the default list of credential provider classes,
|
|
queried in sequence, is:
|
|
* org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider: looks
|
|
for session login secrets in the Hadoop configuration.
|
|
* org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider:
|
|
Uses the values of fs.s3a.access.key and fs.s3a.secret.key.
|
|
* com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports
|
|
configuration of AWS access key ID and secret access key in
|
|
environment variables named AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
|
|
and AWS_SESSION_TOKEN as documented in the AWS SDK.
|
|
* org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider: picks up
|
|
IAM credentials of any EC2 VM or AWS container in which the process is running.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.session.token</name>
|
|
<description>Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider
|
|
as one of the providers.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.security.credential.provider.path</name>
|
|
<value />
|
|
<description>
|
|
Optional comma separated list of credential providers, a list
|
|
which is prepended to that set in hadoop.security.credential.provider.path
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.arn</name>
|
|
<value />
|
|
<description>
|
|
AWS ARN for the role to be assumed.
|
|
Required if the fs.s3a.aws.credentials.provider contains
|
|
org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.session.name</name>
|
|
<value />
|
|
<description>
|
|
Session name for the assumed role, must be valid characters according to
|
|
the AWS APIs.
|
|
Only used if AssumedRoleCredentialProvider is the AWS credential provider.
|
|
If not set, one is generated from the current Hadoop/Kerberos username.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.policy</name>
|
|
<value/>
|
|
<description>
|
|
JSON policy to apply to the role.
|
|
Only used if AssumedRoleCredentialProvider is the AWS credential provider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.session.duration</name>
|
|
<value>30m</value>
|
|
<description>
|
|
Duration of assumed roles before a refresh is attempted.
|
|
Used when session tokens are requested.
|
|
Range: 15m to 1h
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.sts.endpoint</name>
|
|
<value/>
|
|
<description>
|
|
AWS Security Token Service Endpoint.
|
|
If unset, uses the default endpoint.
|
|
Only used if AssumedRoleCredentialProvider is the AWS credential provider.
|
|
Used by the AssumedRoleCredentialProvider and in Session and Role delegation
|
|
tokens.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.sts.endpoint.region</name>
|
|
<value></value>
|
|
<description>
|
|
AWS Security Token Service Endpoint's region;
|
|
Needed if fs.s3a.assumed.role.sts.endpoint points to an endpoint
|
|
other than the default one and the v4 signature is used.
|
|
Used by the AssumedRoleCredentialProvider and in Session and Role delegation
|
|
tokens.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.assumed.role.credentials.provider</name>
|
|
<value>org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider</value>
|
|
<description>
|
|
List of credential providers to authenticate with the STS endpoint and
|
|
retrieve short-lived role credentials.
|
|
Only used if AssumedRoleCredentialProvider is the AWS credential provider.
|
|
If unset, uses "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.delegation.token.binding</name>
|
|
<value></value>
|
|
<description>
|
|
The name of a class to provide delegation tokens support in S3A.
|
|
If unset: delegation token support is disabled.
|
|
|
|
Note: for job submission to actually collect these tokens,
|
|
Kerberos must be enabled.
|
|
|
|
Options are:
|
|
org.apache.hadoop.fs.s3a.auth.delegation.SessionTokenBinding
|
|
org.apache.hadoop.fs.s3a.auth.delegation.FullCredentialsTokenBinding
|
|
and org.apache.hadoop.fs.s3a.auth.delegation.RoleTokenBinding
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.connection.maximum</name>
|
|
<value>96</value>
|
|
<description>Controls the maximum number of simultaneous connections to S3.
|
|
This must be bigger than the value of fs.s3a.threads.max so as to stop
|
|
threads being blocked waiting for new HTTPS connections.
|
|
Why not equal? The AWS SDK transfer manager also uses these connections.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.connection.ssl.enabled</name>
|
|
<value>true</value>
|
|
<description>Enables or disables SSL connections to AWS services.
|
|
Also sets the default port to use for the s3a proxy settings,
|
|
when not explicitly set in fs.s3a.proxy.port.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.endpoint</name>
|
|
<description>AWS S3 endpoint to connect to. An up-to-date list is
|
|
provided in the AWS Documentation: regions and endpoints. Without this
|
|
property, the standard region (s3.amazonaws.com) is assumed.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.path.style.access</name>
|
|
<value>false</value>
|
|
<description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
|
|
Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.proxy.host</name>
|
|
<description>Hostname of the (optional) proxy server for S3 connections.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.proxy.port</name>
|
|
<description>Proxy server port. If this property is not set
|
|
but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
|
|
the value of fs.s3a.connection.ssl.enabled).</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.proxy.username</name>
|
|
<description>Username for authenticating with proxy server.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.proxy.password</name>
|
|
<description>Password for authenticating with proxy server.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.proxy.domain</name>
|
|
<description>Domain for authenticating with proxy server.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.proxy.workstation</name>
|
|
<description>Workstation for authenticating with proxy server.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.attempts.maximum</name>
|
|
<value>20</value>
|
|
<description>How many times we should retry commands on transient errors.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.connection.establish.timeout</name>
|
|
<value>5000</value>
|
|
<description>Socket connection setup timeout in milliseconds.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.connection.timeout</name>
|
|
<value>200000</value>
|
|
<description>Socket connection timeout in milliseconds.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.socket.send.buffer</name>
|
|
<value>8192</value>
|
|
<description>Socket send buffer hint to amazon connector. Represented in bytes.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.socket.recv.buffer</name>
|
|
<value>8192</value>
|
|
<description>Socket receive buffer hint to amazon connector. Represented in bytes.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.paging.maximum</name>
|
|
<value>5000</value>
|
|
<description>How many keys to request from S3 when doing
|
|
directory listings at a time.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.threads.max</name>
|
|
<value>64</value>
|
|
<description>The total number of threads available in the filesystem for data
|
|
uploads *or any other queued filesystem operation*.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.threads.keepalivetime</name>
|
|
<value>60</value>
|
|
<description>Number of seconds a thread can be idle before being
|
|
terminated.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.max.total.tasks</name>
|
|
<value>32</value>
|
|
<description>The number of operations which can be queued for execution.
|
|
This is in addition to the number of active threads in fs.s3a.threads.max.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.executor.capacity</name>
|
|
<value>16</value>
|
|
<description>The maximum number of submitted tasks which is a single
|
|
operation (e.g. rename(), delete()) may submit simultaneously for
|
|
execution -excluding the IO-heavy block uploads, whose capacity
|
|
is set in "fs.s3a.fast.upload.active.blocks"
|
|
|
|
All tasks are submitted to the shared thread pool whose size is
|
|
set in "fs.s3a.threads.max"; the value of capacity should be less than that
|
|
of the thread pool itself, as the goal is to stop a single operation
|
|
from overloading that thread pool.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.multipart.size</name>
|
|
<value>64M</value>
|
|
<description>How big (in bytes) to split upload or copy operations up into.
|
|
A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.multipart.threshold</name>
|
|
<value>128M</value>
|
|
<description>How big (in bytes) to split upload or copy operations up into.
|
|
This also controls the partition size in renamed files, as rename() involves
|
|
copying the source file(s).
|
|
A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.multiobjectdelete.enable</name>
|
|
<value>true</value>
|
|
<description>When enabled, multiple single-object delete requests are replaced by
|
|
a single 'delete multiple objects'-request, reducing the number of requests.
|
|
Beware: legacy S3-compatible object stores might not support this request.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.acl.default</name>
|
|
<description>Set a canned ACL for newly created and copied objects. Value may be Private,
|
|
PublicRead, PublicReadWrite, AuthenticatedRead, LogDeliveryWrite, BucketOwnerRead,
|
|
or BucketOwnerFullControl.
|
|
If set, caller IAM role must have "s3:PutObjectAcl" permission on the bucket.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.multipart.purge</name>
|
|
<value>false</value>
|
|
<description>True if you want to purge existing multipart uploads that may not have been
|
|
completed/aborted correctly. The corresponding purge age is defined in
|
|
fs.s3a.multipart.purge.age.
|
|
If set, when the filesystem is instantiated then all outstanding uploads
|
|
older than the purge age will be terminated -across the entire bucket.
|
|
This will impact multipart uploads by other applications and users. so should
|
|
be used sparingly, with an age value chosen to stop failed uploads, without
|
|
breaking ongoing operations.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.multipart.purge.age</name>
|
|
<value>86400</value>
|
|
<description>Minimum age in seconds of multipart uploads to purge
|
|
on startup if "fs.s3a.multipart.purge" is true
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.encryption.algorithm</name>
|
|
<description>Specify a server-side encryption or client-side
|
|
encryption algorithm for s3a: file system. Unset by default. It supports the
|
|
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'SSE-C', and 'CSE-KMS'
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.encryption.key</name>
|
|
<description>Specific encryption key to use if fs.s3a.encryption.algorithm
|
|
has been set to 'SSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
|
|
, the value of this property should be the Base64 encoded key. If you are
|
|
using SSE-KMS and leave this property empty, you'll be using your default's
|
|
S3 KMS key, otherwise you should set this property to the specific KMS key
|
|
id. In case of 'CSE-KMS' this value needs to be the AWS-KMS Key ID
|
|
generated from AWS console.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.signing-algorithm</name>
|
|
<description>Override the default signing algorithm so legacy
|
|
implementations can still be used</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.accesspoint.required</name>
|
|
<value>false</value>
|
|
<description>Require that all S3 access is made through Access Points and not through
|
|
buckets directly. If enabled, use per-bucket overrides to allow bucket access to a specific set
|
|
of buckets.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.block.size</name>
|
|
<value>32M</value>
|
|
<description>Block size to use when reading files using s3a: file system.
|
|
A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.buffer.dir</name>
|
|
<value>${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/s3a</value>
|
|
<description>Comma separated list of directories that will be used to buffer file
|
|
uploads to.
|
|
Yarn container path will be used as default value on yarn applications,
|
|
otherwise fall back to hadoop.tmp.dir
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.fast.upload.buffer</name>
|
|
<value>disk</value>
|
|
<description>
|
|
The buffering mechanism to for data being written.
|
|
Values: disk, array, bytebuffer.
|
|
|
|
"disk" will use the directories listed in fs.s3a.buffer.dir as
|
|
the location(s) to save data prior to being uploaded.
|
|
|
|
"array" uses arrays in the JVM heap
|
|
|
|
"bytebuffer" uses off-heap memory within the JVM.
|
|
|
|
Both "array" and "bytebuffer" will consume memory in a single stream up to the number
|
|
of blocks set by:
|
|
|
|
fs.s3a.multipart.size * fs.s3a.fast.upload.active.blocks.
|
|
|
|
If using either of these mechanisms, keep this value low
|
|
|
|
The total number of threads performing work across all threads is set by
|
|
fs.s3a.threads.max, with fs.s3a.max.total.tasks values setting the number of queued
|
|
work items.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.fast.upload.active.blocks</name>
|
|
<value>4</value>
|
|
<description>
|
|
Maximum Number of blocks a single output stream can have
|
|
active (uploading, or queued to the central FileSystem
|
|
instance's pool of queued operations.
|
|
|
|
This stops a single stream overloading the shared thread pool.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.readahead.range</name>
|
|
<value>64K</value>
|
|
<description>Bytes to read ahead during a seek() before closing and
|
|
re-opening the S3 HTTP connection. This option will be overridden if
|
|
any call to setReadahead() is made to an open stream.
|
|
A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.user.agent.prefix</name>
|
|
<value></value>
|
|
<description>
|
|
Sets a custom value that will be prepended to the User-Agent header sent in
|
|
HTTP requests to the S3 back-end by S3AFileSystem. The User-Agent header
|
|
always includes the Hadoop version number followed by a string generated by
|
|
the AWS SDK. An example is "User-Agent: Hadoop 2.8.0, aws-sdk-java/1.10.6".
|
|
If this optional property is set, then its value is prepended to create a
|
|
customized User-Agent. For example, if this configuration property was set
|
|
to "MyApp", then an example of the resulting User-Agent would be
|
|
"User-Agent: MyApp, Hadoop 2.8.0, aws-sdk-java/1.10.6".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.impl</name>
|
|
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
|
|
<description>The implementation class of the S3A Filesystem</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.retry.limit</name>
|
|
<value>7</value>
|
|
<description>
|
|
Number of times to retry any repeatable S3 client request on failure,
|
|
excluding throttling requests.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.retry.interval</name>
|
|
<value>500ms</value>
|
|
<description>
|
|
Initial retry interval when retrying operations for any reason other
|
|
than S3 throttle errors.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.retry.throttle.limit</name>
|
|
<value>20</value>
|
|
<description>
|
|
Number of times to retry any throttled request.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.retry.throttle.interval</name>
|
|
<value>100ms</value>
|
|
<description>
|
|
Initial between retry attempts on throttled requests, +/- 50%. chosen at random.
|
|
i.e. for an intial value of 3000ms, the initial delay would be in the range 1500ms to 4500ms.
|
|
Backoffs are exponential; again randomness is used to avoid the thundering heard problem.
|
|
500ms is the default value used by the AWS S3 Retry policy.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.name</name>
|
|
<value>file</value>
|
|
<description>
|
|
Committer to create for output to S3A, one of:
|
|
"file", "directory", "partitioned", "magic".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.magic.enabled</name>
|
|
<value>true</value>
|
|
<description>
|
|
Enable support in the S3A filesystem for the "Magic" committer.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.threads</name>
|
|
<value>8</value>
|
|
<description>
|
|
Number of threads in committers for parallel operations on files
|
|
(upload, commit, abort, delete...)
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.staging.tmp.path</name>
|
|
<value>tmp/staging</value>
|
|
<description>
|
|
Path in the cluster filesystem for temporary data.
|
|
This is for HDFS, not the local filesystem.
|
|
It is only for the summary data of each file, not the actual
|
|
data being committed.
|
|
Using an unqualified path guarantees that the full path will be
|
|
generated relative to the home directory of the user creating the job,
|
|
hence private (assuming home directory permissions are secure).
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.staging.unique-filenames</name>
|
|
<value>true</value>
|
|
<description>
|
|
Option for final files to have a unique name through job attempt info,
|
|
or the value of fs.s3a.committer.staging.uuid
|
|
When writing data with the "append" conflict option, this guarantees
|
|
that new data will not overwrite any existing data.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.staging.conflict-mode</name>
|
|
<value>append</value>
|
|
<description>
|
|
Staging committer conflict resolution policy.
|
|
Supported: "fail", "append", "replace".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.committer.abort.pending.uploads</name>
|
|
<value>true</value>
|
|
<description>
|
|
Should the committers abort all pending uploads to the destination
|
|
directory?
|
|
|
|
Set to false if more than one job is writing to the same directory tree.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.enabled</name>
|
|
<value>true</value>
|
|
<description>Is S3 Select enabled?</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.csv.comment.marker</name>
|
|
<value>#</value>
|
|
<description>In S3 Select queries: the marker for comment lines in CSV files</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.csv.record.delimiter</name>
|
|
<value>\n</value>
|
|
<description>In S3 Select queries over CSV files: the record delimiter.
|
|
\t is remapped to the TAB character, \r to CR \n to newline. \\ to \
|
|
and \" to "
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.csv.field.delimiter</name>
|
|
<value>,</value>
|
|
<description>In S3 Select queries over CSV files: the field delimiter.
|
|
\t is remapped to the TAB character, \r to CR \n to newline. \\ to \
|
|
and \" to "
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.csv.quote.character</name>
|
|
<value>"</value>
|
|
<description>In S3 Select queries over CSV files: quote character.
|
|
\t is remapped to the TAB character, \r to CR \n to newline. \\ to \
|
|
and \" to "
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.csv.quote.escape.character</name>
|
|
<value>\\</value>
|
|
<description>In S3 Select queries over CSV files: quote escape character.
|
|
\t is remapped to the TAB character, \r to CR \n to newline. \\ to \
|
|
and \" to "
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.csv.header</name>
|
|
<value>none</value>
|
|
<description>In S3 Select queries over CSV files: what is the role of the header? One of "none", "ignore" and "use"</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.input.compression</name>
|
|
<value>none</value>
|
|
<description>In S3 Select queries, the source compression
|
|
algorithm. One of: "none" and "gzip"</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.output.csv.quote.fields</name>
|
|
<value>always</value>
|
|
<description>
|
|
In S3 Select queries: should fields in generated CSV Files be quoted?
|
|
One of: "always", "asneeded".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.output.csv.quote.character</name>
|
|
<value>"</value>
|
|
<description>
|
|
In S3 Select queries: the quote character for generated CSV Files.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.output.csv.quote.escape.character</name>
|
|
<value>\\</value>
|
|
<description>
|
|
In S3 Select queries: the quote escape character for generated CSV Files.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.output.csv.record.delimiter</name>
|
|
<value>\n</value>
|
|
<description>
|
|
In S3 Select queries: the record delimiter for generated CSV Files.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.output.csv.field.delimiter</name>
|
|
<value>,</value>
|
|
<description>
|
|
In S3 Select queries: the field delimiter for generated CSV Files.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.select.errors.include.sql</name>
|
|
<value>false</value>
|
|
<description>
|
|
Include the SQL statement in errors: this is useful for development but
|
|
may leak security and Personally Identifying Information in production,
|
|
so must be disabled there.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.s3a.impl</name>
|
|
<value>org.apache.hadoop.fs.s3a.S3A</value>
|
|
<description>The implementation class of the S3A AbstractFileSystem.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.list.version</name>
|
|
<value>2</value>
|
|
<description>
|
|
Select which version of the S3 SDK's List Objects API to use. Currently
|
|
support 2 (default) and 1 (older API).
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.connection.request.timeout</name>
|
|
<value>0</value>
|
|
<description>
|
|
Time out on HTTP requests to the AWS service; 0 means no timeout.
|
|
Measured in seconds; the usual time suffixes are all supported
|
|
|
|
Important: this is the maximum duration of any AWS service call,
|
|
including upload and copy operations. If non-zero, it must be larger
|
|
than the time to upload multi-megabyte blocks to S3 from the client,
|
|
and to rename many-GB files. Use with care.
|
|
|
|
Values that are larger than Integer.MAX_VALUE milliseconds are
|
|
converged to Integer.MAX_VALUE milliseconds
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.etag.checksum.enabled</name>
|
|
<value>false</value>
|
|
<description>
|
|
Should calls to getFileChecksum() return the etag value of the remote
|
|
object.
|
|
WARNING: if enabled, distcp operations between HDFS and S3 will fail unless
|
|
-skipcrccheck is set.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.change.detection.source</name>
|
|
<value>etag</value>
|
|
<description>
|
|
Select which S3 object attribute to use for change detection.
|
|
Currently support 'etag' for S3 object eTags and 'versionid' for
|
|
S3 object version IDs. Use of version IDs requires object versioning to be
|
|
enabled for each S3 bucket utilized. Object versioning is disabled on
|
|
buckets by default. When version ID is used, the buckets utilized should
|
|
have versioning enabled before any data is written.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.change.detection.mode</name>
|
|
<value>server</value>
|
|
<description>
|
|
Determines how change detection is applied to alert to inconsistent S3
|
|
objects read during or after an overwrite. Value 'server' indicates to apply
|
|
the attribute constraint directly on GetObject requests to S3. Value 'client'
|
|
means to do a client-side comparison of the attribute value returned in the
|
|
response. Value 'server' would not work with third-party S3 implementations
|
|
that do not support these constraints on GetObject. Values 'server' and
|
|
'client' generate RemoteObjectChangedException when a mismatch is detected.
|
|
Value 'warn' works like 'client' but generates only a warning. Value 'none'
|
|
will ignore change detection completely.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.change.detection.version.required</name>
|
|
<value>true</value>
|
|
<description>
|
|
Determines if S3 object version attribute defined by
|
|
fs.s3a.change.detection.source should be treated as required. If true and the
|
|
referred attribute is unavailable in an S3 GetObject response,
|
|
NoVersionAttributeException is thrown. Setting to 'true' is encouraged to
|
|
avoid potential for inconsistent reads with third-party S3 implementations or
|
|
against S3 buckets that have object versioning disabled.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.ssl.channel.mode</name>
|
|
<value>default_jsse</value>
|
|
<description>
|
|
If secure connections to S3 are enabled, configures the SSL
|
|
implementation used to encrypt connections to S3. Supported values are:
|
|
"default_jsse", "default_jsse_with_gcm", "default", and "openssl".
|
|
"default_jsse" uses the Java Secure Socket Extension package (JSSE).
|
|
However, when running on Java 8, the GCM cipher is removed from the list
|
|
of enabled ciphers. This is due to performance issues with GCM in Java 8.
|
|
"default_jsse_with_gcm" uses the JSSE with the default list of cipher
|
|
suites. "default_jsse_with_gcm" is equivalent to the behavior prior to
|
|
this feature being introduced. "default" attempts to use OpenSSL rather
|
|
than the JSSE for SSL encryption, if OpenSSL libraries cannot be loaded,
|
|
it falls back to the "default_jsse" behavior. "openssl" attempts to use
|
|
OpenSSL as well, but fails if OpenSSL libraries cannot be loaded.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.s3a.downgrade.syncable.exceptions</name>
|
|
<value>true</value>
|
|
<description>
|
|
Warn but continue when applications use Syncable.hsync when writing
|
|
to S3A.
|
|
</description>
|
|
</property>
|
|
|
|
<!--
|
|
The switch to turn S3A auditing on or off.
|
|
-->
|
|
<property>
|
|
<name>fs.s3a.audit.enabled</name>
|
|
<value>true</value>
|
|
<description>
|
|
Should auditing of S3A requests be enabled?
|
|
</description>
|
|
</property>
|
|
|
|
<!-- Azure file system properties -->
|
|
<property>
|
|
<name>fs.AbstractFileSystem.wasb.impl</name>
|
|
<value>org.apache.hadoop.fs.azure.Wasb</value>
|
|
<description>AbstractFileSystem implementation class of wasb://</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.wasbs.impl</name>
|
|
<value>org.apache.hadoop.fs.azure.Wasbs</value>
|
|
<description>AbstractFileSystem implementation class of wasbs://</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.wasb.impl</name>
|
|
<value>org.apache.hadoop.fs.azure.NativeAzureFileSystem</value>
|
|
<description>The implementation class of the Native Azure Filesystem</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.wasbs.impl</name>
|
|
<value>org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure</value>
|
|
<description>The implementation class of the Secure Native Azure Filesystem</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.azure.secure.mode</name>
|
|
<value>false</value>
|
|
<description>
|
|
Config flag to identify the mode in which fs.azure.NativeAzureFileSystem needs
|
|
to run under. Setting it "true" would make fs.azure.NativeAzureFileSystem use
|
|
SAS keys to communicate with Azure storage.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.abfs.impl</name>
|
|
<value>org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem</value>
|
|
<description>The implementation class of the Azure Blob Filesystem</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.abfss.impl</name>
|
|
<value>org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem</value>
|
|
<description>The implementation class of the Secure Azure Blob Filesystem</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.abfs.impl</name>
|
|
<value>org.apache.hadoop.fs.azurebfs.Abfs</value>
|
|
<description>AbstractFileSystem implementation class of abfs://</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.abfss.impl</name>
|
|
<value>org.apache.hadoop.fs.azurebfs.Abfss</value>
|
|
<description>AbstractFileSystem implementation class of abfss://</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.azure.local.sas.key.mode</name>
|
|
<value>false</value>
|
|
<description>
|
|
Works in conjuction with fs.azure.secure.mode. Setting this config to true
|
|
results in fs.azure.NativeAzureFileSystem using the local SAS key generation
|
|
where the SAS keys are generating in the same process as fs.azure.NativeAzureFileSystem.
|
|
If fs.azure.secure.mode flag is set to false, this flag has no effect.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>fs.azure.sas.expiry.period</name>
|
|
<value>90d</value>
|
|
<description>
|
|
The default value to be used for expiration period for SAS keys generated.
|
|
Can use the following suffix (case insensitive):
|
|
ms(millis), s(sec), m(min), h(hour), d(day)
|
|
to specify the time (such as 2s, 2m, 1h, etc.).
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>fs.azure.authorization</name>
|
|
<value>false</value>
|
|
<description>
|
|
Config flag to enable authorization support in WASB. Setting it to "true" enables
|
|
authorization support to WASB. Currently WASB authorization requires a remote service
|
|
to provide authorization that needs to be specified via fs.azure.authorization.remote.service.url
|
|
configuration
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>fs.azure.authorization.caching.enable</name>
|
|
<value>true</value>
|
|
<description>
|
|
Config flag to enable caching of authorization results and saskeys in WASB.
|
|
This flag is relevant only when fs.azure.authorization is enabled.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>fs.azure.saskey.usecontainersaskeyforallaccess</name>
|
|
<value>true</value>
|
|
<description>
|
|
Use container saskey for access to all blobs within the container.
|
|
Blob-specific saskeys are not used when this setting is enabled.
|
|
This setting provides better performance compared to blob-specific saskeys.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.azure.buffer.dir</name>
|
|
<value>${hadoop.tmp.dir}/abfs</value>
|
|
<description>Directory path for buffer files needed to upload data blocks
|
|
in AbfsOutputStream.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.gs.impl</name>
|
|
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
|
|
<description>The AbstractFileSystem for gs: uris.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.azure.enable.readahead</name>
|
|
<value>true</value>
|
|
<description>Enabled readahead/prefetching in AbfsInputStream.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.seqfile.compress.blocksize</name>
|
|
<value>1000000</value>
|
|
<description>The minimum block size for compression in block compressed
|
|
SequenceFiles.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.mapfile.bloom.size</name>
|
|
<value>1048576</value>
|
|
<description>The size of BloomFilter-s used in BloomMapFile. Each time this many
|
|
keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
|
|
Larger values minimize the number of filters, which slightly increases the performance,
|
|
but may waste too much space if the total number of keys is usually much smaller
|
|
than this number.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>io.mapfile.bloom.error.rate</name>
|
|
<value>0.005</value>
|
|
<description>The rate of false positives in BloomFilter-s used in BloomMapFile.
|
|
As this value decreases, the size of BloomFilter-s increases exponentially. This
|
|
value is the probability of encountering false positives (default is 0.5%).
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.util.hash.type</name>
|
|
<value>murmur</value>
|
|
<description>The default implementation of Hash. Currently this can take one of the
|
|
two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
|
|
</description>
|
|
</property>
|
|
|
|
|
|
<!-- ipc properties -->
|
|
|
|
<property>
|
|
<name>ipc.client.idlethreshold</name>
|
|
<value>4000</value>
|
|
<description>Defines the threshold number of connections after which
|
|
connections will be inspected for idleness.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.kill.max</name>
|
|
<value>10</value>
|
|
<description>Defines the maximum number of clients to disconnect in one go.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.connection.maxidletime</name>
|
|
<value>10000</value>
|
|
<description>The maximum time in msec after which a client will bring down the
|
|
connection to the server.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.connect.max.retries</name>
|
|
<value>10</value>
|
|
<description>Indicates the number of retries a client will make to establish
|
|
a server connection.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.connect.retry.interval</name>
|
|
<value>1000</value>
|
|
<description>Indicates the number of milliseconds a client will wait for
|
|
before retrying to establish a server connection.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.connect.timeout</name>
|
|
<value>20000</value>
|
|
<description>Indicates the number of milliseconds a client will wait for the
|
|
socket to establish a server connection.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.connect.max.retries.on.timeouts</name>
|
|
<value>45</value>
|
|
<description>Indicates the number of retries a client will make on socket timeout
|
|
to establish a server connection.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.tcpnodelay</name>
|
|
<value>true</value>
|
|
<description>Use TCP_NODELAY flag to bypass Nagle's algorithm transmission delays.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.low-latency</name>
|
|
<value>false</value>
|
|
<description>Use low-latency QoS markers for IPC connections.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.ping</name>
|
|
<value>true</value>
|
|
<description>Send a ping to the server when timeout on reading the response,
|
|
if set to true. If no failure is detected, the client retries until at least
|
|
a byte is read or the time given by ipc.client.rpc-timeout.ms is passed.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.ping.interval</name>
|
|
<value>60000</value>
|
|
<description>Timeout on waiting response from server, in milliseconds.
|
|
The client will send ping when the interval is passed without receiving bytes,
|
|
if ipc.client.ping is set to true.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.rpc-timeout.ms</name>
|
|
<value>120000</value>
|
|
<description>Timeout on waiting response from server, in milliseconds.
|
|
If this rpc-timeout is 0, it means no timeout. If this rpc-timeout is greater
|
|
than 0, and ipc.client.ping is set to true, and this rpc-timeout is greater than
|
|
the value of ipc.ping.interval, the effective value of the rpc-timeout is
|
|
rounded up to multiple of ipc.ping.interval.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.server.listen.queue.size</name>
|
|
<value>256</value>
|
|
<description>Indicates the length of the listen queue for servers accepting
|
|
client connections.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.server.log.slow.rpc</name>
|
|
<value>false</value>
|
|
<description>This setting is useful to troubleshoot performance issues for
|
|
various services. If this value is set to true then we log requests that
|
|
fall into 99th percentile as well as increment RpcSlowCalls counter.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.server.purge.interval</name>
|
|
<value>15</value>
|
|
<description>Define how often calls are cleaned up in the server.
|
|
The default is 15 minutes. The unit is minutes.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.maximum.data.length</name>
|
|
<value>134217728</value>
|
|
<description>This indicates the maximum IPC message length (bytes) that can be
|
|
accepted by the server. Messages larger than this value are rejected by the
|
|
immediately to avoid possible OOMs. This setting should rarely need to be
|
|
changed.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.maximum.response.length</name>
|
|
<value>134217728</value>
|
|
<description>This indicates the maximum IPC message length (bytes) that can be
|
|
accepted by the client. Messages larger than this value are rejected
|
|
immediately to avoid possible OOMs. This setting should rarely need to be
|
|
changed. Set to 0 to disable.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.server.reuseaddr</name>
|
|
<value>true</value>
|
|
<description>Enables the SO_REUSEADDR TCP option on the server.
|
|
Useful if BindException often prevents a certain service to be restarted
|
|
because the server side is stuck in TIME_WAIT state.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- FairCallQueue properties -->
|
|
<!-- See FairCallQueue documentation for a table of all properties -->
|
|
|
|
<!-- [port_number] is the port used by the IPC server to be configured. -->
|
|
<!-- For example, ipc.8020.callqueue.impl will adjust the call queue -->
|
|
<!-- implementation for the IPC server running at port 8020. -->
|
|
|
|
<!-- Typically, [port_number] is configured to be the NameNode RPC port, -->
|
|
<!-- i.e. port number in dfs.namenode.rpc-address, or port number in -->
|
|
<!-- fs.defaultFS if dfs.namenode.rpc-address is not explicitly configured. -->
|
|
<property>
|
|
<name>ipc.[port_number].backoff.enable</name>
|
|
<value>false</value>
|
|
<description>Whether or not to enable client backoff when a queue is full.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.backoff.enable</name>
|
|
<value>false</value>
|
|
<description>
|
|
This property is used as fallback property in case
|
|
"ipc.[port_number].backoff.enable" is not defined.
|
|
It determines whether or not to enable client backoff when
|
|
a queue is full.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].callqueue.impl</name>
|
|
<value>java.util.concurrent.LinkedBlockingQueue</value>
|
|
<description>The fully qualified name of a class to use as the implementation
|
|
of a call queue. The default implementation is
|
|
java.util.concurrent.LinkedBlockingQueue (FIFO queue).
|
|
Use org.apache.hadoop.ipc.FairCallQueue for the Fair Call Queue.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.callqueue.impl</name>
|
|
<value>java.util.concurrent.LinkedBlockingQueue</value>
|
|
<description>
|
|
The fully qualified name of a class to use as the implementation
|
|
of a call queue. The default implementation is
|
|
java.util.concurrent.LinkedBlockingQueue (FIFO queue).
|
|
Use org.apache.hadoop.ipc.FairCallQueue for the Fair Call Queue.
|
|
This config is fallback config for ipc.[port_number].callqueue.impl.
|
|
If call queue is not defined at port level, this default
|
|
config is used and hence, this is fallback config to
|
|
config with port.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].scheduler.impl</name>
|
|
<value>org.apache.hadoop.ipc.DefaultRpcScheduler</value>
|
|
<description>The fully qualified name of a class to use as the
|
|
implementation of the scheduler. The default implementation is
|
|
org.apache.hadoop.ipc.DefaultRpcScheduler (no-op scheduler) when not using
|
|
FairCallQueue. If using FairCallQueue, defaults to
|
|
org.apache.hadoop.ipc.DecayRpcScheduler. Use
|
|
org.apache.hadoop.ipc.DecayRpcScheduler in conjunction with the Fair Call
|
|
Queue.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.scheduler.impl</name>
|
|
<value>org.apache.hadoop.ipc.DefaultRpcScheduler</value>
|
|
<description>
|
|
The fully qualified name of a class to use as the
|
|
implementation of the scheduler. The default implementation is
|
|
org.apache.hadoop.ipc.DefaultRpcScheduler (no-op scheduler) when
|
|
not using FairCallQueue. If using FairCallQueue, defaults to
|
|
org.apache.hadoop.ipc.DecayRpcScheduler. Use
|
|
org.apache.hadoop.ipc.DecayRpcScheduler in conjunction
|
|
with the Fair Call Queue.
|
|
This config is fallback config for ipc.[port_number].scheduler.impl.
|
|
If scheduler queue is not defined at port level, this default
|
|
config is used and hence, this is fallback config to
|
|
config with port.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].scheduler.priority.levels</name>
|
|
<value>4</value>
|
|
<description>How many priority levels to use within the scheduler and call
|
|
queue. This property applies to RpcScheduler and CallQueue.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].faircallqueue.multiplexer.weights</name>
|
|
<value>8,4,2,1</value>
|
|
<description>How much weight to give to each priority queue. This should be
|
|
a comma-separated list of length equal to the number of priority levels.
|
|
Weights descend by a factor of 2 (e.g., for 4 levels: 8,4,2,1).
|
|
This property applies to WeightedRoundRobinMultiplexer.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].identity-provider.impl</name>
|
|
<value>org.apache.hadoop.ipc.UserIdentityProvider</value>
|
|
<description>The identity provider mapping user requests to their identity.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.identity-provider.impl</name>
|
|
<value>org.apache.hadoop.ipc.UserIdentityProvider</value>
|
|
<description>
|
|
This property is used as fallback property in case
|
|
"ipc.[port_number].identity-provider.impl" is not defined.
|
|
The identity provider mapping user requests to their identity.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].cost-provider.impl</name>
|
|
<value>org.apache.hadoop.ipc.DefaultCostProvider</value>
|
|
<description>The cost provider mapping user requests to their cost. To
|
|
enable determination of cost based on processing time, use
|
|
org.apache.hadoop.ipc.WeightedTimeCostProvider.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.cost-provider.impl</name>
|
|
<value>org.apache.hadoop.ipc.DefaultCostProvider</value>
|
|
<description>
|
|
This property is used as fallback property in case
|
|
"ipc.[port_number].cost-provider.impl" is not defined.
|
|
The cost provider mapping user requests to their cost. To
|
|
enable determination of cost based on processing time, use
|
|
org.apache.hadoop.ipc.WeightedTimeCostProvider.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.period-ms</name>
|
|
<value>5000</value>
|
|
<description>How frequently the decay factor should be applied to the
|
|
operation counts of users. Higher values have less overhead, but respond
|
|
less quickly to changes in client behavior.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.decay-factor</name>
|
|
<value>0.5</value>
|
|
<description>When decaying the operation counts of users, the multiplicative
|
|
decay factor to apply. Higher values will weight older operations more
|
|
strongly, essentially giving the scheduler a longer memory, and penalizing
|
|
heavy clients for a longer period of time.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.thresholds</name>
|
|
<value>13,25,50</value>
|
|
<description>The client load threshold, as an integer percentage, for each
|
|
priority queue. Clients producing less load, as a percent of total
|
|
operations, than specified at position i will be given priority i. This
|
|
should be a comma-separated list of length equal to the number of priority
|
|
levels minus 1 (the last is implicitly 100).
|
|
Thresholds ascend by a factor of 2 (e.g., for 4 levels: 13,25,50).
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.backoff.responsetime.enable</name>
|
|
<value>false</value>
|
|
<description>Whether or not to enable the backoff by response time feature.
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.backoff.responsetime.thresholds</name>
|
|
<value>10s,20s,30s,40s</value>
|
|
<description>The response time thresholds, as time durations, for each
|
|
priority queue. If the average response time for a queue is above this
|
|
threshold, backoff will occur in lower priority queues. This should be a
|
|
comma-separated list of length equal to the number of priority levels.
|
|
Threshold increases by 10s per level (e.g., for 4 levels: 10s,20s,30s,40s)
|
|
This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.metrics.top.user.count</name>
|
|
<value>10</value>
|
|
<description>The number of top (i.e., heaviest) users to emit metric
|
|
information about. This property applies to DecayRpcScheduler.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].decay-scheduler.service-users</name>
|
|
<value></value>
|
|
<description>Service users will always be scheduled into the highest-priority
|
|
queue and won't be included in the priority computation of normal user
|
|
calls. They are specified as a comma-separated list.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].weighted-cost.lockshared</name>
|
|
<value>10</value>
|
|
<description>The weight multiplier to apply to the time spent in the
|
|
processing phase which holds a shared (read) lock.
|
|
This property applies to WeightedTimeCostProvider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].weighted-cost.lockexclusive</name>
|
|
<value>100</value>
|
|
<description>The weight multiplier to apply to the time spent in the
|
|
processing phase which holds an exclusive (write) lock.
|
|
This property applies to WeightedTimeCostProvider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].weighted-cost.handler</name>
|
|
<value>1</value>
|
|
<description>The weight multiplier to apply to the time spent in the
|
|
HANDLER phase which do not involve holding a lock.
|
|
See org.apache.hadoop.ipc.ProcessingDetails.Timing for more details on
|
|
this phase. This property applies to WeightedTimeCostProvider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].weighted-cost.lockfree</name>
|
|
<value>1</value>
|
|
<description>The weight multiplier to apply to the time spent in the
|
|
LOCKFREE phase which do not involve holding a lock.
|
|
See org.apache.hadoop.ipc.ProcessingDetails.Timing for more details on
|
|
this phase. This property applies to WeightedTimeCostProvider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.[port_number].weighted-cost.response</name>
|
|
<value>1</value>
|
|
<description>The weight multiplier to apply to the time spent in the
|
|
RESPONSE phase which do not involve holding a lock.
|
|
See org.apache.hadoop.ipc.ProcessingDetails.Timing for more details on
|
|
this phase. This property applies to WeightedTimeCostProvider.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- Proxy Configuration -->
|
|
|
|
<property>
|
|
<name>hadoop.security.impersonation.provider.class</name>
|
|
<value></value>
|
|
<description>A class which implements ImpersonationProvider interface, used to
|
|
authorize whether one user can impersonate a specific user.
|
|
If not specified, the DefaultImpersonationProvider will be used.
|
|
If a class is specified, then that class will be used to determine
|
|
the impersonation capability.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.rpc.socket.factory.class.default</name>
|
|
<value>org.apache.hadoop.net.StandardSocketFactory</value>
|
|
<description> Default SocketFactory to use. This parameter is expected to be
|
|
formatted as "package.FactoryClassName".
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
|
|
<value></value>
|
|
<description> SocketFactory to use to connect to a DFS. If null or empty, use
|
|
hadoop.rpc.socket.class.default. This socket factory is also used by
|
|
DFSClient to create sockets to DataNodes.
|
|
</description>
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
<name>hadoop.socks.server</name>
|
|
<value></value>
|
|
<description> Address (host:port) of the SOCKS server to be used by the
|
|
SocksSocketFactory.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- Topology Configuration -->
|
|
<property>
|
|
<name>net.topology.node.switch.mapping.impl</name>
|
|
<value>org.apache.hadoop.net.ScriptBasedMapping</value>
|
|
<description> The default implementation of the DNSToSwitchMapping. It
|
|
invokes a script specified in net.topology.script.file.name to resolve
|
|
node names. If the value for net.topology.script.file.name is not set, the
|
|
default value of DEFAULT_RACK is returned for all node names.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>net.topology.impl</name>
|
|
<value>org.apache.hadoop.net.NetworkTopology</value>
|
|
<description> The default implementation of NetworkTopology which is classic three layer one.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>net.topology.script.file.name</name>
|
|
<value></value>
|
|
<description> The script name that should be invoked to resolve DNS names to
|
|
NetworkTopology names. Example: the script would take host.foo.bar as an
|
|
argument, and return /rack1 as the output.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>net.topology.script.number.args</name>
|
|
<value>100</value>
|
|
<description> The max number of args that the script configured with
|
|
net.topology.script.file.name should be run with. Each arg is an
|
|
IP address.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>net.topology.table.file.name</name>
|
|
<value></value>
|
|
<description> The file name for a topology file, which is used when the
|
|
net.topology.node.switch.mapping.impl property is set to
|
|
org.apache.hadoop.net.TableMapping. The file format is a two column text
|
|
file, with columns separated by whitespace. The first column is a DNS or
|
|
IP address and the second column specifies the rack where the address maps.
|
|
If no entry corresponding to a host in the cluster is found, then
|
|
/default-rack is assumed.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- Local file system -->
|
|
<property>
|
|
<name>file.stream-buffer-size</name>
|
|
<value>4096</value>
|
|
<description>The size of buffer to stream files.
|
|
The size of this buffer should probably be a multiple of hardware
|
|
page size (4096 on Intel x86), and it determines how much data is
|
|
buffered during read and write operations.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>file.bytes-per-checksum</name>
|
|
<value>512</value>
|
|
<description>The number of bytes per checksum. Must not be larger than
|
|
file.stream-buffer-size</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>file.client-write-packet-size</name>
|
|
<value>65536</value>
|
|
<description>Packet size for clients to write</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>file.blocksize</name>
|
|
<value>67108864</value>
|
|
<description>Block size</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>file.replication</name>
|
|
<value>1</value>
|
|
<description>Replication factor</description>
|
|
</property>
|
|
|
|
<!-- FTP file system -->
|
|
<property>
|
|
<name>ftp.stream-buffer-size</name>
|
|
<value>4096</value>
|
|
<description>The size of buffer to stream files.
|
|
The size of this buffer should probably be a multiple of hardware
|
|
page size (4096 on Intel x86), and it determines how much data is
|
|
buffered during read and write operations.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ftp.bytes-per-checksum</name>
|
|
<value>512</value>
|
|
<description>The number of bytes per checksum. Must not be larger than
|
|
ftp.stream-buffer-size</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ftp.client-write-packet-size</name>
|
|
<value>65536</value>
|
|
<description>Packet size for clients to write</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ftp.blocksize</name>
|
|
<value>67108864</value>
|
|
<description>Block size</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ftp.replication</name>
|
|
<value>3</value>
|
|
<description>Replication factor</description>
|
|
</property>
|
|
|
|
<!-- Tfile -->
|
|
|
|
<property>
|
|
<name>tfile.io.chunk.size</name>
|
|
<value>1048576</value>
|
|
<description>
|
|
Value chunk size in bytes. Default to
|
|
1MB. Values of the length less than the chunk size is
|
|
guaranteed to have known value length in read time (See also
|
|
TFile.Reader.Scanner.Entry.isValueLengthKnown()).
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>tfile.fs.output.buffer.size</name>
|
|
<value>262144</value>
|
|
<description>
|
|
Buffer size used for FSDataOutputStream in bytes.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>tfile.fs.input.buffer.size</name>
|
|
<value>262144</value>
|
|
<description>
|
|
Buffer size used for FSDataInputStream in bytes.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- HTTP web-consoles Authentication -->
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.type</name>
|
|
<value>simple</value>
|
|
<description>
|
|
Defines authentication used for Oozie HTTP endpoint.
|
|
Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.token.validity</name>
|
|
<value>36000</value>
|
|
<description>
|
|
Indicates how long (in seconds) an authentication token is valid before it has
|
|
to be renewed.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.signature.secret.file</name>
|
|
<value>${user.home}/hadoop-http-auth-signature-secret</value>
|
|
<description>
|
|
The signature secret for signing the authentication tokens.
|
|
A different secret should be used for each service.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.cookie.domain</name>
|
|
<value></value>
|
|
<description>
|
|
The domain to use for the HTTP cookie that stores the authentication token.
|
|
In order to authentiation to work correctly across all Hadoop nodes web-consoles
|
|
the domain must be correctly set.
|
|
IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
|
|
For this setting to work properly all nodes in the cluster must be configured
|
|
to generate URLs with hostname.domain names on it.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
|
|
<value>true</value>
|
|
<description>
|
|
Indicates if anonymous requests are allowed when using 'simple' authentication.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.kerberos.principal</name>
|
|
<value>HTTP/_HOST@LOCALHOST</value>
|
|
<description>
|
|
Indicates the Kerberos principal to be used for HTTP endpoint.
|
|
The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO specification.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.kerberos.keytab</name>
|
|
<value>${user.home}/hadoop.keytab</value>
|
|
<description>
|
|
Location of the keytab file with the credentials for the principal.
|
|
Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.authentication.kerberos.endpoint.whitelist</name>
|
|
<value></value>
|
|
<description>
|
|
The comma-separated list of the endpoints that skips Kerberos
|
|
authentication. The endpoint must start with '/' and must not
|
|
contain special characters afterwards. This parameter is for
|
|
the monitoring tools that do not support Kerberos authentication.
|
|
Administrator must configure this parameter very carefully
|
|
because it allows unauthenticated access to the daemons.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- HTTP CORS support -->
|
|
<property>
|
|
<name>hadoop.http.cross-origin.enabled</name>
|
|
<value>false</value>
|
|
<description>Enable/disable the cross-origin (CORS) filter.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.cross-origin.allowed-origins</name>
|
|
<value>*</value>
|
|
<description>Comma separated list of origins that are allowed for web services
|
|
needing cross-origin (CORS) support. If a value in the list contains an
|
|
asterix (*), a regex pattern, escaping any dots ('.' -> '\.') and replacing
|
|
the asterix such that it captures any characters ('*' -> '.*'), is generated.
|
|
Values prefixed with 'regex:' are interpreted directly as regular expressions,
|
|
e.g. use the expression 'regex:https?:\/\/foo\.bar:([0-9]+)?' to allow any
|
|
origin using the 'http' or 'https' protocol in the domain 'foo.bar' on any
|
|
port. The use of simple wildcards ('*') is discouraged, and only available for
|
|
backward compatibility.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.cross-origin.allowed-methods</name>
|
|
<value>GET,POST,HEAD</value>
|
|
<description>Comma separated list of methods that are allowed for web
|
|
services needing cross-origin (CORS) support.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.cross-origin.allowed-headers</name>
|
|
<value>X-Requested-With,Content-Type,Accept,Origin</value>
|
|
<description>Comma separated list of headers that are allowed for web
|
|
services needing cross-origin (CORS) support.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.cross-origin.max-age</name>
|
|
<value>1800</value>
|
|
<description>The number of seconds a pre-flighted request can be cached
|
|
for web services needing cross-origin (CORS) support.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>dfs.ha.fencing.methods</name>
|
|
<value></value>
|
|
<description>
|
|
List of fencing methods to use for service fencing. May contain
|
|
builtin methods (eg shell, sshfence and powershell) or user-defined method.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>dfs.ha.fencing.ssh.connect-timeout</name>
|
|
<value>30000</value>
|
|
<description>
|
|
SSH connection timeout, in milliseconds, to use with the builtin
|
|
sshfence fencer.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>dfs.ha.fencing.ssh.private-key-files</name>
|
|
<value></value>
|
|
<description>
|
|
The SSH private key files to use with the builtin sshfence fencer.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.zookeeper.quorum</name>
|
|
<description>
|
|
A list of ZooKeeper server addresses, separated by commas, that are
|
|
to be used by the ZKFailoverController in automatic failover.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.zookeeper.session-timeout.ms</name>
|
|
<value>10000</value>
|
|
<description>
|
|
The session timeout to use when the ZKFC connects to ZooKeeper.
|
|
Setting this value to a lower value implies that server crashes
|
|
will be detected more quickly, but risks triggering failover too
|
|
aggressively in the case of a transient error or network blip.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.zookeeper.parent-znode</name>
|
|
<value>/hadoop-ha</value>
|
|
<description>
|
|
The ZooKeeper znode under which the ZK failover controller stores
|
|
its information. Note that the nameservice ID is automatically
|
|
appended to this znode, so it is not normally necessary to
|
|
configure this, even in a federated environment.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.zookeeper.acl</name>
|
|
<value>world:anyone:rwcda</value>
|
|
<description>
|
|
A comma-separated list of ZooKeeper ACLs to apply to the znodes
|
|
used by automatic failover. These ACLs are specified in the same
|
|
format as used by the ZooKeeper CLI.
|
|
|
|
If the ACL itself contains secrets, you may instead specify a
|
|
path to a file, prefixed with the '@' symbol, and the value of
|
|
this configuration will be loaded from within.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.zookeeper.auth</name>
|
|
<value></value>
|
|
<description>
|
|
A comma-separated list of ZooKeeper authentications to add when
|
|
connecting to ZooKeeper. These are specified in the same format
|
|
as used by the "addauth" command in the ZK CLI. It is
|
|
important that the authentications specified here are sufficient
|
|
to access znodes with the ACL specified in ha.zookeeper.acl.
|
|
|
|
If the auths contain secrets, you may instead specify a
|
|
path to a file, prefixed with the '@' symbol, and the value of
|
|
this configuration will be loaded from within.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- Static Web User Filter properties. -->
|
|
<property>
|
|
<name>hadoop.http.staticuser.user</name>
|
|
<value>dr.who</value>
|
|
<description>
|
|
The user name to filter as, on static web filters
|
|
while rendering content. An example use is the HDFS
|
|
web UI (user to be used for browsing files).
|
|
</description>
|
|
</property>
|
|
|
|
<!-- SSLFactory configuration -->
|
|
|
|
<property>
|
|
<name>hadoop.ssl.keystores.factory.class</name>
|
|
<value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
|
|
<description>
|
|
The keystores factory to use for retrieving certificates.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.ssl.require.client.cert</name>
|
|
<value>false</value>
|
|
<description>Whether client certificates are required</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.ssl.hostname.verifier</name>
|
|
<value>DEFAULT</value>
|
|
<description>
|
|
The hostname verifier to provide for HttpsURLConnections.
|
|
Valid values are: DEFAULT, STRICT, STRICT_IE6, DEFAULT_AND_LOCALHOST and
|
|
ALLOW_ALL
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.ssl.server.conf</name>
|
|
<value>ssl-server.xml</value>
|
|
<description>
|
|
Resource file from which ssl server keystore information will be extracted.
|
|
This file is looked up in the classpath, typically it should be in Hadoop
|
|
conf/ directory.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.ssl.client.conf</name>
|
|
<value>ssl-client.xml</value>
|
|
<description>
|
|
Resource file from which ssl client keystore information will be extracted
|
|
This file is looked up in the classpath, typically it should be in Hadoop
|
|
conf/ directory.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.ssl.enabled.protocols</name>
|
|
<value>TLSv1.2</value>
|
|
<description>
|
|
The supported SSL protocols. The parameter will only be used from
|
|
DatanodeHttpServer.
|
|
Starting from Hadoop 3.3.0, TLSv1.3 is supported with Java 11 Runtime.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.jetty.logs.serve.aliases</name>
|
|
<value>true</value>
|
|
<description>
|
|
Enable/Disable aliases serving from jetty
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.permissions.umask-mode</name>
|
|
<value>022</value>
|
|
<description>
|
|
The umask used when creating files and directories.
|
|
Can be in octal or in symbolic. Examples are:
|
|
"022" (octal for u=rwx,g=r-x,o=r-x in symbolic),
|
|
or "u=rwx,g=rwx,o=" (symbolic for 007 in octal).
|
|
</description>
|
|
</property>
|
|
|
|
<!-- ha properties -->
|
|
|
|
<property>
|
|
<name>ha.health-monitor.connect-retry-interval.ms</name>
|
|
<value>1000</value>
|
|
<description>
|
|
How often to retry connecting to the service.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.health-monitor.check-interval.ms</name>
|
|
<value>1000</value>
|
|
<description>
|
|
How often to check the service.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.health-monitor.sleep-after-disconnect.ms</name>
|
|
<value>1000</value>
|
|
<description>
|
|
How long to sleep after an unexpected RPC error.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.health-monitor.rpc.connect.max.retries</name>
|
|
<value>1</value>
|
|
<description>
|
|
The number of retries on connect error when establishing RPC proxy
|
|
connection to NameNode, used for monitorHealth() calls.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.health-monitor.rpc-timeout.ms</name>
|
|
<value>45000</value>
|
|
<description>
|
|
Timeout for the actual monitorHealth() calls.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.failover-controller.new-active.rpc-timeout.ms</name>
|
|
<value>60000</value>
|
|
<description>
|
|
Timeout that the FC waits for the new active to become active
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.failover-controller.graceful-fence.rpc-timeout.ms</name>
|
|
<value>5000</value>
|
|
<description>
|
|
Timeout that the FC waits for the old active to go to standby
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.failover-controller.graceful-fence.connection.retries</name>
|
|
<value>1</value>
|
|
<description>
|
|
FC connection retries for graceful fencing
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
|
|
<value>3</value>
|
|
<description>
|
|
The number of zookeeper operation retry times in ActiveStandbyElector
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
|
|
<value>20000</value>
|
|
<description>
|
|
Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.fallback-to-simple-auth-allowed</name>
|
|
<value>false</value>
|
|
<description>
|
|
When a client is configured to attempt a secure connection, but attempts to
|
|
connect to an insecure server, that server may instruct the client to
|
|
switch to SASL SIMPLE (unsecure) authentication. This setting controls
|
|
whether or not the client will accept this instruction from the server.
|
|
When false (the default), the client will not allow the fallback to SIMPLE
|
|
authentication, and will abort the connection.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.client.resolve.remote.symlinks</name>
|
|
<value>true</value>
|
|
<description>
|
|
Whether to resolve symlinks when accessing a remote Hadoop filesystem.
|
|
Setting this to false causes an exception to be thrown upon encountering
|
|
a symlink. This setting does not apply to local filesystems, which
|
|
automatically resolve local symlinks.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>nfs.exports.allowed.hosts</name>
|
|
<value>* rw</value>
|
|
<description>
|
|
By default, the export can be mounted by any client. The value string
|
|
contains machine name and access privilege, separated by whitespace
|
|
characters. The machine name format can be a single host, a Java regular
|
|
expression, or an IPv4 address. The access privilege uses rw or ro to
|
|
specify read/write or read-only access of the machines to exports. If the
|
|
access privilege is not provided, the default is read-only. Entries are separated by ";".
|
|
For example: "192.168.0.0/22 rw ; host.*\.example\.com ; host1.test.org ro;".
|
|
Only the NFS gateway needs to restart after this property is updated.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.user.group.static.mapping.overrides</name>
|
|
<value>dr.who=;</value>
|
|
<description>
|
|
Static mapping of user to groups. This will override the groups if
|
|
available in the system for the specified user. In other words, groups
|
|
look-up will not happen for these users, instead groups mapped in this
|
|
configuration will be used.
|
|
Mapping should be in this format.
|
|
user1=group1,group2;user2=;user3=group2;
|
|
Default, "dr.who=;" will consider "dr.who" as user without groups.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>rpc.metrics.quantile.enable</name>
|
|
<value>false</value>
|
|
<description>
|
|
Setting this property to true and rpc.metrics.percentiles.intervals
|
|
to a comma-separated list of the granularity in seconds, the
|
|
50/75/90/95/99th percentile latency for rpc queue/processing time in
|
|
milliseconds are added to rpc metrics.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>rpc.metrics.timeunit</name>
|
|
<value>MILLISECONDS</value>
|
|
<description>
|
|
This property is used to configure timeunit for various RPC Metrics
|
|
e.g rpcQueueTime, rpcLockWaitTime, rpcProcessingTime,
|
|
deferredRpcProcessingTime. In the absence of this property,
|
|
default timeunit used is milliseconds.
|
|
The value of this property should match to any one value of enum:
|
|
java.util.concurrent.TimeUnit.
|
|
Some of the valid values: NANOSECONDS, MICROSECONDS, MILLISECONDS,
|
|
SECONDS etc.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>rpc.metrics.percentiles.intervals</name>
|
|
<value></value>
|
|
<description>
|
|
A comma-separated list of the granularity in seconds for the metrics which
|
|
describe the 50/75/90/95/99th percentile latency for rpc queue/processing
|
|
time. The metrics are outputted if rpc.metrics.quantile.enable is set to
|
|
true.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.codec.classes.EXAMPLECIPHERSUITE</name>
|
|
<value></value>
|
|
<description>
|
|
The prefix for a given crypto codec, contains a comma-separated
|
|
list of implementation classes for a given crypto codec (eg EXAMPLECIPHERSUITE).
|
|
The first implementation will be used if available, others are fallbacks.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.codec.classes.aes.ctr.nopadding</name>
|
|
<value>org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec</value>
|
|
<description>
|
|
Comma-separated list of crypto codec implementations for AES/CTR/NoPadding.
|
|
The first implementation will be used if available, others are fallbacks.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.codec.classes.sm4.ctr.nopadding</name>
|
|
<value>org.apache.hadoop.crypto.OpensslSm4CtrCryptoCodec, org.apache.hadoop.crypto.JceSm4CtrCryptoCodec</value>
|
|
<description>
|
|
Comma-separated list of crypto codec implementations for SM4/CTR/NoPadding.
|
|
The first implementation will be used if available, others are fallbacks.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.openssl.engine.id</name>
|
|
<value></value>
|
|
<description>
|
|
The Openssl provided an engine mechanism that allow to specify third-party software
|
|
encryption library or hardware encryption device for encryption. The engine ID could
|
|
be vendor defined and will be passed to openssl, more info please see:
|
|
https://github.com/openssl/openssl/blob/master/README.ENGINE
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.cipher.suite</name>
|
|
<value>AES/CTR/NoPadding</value>
|
|
<description>
|
|
Cipher suite for crypto codec.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.jce.provider</name>
|
|
<value></value>
|
|
<description>
|
|
The JCE provider name used in CryptoCodec.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.jceks.key.serialfilter</name>
|
|
<description>
|
|
Enhanced KeyStore Mechanisms in JDK 8u171 introduced jceks.key.serialFilter.
|
|
If jceks.key.serialFilter is configured, the JCEKS KeyStore uses it during
|
|
the deserialization of the encrypted Key object stored inside a
|
|
SecretKeyEntry.
|
|
If jceks.key.serialFilter is not configured it will cause an error when
|
|
recovering keystore file in KeyProviderFactory when recovering key from
|
|
keystore file using JDK 8u171 or newer. The filter pattern uses the same
|
|
format as jdk.serialFilter.
|
|
|
|
The value of this property will be used as the following:
|
|
1. The value of jceks.key.serialFilter system property takes precedence
|
|
over the value of this property.
|
|
2. In the absence of jceks.key.serialFilter system property the value of
|
|
this property will be set as the value of jceks.key.serialFilter.
|
|
3. If the value of this property and jceks.key.serialFilter system
|
|
property has not been set, org.apache.hadoop.crypto.key.KeyProvider
|
|
sets a default value for jceks.key.serialFilter.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.crypto.buffer.size</name>
|
|
<value>8192</value>
|
|
<description>
|
|
The buffer size used by CryptoInputStream and CryptoOutputStream.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.java.secure.random.algorithm</name>
|
|
<value>SHA1PRNG</value>
|
|
<description>
|
|
The java secure random algorithm.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.secure.random.impl</name>
|
|
<value>org.apache.hadoop.crypto.random.OpensslSecureRandom</value>
|
|
<description>
|
|
Implementation of secure random.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.random.device.file.path</name>
|
|
<value>/dev/urandom</value>
|
|
<description>
|
|
OS security random device file path.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.key.provider.path</name>
|
|
<description>
|
|
The KeyProvider to use when managing zone keys, and interacting with
|
|
encryption keys when reading and writing to an encryption zone.
|
|
For hdfs clients, the provider path will be same as namenode's
|
|
provider path.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.key.default.bitlength</name>
|
|
<value>128</value>
|
|
<description>
|
|
The length (bits) of keys we want the KeyProvider to produce. Key length
|
|
defines the upper-bound on an algorithm's security, ideally, it would
|
|
coincide with the lower-bound on an algorithm's security.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.key.default.cipher</name>
|
|
<value>AES/CTR/NoPadding</value>
|
|
<description>
|
|
This indicates the algorithm that be used by KeyProvider for generating
|
|
key, and will be converted to CipherSuite when creating encryption zone.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.har.impl.disable.cache</name>
|
|
<value>true</value>
|
|
<description>Don't cache 'har' filesystem instances.</description>
|
|
</property>
|
|
|
|
<!--- KMSClientProvider configurations -->
|
|
<property>
|
|
<name>hadoop.security.kms.client.authentication.retry-count</name>
|
|
<value>1</value>
|
|
<description>
|
|
Number of time to retry connecting to KMS on authentication failure
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.security.kms.client.encrypted.key.cache.size</name>
|
|
<value>500</value>
|
|
<description>
|
|
Size of the EncryptedKeyVersion cache Queue for each key
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.security.kms.client.encrypted.key.cache.low-watermark</name>
|
|
<value>0.3f</value>
|
|
<description>
|
|
If size of the EncryptedKeyVersion cache Queue falls below the
|
|
low watermark, this cache queue will be scheduled for a refill
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.security.kms.client.encrypted.key.cache.num.refill.threads</name>
|
|
<value>2</value>
|
|
<description>
|
|
Number of threads to use for refilling depleted EncryptedKeyVersion
|
|
cache Queues
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.security.kms.client.encrypted.key.cache.expiry</name>
|
|
<value>43200000</value>
|
|
<description>
|
|
Cache expiry time for a Key, after which the cache Queue for this
|
|
key will be dropped. Default = 12hrs
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.security.kms.client.timeout</name>
|
|
<value>60</value>
|
|
<description>
|
|
Sets value for KMS client connection timeout, and the read timeout
|
|
to KMS servers.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.kms.client.failover.sleep.base.millis</name>
|
|
<value>100</value>
|
|
<description>
|
|
Expert only. The time to wait, in milliseconds, between failover
|
|
attempts increases exponentially as a function of the number of
|
|
attempts made so far, with a random factor of +/- 50%. This option
|
|
specifies the base value used in the failover calculation. The
|
|
first failover will retry immediately. The 2nd failover attempt
|
|
will delay at least hadoop.security.client.failover.sleep.base.millis
|
|
milliseconds. And so on.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.security.kms.client.failover.sleep.max.millis</name>
|
|
<value>2000</value>
|
|
<description>
|
|
Expert only. The time to wait, in milliseconds, between failover
|
|
attempts increases exponentially as a function of the number of
|
|
attempts made so far, with a random factor of +/- 50%. This option
|
|
specifies the maximum value to wait between failovers.
|
|
Specifically, the time between two failover attempts will not
|
|
exceed +/- 50% of hadoop.security.client.failover.sleep.max.millis
|
|
milliseconds.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.server.max.connections</name>
|
|
<value>0</value>
|
|
<description>The maximum number of concurrent connections a server is allowed
|
|
to accept. If this limit is exceeded, incoming connections will first fill
|
|
the listen queue and then may go to an OS-specific listen overflow queue.
|
|
The client may fail or timeout, but the server can avoid running out of file
|
|
descriptors using this feature. 0 means no limit.
|
|
</description>
|
|
</property>
|
|
|
|
|
|
<!-- YARN registry -->
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.root</name>
|
|
<value>/registry</value>
|
|
<description>
|
|
The root zookeeper node for the registry
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.session.timeout.ms</name>
|
|
<value>60000</value>
|
|
<description>
|
|
Zookeeper session timeout in milliseconds
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.connection.timeout.ms</name>
|
|
<value>15000</value>
|
|
<description>
|
|
Zookeeper connection timeout in milliseconds
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.retry.times</name>
|
|
<value>5</value>
|
|
<description>
|
|
Zookeeper connection retry count before failing
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.retry.interval.ms</name>
|
|
<value>1000</value>
|
|
<description>
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.retry.ceiling.ms</name>
|
|
<value>60000</value>
|
|
<description>
|
|
Zookeeper retry limit in milliseconds, during
|
|
exponential backoff.
|
|
|
|
This places a limit even
|
|
if the retry times and interval limit, combined
|
|
with the backoff policy, result in a long retry
|
|
period
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.zk.quorum</name>
|
|
<value>localhost:2181</value>
|
|
<description>
|
|
List of hostname:port pairs defining the
|
|
zookeeper quorum binding for the registry
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.secure</name>
|
|
<value>false</value>
|
|
<description>
|
|
Key to set if the registry is secure. Turning it on
|
|
changes the permissions policy from "open access"
|
|
to restrictions on kerberos with the option of
|
|
a user adding one or more auth key pairs down their
|
|
own tree.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.system.acls</name>
|
|
<value>sasl:yarn@, sasl:mapred@, sasl:hdfs@</value>
|
|
<description>
|
|
A comma separated list of Zookeeper ACL identifiers with
|
|
system access to the registry in a secure cluster.
|
|
|
|
These are given full access to all entries.
|
|
|
|
If there is an "@" at the end of a SASL entry it
|
|
instructs the registry client to append the default kerberos domain.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.kerberos.realm</name>
|
|
<value></value>
|
|
<description>
|
|
The kerberos realm: used to set the realm of
|
|
system principals which do not declare their realm,
|
|
and any other accounts that need the value.
|
|
|
|
If empty, the default realm of the running process
|
|
is used.
|
|
|
|
If neither are known and the realm is needed, then the registry
|
|
service/client will fail.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.registry.jaas.context</name>
|
|
<value>Client</value>
|
|
<description>
|
|
Key to define the JAAS context. Used in secure
|
|
mode
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.shell.missing.defaultFs.warning</name>
|
|
<value>false</value>
|
|
<description>
|
|
Enable hdfs shell commands to display warnings if (fs.defaultFS) property
|
|
is not set.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.shell.safely.delete.limit.num.files</name>
|
|
<value>100</value>
|
|
<description>Used by -safely option of hadoop fs shell -rm command to avoid
|
|
accidental deletion of large directories. When enabled, the -rm command
|
|
requires confirmation if the number of files to be deleted is greater than
|
|
this limit. The default limit is 100 files. The warning is disabled if
|
|
the limit is 0 or the -safely is not specified in -rm command.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.client.htrace.sampler.classes</name>
|
|
<value></value>
|
|
<description>The class names of the HTrace Samplers to use for Hadoop
|
|
filesystem clients.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.htrace.span.receiver.classes</name>
|
|
<value></value>
|
|
<description>The class names of the Span Receivers to use for Hadoop.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.logs.enabled</name>
|
|
<value>true</value>
|
|
<description>
|
|
Enable the "/logs" endpoint on all Hadoop daemons, which serves local
|
|
logs, but may be considered a security risk due to it listing the contents
|
|
of a directory.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.client.resolve.topology.enabled</name>
|
|
<value>false</value>
|
|
<description>Whether the client machine will use the class specified by
|
|
property net.topology.node.switch.mapping.impl to compute the network
|
|
distance between itself and remote machines of the FileSystem. Additional
|
|
properties might need to be configured depending on the class specified
|
|
in net.topology.node.switch.mapping.impl. For example, if
|
|
org.apache.hadoop.net.ScriptBasedMapping is used, a valid script file
|
|
needs to be specified in net.topology.script.file.name.
|
|
</description>
|
|
</property>
|
|
|
|
|
|
<!-- Azure Data Lake File System Configurations -->
|
|
|
|
<property>
|
|
<name>fs.adl.impl</name>
|
|
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.adl.impl</name>
|
|
<value>org.apache.hadoop.fs.adl.Adl</value>
|
|
</property>
|
|
|
|
<property>
|
|
<name>adl.feature.ownerandgroup.enableupn</name>
|
|
<value>false</value>
|
|
<description>
|
|
When true : User and Group in FileStatus/AclStatus response is
|
|
represented as user friendly name as per Azure AD profile.
|
|
|
|
When false (default) : User and Group in FileStatus/AclStatus
|
|
response is represented by the unique identifier from Azure AD
|
|
profile (Object ID as GUID).
|
|
|
|
For optimal performance, false is recommended.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.access.token.provider.type</name>
|
|
<value>ClientCredential</value>
|
|
<description>
|
|
Defines Azure Active Directory OAuth2 access token provider type.
|
|
Supported types are ClientCredential, RefreshToken, MSI, DeviceCode,
|
|
and Custom.
|
|
The ClientCredential type requires property fs.adl.oauth2.client.id,
|
|
fs.adl.oauth2.credential, and fs.adl.oauth2.refresh.url.
|
|
The RefreshToken type requires property fs.adl.oauth2.client.id and
|
|
fs.adl.oauth2.refresh.token.
|
|
The MSI type reads optional property fs.adl.oauth2.msi.port, if specified.
|
|
The DeviceCode type requires property
|
|
fs.adl.oauth2.devicecode.clientapp.id.
|
|
The Custom type requires property fs.adl.oauth2.access.token.provider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.client.id</name>
|
|
<value></value>
|
|
<description>The OAuth2 client id.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.credential</name>
|
|
<value></value>
|
|
<description>The OAuth2 access key.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.refresh.url</name>
|
|
<value></value>
|
|
<description>The OAuth2 token endpoint.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.refresh.token</name>
|
|
<value></value>
|
|
<description>The OAuth2 refresh token.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.access.token.provider</name>
|
|
<value></value>
|
|
<description>
|
|
The class name of the OAuth2 access token provider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.msi.port</name>
|
|
<value></value>
|
|
<description>
|
|
The localhost port for the MSI token service. This is the port specified
|
|
when creating the Azure VM. The default, if this setting is not specified,
|
|
is 50342.
|
|
Used by MSI token provider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.adl.oauth2.devicecode.clientapp.id</name>
|
|
<value></value>
|
|
<description>
|
|
The app id of the AAD native app in whose context the auth request
|
|
should be made.
|
|
Used by DeviceCode token provider.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>adl.http.timeout</name>
|
|
<value>-1</value>
|
|
<description>
|
|
Base timeout (in milliseconds) for HTTP requests from the ADL SDK. Values
|
|
of zero or less cause the SDK default to be used instead.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>adl.ssl.channel.mode</name>
|
|
<value></value>
|
|
<description>
|
|
Valid inputs are OpenSSL, Default_JSE and Default (case insensitive).
|
|
If config is missing or is invalid, SSL Channel mode will be set to Default.
|
|
|
|
When OpenSSL, SSL socket connections are created in OpenSSL mode.
|
|
When Default_JSE, SSL socket connections are created in the default JSE mode.
|
|
When Default, SSL socket connections are attempted with OpenSSL
|
|
and will fallback to Default_JSE mode if OpenSSL is not available at runtime.
|
|
</description>
|
|
</property>
|
|
|
|
<!-- Azure Data Lake File System Configurations Ends Here-->
|
|
|
|
<property>
|
|
<name>hadoop.caller.context.enabled</name>
|
|
<value>false</value>
|
|
<description>When the feature is enabled, additional fields are written into
|
|
name-node audit log records for auditing coarse granularity operations.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.caller.context.max.size</name>
|
|
<value>128</value>
|
|
<description>The maximum bytes a caller context string can have. If the
|
|
passed caller context is longer than this maximum bytes, client will
|
|
truncate it before sending to server. Note that the server may have a
|
|
different maximum size, and will truncate the caller context to the
|
|
maximum size it allows.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.caller.context.signature.max.size</name>
|
|
<value>40</value>
|
|
<description>
|
|
The caller's signature (optional) is for offline validation. If the
|
|
signature exceeds the maximum allowed bytes in server, the caller context
|
|
will be abandoned, in which case the caller context will not be recorded
|
|
in audit logs.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.caller.context.separator</name>
|
|
<value>,</value>
|
|
<description>
|
|
The separator is for context which maybe contain many fields. For example,
|
|
if the separator is ',', and there are two key/value fields in context,
|
|
in which case the context string is "key1:value1,key2:value2". The
|
|
separator should not contain '\t', '\n', '='.
|
|
</description>
|
|
</property>
|
|
<!-- SequenceFile's Sorter properties -->
|
|
<property>
|
|
<name>seq.io.sort.mb</name>
|
|
<value>100</value>
|
|
<description>
|
|
The total amount of buffer memory to use while sorting files,
|
|
while using SequenceFile.Sorter, in megabytes. By default,
|
|
gives each merge stream 1MB, which should minimize seeks.
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>seq.io.sort.factor</name>
|
|
<value>100</value>
|
|
<description>
|
|
The number of streams to merge at once while sorting
|
|
files using SequenceFile.Sorter.
|
|
This determines the number of open file handles.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.zk.address</name>
|
|
<!--value>127.0.0.1:2181</value-->
|
|
<description>Host:Port of the ZooKeeper server to be used.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.zk.num-retries</name>
|
|
<value>1000</value>
|
|
<description>Number of tries to connect to ZooKeeper.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.zk.retry-interval-ms</name>
|
|
<value>1000</value>
|
|
<description>Retry interval in milliseconds when connecting to ZooKeeper.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.zk.timeout-ms</name>
|
|
<value>10000</value>
|
|
<description>ZooKeeper session timeout in milliseconds. Session expiration
|
|
is managed by the ZooKeeper cluster itself, not by the client. This value is
|
|
used by the cluster to determine when the client's session expires.
|
|
Expirations happens when the cluster does not hear from the client within
|
|
the specified session timeout period (i.e. no heartbeat).</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.zk.acl</name>
|
|
<value>world:anyone:rwcda</value>
|
|
<description>ACL's to be used for ZooKeeper znodes.</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.zk.auth</name>
|
|
<description>
|
|
Specify the auths to be used for the ACL's specified in hadoop.zk.acl.
|
|
This takes a comma-separated list of authentication mechanisms, each of the
|
|
form 'scheme:auth' (the same syntax used for the 'addAuth' command in
|
|
the ZK CLI).
|
|
</description>
|
|
</property>
|
|
<property>
|
|
<name>hadoop.system.tags</name>
|
|
<value>YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT
|
|
,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL</value>
|
|
<description>
|
|
Deprecated. Please use hadoop.tags.system instead.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.tags.system</name>
|
|
<value>YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT
|
|
,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL</value>
|
|
<description>
|
|
System tags to group related properties together.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>ipc.client.bind.wildcard.addr</name>
|
|
<value>false</value>
|
|
<description>When set to true Clients will bind socket to wildcard
|
|
address. (i.e 0.0.0.0)
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.domainname.resolver.impl</name>
|
|
<value>org.apache.hadoop.net.DNSDomainNameResolver</value>
|
|
<description>The implementation of DomainNameResolver used for service (NameNodes,
|
|
RBF Routers etc) discovery. The default implementation
|
|
org.apache.hadoop.net.DNSDomainNameResolver returns all IP addresses associated
|
|
with the input domain name of the services by querying the underlying DNS.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>dfs.client.ignore.namenode.default.kms.uri</name>
|
|
<value>false</value>
|
|
<description>
|
|
Ignore KMS default URI returned from NameNode.
|
|
When set to true, kms uri is searched in the following order:
|
|
1. If there is a mapping in Credential's secrets map for namenode uri.
|
|
2. Fallback to local conf. (i.e hadoop.security.key.provider.path)
|
|
If client choose to ignore KMS uri provided by NameNode then client
|
|
should set KMS URI using 'hadoop.security.key.provider.path' to access
|
|
the right KMS for encrypted files.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.prometheus.endpoint.enabled</name>
|
|
<value>false</value>
|
|
<description>
|
|
If set to true, prometheus compatible metric page on the HTTP servers
|
|
is enabled via '/prom' endpoint.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.getspaceused.classname</name>
|
|
<value></value>
|
|
<description>
|
|
The class that can tell estimate much space is used in a directory.
|
|
There are four impl classes that being supported:
|
|
org.apache.hadoop.fs.DU(default), org.apache.hadoop.fs.WindowsGetSpaceUsed
|
|
org.apache.hadoop.fs.DFCachingGetSpaceUsed and
|
|
org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReplicaCachingGetSpaceUsed.
|
|
And the ReplicaCachingGetSpaceUsed impl class only used in HDFS module.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.getspaceused.jitterMillis</name>
|
|
<value>60000</value>
|
|
<description>
|
|
fs space usage statistics refresh jitter in msec.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.http.sni.host.check.enabled</name>
|
|
<value>false</value>
|
|
<description>
|
|
Enable Server Name Indication (SNI) host check for HTTPS enabled server.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>hadoop.metrics.jvm.use-thread-mxbean</name>
|
|
<value>false</value>
|
|
<description>
|
|
Whether or not ThreadMXBean is used for getting thread info in JvmMetrics,
|
|
ThreadGroup approach is preferred for better performance.
|
|
</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.ofs.impl</name>
|
|
<value>org.apache.hadoop.fs.ozone.RootedOzFs</value>
|
|
<description>The AbstractFileSystem for Rooted Ozone
|
|
FileSystem ofs uri</description>
|
|
</property>
|
|
|
|
<property>
|
|
<name>fs.AbstractFileSystem.o3fs.impl</name>
|
|
<value>org.apache.hadoop.fs.ozone.OzFs</value>
|
|
<description>The AbstractFileSystem for Ozone FileSystem o3fs uri</description>
|
|
</property>
|
|
|
|
</configuration>
|