hadoop/hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml

898 lines
30 KiB
XML

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Do not modify this file directly. Instead, copy entries that you -->
<!-- wish to modify from this file into hdfs-rbf-site.xml and change them -->
<!-- there. If hdfs-rbf-site.xml does not already exist, create it. -->
<configuration>
<property>
<name>dfs.federation.router.default.nameserviceId</name>
<value></value>
<description>
Nameservice identifier of the default subcluster to monitor.
</description>
</property>
<property>
<name>dfs.federation.router.default.nameservice.enable</name>
<value>true</value>
<description>
The default subcluster is enabled to read and write files.
</description>
</property>
<property>
<name>dfs.federation.router.rpc.enable</name>
<value>true</value>
<description>
If true, the RPC service to handle client requests in the router is
enabled.
</description>
</property>
<property>
<name>dfs.federation.router.rpc-address</name>
<value>0.0.0.0:8888</value>
<description>
RPC address that handles all clients requests.
The value of this property will take the form of router-host1:rpc-port.
</description>
</property>
<property>
<name>dfs.federation.router.rpc-bind-host</name>
<value></value>
<description>
The actual address the RPC server will bind to. If this optional address is
set, it overrides only the hostname portion of
dfs.federation.router.rpc-address. This is useful for making the name node
listen on all interfaces by setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.federation.router.handler.count</name>
<value>10</value>
<description>
The number of server threads for the router to handle RPC requests from
clients.
</description>
</property>
<property>
<name>dfs.federation.router.handler.queue.size</name>
<value>100</value>
<description>
The size of the queue for the number of handlers to handle RPC client requests.
</description>
</property>
<property>
<name>dfs.federation.router.reader.count</name>
<value>1</value>
<description>
The number of readers for the router to handle RPC client requests.
</description>
</property>
<property>
<name>dfs.federation.router.reader.queue.size</name>
<value>100</value>
<description>
The size of the queue for the number of readers for the router to handle RPC client requests.
</description>
</property>
<property>
<name>dfs.federation.router.connection.creator.queue-size</name>
<value>100</value>
<description>
Size of async connection creator queue.
</description>
</property>
<property>
<name>dfs.federation.router.connection.pool-size</name>
<value>1</value>
<description>
Size of the pool of connections from the router to namenodes.
</description>
</property>
<property>
<name>dfs.federation.router.connection.min-active-ratio</name>
<value>0.5f</value>
<description>
Minimum active ratio of connections from the router to namenodes.
</description>
</property>
<property>
<name>dfs.federation.router.connection.clean.ms</name>
<value>10000</value>
<description>
Time interval, in milliseconds, to check if the connection pool should
remove unused connections.
</description>
</property>
<property>
<name>dfs.federation.router.enable.multiple.socket</name>
<value>false</value>
<description>
If enable multiple downstream socket or not. If true, ConnectionPool
will use a new socket when creating a new connection for the same user,
and RouterRPCClient will get a better throughput. It's best used with
dfs.federation.router.max.concurrency.per.connection together to get
a better throughput with fewer sockets. Such as enable
dfs.federation.router.enable.multiple.socket and
set dfs.federation.router.max.concurrency.per.connection = 20.
</description>
</property>
<property>
<name>dfs.federation.router.max.concurrency.per.connection</name>
<value>1</value>
<description>
The maximum number of requests that a connection can handle concurrently.
When the number of requests being processed by a socket is less than this value,
new request will be processed by this socket. When enable
dfs.federation.router.enable.multiple.socket, it's best
set this value greater than 1, such as 20, to avoid frequent
creation and idle sockets in the case of a NS with jitter requests.
</description>
</property>
<property>
<name>dfs.federation.router.connection.pool.clean.ms</name>
<value>60000</value>
<description>
Time interval, in milliseconds, to check if the connection manager should
remove unused connection pools.
</description>
</property>
<property>
<name>dfs.federation.router.metrics.enable</name>
<value>true</value>
<description>
If the metrics in the router are enabled.
</description>
</property>
<property>
<name>dfs.federation.router.dn-report.time-out</name>
<value>1000</value>
<description>
Time out, in milliseconds for getDatanodeReport.
</description>
</property>
<property>
<name>dfs.federation.router.dn-report.cache-expire</name>
<value>10s</value>
<description>
Expiration time in seconds for datanodereport.
</description>
</property>
<property>
<name>dfs.federation.router.enable.get.dn.usage</name>
<value>true</value>
<description>
If true, the getNodeUsage method in RBFMetrics will return an up-to-date
result collecting from downstream nameservices. But it will take a long
time and take up thread resources. If false, it will return a mock result with all 0.
</description>
</property>
<property>
<name>dfs.federation.router.metrics.class</name>
<value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>
<description>
Class to monitor the RPC system in the router. It must implement the
RouterRpcMonitor interface.
</description>
</property>
<property>
<name>dfs.federation.router.admin.enable</name>
<value>true</value>
<description>
If true, the RPC admin service to handle client requests in the router is
enabled.
</description>
</property>
<property>
<name>dfs.federation.router.admin-address</name>
<value>0.0.0.0:8111</value>
<description>
RPC address that handles the admin requests.
The value of this property will take the form of router-host1:rpc-port.
</description>
</property>
<property>
<name>dfs.federation.router.admin-bind-host</name>
<value></value>
<description>
The actual address the RPC admin server will bind to. If this optional
address is set, it overrides only the hostname portion of
dfs.federation.router.admin-address. This is useful for making the name
node listen on all interfaces by setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.federation.router.admin.handler.count</name>
<value>1</value>
<description>
The number of server threads for the router to handle RPC requests from
admin.
</description>
</property>
<property>
<name>dfs.federation.router.admin.mount.check.enable</name>
<value>false</value>
<description>
If true, add/update mount table will include a destination check to make
sure the file exists in downstream namenodes, and changes to mount table
will fail if the file doesn't exist in any of the destination namenode.
</description>
</property>
<property>
<name>dfs.federation.router.http-address</name>
<value>0.0.0.0:50071</value>
<description>
HTTP address that handles the web requests to the Router.
The value of this property will take the form of router-host1:http-port.
</description>
</property>
<property>
<name>dfs.federation.router.http-bind-host</name>
<value></value>
<description>
The actual address the HTTP server will bind to. If this optional
address is set, it overrides only the hostname portion of
dfs.federation.router.http-address. This is useful for making the name
node listen on all interfaces by setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.federation.router.https-address</name>
<value>0.0.0.0:50072</value>
<description>
HTTPS address that handles the web requests to the Router.
The value of this property will take the form of router-host1:https-port.
</description>
</property>
<property>
<name>dfs.federation.router.https-bind-host</name>
<value></value>
<description>
The actual address the HTTPS server will bind to. If this optional
address is set, it overrides only the hostname portion of
dfs.federation.router.https-address. This is useful for making the name
node listen on all interfaces by setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.federation.router.http.enable</name>
<value>true</value>
<description>
If the HTTP service to handle client requests in the router is enabled.
</description>
</property>
<property>
<name>dfs.federation.router.fs-limits.max-component-length</name>
<value>0</value>
<description>
Defines the maximum number of bytes in UTF-8 encoding in each
component of a path at Router side. A value of 0 will disable the check.
Support multiple size unit suffix(case insensitive). It is act as
configuration dfs.namenode.fs-limits.max-component-length at NameNode
side.
</description>
</property>
<property>
<name>dfs.federation.router.file.resolver.client.class</name>
<value>org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver</value>
<description>
Class to resolve files to subclusters. To enable multiple subclusters for a mount point,
set to org.apache.hadoop.hdfs.server.federation.resolver.MultipleDestinationMountTableResolver.
</description>
</property>
<property>
<name>dfs.federation.router.namenode.resolver.client.class</name>
<value>org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver</value>
<description>
Class to resolve the namenode for a subcluster.
</description>
</property>
<property>
<name>dfs.federation.router.store.enable</name>
<value>true</value>
<description>
If true, the Router connects to the State Store.
</description>
</property>
<property>
<name>dfs.federation.router.store.serializer</name>
<value>org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreSerializerPBImpl</value>
<description>
Class to serialize State Store records.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.class</name>
<value>org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl</value>
<description>
Class to implement the State Store. There are three implementation classes currently
being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl,
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileSystemImpl and
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.
These implementation classes use the local file, filesystem and ZooKeeper as a backend respectively.
By default it uses the ZooKeeper as the default State Store.
</description>
</property>
<property>
<name>dfs.federation.router.store.connection.test</name>
<value>60000</value>
<description>
How often to check for the connection to the State Store in milliseconds.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.zk.parent-path</name>
<value>/hdfs-federation</value>
<description>
The parent path of zookeeper for StateStoreZooKeeperImpl.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.zk.async.max.threads</name>
<value>-1</value>
<description>
Max threads number of StateStoreZooKeeperImpl in async mode.
The only class currently being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.
Default value is -1, which means StateStoreZooKeeperImpl is working in sync mode.
Use positive integer value to enable async mode.
</description>
</property>
<property>
<name>dfs.federation.router.cache.ttl</name>
<value>1m</value>
<description>
How often to refresh the State Store caches in milliseconds. This setting
supports multiple time unit suffixes as described in
dfs.heartbeat.interval. If no suffix is specified then milliseconds is
assumed.
</description>
</property>
<property>
<name>dfs.federation.router.store.membership.expiration</name>
<value>300000</value>
<description>
Expiration time in milliseconds for a membership record.
</description>
</property>
<property>
<name>dfs.federation.router.store.membership.expiration.deletion</name>
<value>-1</value>
<description>
Deletion time in milliseconds for a membership record. If an expired
membership record exists beyond this time, it will be deleted. If this
value is negative, the deletion is disabled.
</description>
</property>
<property>
<name>dfs.federation.router.heartbeat.enable</name>
<value>true</value>
<description>
If true, the Router heartbeats into the State Store.
</description>
</property>
<property>
<name>dfs.federation.router.heartbeat.interval</name>
<value>5000</value>
<description>
How often the Router should heartbeat into the State Store in milliseconds.
</description>
</property>
<property>
<name>dfs.federation.router.health.monitor.timeout</name>
<value>30s</value>
<description>
Time out for Router to obtain HAServiceStatus from NameNode.
</description>
</property>
<property>
<name>dfs.federation.router.heartbeat-state.interval</name>
<value>5s</value>
<description>
How often the Router should heartbeat its state into the State Store in
milliseconds. This setting supports multiple time unit suffixes as
described in dfs.federation.router.quota-cache.update.interval.
</description>
</property>
<property>
<name>dfs.federation.router.namenode.heartbeat.enable</name>
<value></value>
<description>
If true, get namenode heartbeats and send into the State Store.
If not explicitly specified takes the same value as for
dfs.federation.router.heartbeat.enable.
</description>
</property>
<property>
<name>dfs.federation.router.store.router.expiration</name>
<value>5m</value>
<description>
Expiration time in milliseconds for a router state record. This setting
supports multiple time unit suffixes as described in
dfs.federation.router.quota-cache.update.interval.
</description>
</property>
<property>
<name>dfs.federation.router.store.router.expiration.deletion</name>
<value>-1</value>
<description>
Deletion time in milliseconds for a router state record. If an expired
router state record exists beyond this time, it will be deleted. If this
value is negative, the deletion is disabled.
</description>
</property>
<property>
<name>dfs.federation.router.safemode.enable</name>
<value>true</value>
<description>
</description>
</property>
<property>
<name>dfs.federation.router.safemode.extension</name>
<value>30s</value>
<description>
Time after startup that the Router is in safe mode. This setting
supports multiple time unit suffixes as described in
dfs.heartbeat.interval. If no suffix is specified then milliseconds is
assumed.
</description>
</property>
<property>
<name>dfs.federation.router.safemode.expiration</name>
<value>3m</value>
<description>
Time without being able to reach the State Store to enter safe mode. This
setting supports multiple time unit suffixes as described in
dfs.heartbeat.interval. If no suffix is specified then milliseconds is
assumed.
</description>
</property>
<property>
<name>dfs.federation.router.monitor.namenode</name>
<value></value>
<description>
The identifier of the namenodes to monitor and heartbeat.
</description>
</property>
<property>
<name>dfs.federation.router.monitor.namenode.nameservice.resolution-enabled</name>
<value>false</value>
<description>
Determines if the given monitored namenode address is a domain name which needs to
be resolved.
This is used by router to resolve namenodes.
</description>
</property>
<property>
<name>dfs.federation.router.monitor.namenode.nameservice.resolver.impl</name>
<value></value>
<description>
Nameservice resolver implementation used by router.
Effective with
dfs.federation.router.monitor.namenode.nameservices.resolution-enabled on.
</description>
</property>
<property>
<name>dfs.federation.router.monitor.localnamenode.enable</name>
<value>true</value>
<description>
If true, the Router should monitor the namenode in the local machine.
</description>
</property>
<property>
<name>dfs.federation.router.mount-table.max-cache-size</name>
<value>10000</value>
<description>
Maximum number of mount table cache entries to have.
By default, remove cache entries if we have more than 10k.
</description>
</property>
<property>
<name>dfs.federation.router.mount-table.cache.enable</name>
<value>true</value>
<description>
Set to true to enable mount table cache (Path to Remote Location cache).
Disabling the cache is recommended when a large amount of unique paths are queried.
</description>
</property>
<property>
<name>dfs.federation.router.quota.enable</name>
<value>false</value>
<description>
Set to true to enable quota system in Router. When it's enabled, setting
or clearing sub-cluster's quota directly is not recommended since Router
Admin server will override sub-cluster's quota with global quota.
</description>
</property>
<property>
<name>dfs.federation.router.quota-cache.update.interval</name>
<value>60s</value>
<description>
Interval time for updating quota usage cache in Router.
This property is used only if the value of
dfs.federation.router.quota.enable is true.
This setting supports multiple time unit suffixes as described
in dfs.heartbeat.interval. If no suffix is specified then milliseconds
is assumed.
</description>
</property>
<property>
<name>dfs.federation.router.client.thread-size</name>
<value>32</value>
<description>
Max threads size for the RouterClient to execute concurrent
requests.
</description>
</property>
<property>
<name>dfs.federation.router.client.retry.max.attempts</name>
<value>3</value>
<description>
Max retry attempts for the RouterClient talking to the Router.
</description>
</property>
<property>
<name>dfs.federation.router.client.reject.overload</name>
<value>false</value>
<description>
Set to true to reject client requests when we run out of RPC client
threads.
</description>
</property>
<property>
<name>dfs.federation.router.client.allow-partial-listing</name>
<value>true</value>
<description>
If the Router can return a partial list of files in a multi-destination mount point when one of the subclusters is unavailable.
True may return a partial list of files if a subcluster is down.
False will fail the request if one is unavailable.
</description>
</property>
<property>
<name>dfs.federation.router.client.mount-status.time-out</name>
<value>1s</value>
<description>
Set a timeout for the Router when listing folders containing mount
points. In this process, the Router checks the mount table and then it
checks permissions in the subcluster. After the time out, we return the
default values.
</description>
</property>
<property>
<name>dfs.federation.router.connect.max.retries.on.timeouts</name>
<value>0</value>
<description>
Maximum number of retries for the IPC Client when connecting to the
subclusters. By default, it doesn't let the IPC retry and the Router
handles it.
</description>
</property>
<property>
<name>dfs.federation.router.connect.timeout</name>
<value>2s</value>
<description>
Time out for the IPC client connecting to the subclusters. This should be
short as the Router has knowledge of the state of the Routers.
</description>
</property>
<property>
<name>dfs.federation.router.keytab.file</name>
<value></value>
<description>
The keytab file used by router to login as its
service principal. The principal name is configured with
dfs.federation.router.kerberos.principal.
</description>
</property>
<property>
<name>dfs.federation.router.kerberos.principal</name>
<value></value>
<description>
The Router service principal. This is typically set to
router/_HOST@REALM.TLD. Each Router will substitute _HOST with its
own fully qualified hostname at startup. The _HOST placeholder
allows using the same configuration setting on both Router
in an HA setup.
</description>
</property>
<property>
<name>dfs.federation.router.kerberos.principal.hostname</name>
<value></value>
<description>
Optional. The hostname for the Router containing this
configuration file. Will be different for each machine.
Defaults to current hostname.
</description>
</property>
<property>
<name>dfs.federation.router.kerberos.internal.spnego.principal</name>
<value>${dfs.web.authentication.kerberos.principal}</value>
<description>
The server principal used by the Router for web UI SPNEGO
authentication when Kerberos security is enabled. This is
typically set to HTTP/_HOST@REALM.TLD The SPNEGO server principal
begins with the prefix HTTP/ by convention.
If the value is '*', the web server will attempt to login with
every principal specified in the keytab file
dfs.web.authentication.kerberos.keytab.
</description>
</property>
<property>
<name>dfs.federation.router.mount-table.cache.update</name>
<value>false</value>
<description>Set true to enable MountTableRefreshService. This service
updates mount table cache immediately after adding, modifying or
deleting the mount table entries. If this service is not enabled
mount table cache are refreshed periodically by
StateStoreCacheUpdateService
</description>
</property>
<property>
<name>dfs.federation.router.mount-table.cache.update.timeout</name>
<value>1m</value>
<description>This property defines how long to wait for all the
admin servers to finish their mount table cache update. This setting
supports multiple time unit suffixes as described in
dfs.federation.router.safemode.extension.
</description>
</property>
<property>
<name>dfs.federation.router.mount-table.cache.update.client.max.time
</name>
<value>5m</value>
<description>Remote router mount table cache is updated through
RouterClient(RPC call). To improve performance, RouterClient
connections are cached but it should not be kept in cache forever.
This property defines the max time a connection can be cached. This
setting supports multiple time unit suffixes as described in
dfs.federation.router.safemode.extension.
</description>
</property>
<property>
<name>dfs.federation.router.secret.manager.class</name>
<value>org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl</value>
<description>
Class to implement state store to delegation tokens.
Default implementation uses zookeeper as the backend to store delegation tokens.
</description>
</property>
<property>
<name>dfs.federation.router.top.num.token.realowners</name>
<value>10</value>
<description>
The number of top real owners by tokens count to report in the JMX metrics.
Real owners are the effective users whose cretential are used to generate
the tokens.
</description>
</property>
<property>
<name>dfs.federation.router.fairness.policy.controller.class</name>
<value>org.apache.hadoop.hdfs.server.federation.fairness.NoRouterRpcFairnessPolicyController</value>
<description>
No fairness policy handler by default, for fairness
StaticFairnessPolicyController should be configured.
</description>
</property>
<property>
<name>dfs.federation.router.fairness.handler.count.EXAMPLENAMESERVICE</name>
<value></value>
<description>
Dedicated handler count for nameservice EXAMPLENAMESERVICE. The handler
(configed by dfs.federation.router.handler.count)resource is controlled
internally by Semaphore permits. Two requirements have to be satisfied.
1) all downstream nameservices need this config otherwise no permit will
be given thus not proxy will happen. 2) if a special *concurrent*
nameservice is specified, the sum of all configured values is smaller or
equal to the total number of router handlers; if the special *concurrent*
is not specified, the sum of all configured values must be strictly
smaller than the router handlers thus the left will be allocated to the
concurrent calls.
</description>
</property>
<property>
<name>dfs.federation.router.fairness.acquire.timeout</name>
<value>1s</value>
<description>
The maximum time to wait for a permit.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.bandwidth</name>
<value>10</value>
<description>
Specify bandwidth per map in MB.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.map</name>
<value>10</value>
<description>
Max number of concurrent maps to use for copy.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.delay</name>
<value>1000</value>
<description>
Specify the delayed duration(millie seconds) when the job needs to retry.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.diff</name>
<value>0</value>
<description>
Specify the threshold of the diff entries that used in incremental copy
stage.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.option</name>
<value>NONE</value>
<description>
Specify the action when rename across namespaces. The option can be NONE
and DISTCP.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.force.close.open.file</name>
<value>true</value>
<description>
Force close all open files when there is no diff in the DIFF_DISTCP stage.
</description>
</property>
<property>
<name>dfs.federation.router.federation.rename.trash</name>
<value>trash</value>
<description>
This options has 3 values: trash (move the source path to trash), delete
(delete the source path directly) and skip (skip both trash and deletion).
</description>
</property>
<property>
<name>dfs.federation.router.observer.read.default</name>
<value>false</value>
<description>
Whether observer reads are enabled. This is a default for all nameservices.
The default can be inverted for individual namespace by adding them to
dfs.federation.router.observer.read.overrides.
</description>
</property>
<property>
<name>dfs.federation.router.observer.read.overrides</name>
<value/>
<description>
Commas separated list of namespaces for which to invert the default configuration,
dfs.federation.router.observer.read.default, for whether to enable observer reads.
</description>
</property>
<property>
<name>dfs.federation.router.observer.federated.state.propagation.maxsize</name>
<value>5</value>
<description>
The maximum size of the federated state to send in the RPC header. Sending the federated
state removes the need to msync on every read call, but at the expense of having a larger
header. The cost tradeoff between the larger header and always msync'ing depends on the number
of namespaces in use and the latency of the msync requests.
</description>
</property>
<property>
<name>dfs.federation.router.observer.state.id.refresh.period</name>
<value>15s</value>
<description>
Period to refresh namespace stateID using active namenode. This ensures the
namespace stateID is refresh even when an observer is trailing behind.
If this is below 0, the auto-refresh is disabled.
</description>
</property>
</configuration>