MAPREDUCE-6495. Docs for archive-logs tool (rkanter)
(cherry picked from commit 0c4af0f998
)
This commit is contained in:
parent
f2009dc89c
commit
492142097b
|
@ -331,6 +331,8 @@ Release 2.8.0 - UNRELEASED
|
||||||
MAPREDUCE-6302. Preempt reducers after a configurable timeout irrespective
|
MAPREDUCE-6302. Preempt reducers after a configurable timeout irrespective
|
||||||
of headroom. (kasha)
|
of headroom. (kasha)
|
||||||
|
|
||||||
|
MAPREDUCE-6495. Docs for archive-logs tool (rkanter)
|
||||||
|
|
||||||
Release 2.7.2 - UNRELEASED
|
Release 2.7.2 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -18,6 +18,7 @@ MapReduce Commands Guide
|
||||||
* [Overview](#Overview)
|
* [Overview](#Overview)
|
||||||
* [User Commands](#User_Commands)
|
* [User Commands](#User_Commands)
|
||||||
* [archive](#archive)
|
* [archive](#archive)
|
||||||
|
* [archive-logs](#archive-logs)
|
||||||
* [classpath](#classpath)
|
* [classpath](#classpath)
|
||||||
* [distcp](#distcp)
|
* [distcp](#distcp)
|
||||||
* [job](#job)
|
* [job](#job)
|
||||||
|
@ -53,6 +54,12 @@ Commands useful for users of a hadoop cluster.
|
||||||
Creates a hadoop archive. More information can be found at
|
Creates a hadoop archive. More information can be found at
|
||||||
[Hadoop Archives Guide](../../hadoop-archives/HadoopArchives.html).
|
[Hadoop Archives Guide](../../hadoop-archives/HadoopArchives.html).
|
||||||
|
|
||||||
|
### `archive-logs`
|
||||||
|
|
||||||
|
A tool to combine YARN aggregated logs into Hadoop archives to reduce the number
|
||||||
|
of files in HDFS. More information can be found at
|
||||||
|
[Hadoop Archive Logs Guide](../../hadoop-archive-logs/HadoopArchiveLogs.html).
|
||||||
|
|
||||||
### `classpath`
|
### `classpath`
|
||||||
|
|
||||||
Usage: `yarn classpath [--glob |--jar <path> |-h |--help]`
|
Usage: `yarn classpath [--glob |--jar <path> |-h |--help]`
|
||||||
|
|
|
@ -154,6 +154,7 @@
|
||||||
<menu name="Tools" inherit="top">
|
<menu name="Tools" inherit="top">
|
||||||
<item name="Hadoop Streaming" href="hadoop-streaming/HadoopStreaming.html"/>
|
<item name="Hadoop Streaming" href="hadoop-streaming/HadoopStreaming.html"/>
|
||||||
<item name="Hadoop Archives" href="hadoop-archives/HadoopArchives.html"/>
|
<item name="Hadoop Archives" href="hadoop-archives/HadoopArchives.html"/>
|
||||||
|
<item name="Hadoop Archive Logs" href="hadoop-archive-logs/HadoopArchiveLogs.html"/>
|
||||||
<item name="DistCp" href="hadoop-distcp/DistCp.html"/>
|
<item name="DistCp" href="hadoop-distcp/DistCp.html"/>
|
||||||
<item name="GridMix" href="hadoop-gridmix/GridMix.html"/>
|
<item name="GridMix" href="hadoop-gridmix/GridMix.html"/>
|
||||||
<item name="Rumen" href="hadoop-rumen/Rumen.html"/>
|
<item name="Rumen" href="hadoop-rumen/Rumen.html"/>
|
||||||
|
|
|
@ -221,7 +221,7 @@ public class HadoopArchiveLogs implements Tool {
|
||||||
CommandLine commandLine = parser.parse(opts, args);
|
CommandLine commandLine = parser.parse(opts, args);
|
||||||
if (commandLine.hasOption(HELP_OPTION)) {
|
if (commandLine.hasOption(HELP_OPTION)) {
|
||||||
HelpFormatter formatter = new HelpFormatter();
|
HelpFormatter formatter = new HelpFormatter();
|
||||||
formatter.printHelp("yarn archive-logs", opts);
|
formatter.printHelp("mapred archive-logs", opts);
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
if (commandLine.hasOption(MAX_ELIGIBLE_APPS_OPTION)) {
|
if (commandLine.hasOption(MAX_ELIGIBLE_APPS_OPTION)) {
|
||||||
|
@ -254,7 +254,7 @@ public class HadoopArchiveLogs implements Tool {
|
||||||
}
|
}
|
||||||
} catch (ParseException pe) {
|
} catch (ParseException pe) {
|
||||||
HelpFormatter formatter = new HelpFormatter();
|
HelpFormatter formatter = new HelpFormatter();
|
||||||
formatter.printHelp("yarn archive-logs", opts);
|
formatter.printHelp("mapred archive-logs", opts);
|
||||||
throw pe;
|
throw pe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
<!---
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
Hadoop Archive Logs Guide
|
||||||
|
=========================
|
||||||
|
|
||||||
|
- [Overview](#Overview)
|
||||||
|
- [How to Archive Logs](#How_to_Archive_Logs)
|
||||||
|
|
||||||
|
Overview
|
||||||
|
--------
|
||||||
|
|
||||||
|
For clusters with a lot of Yarn aggregated logs, it can be helpful to combine
|
||||||
|
them into hadoop archives in order to reduce the number of small files, and
|
||||||
|
hence the stress on the NameNode. This tool provides an easy way to do this.
|
||||||
|
Aggregated logs in hadoop archives can still be read by the Job History Server
|
||||||
|
and by the `yarn logs` command.
|
||||||
|
|
||||||
|
For more on hadoop archives, see
|
||||||
|
[Hadoop Archives Guide](../hadoop-archives/HadoopArchives.html).
|
||||||
|
|
||||||
|
How to Archive Logs
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
usage: mapred archive-logs
|
||||||
|
-force Force recreating the working directory if
|
||||||
|
an existing one is found. This should
|
||||||
|
only be used if you know that another
|
||||||
|
instance is not currently running
|
||||||
|
-help Prints this message
|
||||||
|
-maxEligibleApps <n> The maximum number of eligible apps to
|
||||||
|
process (default: -1 (all))
|
||||||
|
-maxTotalLogsSize <megabytes> The maximum total logs size (in
|
||||||
|
megabytes) required to be eligible
|
||||||
|
(default: 1024)
|
||||||
|
-memory <megabytes> The amount of memory (in megabytes) for
|
||||||
|
each container (default: 1024)
|
||||||
|
-minNumberLogFiles <n> The minimum number of log files required
|
||||||
|
to be eligible (default: 20)
|
||||||
|
-verbose Print more details.
|
||||||
|
|
||||||
|
The tool only supports running one instance on a cluster at a time in order
|
||||||
|
to prevent conflicts. It does this by checking for the existance of a
|
||||||
|
directory named ``archive-logs-work`` under
|
||||||
|
``yarn.nodemanager.remote-app-log-dir`` in HDFS
|
||||||
|
(default: ``/tmp/logs/archive-logs-work``). If for some reason that
|
||||||
|
directory was not cleaned up properly, and the tool refuses to run, you can
|
||||||
|
force it with the ``-force`` option.
|
||||||
|
|
||||||
|
The ``-help`` option prints out the usage information.
|
||||||
|
|
||||||
|
The tool works by performing the following procedure:
|
||||||
|
|
||||||
|
1. Determine the list of eligible applications, based on the following
|
||||||
|
criteria:
|
||||||
|
- is not already archived
|
||||||
|
- its aggregation status has successfully completed
|
||||||
|
- has at least ``-minNumberLogFiles`` log files
|
||||||
|
- the sum of its log files size is less than ``-maxTotalLogsSize`` megabytes
|
||||||
|
2. If there are are more than ``-maxEligibleApps`` applications found, the
|
||||||
|
newest applications are dropped. They can be processed next time.
|
||||||
|
3. A shell script is generated based on the eligible applications
|
||||||
|
4. The Distributed Shell program is run with the aformentioned script. It
|
||||||
|
will run with ``-maxEligibleApps`` containers, one to process each
|
||||||
|
application, and with ``-memory`` megabytes of memory. Each container runs
|
||||||
|
the ``hadoop archives`` command for a single application and replaces
|
||||||
|
its aggregated log files with the resulting archive.
|
||||||
|
|
||||||
|
The ``-verbose`` option makes the tool print more details about what it's
|
||||||
|
doing.
|
||||||
|
|
||||||
|
The end result of running the tool is that the original aggregated log files for
|
||||||
|
a processed application will be replaced by a hadoop archive containing all of
|
||||||
|
those logs.
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#banner {
|
||||||
|
height: 93px;
|
||||||
|
background: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
#bannerLeft img {
|
||||||
|
margin-left: 30px;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#bannerRight img {
|
||||||
|
margin: 17px;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue