From 30dcd044c37a2b249ef958ce427a44f534fb00df Mon Sep 17 00:00:00 2001 From: monthonk <47974768+monthonk@users.noreply.github.com> Date: Tue, 22 Feb 2022 04:50:27 +0000 Subject: [PATCH] HADOOP-17386. Change default fs.s3a.buffer.dir to be under Yarn container path on yarn applications (#3908) Co-authored-by: Monthon Klongklaew Signed-off-by: Akira Ajisaka --- .../src/main/resources/core-default.xml | 7 +++++-- .../site/markdown/tools/hadoop-aws/committers.md | 2 +- .../src/site/markdown/tools/hadoop-aws/index.md | 14 ++++++++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 9f71f27c86a..b1a25ce1f00 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1602,9 +1602,12 @@ fs.s3a.buffer.dir - ${hadoop.tmp.dir}/s3a + ${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/s3a Comma separated list of directories that will be used to buffer file - uploads to. + uploads to. + Yarn container path will be used as default value on yarn applications, + otherwise fall back to hadoop.tmp.dir + diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md index 9f3ea583553..38aea18cad1 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md @@ -545,7 +545,7 @@ The table below provides a summary of each option. | Option | Meaning | Default | |--------|---------|---------| | `mapreduce.fileoutputcommitter.marksuccessfuljobs` | Write a `_SUCCESS` file on the successful completion of the job. | `true` | -| `fs.s3a.buffer.dir` | Local filesystem directory for data being written and/or staged. | `${hadoop.tmp.dir}/s3a` | +| `fs.s3a.buffer.dir` | Local filesystem directory for data being written and/or staged. | `${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/s3a` | | `fs.s3a.committer.magic.enabled` | Enable "magic committer" support in the filesystem. | `true` | | `fs.s3a.committer.abort.pending.uploads` | list and abort all pending uploads under the destination path when the job is committed or aborted. | `true` | | `fs.s3a.committer.threads` | Number of threads in committers for parallel operations on files.| -4 | diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 549e69115d0..2c6300f3742 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -972,9 +972,12 @@ options are covered in [Testing](./testing.md). fs.s3a.buffer.dir - ${hadoop.tmp.dir}/s3a + ${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/s3a Comma separated list of directories that will be used to buffer file - uploads to. + uploads to. + Yarn container path will be used as default value on yarn applications, + otherwise fall back to hadoop.tmp.dir + @@ -1824,9 +1827,12 @@ consumed, and so eliminates heap size as the limiting factor in queued uploads fs.s3a.buffer.dir - ${hadoop.tmp.dir}/s3a + ${env.LOCAL_DIRS:-${hadoop.tmp.dir}}/s3a Comma separated list of directories that will be used to buffer file - uploads to. + uploads to. + Yarn container path will be used as default value on yarn applications, + otherwise fall back to hadoop.tmp.dir + ```