From 9be7b353dd12135772f5d311817306e9f3ce4e9a Mon Sep 17 00:00:00 2001
From: Brahma Reddy Battula
Date: Tue, 22 Nov 2016 19:45:45 +0530
Subject: [PATCH] HADOOP-13646. Remove outdated overview.html. Contributed By
Brahma Reddy Battula.
(cherry picked from commit afcf8d38e750f935c06629e641a1321b79c4cace)
---
.../hadoop-common/src/main/java/overview.html | 274 ------------------
.../hadoop-hdfs/src/main/java/overview.html | 274 ------------------
pom.xml | 25 --
3 files changed, 573 deletions(-)
delete mode 100644 hadoop-common-project/hadoop-common/src/main/java/overview.html
delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
diff --git a/hadoop-common-project/hadoop-common/src/main/java/overview.html b/hadoop-common-project/hadoop-common/src/main/java/overview.html
deleted file mode 100644
index 5868617709b..00000000000
--- a/hadoop-common-project/hadoop-common/src/main/java/overview.html
+++ /dev/null
@@ -1,274 +0,0 @@
-
-
-
-
- Hadoop
-
-
-
-Hadoop is a distributed computing platform.
-
-Hadoop primarily consists of the Hadoop Distributed FileSystem
-(HDFS) and an
-implementation of the
-Map-Reduce programming paradigm.
-
-
-Hadoop is a software framework that lets one easily write and run applications
-that process vast amounts of data. Here's what makes Hadoop especially useful:
-
- -
- Scalable: Hadoop can reliably store and process petabytes.
-
- -
- Economical: It distributes the data and processing across clusters
- of commonly available computers. These clusters can number into the thousands
- of nodes.
-
- -
- Efficient: By distributing the data, Hadoop can process it in parallel
- on the nodes where the data is located. This makes it extremely rapid.
-
- -
- Reliable: Hadoop automatically maintains multiple copies of data and
- automatically redeploys computing tasks based on failures.
-
-
-
-Requirements
-
-Platforms
-
-
- -
- Hadoop has been demonstrated on GNU/Linux clusters with more than 4000 nodes.
-
- -
- Windows is also a supported platform.
-
-
-
-Requisite Software
-
-
- -
- Java 1.6.x, preferably from
- Sun.
- Set JAVA_HOME to the root of your Java installation.
-
- -
- ssh must be installed and sshd must be running to use Hadoop's
- scripts to manage remote Hadoop daemons.
-
- -
- rsync may be installed to use Hadoop's scripts to manage remote
- Hadoop installations.
-
-
-
-Installing Required Software
-
-If your platform does not have the required software listed above, you
-will have to install it.
-
-For example on Ubuntu Linux:
-
-$ sudo apt-get install ssh
-$ sudo apt-get install rsync
-
-
-Getting Started
-
-First, you need to get a copy of the Hadoop code.
-
-Edit the file conf/hadoop-env.sh to define at least
-JAVA_HOME.
-
-Try the following command:
-bin/hadoop
-This will display the documentation for the Hadoop command script.
-
-Standalone operation
-
-By default, Hadoop is configured to run things in a non-distributed
-mode, as a single Java process. This is useful for debugging, and can
-be demonstrated as follows:
-
-mkdir input
-cp conf/*.xml input
-bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
-cat output/*
-
-This will display counts for each match of the
-regular expression.
-
-Note that input is specified as a directory containing input
-files and that output is also specified as a directory where parts are
-written.
-
-Distributed operation
-
-To configure Hadoop for distributed operation you must specify the
-following:
-
-
-
-- The NameNode (Distributed Filesystem master) host. This is
-specified with the configuration property fs.default.name.
-
-
-- The org.apache.hadoop.mapred.JobTracker (MapReduce master)
-host and port. This is specified with the configuration property
-mapred.job.tracker.
-
-
-- A slaves file that lists the names of all the hosts in
-the cluster. The default slaves file is conf/slaves.
-
-
-
-Pseudo-distributed configuration
-
-You can in fact run everything on a single host. To run things this
-way, put the following in:
-
-
-conf/core-site.xml:
-
-
-
- fs.default.name
- hdfs://localhost/
-
-
-
-
-conf/hdfs-site.xml:
-
-
-
- dfs.replication
- 1
-
-
-
-
-conf/mapred-site.xml:
-
-
-
- mapred.job.tracker
- localhost:9001
-
-
-
-
-(We also set the HDFS replication level to 1 in order to
-reduce warnings when running on a single node.)
-
-Now check that the command
ssh localhost
does not
-require a password. If it does, execute the following commands:
-
-ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
-cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
-
-
-Bootstrapping
-
-A new distributed filesystem must be formatted with the following
-command, run on the master node:
-
-bin/hadoop namenode -format
-
-The Hadoop daemons are started with the following command:
-
-bin/start-all.sh
-
-Daemon log output is written to the logs/ directory.
-
-Input files are copied into the distributed filesystem as follows:
-
-bin/hadoop fs -put input input
-
-Distributed execution
-
-Things are run as before, but output must be copied locally to
-examine it:
-
-
-bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
-bin/hadoop fs -get output output
-cat output/*
-
-
-When you're done, stop the daemons with:
-
-bin/stop-all.sh
-
-Fully-distributed operation
-
-Fully distributed operation is just like the pseudo-distributed operation
-described above, except, specify:
-
-
-
-- The hostname or IP address of your master server in the value
-for fs.default.name,
- as hdfs://master.example.com/ in conf/core-site.xml.
-
-- The host and port of the your master server in the value
-of mapred.job.tracker
-as master.example.com:port in conf/mapred-site.xml.
-
-- Directories for dfs.name.dir and
-dfs.data.dir
-in conf/hdfs-site.xml.
-These are local directories used to hold distributed filesystem
-data on the master node and slave nodes respectively. Note
-that dfs.data.dir may contain a space- or comma-separated
-list of directory names, so that data may be stored on multiple local
-devices.
-
-- mapred.local.dir
- in conf/mapred-site.xml, the local directory where temporary
- MapReduce data is stored. It also may be a list of directories.
-
-- mapred.map.tasks
-and mapred.reduce.tasks
-in conf/mapred-site.xml.
-As a rule of thumb, use 10x the
-number of slave processors for mapred.map.tasks, and 2x the
-number of slave processors for mapred.reduce.tasks.
-
-
-
-Finally, list all slave hostnames or IP addresses in your
-conf/slaves file, one per line. Then format your filesystem
-and start your cluster on your master node, as above.
-
-
-
-
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
deleted file mode 100644
index 759c093aa59..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
+++ /dev/null
@@ -1,274 +0,0 @@
-
-
-
-
- Hadoop
-
-
-
-Hadoop is a distributed computing platform.
-
-Hadoop primarily consists of the Hadoop Distributed FileSystem
-(HDFS) and an
-implementation of the
-Map-Reduce programming paradigm.
-
-
-Hadoop is a software framework that lets one easily write and run applications
-that process vast amounts of data. Here's what makes Hadoop especially useful:
-
- -
- Scalable: Hadoop can reliably store and process petabytes.
-
- -
- Economical: It distributes the data and processing across clusters
- of commonly available computers. These clusters can number into the thousands
- of nodes.
-
- -
- Efficient: By distributing the data, Hadoop can process it in parallel
- on the nodes where the data is located. This makes it extremely rapid.
-
- -
- Reliable: Hadoop automatically maintains multiple copies of data and
- automatically redeploys computing tasks based on failures.
-
-
-
-Requirements
-
-Platforms
-
-
- -
- Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes.
-
- -
- Windows is also a supported platform.
-
-
-
-Requisite Software
-
-
- -
- Java 1.6.x, preferably from
- Sun.
- Set JAVA_HOME to the root of your Java installation.
-
- -
- ssh must be installed and sshd must be running to use Hadoop's
- scripts to manage remote Hadoop daemons.
-
- -
- rsync may be installed to use Hadoop's scripts to manage remote
- Hadoop installations.
-
-
-
-Installing Required Software
-
-If your platform does not have the required software listed above, you
-will have to install it.
-
-For example on Ubuntu Linux:
-
-$ sudo apt-get install ssh
-$ sudo apt-get install rsync
-
-
-Getting Started
-
-First, you need to get a copy of the Hadoop code.
-
-Edit the file conf/hadoop-env.sh to define at least
-JAVA_HOME.
-
-Try the following command:
-bin/hadoop
-This will display the documentation for the Hadoop command script.
-
-Standalone operation
-
-By default, Hadoop is configured to run things in a non-distributed
-mode, as a single Java process. This is useful for debugging, and can
-be demonstrated as follows:
-
-mkdir input
-cp conf/*.xml input
-bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
-cat output/*
-
-This will display counts for each match of the
-regular expression.
-
-Note that input is specified as a directory containing input
-files and that output is also specified as a directory where parts are
-written.
-
-Distributed operation
-
-To configure Hadoop for distributed operation you must specify the
-following:
-
-
-
-- The NameNode (Distributed Filesystem master) host. This is
-specified with the configuration property fs.default.name.
-
-
-- The org.apache.hadoop.mapred.JobTracker (MapReduce master)
-host and port. This is specified with the configuration property
-mapred.job.tracker.
-
-
-- A slaves file that lists the names of all the hosts in
-the cluster. The default slaves file is conf/slaves.
-
-
-
-Pseudo-distributed configuration
-
-You can in fact run everything on a single host. To run things this
-way, put the following in:
-
-
-conf/core-site.xml:
-
-
-
- fs.default.name
- hdfs://localhost/
-
-
-
-
-conf/hdfs-site.xml:
-
-
-
- dfs.replication
- 1
-
-
-
-
-conf/mapred-site.xml:
-
-
-
- mapred.job.tracker
- localhost:9001
-
-
-
-
-(We also set the HDFS replication level to 1 in order to
-reduce warnings when running on a single node.)
-
-Now check that the command
ssh localhost
does not
-require a password. If it does, execute the following commands:
-
-ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
-cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
-
-
-Bootstrapping
-
-A new distributed filesystem must be formatted with the following
-command, run on the master node:
-
-bin/hadoop namenode -format
-
-The Hadoop daemons are started with the following command:
-
-bin/start-all.sh
-
-Daemon log output is written to the logs/ directory.
-
-Input files are copied into the distributed filesystem as follows:
-
-bin/hadoop fs -put input input
-
-Distributed execution
-
-Things are run as before, but output must be copied locally to
-examine it:
-
-
-bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
-bin/hadoop fs -get output output
-cat output/*
-
-
-When you're done, stop the daemons with:
-
-bin/stop-all.sh
-
-Fully-distributed operation
-
-Fully distributed operation is just like the pseudo-distributed operation
-described above, except, specify:
-
-
-
-- The hostname or IP address of your master server in the value
-for fs.default.name,
- as hdfs://master.example.com/ in conf/core-site.xml.
-
-- The host and port of the your master server in the value
-of mapred.job.tracker
-as master.example.com:port in conf/mapred-site.xml.
-
-- Directories for dfs.name.dir and
-dfs.data.dir
-in conf/hdfs-site.xml.
-These are local directories used to hold distributed filesystem
-data on the master node and slave nodes respectively. Note
-that dfs.data.dir may contain a space- or comma-separated
-list of directory names, so that data may be stored on multiple local
-devices.
-
-- mapred.local.dir
- in conf/mapred-site.xml, the local directory where temporary
- MapReduce data is stored. It also may be a list of directories.
-
-- mapred.map.tasks
-and mapred.reduce.tasks
-in conf/mapred-site.xml.
-As a rule of thumb, use 10x the
-number of slave processors for mapred.map.tasks, and 2x the
-number of slave processors for mapred.reduce.tasks.
-
-
-
-Finally, list all slave hostnames or IP addresses in your
-conf/slaves file, one per line. Then format your filesystem
-and start your cluster on your master node, as above.
-
-
-
-
diff --git a/pom.xml b/pom.xml
index 2977294f4b2..acf4c048f34 100644
--- a/pom.xml
+++ b/pom.xml
@@ -441,31 +441,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
-
- dist
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
- false
-
-
-
- default-cli
-
- aggregate
-
-
- hadoop-common-project/hadoop-common/src/main/java/overview.html
-
-
-
-
-
-
-
sign