From 11b908bf27b8a102f069b5d517fe5a34ebb3b917 Mon Sep 17 00:00:00 2001 From: Tsz-wo Sze Date: Wed, 23 Nov 2011 22:09:34 +0000 Subject: [PATCH] svn merge -c 1205626 from trunk for HDFS-2587. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1205630 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../src/site/apt/Federation.apt.vm | 18 +- .../src/site/apt/WebHDFS.apt.vm | 1626 +++++++++++++++++ hadoop-project/src/site/site.xml | 1 + 4 files changed, 1638 insertions(+), 9 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebHDFS.apt.vm diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c13a8a473bb..eda6987868b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -34,6 +34,8 @@ Release 0.23.1 - UNRELEASED HDFS-2552. Add Forrest doc for WebHDFS REST API. (szetszwo) + HDFS-2587. Add apt doc for WebHDFS REST API. (szetszwo) + OPTIMIZATIONS HDFS-2130. Switch default checksum to CRC32C. (todd) diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/Federation.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/Federation.apt.vm index 7208fc7ed8a..086799cd93e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/Federation.apt.vm +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/Federation.apt.vm @@ -12,7 +12,7 @@ ~~ limitations under the License. See accompanying LICENSE file. --- - Hadoop Map Reduce Next Generation-${project.version} - Cluster Setup + Hadoop Distributed File System-${project.version} - Federation --- --- ${maven.build.timestamp} @@ -57,12 +57,12 @@ HDFS Federation * Storage - is provided by datanodes by storing blocks on the local file system and allows read/write access. - The current HDFS architecture allows only a single namespace for the + The prior HDFS architecture allows only a single namespace for the entire cluster. A single Namenode manages this namespace. HDFS - Federation addresses limitation of current architecture by adding + Federation addresses limitation of the prior architecture by adding support multiple Namenodes/namespaces to HDFS file system. -* {HDFS Federation} +* {Multiple Namenodes/Namespaces} In order to scale the name service horizontally, federation uses multiple independent Namenodes/namespaces. The Namenodes are federated, that is, the @@ -103,9 +103,9 @@ HDFS Federation of small files benefit from scaling the namespace by adding more Namenodes to the cluster - * Performance - File system operation throughput is currently limited - by a single Namenode. Adding more Namenodes to the cluster scales the - file system read/write operations throughput. + * Performance - File system operation throughput is limited by a single + Namenode in the prior architecture. Adding more Namenodes to the cluster + scales the file system read/write operations throughput. * Isolation - A single Namenode offers no isolation in multi user environment. An experimental application can overload the Namenode @@ -265,7 +265,7 @@ HDFS Federation > $HADOOP_PREFIX_HOME/bin/start-dfs.sh ---- - To start the cluster run the following command: + To stop the cluster run the following command: ---- > $HADOOP_PREFIX_HOME/bin/stop-dfs.sh @@ -300,7 +300,7 @@ HDFS Federation ** Decommissioning Decommissioning is similar to prior releases. The nodes that need to be - decommissioning are added to the exclude file at all the Namenode. Each + decomissioned are added to the exclude file at all the Namenode. Each Namenode decommissions its Block Pool. When all the Namenodes finish decommissioning a datanode, the datanode is considered to be decommissioned. diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebHDFS.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebHDFS.apt.vm new file mode 100644 index 00000000000..f8cd404f623 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebHDFS.apt.vm @@ -0,0 +1,1626 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop Distributed File System-${project.version} - WebHDFS REST API + --- + --- + ${maven.build.timestamp} + +WebHDFS REST API + + \[ {{{./index.html}Go Back}} \] + +%{toc|section=1|fromDepth=0} + +* {Document Conventions} + +*----------------------+-------------------------------------------------------------------------------+ +| <<>> | Used for commands, HTTP request and responses and code blocks. | +*----------------------+-------------------------------------------------------------------------------+ +| <<<\>>> | User entered values. | +*----------------------+-------------------------------------------------------------------------------+ +| <<<[Monospaced]>>> | Optional values. When the value is not specified, the default value is used. | +*----------------------+-------------------------------------------------------------------------------+ +| | Important phrases and words. | +*----------------------+-------------------------------------------------------------------------------+ + + +* {Introduction} + + The HTTP REST API supports the complete + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}} interface for HDFS. + The operations and the corresponding FileSystem methods are shown in the next section. + The Section {{HTTP Query Parameter Dictionary}} specifies the parameter details + such as the defaults and the valid values. + +** {Operations} + + * HTTP GET + + * {{{Open and Read a File}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.open) + + * {{{Status of a File/Directory}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileStatus) + + * {{<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.listStatus) + + * {{{Get Content Summary of a Directory}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getContentSummary) + + * {{{Get File Checksum}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileChecksum) + + * {{{Get Home Directory}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getHomeDirectory) + + * {{{Get Delegation Token}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getDelegationToken) + + * HTTP PUT + + * {{{Create and Write to a File}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.create) + + * {{{Make a Directory}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.mkdirs) + + * {{{Rename a File/Directory}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.rename) + + * {{{Set Replication Factor}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setReplication) + + * {{{Set Owner}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setOwner) + + * {{{Set Permission}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setPermission) + + * {{{Set Access or Modification Time}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setTimes) + + * {{{Renew Delegation Token}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.renewDelegationToken) + + * {{{Cancel Delegation Token}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.cancelDelegationToken) + + * HTTP POST + + * {{{Append to a File}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append) + + * HTTP DELETE + + * {{{Delete a File/Directory}<<>>}} + (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.delete) + +** {FileSystem URIs vs HTTP URLs} + + The FileSystem scheme of WebHDFS is "<<>>". + A WebHDFS FileSystem URI has the following format. + ++--------------------------------- + webhdfs://:/ ++--------------------------------- + + The above WebHDFS URI corresponds to the below HDFS URI. + ++--------------------------------- + hdfs://:/ ++--------------------------------- + + In the REST API, the prefix "<<>>" is inserted in the path and a query is appended at the end. + Therefore, the corresponding HTTP URL has the following format. + ++--------------------------------- + http://:/webhdfs/v1/?op=... ++--------------------------------- + +* {Authentication} + + When security is , the authenticated user is the username specified in the <<>> query parameter. + If the <<>> parameter is not set, + the server may either set the authenticated user to a default web user, if there is any, or return an error response. + + + When security is , authentication is performed by either Hadoop delegation token or Kerberos SPNEGO. + If a token is set in the <<>> query parameter, the authenticated user is the user encoded in the token. + If the <<>> parameter is not set, the user is authenticated by Kerberos SPNEGO. + + + Below are examples using the <<>> command tool. + + [[1]] Authentication when security is off: + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?[user.name=&]op=..." ++--------------------------------- + + [[1]] Authentication using Kerberos SPNEGO when security is on: + ++--------------------------------- +curl -i --negotiate -u : "http://:/webhdfs/v1/?op=..." ++--------------------------------- + + [[1]] Authentication using Hadoop delegation token when security is on: + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?delegation=&op=..." ++--------------------------------- + +* {Proxy Users} + + When the proxy user feature is enabled, a proxy user

may submit a request on behalf of another user . + The username of must be specified in the <<>> query parameter unless a delegation token is presented in authentication. + In such case, the information of both users

and must be encoded in the delegation token. + + [[1]] A proxy request when security is off: + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?[user.name=&]doas=&op=..." ++--------------------------------- + + [[1]] A proxy request using Kerberos SPNEGO when security is on: + ++--------------------------------- +curl -i --negotiate -u : "http://:/webhdfs/v1/?doas=&op=..." ++--------------------------------- + + [[1]] A proxy request using Hadoop delegation token when security is on: + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?delegation=&op=..." ++--------------------------------- + + +* {File and Directory Operations} + +** {Create and Write to a File} + + * Step 1: Submit a HTTP PUT request without automatically following redirects and without sending the file data. + ++--------------------------------- +curl -i -X PUT "http://:/webhdfs/v1/?op=CREATE + [&overwrite=][&blocksize=][&replication=] + [&permission=][&buffersize=]" ++--------------------------------- + + The request is redirected to a datanode where the file data is to be written: + ++--------------------------------- +HTTP/1.1 307 TEMPORARY_REDIRECT +Location: http://:/webhdfs/v1/?op=CREATE... +Content-Length: 0 ++--------------------------------- + + * Step 2: Submit another HTTP PUT request using the URL in the <<>> header with the file data to be written. + ++--------------------------------- +curl -i -X PUT -T "http://:/webhdfs/v1/?op=CREATE..." ++--------------------------------- + + The client receives a <<<201 Created>>> response with zero content length + and the WebHDFS URI of the file in the <<>> header: + ++--------------------------------- +HTTP/1.1 201 Created +Location: webhdfs://:/ +Content-Length: 0 ++--------------------------------- + + [] + + <> that the reason of having two-step create/append is + for preventing clients to send out data before the redirect. + This issue is addressed by the "<<>>" header in HTTP/1.1; + see {{{http://www.w3.org/Protocols/rfc2616/rfc2616-sec8.html#sec8.2.3}RFC 2616, Section 8.2.3}}. + Unfortunately, there are software library bugs (e.g. Jetty 6 HTTP server and Java 6 HTTP client), + which do not correctly implement "<<>>". + The two-step create/append is a temporary workaround for the software library bugs. + + See also: + {{{Overwrite}<<>>}}, + {{{Block Size}<<>>}}, + {{{Replication}<<>>}}, + {{{Permission}<<>>}}, + {{{Buffer Size}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.create + + +** {Append to a File} + + * Step 1: Submit a HTTP POST request without automatically following redirects and without sending the file data. + ++--------------------------------- +curl -i -X POST "http://:/webhdfs/v1/?op=APPEND[&buffersize=]" ++--------------------------------- + + The request is redirected to a datanode where the file data is to be appended: + ++--------------------------------- +HTTP/1.1 307 TEMPORARY_REDIRECT +Location: http://:/webhdfs/v1/?op=APPEND... +Content-Length: 0 ++--------------------------------- + + * Step 2: Submit another HTTP POST request using the URL in the <<>> header with the file data to be appended. + ++--------------------------------- +curl -i -X POST -T "http://:/webhdfs/v1/?op=APPEND..." ++--------------------------------- + + The client receives a response with zero content length: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Length: 0 ++--------------------------------- + + [] + + See the note in the previous section for the description of why this operation requires two steps. + + See also: + {{{Buffer Size}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append + + +** {Open and Read a File} + + * Submit a HTTP GET request with automatically following redirects. + ++--------------------------------- +curl -i -L "http://:/webhdfs/v1/?op=OPEN + [&offset=][&length=][&buffersize=]" ++--------------------------------- + + The request is redirected to a datanode where the file data can be read: + ++--------------------------------- +HTTP/1.1 307 TEMPORARY_REDIRECT +Location: http://:/webhdfs/v1/?op=OPEN... +Content-Length: 0 ++--------------------------------- + + The client follows the redirect to the datanode and receives the file data: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/octet-stream +Content-Length: 22 + +Hello, webhdfs user! ++--------------------------------- + + [] + + See also: + {{{Offset}<<>>}}, + {{{Length}<<>>}}, + {{{Buffer Size}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.open + + +** {Make a Directory} + + * Submit a HTTP PUT request. + ++--------------------------------- +curl -i -X PUT "http://:/?op=MKDIRS[&permission=]" ++--------------------------------- + + The client receives a response with a {{{Boolean JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{"boolean": true} ++--------------------------------- + + [] + + See also: + {{{Permission}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.mkdirs + + +** {Rename a File/Directory} + + * Submit a HTTP PUT request. + ++--------------------------------- +curl -i -X PUT ":/webhdfs/v1/?op=RENAME&destination=" ++--------------------------------- + + The client receives a response with a {{{Boolean JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{"boolean": true} ++--------------------------------- + + [] + + See also: + {{{Destination}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.rename + + +** {Delete a File/Directory} + + * Submit a HTTP DELETE request. + ++--------------------------------- +curl -i -X DELETE "http://:/webhdfs/v1/?op=DELETE + [&recursive=]" ++--------------------------------- + + The client receives a response with a {{{Boolean JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{"boolean": true} ++--------------------------------- + + [] + + See also: + {{{Recursive}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.delete + + +** {Status of a File/Directory} + + * Submit a HTTP GET request. + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?op=GETFILESTATUS" ++--------------------------------- + + The client receives a response with a {{{FileStatus JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{ + "FileStatus": + { + "accessTime" : 0, + "blockSize" : 0, + "group" : "supergroup", + "length" : 0, //in bytes, zero for directories + "modificationTime": 1320173277227, + "owner" : "webuser", + "pathSuffix" : "", + "permission" : "777", + "replication" : 0, + "type" : "DIRECTORY" //enum {FILE, DIRECTORY, SYMLINK} + } +} ++--------------------------------- + + [] + + See also: + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileStatus + + +** {List a Directory} + + * Submit a HTTP GET request. + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?op=LISTSTATUS" ++--------------------------------- + + The client receives a response with a {{{FileStatuses JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Content-Length: 427 + +{ + "FileStatuses": + { + "FileStatus": + [ + { + "accessTime" : 1320171722771, + "blockSize" : 33554432, + "group" : "supergroup", + "length" : 24930, + "modificationTime": 1320171722771, + "owner" : "webuser", + "pathSuffix" : "a.patch", + "permission" : "644", + "replication" : 1, + "type" : "FILE" + }, + { + "accessTime" : 0, + "blockSize" : 0, + "group" : "supergroup", + "length" : 0, + "modificationTime": 1320895981256, + "owner" : "szetszwo", + "pathSuffix" : "bar", + "permission" : "711", + "replication" : 0, + "type" : "DIRECTORY" + }, + ... + ] + } +} ++--------------------------------- + + [] + + See also: + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.listStatus + + +* {Other File System Operations} + +** {Get Content Summary of a Directory} + + * Submit a HTTP GET request. + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?op=GETCONTENTSUMMARY" ++--------------------------------- + + The client receives a response with a {{{ContentSummary JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{ + "ContentSummary": + { + "directoryCount": 2, + "fileCount" : 1, + "length" : 24930, + "quota" : -1, + "spaceConsumed" : 24930, + "spaceQuota" : -1 + } +} ++--------------------------------- + + [] + + See also: + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getContentSummary + + +** {Get File Checksum} + + * Submit a HTTP GET request. + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?op=GETFILECHECKSUM" ++--------------------------------- + + The request is redirected to a datanode: + ++--------------------------------- +HTTP/1.1 307 TEMPORARY_REDIRECT +Location: http://:/webhdfs/v1/?op=GETFILECHECKSUM... +Content-Length: 0 ++--------------------------------- + + The client follows the redirect to the datanode and receives a {{{FileChecksum JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{ + "FileChecksum": + { + "algorithm": "MD5-of-1MD5-of-512CRC32", + "bytes" : "eadb10de24aa315748930df6e185c0d ...", + "length" : 28 + } +} ++--------------------------------- + + [] + + See also: + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileChecksum + + +** {Get Home Directory} + + * Submit a HTTP GET request. + ++--------------------------------- +curl -i "http://:/webhdfs/v1/?op=GETHOMEDIRECTORY" ++--------------------------------- + + The client receives a response with a {{{Path JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{"Path": "/user/szetszwo"} ++--------------------------------- + + [] + + See also: + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getHomeDirectory + + +** {Set Permission} + + * Submit a HTTP PUT request. + ++--------------------------------- +curl -i -X PUT "http://:/webhdfs/v1/?op=SETPERMISSION + [&permission=]" ++--------------------------------- + + The client receives a response with zero content length: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Length: 0 ++--------------------------------- + + [] + + See also: + {{{Permission}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setPermission + + +** {Set Owner} + + * Submit a HTTP PUT request. + ++--------------------------------- +curl -i -X PUT "http://:/webhdfs/v1/?op=SETOWNER + [&owner=][&group=]" ++--------------------------------- + + The client receives a response with zero content length: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Length: 0 ++--------------------------------- + + [] + + See also: + {{{Owner}<<>>}}, + {{{Group}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setOwner + + +** {Set Replication Factor} + + * Submit a HTTP PUT request. + ++--------------------------------- +curl -i -X PUT "http://:/webhdfs/v1/?op=SETREPLICATION + [&replication=]" ++--------------------------------- + + The client receives a response with a {{{Boolean JSON Schema}<<>> JSON object}}: + ++--------------------------------- +HTTP/1.1 200 OK +Content-Type: application/json +Transfer-Encoding: chunked + +{"boolean": true} ++--------------------------------- + + [] + + See also: + {{{Replication}<<>>}}, + {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.setReplication + + +** {Set Access or Modification Time} + + * Submit a HTTP PUT request. + ++--------------------------------- +curl -i -X PUT "http://:/webhdfs/v1/?op=SETTIMES + [&modificationtime=

+