From 38e97c2061d79459dc28eb4be1c331820ba77058 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 17 Feb 2020 15:49:48 -0600 Subject: [PATCH] HDFS-12459. Fix revert: Add new op GETFILEBLOCKLOCATIONS to WebHDFS REST API. Contributed by Weiwei Yang. Signed-off-by: Wei-Chiu Chuang (cherry picked from commit 3ead525c71cba068e7abf1c76ad629bfeec10852) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md --- .../hadoop/hdfs/web/resources/GetOpParam.java | 12 +- .../web/resources/NamenodeWebHdfsMethods.java | 13 ++ .../org/apache/hadoop/hdfs/web/JsonUtil.java | 51 ++++-- .../hadoop-hdfs/src/site/markdown/WebHDFS.md | 173 ++++++++++++++++++ .../apache/hadoop/hdfs/web/TestWebHDFS.java | 149 +++++++++++++++ 5 files changed, 378 insertions(+), 20 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java index 1cb0a398ab3..85f7aa46b1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java @@ -34,8 +34,18 @@ public class GetOpParam extends HttpOpParam { GETHOMEDIRECTORY(false, HttpURLConnection.HTTP_OK), GETDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true), - /** GET_BLOCK_LOCATIONS is a private unstable op. */ + /** + * GET_BLOCK_LOCATIONS is a private/stable API op. It returns a + * {@link org.apache.hadoop.hdfs.protocol.LocatedBlocks} + * json object. + */ GET_BLOCK_LOCATIONS(false, HttpURLConnection.HTTP_OK), + /** + * GETFILEBLOCKLOCATIONS is the public op that complies with + * {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations} + * interface. + */ + GETFILEBLOCKLOCATIONS(false, HttpURLConnection.HTTP_OK), GETACLSTATUS(false, HttpURLConnection.HTTP_OK), GETXATTRS(false, HttpURLConnection.HTTP_OK), GETTRASHROOT(false, HttpURLConnection.HTTP_OK), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 4fef4a6aa3b..973fe71d56c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -58,6 +58,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.QuotaUsage; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileEncryptionInfo; import org.apache.hadoop.fs.FileStatus; @@ -1013,6 +1014,18 @@ public class NamenodeWebHdfsMethods { .build(); } } + case GETFILEBLOCKLOCATIONS: + { + final long offsetValue = offset.getValue(); + final Long lengthValue = length.getValue(); + LocatedBlocks locatedBlocks = getRpcClientProtocol() + .getBlockLocations(fullpath, offsetValue, lengthValue != null ? + lengthValue : Long.MAX_VALUE); + BlockLocation[] locations = + DFSUtilClient.locatedBlocks2Locations(locatedBlocks); + final String js = JsonUtil.toJsonString(locations); + return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); + } case GET_BLOCK_LOCATIONS: { final long offsetValue = offset.getValue(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 585bf0f1d6a..b626c3dacb4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -509,25 +509,6 @@ public class JsonUtil { return m; } - public static Map toJsonMap( - final BlockLocation blockLocation) throws IOException { - if (blockLocation == null) { - return null; - } - - final Map m = new TreeMap(); - m.put("length", blockLocation.getLength()); - m.put("offset", blockLocation.getOffset()); - m.put("corrupt", blockLocation.isCorrupt()); - m.put("storageTypes", toJsonArray(blockLocation.getStorageTypes())); - m.put("storageIds", blockLocation.getStorageIds()); - m.put("cachedHosts", blockLocation.getCachedHosts()); - m.put("hosts", blockLocation.getHosts()); - m.put("names", blockLocation.getNames()); - m.put("topologyPaths", blockLocation.getTopologyPaths()); - return m; - } - public static String toJsonString(FsServerDefaults serverDefaults) { return toJsonString(FsServerDefaults.class, toJsonMap(serverDefaults)); } @@ -546,4 +527,36 @@ public class JsonUtil { m.put("defaultStoragePolicyId", serverDefaults.getDefaultStoragePolicyId()); return m; } + + private static Map toJsonMap( + final BlockLocation blockLocation) throws IOException { + if (blockLocation == null) { + return null; + } + + final Map m = new HashMap<>(); + m.put("length", blockLocation.getLength()); + m.put("offset", blockLocation.getOffset()); + m.put("corrupt", blockLocation.isCorrupt()); + m.put("storageTypes", toJsonArray(blockLocation.getStorageTypes())); + m.put("cachedHosts", blockLocation.getCachedHosts()); + m.put("hosts", blockLocation.getHosts()); + m.put("names", blockLocation.getNames()); + m.put("topologyPaths", blockLocation.getTopologyPaths()); + return m; + } + + public static String toJsonString(BlockLocation[] locations) + throws IOException { + if (locations == null) { + return null; + } + final Map m = new HashMap<>(); + Object[] blockLocations = new Object[locations.length]; + for(int i=0; i:/webhdfs/v1/?op=GETFILEBLOCKLOCATIONS + + The client receives a response with a [`BlockLocations` JSON Object](#Block_Locations_JSON_Schema): + + HTTP/1.1 200 OK + Content-Type: application/json + Transfer-Encoding: chunked + + { + "BlockLocations" : + { + "BlockLocation": + [ + { + "cachedHosts" : [], + "corrupt" : false, + "hosts" : ["host"], + "length" : 134217728, // length of this block + "names" : ["host:ip"], + "offset" : 0, // offset of the block in the file + "storageTypes" : ["DISK"], // enum {RAM_DISK, SSD, DISK, ARCHIVE} + "topologyPaths" : ["/default-rack/hostname:ip"] + }, { + "cachedHosts" : [], + "corrupt" : false, + "hosts" : ["host"], + "length" : 62599364, + "names" : ["host:ip"], + "offset" : 134217728, + "storageTypes" : ["DISK"], + "topologyPaths" : ["/default-rack/hostname:ip"] + }, + ... + ] + } + } + +See also: [`offset`](#Offset), [`length`](#Length), [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations + Extended Attributes(XAttrs) Operations -------------------------------------- @@ -2173,6 +2217,135 @@ A `BlockStoragePolicies` JSON object represents an array of `BlockStoragePolicy` } ``` +### BlockLocations JSON Schema + +A `BlockLocations` JSON object represents an array of `BlockLocation` JSON objects. + +```json +{ + "name" : "BlockLocations", + "properties": + { + "BlockLocations": + { + "type" : "object", + "properties": + { + "BlockLocation": + { + "description": "An array of BlockLocation", + "type" : "array", + "items" : blockLocationProperties //See BlockLocation Properties + } + } + } + } +} +``` + +See also [`BlockLocation` Properties](#BlockLocation_Properties), [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations), [BlockLocation](../../api/org/apache/hadoop/fs/BlockLocation.html) + +### BlockLocation JSON Schema + +```json +{ + "name" : "BlockLocation", + "properties": + { + "BlockLocation": blockLocationProperties //See BlockLocation Properties + } +} +``` + +See also [`BlockLocation` Properties](#BlockLocation_Properties), [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations), [BlockLocation](../../api/org/apache/hadoop/fs/BlockLocation.html) + +#### BlockLocation Properties + +JavaScript syntax is used to define `blockLocationProperties` so that it can be referred in both `BlockLocation` and `BlockLocations` JSON schemas. + +```javascript +var blockLocationProperties = +{ + "type" : "object", + "properties": + { + "cachedHosts": + { + "description": "Datanode hostnames with a cached replica", + "type" : "array", + "required" : "true", + "items" : + { + "description": "A datanode hostname", + "type" : "string" + } + }, + "corrupt": + { + "description": "True if the block is corrupted", + "type" : "boolean", + "required" : "true" + }, + "hosts": + { + "description": "Datanode hostnames store the block", + "type" : "array", + "required" : "true", + "items" : + { + "description": "A datanode hostname", + "type" : "string" + } + }, + "length": + { + "description": "Length of the block", + "type" : "integer", + "required" : "true" + }, + "names": + { + "description": "Datanode IP:xferPort for accessing the block", + "type" : "array", + "required" : "true", + "items" : + { + "description": "DatanodeIP:xferPort", + "type" : "string" + } + }, + "offset": + { + "description": "Offset of the block in the file", + "type" : "integer", + "required" : "true" + }, + "storageTypes": + { + "description": "Storage type of each replica", + "type" : "array", + "required" : "true", + "items" : + { + "description": "Storage type", + "enum" : ["RAM_DISK", "SSD", "DISK", "ARCHIVE"] + } + }, + "topologyPaths": + { + "description": "Datanode addresses in network topology", + "type" : "array", + "required" : "true", + "items" : + { + "description": "/rack/host:ip", + "type" : "string" + } + } + } +}; +``` + HTTP Query Parameter Dictionary ------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 5c6a832d924..30ac622a462 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -44,8 +44,10 @@ import java.net.SocketTimeoutException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; import java.util.Arrays; +import java.util.Map; import java.util.Random; import com.google.common.collect.ImmutableList; @@ -111,6 +113,9 @@ import org.junit.Test; import org.mockito.Mockito; import org.mockito.internal.util.reflection.Whitebox; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.type.MapType; + import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyInt; import static org.mockito.Mockito.doReturn; @@ -993,6 +998,150 @@ public class TestWebHDFS { } } + @Test + public void testWebHdfsGetBlockLocations() throws Exception{ + MiniDFSCluster cluster = null; + final Configuration conf = WebHdfsTestUtil.createConf(); + final int offset = 42; + final int length = 512; + final Path path = new Path("/foo"); + byte[] contents = new byte[1024]; + RANDOM.nextBytes(contents); + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + final WebHdfsFileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, + WebHdfsConstants.WEBHDFS_SCHEME); + try (OutputStream os = fs.create(path)) { + os.write(contents); + } + BlockLocation[] locations = fs.getFileBlockLocations(path, offset, + length); + + // Query webhdfs REST API to get block locations + InetSocketAddress addr = cluster.getNameNode().getHttpAddress(); + + // Case 1 + // URL without length or offset parameters + URL url1 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS"); + + String response1 = getResponse(url1, "GET"); + // Parse BlockLocation array from json output using object mapper + BlockLocation[] locationArray1 = toBlockLocationArray(response1); + + // Verify the result from rest call is same as file system api + verifyEquals(locations, locationArray1); + + // Case 2 + // URL contains length and offset parameters + URL url2 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&length=" + length + "&offset=" + offset); + + String response2 = getResponse(url2, "GET"); + BlockLocation[] locationArray2 = toBlockLocationArray(response2); + + verifyEquals(locations, locationArray2); + + // Case 3 + // URL contains length parameter but without offset parameters + URL url3 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&length=" + length); + + String response3 = getResponse(url3, "GET"); + BlockLocation[] locationArray3 = toBlockLocationArray(response3); + + verifyEquals(locations, locationArray3); + + // Case 4 + // URL contains offset parameter but without length parameter + URL url4 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&offset=" + offset); + + String response4 = getResponse(url4, "GET"); + BlockLocation[] locationArray4 = toBlockLocationArray(response4); + + verifyEquals(locations, locationArray4); + + // Case 5 + // URL specifies offset exceeds the file length + URL url5 = new URL("http", addr.getHostString(), addr.getPort(), + WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" + + "&offset=1200"); + + String response5 = getResponse(url5, "GET"); + BlockLocation[] locationArray5 = toBlockLocationArray(response5); + + // Expected an empty array of BlockLocation + verifyEquals(new BlockLocation[] {}, locationArray5); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + + private BlockLocation[] toBlockLocationArray(String json) + throws IOException { + ObjectMapper mapper = new ObjectMapper(); + MapType subType = mapper.getTypeFactory().constructMapType( + Map.class, + String.class, + BlockLocation[].class); + MapType rootType = mapper.getTypeFactory().constructMapType( + Map.class, + mapper.constructType(String.class), + mapper.constructType(subType)); + + Map> jsonMap = mapper + .readValue(json, rootType); + Map locationMap = jsonMap + .get("BlockLocations"); + BlockLocation[] locationArray = locationMap.get( + BlockLocation.class.getSimpleName()); + return locationArray; + } + + private void verifyEquals(BlockLocation[] locations1, + BlockLocation[] locations2) throws IOException { + for(int i=0; i