HDFS-10837. Standardize serializiation of WebHDFS DirectoryListing.

This commit is contained in:
Andrew Wang 2016-09-13 11:02:36 -07:00
parent e3f7f58a5f
commit db6d243cf8
4 changed files with 182 additions and 96 deletions

View File

@ -143,23 +143,36 @@ class JsonUtilClient {
storagePolicy, null);
}
static HdfsFileStatus[] toHdfsFileStatusArray(final Map<?, ?> json) {
Preconditions.checkNotNull(json);
final Map<?, ?> rootmap =
(Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
final List<?> array = JsonUtilClient.getList(rootmap,
FileStatus.class.getSimpleName());
// convert FileStatus
Preconditions.checkNotNull(array);
final HdfsFileStatus[] statuses = new HdfsFileStatus[array.size()];
int i = 0;
for (Object object : array) {
final Map<?, ?> m = (Map<?, ?>) object;
statuses[i++] = JsonUtilClient.toFileStatus(m, false);
}
return statuses;
}
static DirectoryListing toDirectoryListing(final Map<?, ?> json) {
if (json == null) {
return null;
}
final List<?> list = JsonUtilClient.getList(json,
"partialListing");
final Map<?, ?> listing = getMap(json, "DirectoryListing");
final Map<?, ?> partialListing = getMap(listing, "partialListing");
HdfsFileStatus[] fileStatuses = toHdfsFileStatusArray(partialListing);
HdfsFileStatus[] partialListing = new HdfsFileStatus[list.size()];
int i = 0;
for (Object o : list) {
final Map<?, ?> m = (Map<?, ?>) o;
partialListing[i++] = toFileStatus(m, false);
}
int remainingEntries = getInt(json, "remainingEntries", -1);
int remainingEntries = getInt(listing, "remainingEntries", -1);
Preconditions.checkState(remainingEntries != -1,
"remainingEntries was not set");
return new DirectoryListing(partialListing, remainingEntries);
return new DirectoryListing(fileStatuses, remainingEntries);
}
/** Convert a Json map to an ExtendedBlock object. */
@ -210,6 +223,15 @@ class JsonUtilClient {
}
}
static Map<?, ?> getMap(Map<?, ?> m, String key) {
Object map = m.get(key);
if (map instanceof Map<?, ?>) {
return (Map<?, ?>) map;
} else {
return null;
}
}
/** Convert a Json map to an DatanodeInfo object. */
static DatanodeInfo toDatanodeInfo(final Map<?, ?> m)
throws IOException {

View File

@ -1491,20 +1491,13 @@ public class WebHdfsFileSystem extends FileSystem
return new FsPathResponseRunner<FileStatus[]>(op, f) {
@Override
FileStatus[] decodeResponse(Map<?,?> json) {
final Map<?, ?> rootmap =
(Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
final List<?> array = JsonUtilClient.getList(rootmap,
FileStatus.class.getSimpleName());
//convert FileStatus
assert array != null;
final FileStatus[] statuses = new FileStatus[array.size()];
int i = 0;
for (Object object : array) {
final Map<?, ?> m = (Map<?, ?>) object;
statuses[i++] = makeQualified(JsonUtilClient.toFileStatus(m, false),
f);
HdfsFileStatus[] hdfsStatuses =
JsonUtilClient.toHdfsFileStatusArray(json);
final FileStatus[] statuses = new FileStatus[hdfsStatuses.length];
for (int i = 0; i < hdfsStatuses.length; i++) {
statuses[i] = makeQualified(hdfsStatuses[i], f);
}
return statuses;
}
}.run();

View File

@ -232,32 +232,42 @@ public class JsonUtil {
return m;
}
private static Map<String, Object> toJson(final DirectoryListing listing)
throws IOException {
final Map<String, Object> m = new TreeMap<>();
// Serialize FileStatus[] to a FileStatuses map
m.put("partialListing", toJsonMap(listing.getPartialListing()));
// Simple int
m.put("remainingEntries", listing.getRemainingEntries());
return m;
}
public static String toJsonString(final DirectoryListing listing) throws
IOException {
if (listing == null) {
return null;
}
final Map<String, Object> m = new TreeMap<>();
m.put("partialListing", toJsonArray(listing.getPartialListing()));
m.put("remainingEntries", listing.getRemainingEntries());
return MAPPER.writeValueAsString(m);
return toJsonString(DirectoryListing.class, toJson(listing));
}
private static Object[] toJsonArray(HdfsFileStatus[] statuses) throws
private static Map<String, Object> toJsonMap(HdfsFileStatus[] statuses) throws
IOException {
if (statuses == null) {
return null;
}
if (statuses.length == 0) {
return EMPTY_OBJECT_ARRAY;
}
final Object[] a = new Object[statuses.length];
final Map<String, Object> fileStatuses = new TreeMap<>();
final Map<String, Object> fileStatus = new TreeMap<>();
fileStatuses.put("FileStatuses", fileStatus);
final Object[] array = new Object[statuses.length];
fileStatus.put("FileStatus", array);
for (int i = 0; i < statuses.length; i++) {
a[i] = toJsonMap(statuses[i]);
array[i] = toJsonMap(statuses[i]);
}
return a;
return fileStatuses;
}
/** Convert a LocatedBlock[] to a Json array. */

View File

@ -597,15 +597,15 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=<CHILD>"
The client receives a response with a batch of [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information:
The client receives a response with a [`DirectoryListing` JSON object](#DirectoryListing_JSON_Schema), which contains a [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information:
HTTP/1.1 200 OK
Cache-Control: no-cache
Expires: Tue, 30 Aug 2016 16:42:16 GMT
Date: Tue, 30 Aug 2016 16:42:16 GMT
Expires: Thu, 08 Sep 2016 03:40:38 GMT
Date: Thu, 08 Sep 2016 03:40:38 GMT
Pragma: no-cache
Expires: Tue, 30 Aug 2016 16:42:16 GMT
Date: Tue, 30 Aug 2016 16:42:16 GMT
Expires: Thu, 08 Sep 2016 03:40:38 GMT
Date: Thu, 08 Sep 2016 03:40:38 GMT
Pragma: no-cache
Content-Type: application/json
X-FRAME-OPTIONS: SAMEORIGIN
@ -613,56 +613,61 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
Server: Jetty(6.1.26)
{
"partialListing": [
{
"accessTime": 0,
"blockSize": 0,
"childrenNum": 0,
"fileId": 16389,
"group": "supergroup",
"length": 0,
"modificationTime": 1472575493064,
"owner": "andrew",
"pathSuffix": "anotherdir",
"permission": "755",
"replication": 0,
"storagePolicy": 0,
"type": "DIRECTORY"
"DirectoryListing": {
"partialListing": {
"FileStatuses": {
"FileStatus": [
{
"accessTime": 0,
"blockSize": 0,
"childrenNum": 0,
"fileId": 16387,
"group": "supergroup",
"length": 0,
"modificationTime": 1473305882563,
"owner": "andrew",
"pathSuffix": "bardir",
"permission": "755",
"replication": 0,
"storagePolicy": 0,
"type": "DIRECTORY"
},
{
"accessTime": 1473305896945,
"blockSize": 1024,
"childrenNum": 0,
"fileId": 16388,
"group": "supergroup",
"length": 0,
"modificationTime": 1473305896965,
"owner": "andrew",
"pathSuffix": "bazfile",
"permission": "644",
"replication": 3,
"storagePolicy": 0,
"type": "FILE"
}
]
}
},
{
"accessTime": 0,
"blockSize": 0,
"childrenNum": 0,
"fileId": 16386,
"group": "supergroup",
"length": 0,
"modificationTime": 1472575274776,
"owner": "andrew",
"pathSuffix": "somedir",
"permission": "755",
"replication": 0,
"storagePolicy": 0,
"type": "DIRECTORY"
}
],
"remainingEntries": 1
"remainingEntries": 2
}
}
If `remainingEntries` is non-zero, there are additional entries in the directory.
To query the next batch, set the `startAfter` parameter to the `pathSuffix` of the last item returned in the current batch. For example:
curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=somedir"
curl -i "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=bazfile"
Which will return the next batch of directory entries:
HTTP/1.1 200 OK
Cache-Control: no-cache
Expires: Tue, 30 Aug 2016 16:46:23 GMT
Date: Tue, 30 Aug 2016 16:46:23 GMT
Expires: Thu, 08 Sep 2016 03:43:20 GMT
Date: Thu, 08 Sep 2016 03:43:20 GMT
Pragma: no-cache
Expires: Tue, 30 Aug 2016 16:46:23 GMT
Date: Tue, 30 Aug 2016 16:46:23 GMT
Expires: Thu, 08 Sep 2016 03:43:20 GMT
Date: Thu, 08 Sep 2016 03:43:20 GMT
Pragma: no-cache
Content-Type: application/json
X-FRAME-OPTIONS: SAMEORIGIN
@ -670,24 +675,45 @@ Which will return the next batch of directory entries:
Server: Jetty(6.1.26)
{
"partialListing": [
{
"accessTime": 1472575333568,
"blockSize": 1024,
"childrenNum": 0,
"fileId": 16388,
"group": "supergroup",
"length": 224,
"modificationTime": 1472575334222,
"owner": "andrew",
"pathSuffix": "somefile",
"permission": "644",
"replication": 3,
"storagePolicy": 0,
"type": "FILE"
}
],
"remainingEntries": 0
"DirectoryListing": {
"partialListing": {
"FileStatuses": {
"FileStatus": [
{
"accessTime": 0,
"blockSize": 0,
"childrenNum": 0,
"fileId": 16386,
"group": "supergroup",
"length": 0,
"modificationTime": 1473305878951,
"owner": "andrew",
"pathSuffix": "foodir",
"permission": "755",
"replication": 0,
"storagePolicy": 0,
"type": "DIRECTORY"
},
{
"accessTime": 1473305902864,
"blockSize": 1024,
"childrenNum": 0,
"fileId": 16389,
"group": "supergroup",
"length": 0,
"modificationTime": 1473305902878,
"owner": "andrew",
"pathSuffix": "quxfile",
"permission": "644",
"replication": 3,
"storagePolicy": 0,
"type": "FILE"
}
]
}
},
"remainingEntries": 0
}
}
Batch size is controlled by the `dfs.ls.limit` option on the NameNode.
@ -1672,6 +1698,41 @@ A `FileStatuses` JSON object represents an array of `FileStatus` JSON objects.
See also: [`FileStatus` Properties](#FileStatus_Properties), [`LISTSTATUS`](#List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html)
### DirectoryListing JSON Schema
A `DirectoryListing` JSON object represents a batch of directory entries while iteratively listing a directory. It contains a `FileStatuses` JSON object as well as iteration information.
```json
{
"name" : "DirectoryListing",
"properties":
{
"DirectoryListing":
{
"type" : "object",
"properties":
{
"partialListing":
{
"description": "A partial directory listing",
"type" : "object", // A FileStatuses object
"required" : true
},
"remainingEntries":
{
"description": "Number of remaining entries",
"type" : "integer",
"required" : true
}
}
}
}
}
```
See also: [`FileStatuses` JSON Schema](#FileStatuses_JSON_Schema), [`LISTSTATUS_BATCH`](#Iteratively_List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html)
### Long JSON Schema
```json