diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchListingOperations.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchListingOperations.java new file mode 100644 index 00000000000..f72b1e288eb --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchListingOperations.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Interface filesystems MAY implement to offer a batched list. + * If implemented, filesystems SHOULD declare + * {@link CommonPathCapabilities#FS_EXPERIMENTAL_BATCH_LISTING} to be a supported + * path capability. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface BatchListingOperations { + + /** + * Batched listing API that returns {@link PartialListing}s for the + * passed Paths. + * + * @param paths List of paths to list. + * @return RemoteIterator that returns corresponding PartialListings. + * @throws IOException failure + */ + RemoteIterator> batchedListStatusIterator( + List paths) throws IOException; + + /** + * Batched listing API that returns {@link PartialListing}s for the passed + * Paths. The PartialListing will contain {@link LocatedFileStatus} entries + * with locations. + * + * @param paths List of paths to list. + * @return RemoteIterator that returns corresponding PartialListings. + * @throws IOException failure + */ + RemoteIterator> + batchedListLocatedStatusIterator( + List paths) throws IOException; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java index 31e6bac0cce..fb46ef81e36 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs; +import org.apache.hadoop.classification.InterfaceStability; + /** * Common path capabilities. */ @@ -123,4 +125,10 @@ public final class CommonPathCapabilities { */ public static final String FS_XATTRS = "fs.capability.paths.xattrs"; + /** + * Probe for support for {@link BatchListingOperations}. + */ + @InterfaceStability.Unstable + public static final String FS_EXPERIMENTAL_BATCH_LISTING = + "fs.capability.batch.listing"; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index b2d744d663c..358db744e65 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -2227,33 +2227,6 @@ public abstract class FileSystem extends Configured return new DirListingIterator<>(p); } - /** - * Batched listing API that returns {@link PartialListing}s for the - * passed Paths. - * - * @param paths List of paths to list. - * @return RemoteIterator that returns corresponding PartialListings. - * @throws IOException - */ - public RemoteIterator> batchedListStatusIterator( - final List paths) throws IOException { - throw new UnsupportedOperationException("Not implemented"); - } - - /** - * Batched listing API that returns {@link PartialListing}s for the passed - * Paths. The PartialListing will contain {@link LocatedFileStatus} entries - * with locations. - * - * @param paths List of paths to list. - * @return RemoteIterator that returns corresponding PartialListings. - * @throws IOException - */ - public RemoteIterator> batchedListLocatedStatusIterator( - final List paths) throws IOException { - throw new UnsupportedOperationException("Not implemented"); - } - /** * List the statuses and block locations of the files in the given path. * Does not guarantee to return the iterator that traverses statuses diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java index 58b66124901..043f84612dc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartialListing.java @@ -35,7 +35,7 @@ import java.util.List; * {@link #get()} will throw an Exception if there was a failure. */ @InterfaceAudience.Public -@InterfaceStability.Stable +@InterfaceStability.Unstable public class PartialListing { private final Path listedPath; private final List partialListing; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java index 9161be31484..f0057a6c6d9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java @@ -27,7 +27,6 @@ import java.lang.reflect.Modifier; import java.net.URI; import java.util.EnumSet; import java.util.Iterator; -import java.util.List; import org.apache.commons.logging.Log; import org.apache.hadoop.conf.Configuration; @@ -106,10 +105,6 @@ public class TestFilterFileSystem { public FileStatus[] listStatusBatch(Path f, byte[] token); public FileStatus[] listStatus(Path[] files); public FileStatus[] listStatus(Path[] files, PathFilter filter); - public RemoteIterator> batchedListLocatedStatusIterator( - final List paths) throws IOException; - public RemoteIterator> batchedListStatusIterator( - final List paths) throws IOException; public FileStatus[] globStatus(Path pathPattern); public FileStatus[] globStatus(Path pathPattern, PathFilter filter); public Iterator listFiles(Path path, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java index e1d49f20cf6..20976338391 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java @@ -125,10 +125,6 @@ public class TestHarFileSystem { public FileStatus[] listStatusBatch(Path f, byte[] token); public FileStatus[] listStatus(Path[] files); public FileStatus[] listStatus(Path[] files, PathFilter filter); - public RemoteIterator> batchedListLocatedStatusIterator( - final List paths) throws IOException; - public RemoteIterator> batchedListStatusIterator( - final List paths) throws IOException; public FileStatus[] globStatus(Path pathPattern); public FileStatus[] globStatus(Path pathPattern, PathFilter filter); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 74b6ae39bee..d1babe3280c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -29,9 +29,11 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProviderTokenIssuer; +import org.apache.hadoop.fs.BatchListingOperations; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStoragePolicySpi; import org.apache.hadoop.fs.CacheFlag; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; @@ -129,6 +131,8 @@ import java.util.Map; import java.util.NoSuchElementException; import java.util.Optional; +import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; + /**************************************************************** * Implementation of the abstract FileSystem for the DFS system. * This object is the way end-user code interacts with a Hadoop @@ -138,7 +142,7 @@ import java.util.Optional; @InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase" }) @InterfaceStability.Unstable public class DistributedFileSystem extends FileSystem - implements KeyProviderTokenIssuer { + implements KeyProviderTokenIssuer, BatchListingOperations { private Path workingDir; private URI uri; @@ -3575,6 +3579,15 @@ public class DistributedFileSystem extends FileSystem if (cap.isPresent()) { return cap.get(); } + // this switch is for features which are in the DFS client but not + // (yet/ever) in the WebHDFS API. + switch (validatePathCapabilityArgs(path, capability)) { + case CommonPathCapabilities.FS_EXPERIMENTAL_BATCH_LISTING: + return true; + default: + // fall through + } + return super.hasPathCapability(p, capability); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBatchedListDirectories.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBatchedListDirectories.java index f0b62bb45e0..081a0c5e916 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBatchedListDirectories.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBatchedListDirectories.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -253,6 +254,13 @@ public class TestBatchedListDirectories { } } + @Test + public void testDFSHasCapability() throws Throwable { + assertTrue("FS does not declare PathCapability support", + dfs.hasPathCapability(new Path("/"), + CommonPathCapabilities.FS_EXPERIMENTAL_BATCH_LISTING)); + } + private void listFilesInternal(int numFiles) throws Exception { List paths = FILE_PATHS.subList(0, numFiles); List statuses = getStatuses(paths); @@ -384,7 +392,8 @@ public class TestBatchedListDirectories { @Override public Void run() throws Exception { // try renew with long name - FileSystem fs = FileSystem.get(cluster.getURI(), conf); + DistributedFileSystem fs = (DistributedFileSystem) + FileSystem.get(cluster.getURI(), conf); RemoteIterator> it = fs.batchedListStatusIterator(paths); PartialListing listing = it.next();