From f965464f369dc75ce00d50492704cbfc451da364 Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Wed, 2 Dec 2020 12:37:08 -0600 Subject: [PATCH] Fix empty directory handling (#10319) Co-authored-by: Atul Mohan --- .../indexer/hadoop/FSSpideringIterator.java | 39 ++++++++++----- .../hadoop/FSSpideringIteratorTest.java | 47 ++++++++++++++++++- 2 files changed, 74 insertions(+), 12 deletions(-) diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/hadoop/FSSpideringIterator.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/hadoop/FSSpideringIterator.java index a07d46b6f2f..9c76cdd6c55 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/hadoop/FSSpideringIterator.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/hadoop/FSSpideringIterator.java @@ -29,6 +29,8 @@ import java.util.Iterator; import java.util.NoSuchElementException; /** + * An iterator that walks through the file tree for a given {@link Path} and returns {@link FileStatus} for every + * file encountered within the hierarchy. */ public class FSSpideringIterator implements Iterator { @@ -60,8 +62,9 @@ public class FSSpideringIterator implements Iterator private final FileSystem fs; private final FileStatus[] statii; + private FileStatus nextStatus = null; - FSSpideringIterator statuses = null; + private FSSpideringIterator statuses = null; int index = 0; public FSSpideringIterator( @@ -76,29 +79,43 @@ public class FSSpideringIterator implements Iterator @Override public boolean hasNext() { - if (statuses != null && !statuses.hasNext()) { - statuses = null; - index++; - } - return index < statii.length; + fetchNextIfNeeded(); + return nextStatus != null; } @Override public FileStatus next() { - while (hasNext()) { - if (statii[index].isDir()) { + fetchNextIfNeeded(); + if (nextStatus == null) { + throw new NoSuchElementException(); + } + FileStatus result = nextStatus; + nextStatus = null; + return result; + } + + private void fetchNextIfNeeded() + { + + while (nextStatus == null && index < statii.length) { + if (statii[index].isDirectory()) { if (statuses == null) { statuses = spiderPathPropagateExceptions(fs, statii[index].getPath()); } else if (statuses.hasNext()) { - return statuses.next(); + nextStatus = statuses.next(); + } else { + if (index == statii.length - 1) { + return; + } + statuses = null; + index++; } } else { ++index; - return statii[index - 1]; + nextStatus = statii[index - 1]; } } - throw new NoSuchElementException(); } @Override diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/FSSpideringIteratorTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/FSSpideringIteratorTest.java index fb52090389f..7df958d6ca2 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/FSSpideringIteratorTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/hadoop/FSSpideringIteratorTest.java @@ -40,7 +40,7 @@ import java.util.List; public class FSSpideringIteratorTest { @Test - public void testIterator() + public void testNonEmptyDirs() { String[] testFiles = {"file1", "file2", "file3", "file4", "file5"}; @@ -92,4 +92,49 @@ public class FSSpideringIteratorTest } } } + + @Test + public void testEmptyDirs() + { + File baseDir = FileUtils.createTempDir(); + + try { + new File(baseDir, "dir1").mkdir(); + + new File(baseDir, "dir2/subDir1").mkdirs(); + new File(baseDir, "dir2/subDir2").mkdirs(); + + new File(baseDir, "dir3/subDir1").mkdirs(); + + List files = Lists.newArrayList( + Iterables.transform( + FSSpideringIterator.spiderIterable( + FileSystem.getLocal(new Configuration()), + new Path(baseDir.toString()) + ), + new Function() + { + @Override + public String apply(@Nullable FileStatus input) + { + return input.getPath().getName(); + } + } + ) + ); + + Assert.assertTrue(files.isEmpty()); + } + catch (IOException e) { + throw new RuntimeException(e); + } + finally { + try { + FileUtils.deleteDirectory(baseDir); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + } }