LUCENE-6241: FSDirectory.listAll doesnt filter out subdirectories anymore

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1659621 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-02-13 17:30:54 +00:00
parent 2919db8728
commit 04e297adee
16 changed files with 47 additions and 39 deletions

View File

@ -86,6 +86,11 @@ Optimizations
positions lazily if the phrase query is in a conjunction with other queries.
(Robert Muir, Adrien Grand)
* LUCENE-6241: FSDirectory.listAll() doesnt filter out subdirectories anymore,
for faster performance. Subdirectories don't matter to Lucene. If you need to
filter out non-index files with some custom usage, you may want to look at
the IndexFileNames class. (Robert Muir)
API Changes
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()

View File

@ -43,7 +43,7 @@ import org.apache.lucene.util.IOUtils;
public abstract class Directory implements Closeable {
/**
* Returns an array of strings, one for each file in the directory.
* Returns an array of strings, one for each entry in the directory.
*
* @throws IOException in case of IO error
*/

View File

@ -166,19 +166,14 @@ public abstract class FSDirectory extends BaseDirectory {
}
}
/** Lists all files (not subdirectories) in the
/** Lists all files (including subdirectories) in the
* directory.
*
* @throws IOException if there was an I/O error during listing */
public static String[] listAll(Path dir) throws IOException {
List<String> entries = new ArrayList<>();
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir, new DirectoryStream.Filter<Path>() {
@Override
public boolean accept(Path entry) throws IOException {
return !Files.isDirectory(entry); // filter out entries that are definitely directories.
}
})) {
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path path : stream) {
entries.add(path.getFileName().toString());
}
@ -187,9 +182,6 @@ public abstract class FSDirectory extends BaseDirectory {
return entries.toArray(new String[entries.size()]);
}
/** Lists all files (not subdirectories) in the
* directory.
* @see #listAll(Path) */
@Override
public String[] listAll() throws IOException {
ensureOpen();

View File

@ -19,6 +19,7 @@ package org.apache.lucene.store;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -87,14 +88,16 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
* @param dir a <code>Directory</code> value
* @exception IOException if an error occurs
*/
public RAMDirectory(Directory dir, IOContext context) throws IOException {
public RAMDirectory(FSDirectory dir, IOContext context) throws IOException {
this(dir, false, context);
}
private RAMDirectory(Directory dir, boolean closeDir, IOContext context) throws IOException {
private RAMDirectory(FSDirectory dir, boolean closeDir, IOContext context) throws IOException {
this();
for (String file : dir.listAll()) {
copyFrom(dir, file, file, context);
if (!Files.isDirectory(dir.getDirectory().resolve(file))) {
copyFrom(dir, file, file, context);
}
}
if (closeDir) {
dir.close();

View File

@ -394,7 +394,7 @@ public class TestAddIndexes extends LuceneTestCase {
setMergePolicy(newLogMergePolicy(4))
);
writer.addIndexes(aux, new MockDirectoryWrapper(random(), new RAMDirectory(aux, newIOContext(random()))));
writer.addIndexes(aux, new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(aux)));
assertEquals(1060, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
@ -436,7 +436,7 @@ public class TestAddIndexes extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\nTEST: now addIndexes");
}
writer.addIndexes(aux, new MockDirectoryWrapper(random(), new RAMDirectory(aux, newIOContext(random()))));
writer.addIndexes(aux, new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(aux)));
assertEquals(1020, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
@ -686,7 +686,7 @@ public class TestAddIndexes extends LuceneTestCase {
final Directory[] dirs = new Directory[NUM_COPY];
for(int k=0;k<NUM_COPY;k++)
dirs[k] = new MockDirectoryWrapper(random(), new RAMDirectory(dir, newIOContext(random())));
dirs[k] = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir));
int j=0;

View File

@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@ -525,7 +524,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
if (VERBOSE) {
System.out.println("TEST: cycle");
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new RAMDirectory(startDir, newIOContext(random())));
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(startDir));
dir.setPreventDoubleWrite(false);
dir.setAllowRandomFileNotFoundException(false);
// test uses IW unref'ed helper which is unaware of retries

View File

@ -982,7 +982,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
if (VERBOSE) {
System.out.println("TEST: iter " + i);
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new RAMDirectory(startDir, newIOContext(random())));
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(startDir));
conf = newIndexWriterConfig(new MockAnalyzer(random()))
.setMergeScheduler(new ConcurrentMergeScheduler());
((ConcurrentMergeScheduler) conf.getMergeScheduler()).setSuppressExceptions();

View File

@ -255,7 +255,7 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase {
}
// Make a new dir that will enforce disk usage:
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new RAMDirectory(startDir, newIOContext(random())));
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(startDir));
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND)
.setMergePolicy(newLogMergePolicy(false)));

View File

@ -463,7 +463,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
try {
final Directory[] dirs = new Directory[numDirs];
for (int k = 0; k < numDirs; k++)
dirs[k] = new MockDirectoryWrapper(random(), new RAMDirectory(addDir, newIOContext(random())));
dirs[k] = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(addDir));
//int j = 0;
//while (true) {
// System.out.println(Thread.currentThread().getName() + ": iter
@ -723,7 +723,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
final Directory[] dirs = new Directory[10];
for (int i=0;i<10;i++) {
dirs[i] = new MockDirectoryWrapper(random(), new RAMDirectory(dir1, newIOContext(random())));
dirs[i] = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir1));
}
DirectoryReader r = writer.getReader();

View File

@ -34,9 +34,9 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/** tests for writing term vectors */
public class TestTermVectorsWriter extends LuceneTestCase {
@ -413,7 +413,7 @@ public class TestTermVectorsWriter extends LuceneTestCase {
.setMergeScheduler(new SerialMergeScheduler())
.setMergePolicy(new LogDocMergePolicy()));
Directory[] indexDirs = {new MockDirectoryWrapper(random(), new RAMDirectory(dir, newIOContext(random())))};
Directory[] indexDirs = {new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir))};
writer.addIndexes(indexDirs);
writer.forceMerge(1);
writer.close();

View File

@ -30,9 +30,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
@ -70,7 +68,7 @@ public class TestBoolean2 extends LuceneTestCase {
searcher.setSimilarity(new DefaultSimilarity());
// Make big index
dir2 = new MockDirectoryWrapper(random(), new RAMDirectory(directory, IOContext.DEFAULT));
dir2 = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(directory));
// First multiply small test index:
mulFactor = 1;
@ -82,7 +80,7 @@ public class TestBoolean2 extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\nTEST: cycle...");
}
final Directory copy = new MockDirectoryWrapper(random(), new RAMDirectory(dir2, IOContext.DEFAULT));
final Directory copy = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir2));
RandomIndexWriter w = new RandomIndexWriter(random(), dir2);
w.addIndexes(copy);
docCount = w.maxDoc();

View File

@ -142,7 +142,7 @@ public class TestDirectory extends BaseDirectoryTestCase {
Path path = createTempDir("testsubdir");
try {
Files.createDirectory(path.resolve("subdir"));
Directory fsDir = new SimpleFSDirectory(path);
FSDirectory fsDir = new SimpleFSDirectory(path);
assertEquals(0, new RAMDirectory(fsDir, newIOContext(random())).listAll().length);
} finally {
IOUtils.rm(path);

View File

@ -71,10 +71,10 @@ public class TestRAMDirectory extends BaseDirectoryTestCase {
// LUCENE-1468
public void testCopySubdir() throws Throwable {
Path path = createTempDir("testsubdir");
Directory fsDir = null;
FSDirectory fsDir = null;
try {
Files.createDirectory(path.resolve("subdir"));
fsDir = newFSDirectory(path);
fsDir = new SimpleFSDirectory(path);
assertEquals(0, new RAMDirectory(fsDir, newIOContext(random())).listAll().length);
} finally {
IOUtils.close(fsDir);
@ -85,7 +85,7 @@ public class TestRAMDirectory extends BaseDirectoryTestCase {
public void testRAMDirectory () throws IOException {
Path indexDir = buildIndex();
Directory dir = newFSDirectory(indexDir);
FSDirectory dir = new SimpleFSDirectory(indexDir);
MockDirectoryWrapper ramDir = new MockDirectoryWrapper(random(), new RAMDirectory(dir, newIOContext(random())));
// close the underlaying directory
@ -118,7 +118,7 @@ public class TestRAMDirectory extends BaseDirectoryTestCase {
Path indexDir = buildIndex();
Directory dir = newFSDirectory(indexDir);
FSDirectory dir = new SimpleFSDirectory(indexDir);
final MockDirectoryWrapper ramDir = new MockDirectoryWrapper(random(), new RAMDirectory(dir, newIOContext(random())));
dir.close();

View File

@ -48,6 +48,7 @@ import java.util.zip.ZipInputStream;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
@ -100,7 +101,9 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Assert;
@ -1235,6 +1238,15 @@ public final class TestUtil {
}
}
/** Returns a copy of directory, entirely in RAM */
public static RAMDirectory ramCopyOf(Directory dir) throws IOException {
RAMDirectory ram = new RAMDirectory();
for (String file : dir.listAll()) {
ram.copyFrom(dir, file, file, IOContext.DEFAULT);
}
return ram;
}
/** List of characters that match {@link Character#isWhitespace} */
public static final char[] WHITESPACE_CHARACTERS = new char[] {
// :TODO: is this list exhaustive?

View File

@ -221,7 +221,8 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
public static long sizeOf(Directory directory, String file) throws IOException {
try {
return directory.fileLength(file);
} catch (FileNotFoundException | NoSuchFileException e) {
} catch (IOException e) {
// could be a race, file no longer exists, access denied, is a directory, etc.
return 0;
}
}

View File

@ -168,13 +168,11 @@ public class HdfsDirectory extends BaseDirectory {
return new String[] {};
}
for (FileStatus status : listStatus) {
if (!status.isDirectory()) {
files.add(status.getPath().getName());
}
files.add(status.getPath().getName());
}
return getNormalNames(files);
}
public Path getHdfsDirPath() {
return hdfsDirPath;
}