From 4a0a5664dd44623df86e73969dafd83a25347428 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 5 Dec 2008 17:03:13 +0000 Subject: [PATCH] LUCENE-1468: switch Directory.list() to Directory.listAll(), which does no filtering of returned array git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@723789 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 8 ++ .../benchmark/byTask/TestPerfTasksLogic.java | 3 +- .../lucene/index/DirectoryIndexReader.java | 4 +- .../apache/lucene/index/IndexFileDeleter.java | 8 +- .../org/apache/lucene/index/SegmentInfo.java | 7 +- .../org/apache/lucene/index/SegmentInfos.java | 13 ++- .../org/apache/lucene/store/Directory.java | 94 ++++++++++++------- .../org/apache/lucene/store/FSDirectory.java | 44 ++++++++- .../store/NoSuchDirectoryException.java | 31 ++++++ .../org/apache/lucene/store/RAMDirectory.java | 5 +- .../index/TestBackwardsCompatibility.java | 2 +- .../lucene/index/TestDeletionPolicy.java | 4 +- .../apache/lucene/index/TestFieldsReader.java | 3 + .../lucene/index/TestIndexFileDeleter.java | 6 +- .../apache/lucene/index/TestIndexReader.java | 20 +++- .../lucene/index/TestIndexReaderReopen.java | 2 +- .../apache/lucene/index/TestIndexWriter.java | 45 +++++++-- .../lucene/index/TestIndexWriterDelete.java | 4 +- .../index/TestIndexWriterMergePolicy.java | 4 +- .../org/apache/lucene/index/TestOmitTf.java | 2 +- .../lucene/store/TestBufferedIndexInput.java | 5 + .../apache/lucene/store/TestDirectory.java | 57 +++++++++++ 22 files changed, 290 insertions(+), 81 deletions(-) create mode 100644 src/java/org/apache/lucene/store/NoSuchDirectoryException.java diff --git a/CHANGES.txt b/CHANGES.txt index abb3d6ac050..afdbf6ef497 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -38,6 +38,14 @@ API Changes These methods can be used to avoid additional calls to doc(). (Michael Busch) +6. LUCENE-1468: Deprecate Directory.list(), which sometimes (in + FSDirectory) filters out files that don't look like index files, in + favor of new Directory.listAll(), which does no filtering. Also, + listAll() will never return null; instead, it throws an IOException + (or subclass). Specifically, FSDirectory.listAll() will throw the + newly added NoSuchDirectoryException if the directory does not + exist. (Marcel Reutegger, Mike McCandless) + Bug fixes 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals() diff --git a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 32dbcd343df..a3f31b22691 100755 --- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -24,7 +24,6 @@ import java.io.BufferedReader; import java.util.List; import java.util.Iterator; -import org.apache.lucene.benchmark.byTask.Benchmark; import org.apache.lucene.benchmark.byTask.feeds.DocData; import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker; @@ -712,7 +711,7 @@ public class TestPerfTasksLogic extends TestCase { ir.close(); // Make sure we have 3 segments: - final String[] files = benchmark.getRunData().getDirectory().list(); + final String[] files = benchmark.getRunData().getDirectory().listAll(); int cfsCount = 0; for(int i=0;i prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) { + if (filter.accept(null, fileName) && fileName.length() > prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) { files.add(fileName); } } diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java index a6ae54ee6ed..a5c4815c4ba 100644 --- a/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/src/java/org/apache/lucene/index/SegmentInfos.java @@ -17,11 +17,13 @@ package org.apache.lucene.index; * limitations under the License. */ +import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.ChecksumIndexOutput; import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.NoSuchDirectoryException; import java.io.File; import java.io.FileNotFoundException; @@ -129,8 +131,11 @@ final class SegmentInfos extends Vector { * @param directory -- directory to search for the latest segments_N file */ public static long getCurrentSegmentGeneration(Directory directory) throws IOException { - String[] files = directory.list(); - return getCurrentSegmentGeneration(files); + try { + return getCurrentSegmentGeneration(directory.listAll()); + } catch (NoSuchDirectoryException nsde) { + return -1; + } } /** @@ -558,9 +563,9 @@ final class SegmentInfos extends Vector { long genA = -1; if (directory != null) - files = directory.list(); + files = directory.listAll(); else - files = fileDirectory.list(); + files = FSDirectory.listAll(fileDirectory); if (files != null) genA = getCurrentSegmentGeneration(files); diff --git a/src/java/org/apache/lucene/store/Directory.java b/src/java/org/apache/lucene/store/Directory.java index 0147cd5c11b..fac54473044 100644 --- a/src/java/org/apache/lucene/store/Directory.java +++ b/src/java/org/apache/lucene/store/Directory.java @@ -19,6 +19,8 @@ package org.apache.lucene.store; import java.io.IOException; +import org.apache.lucene.index.IndexFileNameFilter; + /** A Directory is a flat list of files. Files may be written once, when they * are created. Once a file is created it may only be opened for read, or * deleted. Random access is permitted both when reading and writing. @@ -43,14 +45,29 @@ public abstract class Directory { * this Directory instance). */ protected LockFactory lockFactory; - /** Returns an array of strings, one for each file in the - * directory. This method may return null (for example for - * {@link FSDirectory} if the underlying directory doesn't - * exist in the filesystem or there are permissions - * problems).*/ + /** @deprecated For some Directory implementations ({@link + * FSDirectory}, and its subclasses), this method + * silently filters its results to include only index + * files. Please use {@link #listAll} instead, which + * does no filtering. */ public abstract String[] list() throws IOException; + /** Returns an array of strings, one for each file in the + * directory. Unlike {@link #list} this method does no + * filtering of the contents in a directory, and it will + * never return null (throws IOException instead). + * + * Currently this method simply fallsback to {@link + * #list} for Directory impls outside of Lucene's core & + * contrib, but in 3.0 that method will be removed and + * this method will become abstract. */ + public String[] listAll() + throws IOException + { + return list(); + } + /** Returns true iff a file with the given name exists. */ public abstract boolean fileExists(String name) throws IOException; @@ -173,48 +190,55 @@ public abstract class Directory { * are undefined and you could easily hit a * FileNotFoundException. * + *

NOTE: this method only copies files that look + * like index files (ie, have extensions matching the + * known extensions of index files). + * * @param src source directory * @param dest destination directory * @param closeDirSrc if true, call {@link #close()} method on source directory * @throws IOException */ public static void copy(Directory src, Directory dest, boolean closeDirSrc) throws IOException { - final String[] files = src.list(); + final String[] files = src.listAll(); - if (files == null) - throw new IOException("cannot read directory " + src + ": list() returned null"); + IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); - byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE]; - for (int i = 0; i < files.length; i++) { - IndexOutput os = null; - IndexInput is = null; + byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE]; + for (int i = 0; i < files.length; i++) { + + if (!filter.accept(null, files[i])) + continue; + + IndexOutput os = null; + IndexInput is = null; + try { + // create file in dest directory + os = dest.createOutput(files[i]); + // read current file + is = src.openInput(files[i]); + // and copy to dest directory + long len = is.length(); + long readCount = 0; + while (readCount < len) { + int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len ? (int)(len - readCount) : BufferedIndexOutput.BUFFER_SIZE; + is.readBytes(buf, 0, toRead); + os.writeBytes(buf, toRead); + readCount += toRead; + } + } finally { + // graceful cleanup try { - // create file in dest directory - os = dest.createOutput(files[i]); - // read current file - is = src.openInput(files[i]); - // and copy to dest directory - long len = is.length(); - long readCount = 0; - while (readCount < len) { - int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len ? (int)(len - readCount) : BufferedIndexOutput.BUFFER_SIZE; - is.readBytes(buf, 0, toRead); - os.writeBytes(buf, toRead); - readCount += toRead; - } + if (os != null) + os.close(); } finally { - // graceful cleanup - try { - if (os != null) - os.close(); - } finally { - if (is != null) - is.close(); - } + if (is != null) + is.close(); } } - if(closeDirSrc) - src.close(); + } + if(closeDirSrc) + src.close(); } /** diff --git a/src/java/org/apache/lucene/store/FSDirectory.java b/src/java/org/apache/lucene/store/FSDirectory.java index d4a6b99bcb4..ca32d85ce4d 100644 --- a/src/java/org/apache/lucene/store/FSDirectory.java +++ b/src/java/org/apache/lucene/store/FSDirectory.java @@ -18,6 +18,7 @@ package org.apache.lucene.store; */ import java.io.File; +import java.io.FilenameFilter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; @@ -242,6 +243,7 @@ public class FSDirectory extends Directory { return dir; } + /** @deprecated */ private void create() throws IOException { if (directory.exists()) { String[] files = directory.list(IndexFileNameFilter.getFilter()); // clear old files @@ -265,9 +267,6 @@ public class FSDirectory extends Directory { final void createDir() throws IOException { if (!checked) { - if (directory.exists() && !directory.isDirectory()) - throw new IOException(directory + " not a directory"); - if (!directory.exists()) if (!directory.mkdirs()) throw new IOException("Cannot create directory: " + directory); @@ -304,6 +303,9 @@ public class FSDirectory extends Directory { directory = path; + if (directory.exists() && !directory.isDirectory()) + throw new NoSuchDirectoryException("file '" + directory + "' exists but is not a directory"); + boolean doClearLockID = false; if (lockFactory == null) { @@ -356,12 +358,46 @@ public class FSDirectory extends Directory { } } - /** Returns an array of strings, one for each Lucene index file in the directory. */ + /** Lists all files (not subdirectories) in the + * directory. This method never returns null (throws + * {@link IOException} instead). + * + * @throws NoSuchDirectoryException if the directory + * does not exist, or does exist but is not a + * directory. + * @throws IOException if list() returns null */ + public static String[] listAll(File dir) throws IOException { + if (!dir.exists()) + throw new NoSuchDirectoryException("directory '" + dir + "' does not exist"); + else if (!dir.isDirectory()) + throw new NoSuchDirectoryException("file '" + dir + "' exists but is not a directory"); + + // Exclude subdirs + String[] result = dir.list(new FilenameFilter() { + public boolean accept(File dir, String file) { + return !new File(dir, file).isDirectory(); + } + }); + + if (result == null) + throw new IOException("directory '" + dir + "' exists and is a directory, but cannot be listed: list() returned null"); + + return result; + } + public String[] list() { ensureOpen(); return directory.list(IndexFileNameFilter.getFilter()); } + /** Lists all files (not subdirectories) in the + * directory. + * @see #listAll(File) */ + public String[] listAll() throws IOException { + ensureOpen(); + return listAll(directory); + } + /** Returns true iff a file with the given name exists. */ public boolean fileExists(String name) { ensureOpen(); diff --git a/src/java/org/apache/lucene/store/NoSuchDirectoryException.java b/src/java/org/apache/lucene/store/NoSuchDirectoryException.java new file mode 100644 index 00000000000..ef46f490534 --- /dev/null +++ b/src/java/org/apache/lucene/store/NoSuchDirectoryException.java @@ -0,0 +1,31 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FileNotFoundException; + +/** + * This exception is thrown when you try to list a + * non-existent directory. + */ + +public class NoSuchDirectoryException extends FileNotFoundException { + public NoSuchDirectoryException(String message) { + super(message); + } +} diff --git a/src/java/org/apache/lucene/store/RAMDirectory.java b/src/java/org/apache/lucene/store/RAMDirectory.java index 95723e1db40..85fe2699e61 100644 --- a/src/java/org/apache/lucene/store/RAMDirectory.java +++ b/src/java/org/apache/lucene/store/RAMDirectory.java @@ -95,8 +95,11 @@ public class RAMDirectory extends Directory implements Serializable { this(FSDirectory.getDirectory(dir), true); } - /** Returns an array of strings, one for each file in the directory. */ public synchronized final String[] list() { + return listAll(); + } + + public synchronized final String[] listAll() { ensureOpen(); Set fileNames = fileMap.keySet(); String[] result = new String[fileNames.size()]; diff --git a/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index fe5d1f7e13a..9655271d200 100644 --- a/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -447,7 +447,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase "segments_3", "segments.gen"}; - String[] actual = dir.list(); + String[] actual = dir.listAll(); Arrays.sort(expected); Arrays.sort(actual); if (!Arrays.equals(expected, actual)) { diff --git a/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/src/test/org/apache/lucene/index/TestDeletionPolicy.java index 21aa21a6fc8..03793ecdff7 100644 --- a/src/test/org/apache/lucene/index/TestDeletionPolicy.java +++ b/src/test/org/apache/lucene/index/TestDeletionPolicy.java @@ -333,10 +333,10 @@ public class TestDeletionPolicy extends LuceneTestCase // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: - int preCount = dir.list().length; + int preCount = dir.listAll().length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.close(); - int postCount = dir.list().length; + int postCount = dir.listAll().length; assertTrue(postCount < preCount); } } diff --git a/src/test/org/apache/lucene/index/TestFieldsReader.java b/src/test/org/apache/lucene/index/TestFieldsReader.java index 6277dc319cc..cf12c0dde47 100644 --- a/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -315,6 +315,9 @@ public class TestFieldsReader extends LuceneTestCase { public String[] list() throws IOException { return fsDir.list(); } + public String[] listAll() throws IOException { + return fsDir.listAll(); + } public boolean fileExists(String name) throws IOException { return fsDir.fileExists(name); } diff --git a/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index b9ba1051454..9cc38cd7c0f 100644 --- a/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -65,7 +65,7 @@ public class TestIndexFileDeleter extends LuceneTestCase // Now, artificially create an extra .del file & extra // .s0 file: - String[] files = dir.list(); + String[] files = dir.listAll(); /* for(int j=0;j 1 but got " + gen, gen > 1); - String[] files = dir.list(); + String[] files = dir.listAll(); for(int i=0;i lastNumFile); @@ -4254,4 +4254,31 @@ public class TestIndexWriter extends LuceneTestCase r.close(); dir.close(); } + + // LUCENE-1468 -- make sure opening an IndexWriter with + // create=true does not remove non-index files + + public void testOtherFiles() throws Throwable { + File indexDir = new File(System.getProperty("tempDir"), "otherfiles"); + Directory dir = new FSDirectory(indexDir, null); + try { + // Create my own random file: + + IndexOutput out = dir.createOutput("myrandomfile"); + out.writeByte((byte) 42); + out.close(); + + new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED).close(); + + assertTrue(dir.fileExists("myrandomfile")); + + // Make sure this does not copy myrandomfile: + Directory dir2 = new RAMDirectory(dir); + assertTrue(!dir2.fileExists("myrandomfile")); + + } finally { + dir.close(); + _TestUtil.rmDir(indexDir); + } + } } diff --git a/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/src/test/org/apache/lucene/index/TestIndexWriterDelete.java index 9432cac4395..4a058938713 100644 --- a/src/test/org/apache/lucene/index/TestIndexWriterDelete.java +++ b/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -668,11 +668,11 @@ public class TestIndexWriterDelete extends LuceneTestCase { } } - String[] startFiles = dir.list(); + String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); - String[] endFiles = dir.list(); + String[] endFiles = dir.listAll(); if (!Arrays.equals(startFiles, endFiles)) { fail("docswriter abort() failed to delete unreferenced files:\n before delete:\n " diff --git a/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java b/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java index d0d38c14274..10f92fcd69f 100755 --- a/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java +++ b/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java @@ -239,7 +239,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { assertTrue(numSegments < mergeFactor); } - String[] files = writer.getDirectory().list(); + String[] files = writer.getDirectory().listAll(); int segmentCfsCount = 0; for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".cfs")) { @@ -249,6 +249,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { assertEquals(segmentCount, segmentCfsCount); } + /* private void printSegmentDocCounts(IndexWriter writer) { int segmentCount = writer.getSegmentCount(); System.out.println("" + segmentCount + " segments total"); @@ -257,4 +258,5 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { + " docs"); } } + */ } diff --git a/src/test/org/apache/lucene/index/TestOmitTf.java b/src/test/org/apache/lucene/index/TestOmitTf.java index 4dbc08acebf..bc8b06b7099 100644 --- a/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/src/test/org/apache/lucene/index/TestOmitTf.java @@ -197,7 +197,7 @@ public class TestOmitTf extends LuceneTestCase { } private void assertNoPrx(Directory dir) throws Throwable { - final String[] files = dir.list(); + final String[] files = dir.listAll(); for(int i=0;i