LUCENE-4190: restrict allowed filenames to reduce risk of deleting non-lucene file from the index directory

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1366881 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-07-29 17:59:49 +00:00
parent 85b13adcd4
commit 08baaf03e5
6 changed files with 136 additions and 53 deletions

View File

@ -116,6 +116,12 @@ Bug Fixes
* LUCENE-4245: Make IndexWriter#close() and MergeScheduler#close()
non-interruptible. (Mark Miller, Uwe Schindler)
* LUCENE-4190: restrict allowed filenames that a codec may create to
the patterns recognized by IndexFileNames. This also fixes
IndexWriter to only delete files matching this pattern from an index
directory, to reduce risk when the wrong index path is accidentally
passed to IndexWriter (Robert Muir, Mike McCandless)
Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default.

View File

@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoSuchDirectoryException;
@ -146,57 +147,61 @@ final class IndexFileDeleter {
// it means the directory is empty, so ignore it.
files = new String[0];
}
for (String fileName : files) {
if (!fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
// Add this file to refCounts with initial count 0:
getRefCount(fileName);
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
// This is a commit (segments or segments_N), and
// it's valid (<= the max gen). Load it, then
// incref all files it refers to:
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "init: load commit \"" + fileName + "\"");
}
SegmentInfos sis = new SegmentInfos();
try {
sis.read(directory, fileName);
} catch (FileNotFoundException e) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
// dir listing will be stale and will claim a
// file segments_X exists when in fact it
// doesn't. So, we catch this and handle it
// as if the file does not exist
if (currentSegmentsFile != null) {
Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
for (String fileName : files) {
m.reset(fileName);
if (!fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)
&& (m.matches() || fileName.startsWith(IndexFileNames.SEGMENTS))) {
// Add this file to refCounts with initial count 0:
getRefCount(fileName);
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
// This is a commit (segments or segments_N), and
// it's valid (<= the max gen). Load it, then
// incref all files it refers to:
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
infoStream.message("IFD", "init: load commit \"" + fileName + "\"");
}
sis = null;
} catch (IOException e) {
if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen && directory.fileLength(fileName) > 0) {
throw e;
} else {
// Most likely we are opening an index that
// has an aborted "future" commit, so suppress
// exc in this case
SegmentInfos sis = new SegmentInfos();
try {
sis.read(directory, fileName);
} catch (FileNotFoundException e) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
// dir listing will be stale and will claim a
// file segments_X exists when in fact it
// doesn't. So, we catch this and handle it
// as if the file does not exist
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
}
sis = null;
} catch (IOException e) {
if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen && directory.fileLength(fileName) > 0) {
throw e;
} else {
// Most likely we are opening an index that
// has an aborted "future" commit, so suppress
// exc in this case
sis = null;
}
}
}
if (sis != null) {
final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
if (sis.getGeneration() == segmentInfos.getGeneration()) {
currentCommitPoint = commitPoint;
}
commits.add(commitPoint);
incRef(sis, true);
if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
lastSegmentInfos = sis;
if (sis != null) {
final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
if (sis.getGeneration() == segmentInfos.getGeneration()) {
currentCommitPoint = commitPoint;
}
commits.add(commitPoint);
incRef(sis, true);
if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
lastSegmentInfos = sis;
}
}
}
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.util.regex.Pattern;
import org.apache.lucene.codecs.Codec;
// TODO: put all files under codec and remove all the static extensions here
@ -189,4 +191,8 @@ public final class IndexFileNames {
}
return filename;
}
// All files created by codecs much match this pattern (we
// check this in SegmentInfo.java):
static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
}

View File

@ -24,6 +24,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.store.Directory;
@ -242,16 +243,31 @@ public final class SegmentInfo {
private Set<String> setFiles;
public void setFiles(Set<String> files) {
checkFileNames(files);
setFiles = files;
sizeInBytes = -1;
}
public void addFiles(Collection<String> files) {
checkFileNames(files);
setFiles.addAll(files);
sizeInBytes = -1;
}
public void addFile(String file) {
checkFileNames(Collections.singleton(file));
setFiles.add(file);
sizeInBytes = -1;
}
private void checkFileNames(Collection<String> files) {
Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
for (String file : files) {
m.reset(file);
if (!m.matches()) {
throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern());
}
}
}
/**

View File

@ -128,13 +128,13 @@ public class TestDoc extends LuceneTestCase {
printSegment(out, si2);
writer.close();
SegmentInfoPerCommit siMerge = merge(directory, si1, si2, "merge", false);
SegmentInfoPerCommit siMerge = merge(directory, si1, si2, "_merge", false);
printSegment(out, siMerge);
SegmentInfoPerCommit siMerge2 = merge(directory, si1, si2, "merge2", false);
SegmentInfoPerCommit siMerge2 = merge(directory, si1, si2, "_merge2", false);
printSegment(out, siMerge2);
SegmentInfoPerCommit siMerge3 = merge(directory, siMerge, siMerge2, "merge3", false);
SegmentInfoPerCommit siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", false);
printSegment(out, siMerge3);
directory.close();
@ -163,13 +163,13 @@ public class TestDoc extends LuceneTestCase {
printSegment(out, si2);
writer.close();
siMerge = merge(directory, si1, si2, "merge", true);
siMerge = merge(directory, si1, si2, "_merge", true);
printSegment(out, siMerge);
siMerge2 = merge(directory, si1, si2, "merge2", true);
siMerge2 = merge(directory, si1, si2, "_merge2", true);
printSegment(out, siMerge2);
siMerge3 = merge(directory, siMerge, siMerge2, "merge3", true);
siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", true);
printSegment(out, siMerge3);
directory.close();

View File

@ -46,6 +46,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.LockObtainFailedException;
@ -1836,4 +1837,53 @@ public class TestIndexWriter extends LuceneTestCase {
w.close();
dir.close();
}
//LUCENE-1468 -- make sure opening an IndexWriter with
// create=true does not remove non-index files
public void testOtherFiles() throws Throwable {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
iw.addDocument(new Document());
iw.close();
try {
// Create my own random file:
IndexOutput out = dir.createOutput("myrandomfile", newIOContext(random()));
out.writeByte((byte) 42);
out.close();
new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))).close();
assertTrue(dir.fileExists("myrandomfile"));
} finally {
dir.close();
}
}
// here we do better, there is no current segments file, so we don't delete anything.
// however, if you actually go and make a commit, the next time you run indexwriter
// this file will be gone.
public void testOtherFiles2() throws Throwable {
Directory dir = newDirectory();
try {
// Create my own random file:
IndexOutput out = dir.createOutput("_a.frq", newIOContext(random()));
out.writeByte((byte) 42);
out.close();
new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))).close();
assertTrue(dir.fileExists("_a.frq"));
IndexWriter iw = new IndexWriter(dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
iw.addDocument(new Document());
iw.close();
assertFalse(dir.fileExists("_a.frq"));
} finally {
dir.close();
}
}
}