LUCENE-3626: Make PKIndexSplitter and MultiPassIndexSplitter work per segment

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1212894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-12-10 22:07:32 +00:00
parent 764059147f
commit ad7cb17bc9
4 changed files with 73 additions and 19 deletions

View File

@ -21,12 +21,12 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.index.IndexWriter; // javadoc
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
/**
@ -174,15 +174,60 @@ public class MultiPassIndexSplitter {
}
/**
* This class pretends that it can write deletions to the underlying index.
* Instead, deletions are buffered in a bitset and overlaid with the original
* list of deletions.
* This class emulates deletions on the underlying index.
*/
public static final class FakeDeleteIndexReader extends FilterIndexReader {
private static final class FakeDeleteIndexReader extends MultiReader {
public FakeDeleteIndexReader(IndexReader reader) throws IOException {
super(initSubReaders(reader), false /* dont close */);
}
private static IndexReader[] initSubReaders(IndexReader reader) throws IOException {
final ArrayList<IndexReader> subs = new ArrayList<IndexReader>();
new ReaderUtil.Gather(reader) {
@Override
protected void add(int base, IndexReader r) {
subs.add(new FakeDeleteAtomicIndexReader(r));
}
}.run();
return subs.toArray(new IndexReader[subs.size()]);
}
public void deleteDocument(int docID) {
final int i = readerIndex(docID);
((FakeDeleteAtomicIndexReader) subReaders[i]).deleteDocument(docID - starts[i]);
}
public void undeleteAll() {
for (IndexReader r : subReaders) {
((FakeDeleteAtomicIndexReader) r).undeleteAll();
}
}
// override this as MultiReader precalculates the number of deletions
// (this method is never used by MultiPassIndexSplitter)
@Override
public int numDocs() {
int n = 0;
for (int i = 0; i < subReaders.length; i++)
n += subReaders[i].numDocs();
return n;
}
// override this as MultiReader precalculates the number of deletions
// (this method is never used by MultiPassIndexSplitter)
@Override
public boolean hasDeletions() {
return (maxDoc() != numDocs());
}
}
private static final class FakeDeleteAtomicIndexReader extends FilterIndexReader {
FixedBitSet liveDocs;
public FakeDeleteIndexReader(IndexReader in) {
super(new SlowMultiReaderWrapper(in));
public FakeDeleteAtomicIndexReader(IndexReader reader) {
super(reader);
undeleteAll(); // initialize main bitset
}
@ -191,7 +236,7 @@ public class MultiPassIndexSplitter {
return liveDocs.cardinality();
}
void undeleteAll() {
public void undeleteAll() {
final int maxDoc = in.maxDoc();
liveDocs = new FixedBitSet(in.maxDoc());
if (in.hasDeletions()) {
@ -207,7 +252,7 @@ public class MultiPassIndexSplitter {
}
}
void deleteDocument(int n) {
public void deleteDocument(int n) {
liveDocs.clear(n);
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
@ -28,6 +29,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
/**
@ -99,9 +101,14 @@ public class PKIndexSplitter {
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
boolean success = false;
IndexWriter w = new IndexWriter(target, config);
final IndexWriter w = new IndexWriter(target, config);
try {
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
final AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
final IndexReader[] subReaders = new IndexReader[leaves.length];
for (int i = 0; i < leaves.length; i++) {
subReaders[i] = new DocumentFilteredAtomicIndexReader(leaves[i], preserveFilter, negateFilter);
}
w.addIndexes(subReaders);
success = true;
} finally {
if (success) {
@ -112,17 +119,16 @@ public class PKIndexSplitter {
}
}
public static class DocumentFilteredIndexReader extends FilterIndexReader {
private static class DocumentFilteredAtomicIndexReader extends FilterIndexReader {
final Bits liveDocs;
final int numDocs;
public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
super(new SlowMultiReaderWrapper(reader));
public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException {
super(context.reader);
final int maxDoc = in.maxDoc();
final FixedBitSet bits = new FixedBitSet(maxDoc);
// ignore livedocs here, as we filter them later:
final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext(), null);
final DocIdSet docs = preserveFilter.getDocIdSet(context, null);
if (docs != null) {
final DocIdSetIterator it = docs.iterator();
if (it != null) {

View File

@ -33,13 +33,14 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
Document doc;
for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document();
doc.add(newField("id", i + "", StringField.TYPE_STORED));
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
w.addDocument(doc);
if (i%3==0) w.commit();
}
w.commit();
w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1)));

View File

@ -37,14 +37,16 @@ public class TestPKIndexSplitter extends LuceneTestCase {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
.setOpenMode(OpenMode.CREATE));
.setOpenMode(OpenMode.CREATE).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
for (int x = 0; x < 11; x++) {
Document doc = createDocument(x, "1", 3, format);
w.addDocument(doc);
if (x%3==0) w.commit();
}
for (int x = 11; x < 20; x++) {
Document doc = createDocument(x, "2", 3, format);
w.addDocument(doc);
if (x%3==0) w.commit();
}
w.close();
@ -55,7 +57,7 @@ public class TestPKIndexSplitter extends LuceneTestCase {
// delete some documents
w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
.setOpenMode(OpenMode.APPEND));
.setOpenMode(OpenMode.APPEND).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
w.deleteDocuments(midTerm);
w.deleteDocuments(new Term("id", format.format(2)));
w.close();