LUCENE-3626: Make PKIndexSplitter and MultiPassIndexSplitter work per segment

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1212894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2011-12-10 22:07:32 +00:00
parent 764059147f
commit ad7cb17bc9
4 changed files with 73 additions and 19 deletions

View File

@ -21,12 +21,12 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import org.apache.lucene.index.IndexWriter; // javadoc
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -174,15 +174,60 @@ public class MultiPassIndexSplitter {
} }
/** /**
* This class pretends that it can write deletions to the underlying index. * This class emulates deletions on the underlying index.
* Instead, deletions are buffered in a bitset and overlaid with the original
* list of deletions.
*/ */
public static final class FakeDeleteIndexReader extends FilterIndexReader { private static final class FakeDeleteIndexReader extends MultiReader {
public FakeDeleteIndexReader(IndexReader reader) throws IOException {
super(initSubReaders(reader), false /* dont close */);
}
private static IndexReader[] initSubReaders(IndexReader reader) throws IOException {
final ArrayList<IndexReader> subs = new ArrayList<IndexReader>();
new ReaderUtil.Gather(reader) {
@Override
protected void add(int base, IndexReader r) {
subs.add(new FakeDeleteAtomicIndexReader(r));
}
}.run();
return subs.toArray(new IndexReader[subs.size()]);
}
public void deleteDocument(int docID) {
final int i = readerIndex(docID);
((FakeDeleteAtomicIndexReader) subReaders[i]).deleteDocument(docID - starts[i]);
}
public void undeleteAll() {
for (IndexReader r : subReaders) {
((FakeDeleteAtomicIndexReader) r).undeleteAll();
}
}
// override this as MultiReader precalculates the number of deletions
// (this method is never used by MultiPassIndexSplitter)
@Override
public int numDocs() {
int n = 0;
for (int i = 0; i < subReaders.length; i++)
n += subReaders[i].numDocs();
return n;
}
// override this as MultiReader precalculates the number of deletions
// (this method is never used by MultiPassIndexSplitter)
@Override
public boolean hasDeletions() {
return (maxDoc() != numDocs());
}
}
private static final class FakeDeleteAtomicIndexReader extends FilterIndexReader {
FixedBitSet liveDocs; FixedBitSet liveDocs;
public FakeDeleteIndexReader(IndexReader in) { public FakeDeleteAtomicIndexReader(IndexReader reader) {
super(new SlowMultiReaderWrapper(in)); super(reader);
undeleteAll(); // initialize main bitset undeleteAll(); // initialize main bitset
} }
@ -191,7 +236,7 @@ public class MultiPassIndexSplitter {
return liveDocs.cardinality(); return liveDocs.cardinality();
} }
void undeleteAll() { public void undeleteAll() {
final int maxDoc = in.maxDoc(); final int maxDoc = in.maxDoc();
liveDocs = new FixedBitSet(in.maxDoc()); liveDocs = new FixedBitSet(in.maxDoc());
if (in.hasDeletions()) { if (in.hasDeletions()) {
@ -207,7 +252,7 @@ public class MultiPassIndexSplitter {
} }
} }
void deleteDocument(int n) { public void deleteDocument(int n) {
liveDocs.clear(n); liveDocs.clear(n);
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
@ -28,6 +29,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -99,9 +101,14 @@ public class PKIndexSplitter {
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
boolean success = false; boolean success = false;
IndexWriter w = new IndexWriter(target, config); final IndexWriter w = new IndexWriter(target, config);
try { try {
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter)); final AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
final IndexReader[] subReaders = new IndexReader[leaves.length];
for (int i = 0; i < leaves.length; i++) {
subReaders[i] = new DocumentFilteredAtomicIndexReader(leaves[i], preserveFilter, negateFilter);
}
w.addIndexes(subReaders);
success = true; success = true;
} finally { } finally {
if (success) { if (success) {
@ -112,17 +119,16 @@ public class PKIndexSplitter {
} }
} }
public static class DocumentFilteredIndexReader extends FilterIndexReader { private static class DocumentFilteredAtomicIndexReader extends FilterIndexReader {
final Bits liveDocs; final Bits liveDocs;
final int numDocs; final int numDocs;
public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException {
super(new SlowMultiReaderWrapper(reader)); super(context.reader);
final int maxDoc = in.maxDoc(); final int maxDoc = in.maxDoc();
final FixedBitSet bits = new FixedBitSet(maxDoc); final FixedBitSet bits = new FixedBitSet(maxDoc);
// ignore livedocs here, as we filter them later: // ignore livedocs here, as we filter them later:
final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext(), null); final DocIdSet docs = preserveFilter.getDocIdSet(context, null);
if (docs != null) { if (docs != null) {
final DocIdSetIterator it = docs.iterator(); final DocIdSetIterator it = docs.iterator();
if (it != null) { if (it != null) {

View File

@ -33,13 +33,14 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
dir = newDirectory(); dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
Document doc; Document doc;
for (int i = 0; i < NUM_DOCS; i++) { for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document(); doc = new Document();
doc.add(newField("id", i + "", StringField.TYPE_STORED)); doc.add(newField("id", i + "", StringField.TYPE_STORED));
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED)); doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
w.addDocument(doc); w.addDocument(doc);
if (i%3==0) w.commit();
} }
w.commit(); w.commit();
w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1))); w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1)));

View File

@ -37,14 +37,16 @@ public class TestPKIndexSplitter extends LuceneTestCase {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
.setOpenMode(OpenMode.CREATE)); .setOpenMode(OpenMode.CREATE).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
for (int x = 0; x < 11; x++) { for (int x = 0; x < 11; x++) {
Document doc = createDocument(x, "1", 3, format); Document doc = createDocument(x, "1", 3, format);
w.addDocument(doc); w.addDocument(doc);
if (x%3==0) w.commit();
} }
for (int x = 11; x < 20; x++) { for (int x = 11; x < 20; x++) {
Document doc = createDocument(x, "2", 3, format); Document doc = createDocument(x, "2", 3, format);
w.addDocument(doc); w.addDocument(doc);
if (x%3==0) w.commit();
} }
w.close(); w.close();
@ -55,7 +57,7 @@ public class TestPKIndexSplitter extends LuceneTestCase {
// delete some documents // delete some documents
w = new IndexWriter(dir, newIndexWriterConfig( w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
.setOpenMode(OpenMode.APPEND)); .setOpenMode(OpenMode.APPEND).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
w.deleteDocuments(midTerm); w.deleteDocuments(midTerm);
w.deleteDocuments(new Term("id", format.format(2))); w.deleteDocuments(new Term("id", format.format(2)));
w.close(); w.close();