mirror of https://github.com/apache/lucene.git
LUCENE-3626: Make PKIndexSplitter and MultiPassIndexSplitter work per segment
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1212894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
764059147f
commit
ad7cb17bc9
|
@ -21,12 +21,12 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter; // javadoc
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -174,15 +174,60 @@ public class MultiPassIndexSplitter {
|
|||
}
|
||||
|
||||
/**
|
||||
* This class pretends that it can write deletions to the underlying index.
|
||||
* Instead, deletions are buffered in a bitset and overlaid with the original
|
||||
* list of deletions.
|
||||
* This class emulates deletions on the underlying index.
|
||||
*/
|
||||
public static final class FakeDeleteIndexReader extends FilterIndexReader {
|
||||
private static final class FakeDeleteIndexReader extends MultiReader {
|
||||
|
||||
public FakeDeleteIndexReader(IndexReader reader) throws IOException {
|
||||
super(initSubReaders(reader), false /* dont close */);
|
||||
}
|
||||
|
||||
private static IndexReader[] initSubReaders(IndexReader reader) throws IOException {
|
||||
final ArrayList<IndexReader> subs = new ArrayList<IndexReader>();
|
||||
new ReaderUtil.Gather(reader) {
|
||||
@Override
|
||||
protected void add(int base, IndexReader r) {
|
||||
subs.add(new FakeDeleteAtomicIndexReader(r));
|
||||
}
|
||||
}.run();
|
||||
return subs.toArray(new IndexReader[subs.size()]);
|
||||
}
|
||||
|
||||
public void deleteDocument(int docID) {
|
||||
final int i = readerIndex(docID);
|
||||
((FakeDeleteAtomicIndexReader) subReaders[i]).deleteDocument(docID - starts[i]);
|
||||
}
|
||||
|
||||
public void undeleteAll() {
|
||||
for (IndexReader r : subReaders) {
|
||||
((FakeDeleteAtomicIndexReader) r).undeleteAll();
|
||||
}
|
||||
}
|
||||
|
||||
// override this as MultiReader precalculates the number of deletions
|
||||
// (this method is never used by MultiPassIndexSplitter)
|
||||
@Override
|
||||
public int numDocs() {
|
||||
int n = 0;
|
||||
for (int i = 0; i < subReaders.length; i++)
|
||||
n += subReaders[i].numDocs();
|
||||
return n;
|
||||
}
|
||||
|
||||
// override this as MultiReader precalculates the number of deletions
|
||||
// (this method is never used by MultiPassIndexSplitter)
|
||||
@Override
|
||||
public boolean hasDeletions() {
|
||||
return (maxDoc() != numDocs());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final class FakeDeleteAtomicIndexReader extends FilterIndexReader {
|
||||
FixedBitSet liveDocs;
|
||||
|
||||
public FakeDeleteIndexReader(IndexReader in) {
|
||||
super(new SlowMultiReaderWrapper(in));
|
||||
public FakeDeleteAtomicIndexReader(IndexReader reader) {
|
||||
super(reader);
|
||||
undeleteAll(); // initialize main bitset
|
||||
}
|
||||
|
||||
|
@ -191,7 +236,7 @@ public class MultiPassIndexSplitter {
|
|||
return liveDocs.cardinality();
|
||||
}
|
||||
|
||||
void undeleteAll() {
|
||||
public void undeleteAll() {
|
||||
final int maxDoc = in.maxDoc();
|
||||
liveDocs = new FixedBitSet(in.maxDoc());
|
||||
if (in.hasDeletions()) {
|
||||
|
@ -207,7 +252,7 @@ public class MultiPassIndexSplitter {
|
|||
}
|
||||
}
|
||||
|
||||
void deleteDocument(int n) {
|
||||
public void deleteDocument(int n) {
|
||||
liveDocs.clear(n);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
|
@ -28,6 +29,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -99,9 +101,14 @@ public class PKIndexSplitter {
|
|||
|
||||
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||
boolean success = false;
|
||||
IndexWriter w = new IndexWriter(target, config);
|
||||
final IndexWriter w = new IndexWriter(target, config);
|
||||
try {
|
||||
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
|
||||
final AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
|
||||
final IndexReader[] subReaders = new IndexReader[leaves.length];
|
||||
for (int i = 0; i < leaves.length; i++) {
|
||||
subReaders[i] = new DocumentFilteredAtomicIndexReader(leaves[i], preserveFilter, negateFilter);
|
||||
}
|
||||
w.addIndexes(subReaders);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
|
@ -112,17 +119,16 @@ public class PKIndexSplitter {
|
|||
}
|
||||
}
|
||||
|
||||
public static class DocumentFilteredIndexReader extends FilterIndexReader {
|
||||
private static class DocumentFilteredAtomicIndexReader extends FilterIndexReader {
|
||||
final Bits liveDocs;
|
||||
final int numDocs;
|
||||
|
||||
public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||
super(new SlowMultiReaderWrapper(reader));
|
||||
|
||||
public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||
super(context.reader);
|
||||
final int maxDoc = in.maxDoc();
|
||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
// ignore livedocs here, as we filter them later:
|
||||
final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext(), null);
|
||||
final DocIdSet docs = preserveFilter.getDocIdSet(context, null);
|
||||
if (docs != null) {
|
||||
final DocIdSetIterator it = docs.iterator();
|
||||
if (it != null) {
|
||||
|
|
|
@ -33,13 +33,14 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||
Document doc;
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
doc = new Document();
|
||||
doc.add(newField("id", i + "", StringField.TYPE_STORED));
|
||||
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
|
||||
w.addDocument(doc);
|
||||
if (i%3==0) w.commit();
|
||||
}
|
||||
w.commit();
|
||||
w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1)));
|
||||
|
|
|
@ -37,14 +37,16 @@ public class TestPKIndexSplitter extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
.setOpenMode(OpenMode.CREATE).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||
for (int x = 0; x < 11; x++) {
|
||||
Document doc = createDocument(x, "1", 3, format);
|
||||
w.addDocument(doc);
|
||||
if (x%3==0) w.commit();
|
||||
}
|
||||
for (int x = 11; x < 20; x++) {
|
||||
Document doc = createDocument(x, "2", 3, format);
|
||||
w.addDocument(doc);
|
||||
if (x%3==0) w.commit();
|
||||
}
|
||||
w.close();
|
||||
|
||||
|
@ -55,7 +57,7 @@ public class TestPKIndexSplitter extends LuceneTestCase {
|
|||
// delete some documents
|
||||
w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
|
||||
.setOpenMode(OpenMode.APPEND));
|
||||
.setOpenMode(OpenMode.APPEND).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||
w.deleteDocuments(midTerm);
|
||||
w.deleteDocuments(new Term("id", format.format(2)));
|
||||
w.close();
|
||||
|
|
Loading…
Reference in New Issue