mirror of https://github.com/apache/lucene.git
LUCENE-3626: Make PKIndexSplitter and MultiPassIndexSplitter work per segment
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1212894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
764059147f
commit
ad7cb17bc9
|
@ -21,12 +21,12 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexWriter; // javadoc
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -174,15 +174,60 @@ public class MultiPassIndexSplitter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class pretends that it can write deletions to the underlying index.
|
* This class emulates deletions on the underlying index.
|
||||||
* Instead, deletions are buffered in a bitset and overlaid with the original
|
|
||||||
* list of deletions.
|
|
||||||
*/
|
*/
|
||||||
public static final class FakeDeleteIndexReader extends FilterIndexReader {
|
private static final class FakeDeleteIndexReader extends MultiReader {
|
||||||
|
|
||||||
|
public FakeDeleteIndexReader(IndexReader reader) throws IOException {
|
||||||
|
super(initSubReaders(reader), false /* dont close */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IndexReader[] initSubReaders(IndexReader reader) throws IOException {
|
||||||
|
final ArrayList<IndexReader> subs = new ArrayList<IndexReader>();
|
||||||
|
new ReaderUtil.Gather(reader) {
|
||||||
|
@Override
|
||||||
|
protected void add(int base, IndexReader r) {
|
||||||
|
subs.add(new FakeDeleteAtomicIndexReader(r));
|
||||||
|
}
|
||||||
|
}.run();
|
||||||
|
return subs.toArray(new IndexReader[subs.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteDocument(int docID) {
|
||||||
|
final int i = readerIndex(docID);
|
||||||
|
((FakeDeleteAtomicIndexReader) subReaders[i]).deleteDocument(docID - starts[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void undeleteAll() {
|
||||||
|
for (IndexReader r : subReaders) {
|
||||||
|
((FakeDeleteAtomicIndexReader) r).undeleteAll();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// override this as MultiReader precalculates the number of deletions
|
||||||
|
// (this method is never used by MultiPassIndexSplitter)
|
||||||
|
@Override
|
||||||
|
public int numDocs() {
|
||||||
|
int n = 0;
|
||||||
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
n += subReaders[i].numDocs();
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// override this as MultiReader precalculates the number of deletions
|
||||||
|
// (this method is never used by MultiPassIndexSplitter)
|
||||||
|
@Override
|
||||||
|
public boolean hasDeletions() {
|
||||||
|
return (maxDoc() != numDocs());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class FakeDeleteAtomicIndexReader extends FilterIndexReader {
|
||||||
FixedBitSet liveDocs;
|
FixedBitSet liveDocs;
|
||||||
|
|
||||||
public FakeDeleteIndexReader(IndexReader in) {
|
public FakeDeleteAtomicIndexReader(IndexReader reader) {
|
||||||
super(new SlowMultiReaderWrapper(in));
|
super(reader);
|
||||||
undeleteAll(); // initialize main bitset
|
undeleteAll(); // initialize main bitset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +236,7 @@ public class MultiPassIndexSplitter {
|
||||||
return liveDocs.cardinality();
|
return liveDocs.cardinality();
|
||||||
}
|
}
|
||||||
|
|
||||||
void undeleteAll() {
|
public void undeleteAll() {
|
||||||
final int maxDoc = in.maxDoc();
|
final int maxDoc = in.maxDoc();
|
||||||
liveDocs = new FixedBitSet(in.maxDoc());
|
liveDocs = new FixedBitSet(in.maxDoc());
|
||||||
if (in.hasDeletions()) {
|
if (in.hasDeletions()) {
|
||||||
|
@ -207,7 +252,7 @@ public class MultiPassIndexSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void deleteDocument(int n) {
|
public void deleteDocument(int n) {
|
||||||
liveDocs.clear(n);
|
liveDocs.clear(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
|
@ -28,6 +29,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -99,9 +101,14 @@ public class PKIndexSplitter {
|
||||||
|
|
||||||
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
IndexWriter w = new IndexWriter(target, config);
|
final IndexWriter w = new IndexWriter(target, config);
|
||||||
try {
|
try {
|
||||||
w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
|
final AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
|
||||||
|
final IndexReader[] subReaders = new IndexReader[leaves.length];
|
||||||
|
for (int i = 0; i < leaves.length; i++) {
|
||||||
|
subReaders[i] = new DocumentFilteredAtomicIndexReader(leaves[i], preserveFilter, negateFilter);
|
||||||
|
}
|
||||||
|
w.addIndexes(subReaders);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
|
@ -112,17 +119,16 @@ public class PKIndexSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DocumentFilteredIndexReader extends FilterIndexReader {
|
private static class DocumentFilteredAtomicIndexReader extends FilterIndexReader {
|
||||||
final Bits liveDocs;
|
final Bits liveDocs;
|
||||||
final int numDocs;
|
final int numDocs;
|
||||||
|
|
||||||
public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||||
super(new SlowMultiReaderWrapper(reader));
|
super(context.reader);
|
||||||
|
|
||||||
final int maxDoc = in.maxDoc();
|
final int maxDoc = in.maxDoc();
|
||||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||||
// ignore livedocs here, as we filter them later:
|
// ignore livedocs here, as we filter them later:
|
||||||
final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext(), null);
|
final DocIdSet docs = preserveFilter.getDocIdSet(context, null);
|
||||||
if (docs != null) {
|
if (docs != null) {
|
||||||
final DocIdSetIterator it = docs.iterator();
|
final DocIdSetIterator it = docs.iterator();
|
||||||
if (it != null) {
|
if (it != null) {
|
||||||
|
|
|
@ -33,13 +33,14 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
dir = newDirectory();
|
dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||||
Document doc;
|
Document doc;
|
||||||
for (int i = 0; i < NUM_DOCS; i++) {
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(newField("id", i + "", StringField.TYPE_STORED));
|
doc.add(newField("id", i + "", StringField.TYPE_STORED));
|
||||||
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
|
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
if (i%3==0) w.commit();
|
||||||
}
|
}
|
||||||
w.commit();
|
w.commit();
|
||||||
w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1)));
|
w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1)));
|
||||||
|
|
|
@ -37,14 +37,16 @@ public class TestPKIndexSplitter extends LuceneTestCase {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||||
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
|
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
|
||||||
.setOpenMode(OpenMode.CREATE));
|
.setOpenMode(OpenMode.CREATE).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||||
for (int x = 0; x < 11; x++) {
|
for (int x = 0; x < 11; x++) {
|
||||||
Document doc = createDocument(x, "1", 3, format);
|
Document doc = createDocument(x, "1", 3, format);
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
if (x%3==0) w.commit();
|
||||||
}
|
}
|
||||||
for (int x = 11; x < 20; x++) {
|
for (int x = 11; x < 20; x++) {
|
||||||
Document doc = createDocument(x, "2", 3, format);
|
Document doc = createDocument(x, "2", 3, format);
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
if (x%3==0) w.commit();
|
||||||
}
|
}
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
|
@ -55,7 +57,7 @@ public class TestPKIndexSplitter extends LuceneTestCase {
|
||||||
// delete some documents
|
// delete some documents
|
||||||
w = new IndexWriter(dir, newIndexWriterConfig(
|
w = new IndexWriter(dir, newIndexWriterConfig(
|
||||||
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
|
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))
|
||||||
.setOpenMode(OpenMode.APPEND));
|
.setOpenMode(OpenMode.APPEND).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
|
||||||
w.deleteDocuments(midTerm);
|
w.deleteDocuments(midTerm);
|
||||||
w.deleteDocuments(new Term("id", format.format(2)));
|
w.deleteDocuments(new Term("id", format.format(2)));
|
||||||
w.close();
|
w.close();
|
||||||
|
|
Loading…
Reference in New Issue