From ad7cb17bc9ec7b5b564d15baa38b52abc8e8b143 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Sat, 10 Dec 2011 22:07:32 +0000 Subject: [PATCH] LUCENE-3626: Make PKIndexSplitter and MultiPassIndexSplitter work per segment git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1212894 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/MultiPassIndexSplitter.java | 63 ++++++++++++++++--- .../apache/lucene/index/PKIndexSplitter.java | 20 +++--- .../index/TestMultiPassIndexSplitter.java | 3 +- .../lucene/index/TestPKIndexSplitter.java | 6 +- 4 files changed, 73 insertions(+), 19 deletions(-) diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java index c356dea3336..1c83d93368b 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java @@ -21,12 +21,12 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; -import org.apache.lucene.index.IndexWriter; // javadoc import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Version; /** @@ -174,15 +174,60 @@ public class MultiPassIndexSplitter { } /** - * This class pretends that it can write deletions to the underlying index. - * Instead, deletions are buffered in a bitset and overlaid with the original - * list of deletions. + * This class emulates deletions on the underlying index. */ - public static final class FakeDeleteIndexReader extends FilterIndexReader { + private static final class FakeDeleteIndexReader extends MultiReader { + + public FakeDeleteIndexReader(IndexReader reader) throws IOException { + super(initSubReaders(reader), false /* dont close */); + } + + private static IndexReader[] initSubReaders(IndexReader reader) throws IOException { + final ArrayList subs = new ArrayList(); + new ReaderUtil.Gather(reader) { + @Override + protected void add(int base, IndexReader r) { + subs.add(new FakeDeleteAtomicIndexReader(r)); + } + }.run(); + return subs.toArray(new IndexReader[subs.size()]); + } + + public void deleteDocument(int docID) { + final int i = readerIndex(docID); + ((FakeDeleteAtomicIndexReader) subReaders[i]).deleteDocument(docID - starts[i]); + } + + public void undeleteAll() { + for (IndexReader r : subReaders) { + ((FakeDeleteAtomicIndexReader) r).undeleteAll(); + } + } + + // override this as MultiReader precalculates the number of deletions + // (this method is never used by MultiPassIndexSplitter) + @Override + public int numDocs() { + int n = 0; + for (int i = 0; i < subReaders.length; i++) + n += subReaders[i].numDocs(); + return n; + } + + // override this as MultiReader precalculates the number of deletions + // (this method is never used by MultiPassIndexSplitter) + @Override + public boolean hasDeletions() { + return (maxDoc() != numDocs()); + } + + } + + private static final class FakeDeleteAtomicIndexReader extends FilterIndexReader { FixedBitSet liveDocs; - public FakeDeleteIndexReader(IndexReader in) { - super(new SlowMultiReaderWrapper(in)); + public FakeDeleteAtomicIndexReader(IndexReader reader) { + super(reader); undeleteAll(); // initialize main bitset } @@ -191,7 +236,7 @@ public class MultiPassIndexSplitter { return liveDocs.cardinality(); } - void undeleteAll() { + public void undeleteAll() { final int maxDoc = in.maxDoc(); liveDocs = new FixedBitSet(in.maxDoc()); if (in.hasDeletions()) { @@ -207,7 +252,7 @@ public class MultiPassIndexSplitter { } } - void deleteDocument(int n) { + public void deleteDocument(int n) { liveDocs.clear(n); } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java index c84a5fcfa87..38e9f03305f 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; @@ -28,6 +29,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Version; /** @@ -99,9 +101,14 @@ public class PKIndexSplitter { private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { boolean success = false; - IndexWriter w = new IndexWriter(target, config); + final IndexWriter w = new IndexWriter(target, config); try { - w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter)); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext()); + final IndexReader[] subReaders = new IndexReader[leaves.length]; + for (int i = 0; i < leaves.length; i++) { + subReaders[i] = new DocumentFilteredAtomicIndexReader(leaves[i], preserveFilter, negateFilter); + } + w.addIndexes(subReaders); success = true; } finally { if (success) { @@ -112,17 +119,16 @@ public class PKIndexSplitter { } } - public static class DocumentFilteredIndexReader extends FilterIndexReader { + private static class DocumentFilteredAtomicIndexReader extends FilterIndexReader { final Bits liveDocs; final int numDocs; - public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { - super(new SlowMultiReaderWrapper(reader)); - + public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException { + super(context.reader); final int maxDoc = in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); // ignore livedocs here, as we filter them later: - final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext(), null); + final DocIdSet docs = preserveFilter.getDocIdSet(context, null); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java index 2082624870e..54c58ad1824 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java @@ -33,13 +33,14 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document doc; for (int i = 0; i < NUM_DOCS; i++) { doc = new Document(); doc.add(newField("id", i + "", StringField.TYPE_STORED)); doc.add(newField("f", i + " " + i, TextField.TYPE_STORED)); w.addDocument(doc); + if (i%3==0) w.commit(); } w.commit(); w.deleteDocuments(new Term("id", "" + (NUM_DOCS-1))); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java index 25dab63c430..b5fdfb4a982 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java @@ -37,14 +37,16 @@ public class TestPKIndexSplitter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) - .setOpenMode(OpenMode.CREATE)); + .setOpenMode(OpenMode.CREATE).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); for (int x = 0; x < 11; x++) { Document doc = createDocument(x, "1", 3, format); w.addDocument(doc); + if (x%3==0) w.commit(); } for (int x = 11; x < 20; x++) { Document doc = createDocument(x, "2", 3, format); w.addDocument(doc); + if (x%3==0) w.commit(); } w.close(); @@ -55,7 +57,7 @@ public class TestPKIndexSplitter extends LuceneTestCase { // delete some documents w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) - .setOpenMode(OpenMode.APPEND)); + .setOpenMode(OpenMode.APPEND).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); w.deleteDocuments(midTerm); w.deleteDocuments(new Term("id", format.format(2))); w.close();