LUCENE-6634: PKIndexSplitter now takes a Query instead of a Filter.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688365 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-06-30 06:50:50 +00:00
parent 4b758ee3fe
commit a7d1ee7002
2 changed files with 26 additions and 22 deletions

View File

@ -145,6 +145,9 @@ API Changes
parameter. Live docs are now always checked on top of these APIs.
(Adrien Grand)
* LUCENE-6634: PKIndexSplitter now takes a Query instead of a Filter to decide
how to split an index. (Adrien Grand)
Bug fixes
* LUCENE-6500: ParallelCompositeReader did not always call

View File

@ -21,11 +21,11 @@ import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
@ -33,11 +33,11 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
/**
* Split an index based on a {@link Filter}.
* Split an index based on a {@link Query}.
*/
public class PKIndexSplitter {
private final Filter docsInFirstIndex;
private final Query docsInFirstIndex;
private final Directory input;
private final Directory dir1;
private final Directory dir2;
@ -45,19 +45,19 @@ public class PKIndexSplitter {
private final IndexWriterConfig config2;
/**
* Split an index based on a {@link Filter}. All documents that match the filter
* Split an index based on a {@link Query}. All documents that match the query
* are sent to dir1, remaining ones to dir2.
*/
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Query docsInFirstIndex) {
this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(), newDefaultConfig());
}
private static IndexWriterConfig newDefaultConfig() {
return new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
return new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
}
public PKIndexSplitter(Directory input, Directory dir1,
Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
Directory dir2, Query docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
this.input = input;
this.dir1 = dir1;
this.dir2 = dir2;
@ -73,13 +73,13 @@ public class PKIndexSplitter {
*/
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
this(input, dir1, dir2,
new QueryWrapperFilter(new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false)));
new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false));
}
public PKIndexSplitter(Directory input, Directory dir1,
Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
this(input, dir1, dir2,
new QueryWrapperFilter(new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false)), config1, config2);
new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
}
public void split() throws IOException {
@ -99,15 +99,19 @@ public class PKIndexSplitter {
}
}
private void createIndex(IndexWriterConfig config, Directory target, DirectoryReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
private void createIndex(IndexWriterConfig config, Directory target, DirectoryReader reader, Query preserveFilter, boolean negateFilter) throws IOException {
boolean success = false;
final IndexWriter w = new IndexWriter(target, config);
try {
final IndexSearcher searcher = new IndexSearcher(reader);
searcher.setQueryCache(null);
final boolean needsScores = false; // scores are not needed, only matching docs
final Weight preserveWeight = searcher.createNormalizedWeight(preserveFilter, needsScores);
final List<LeafReaderContext> leaves = reader.leaves();
final CodecReader[] subReaders = new CodecReader[leaves.size()];
int i = 0;
for (final LeafReaderContext ctx : leaves) {
subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveFilter, negateFilter);
subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveWeight, negateFilter);
}
w.addIndexes(subReaders);
success = true;
@ -124,18 +128,15 @@ public class PKIndexSplitter {
final Bits liveDocs;
final int numDocs;
public DocumentFilteredLeafIndexReader(LeafReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException {
public DocumentFilteredLeafIndexReader(LeafReaderContext context, Weight preserveWeight, boolean negateFilter) throws IOException {
// our cast is ok, since we open the Directory.
super((CodecReader) context.reader());
final int maxDoc = in.maxDoc();
final FixedBitSet bits = new FixedBitSet(maxDoc);
// ignore livedocs here, as we filter them later:
final DocIdSet docs = preserveFilter.getDocIdSet(context, null);
if (docs != null) {
final DocIdSetIterator it = docs.iterator();
if (it != null) {
bits.or(it);
}
final DocIdSetIterator preserveIt = preserveWeight.scorer(context);
if (preserveIt != null) {
bits.or(preserveIt);
}
if (negateFilter) {
bits.flip(0, maxDoc);
@ -145,14 +146,14 @@ public class PKIndexSplitter {
final Bits oldLiveDocs = in.getLiveDocs();
assert oldLiveDocs != null;
final DocIdSetIterator it = new BitSetIterator(bits, 0L); // the cost is not useful here
for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc()) {
for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
if (!oldLiveDocs.get(i)) {
// we can safely modify the current bit, as the iterator already stepped over it:
bits.clear(i);
}
}
}
this.liveDocs = bits;
this.numDocs = bits.cardinality();
}