mirror of https://github.com/apache/lucene.git
LUCENE-6634: PKIndexSplitter now takes a Query instead of a Filter.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688365 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4b758ee3fe
commit
a7d1ee7002
|
@ -145,6 +145,9 @@ API Changes
|
|||
parameter. Live docs are now always checked on top of these APIs.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-6634: PKIndexSplitter now takes a Query instead of a Filter to decide
|
||||
how to split an index. (Adrien Grand)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-6500: ParallelCompositeReader did not always call
|
||||
|
|
|
@ -21,11 +21,11 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -33,11 +33,11 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Split an index based on a {@link Filter}.
|
||||
* Split an index based on a {@link Query}.
|
||||
*/
|
||||
|
||||
public class PKIndexSplitter {
|
||||
private final Filter docsInFirstIndex;
|
||||
private final Query docsInFirstIndex;
|
||||
private final Directory input;
|
||||
private final Directory dir1;
|
||||
private final Directory dir2;
|
||||
|
@ -45,19 +45,19 @@ public class PKIndexSplitter {
|
|||
private final IndexWriterConfig config2;
|
||||
|
||||
/**
|
||||
* Split an index based on a {@link Filter}. All documents that match the filter
|
||||
* Split an index based on a {@link Query}. All documents that match the query
|
||||
* are sent to dir1, remaining ones to dir2.
|
||||
*/
|
||||
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
|
||||
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Query docsInFirstIndex) {
|
||||
this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(), newDefaultConfig());
|
||||
}
|
||||
|
||||
private static IndexWriterConfig newDefaultConfig() {
|
||||
return new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
|
||||
return new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
|
||||
}
|
||||
|
||||
public PKIndexSplitter(Directory input, Directory dir1,
|
||||
Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
|
||||
Directory dir2, Query docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
|
||||
this.input = input;
|
||||
this.dir1 = dir1;
|
||||
this.dir2 = dir2;
|
||||
|
@ -73,13 +73,13 @@ public class PKIndexSplitter {
|
|||
*/
|
||||
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
|
||||
this(input, dir1, dir2,
|
||||
new QueryWrapperFilter(new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false)));
|
||||
new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false));
|
||||
}
|
||||
|
||||
public PKIndexSplitter(Directory input, Directory dir1,
|
||||
Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
|
||||
this(input, dir1, dir2,
|
||||
new QueryWrapperFilter(new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false)), config1, config2);
|
||||
new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
|
||||
}
|
||||
|
||||
public void split() throws IOException {
|
||||
|
@ -99,15 +99,19 @@ public class PKIndexSplitter {
|
|||
}
|
||||
}
|
||||
|
||||
private void createIndex(IndexWriterConfig config, Directory target, DirectoryReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||
private void createIndex(IndexWriterConfig config, Directory target, DirectoryReader reader, Query preserveFilter, boolean negateFilter) throws IOException {
|
||||
boolean success = false;
|
||||
final IndexWriter w = new IndexWriter(target, config);
|
||||
try {
|
||||
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setQueryCache(null);
|
||||
final boolean needsScores = false; // scores are not needed, only matching docs
|
||||
final Weight preserveWeight = searcher.createNormalizedWeight(preserveFilter, needsScores);
|
||||
final List<LeafReaderContext> leaves = reader.leaves();
|
||||
final CodecReader[] subReaders = new CodecReader[leaves.size()];
|
||||
int i = 0;
|
||||
for (final LeafReaderContext ctx : leaves) {
|
||||
subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveFilter, negateFilter);
|
||||
subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveWeight, negateFilter);
|
||||
}
|
||||
w.addIndexes(subReaders);
|
||||
success = true;
|
||||
|
@ -124,18 +128,15 @@ public class PKIndexSplitter {
|
|||
final Bits liveDocs;
|
||||
final int numDocs;
|
||||
|
||||
public DocumentFilteredLeafIndexReader(LeafReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException {
|
||||
public DocumentFilteredLeafIndexReader(LeafReaderContext context, Weight preserveWeight, boolean negateFilter) throws IOException {
|
||||
// our cast is ok, since we open the Directory.
|
||||
super((CodecReader) context.reader());
|
||||
final int maxDoc = in.maxDoc();
|
||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
// ignore livedocs here, as we filter them later:
|
||||
final DocIdSet docs = preserveFilter.getDocIdSet(context, null);
|
||||
if (docs != null) {
|
||||
final DocIdSetIterator it = docs.iterator();
|
||||
if (it != null) {
|
||||
bits.or(it);
|
||||
}
|
||||
final DocIdSetIterator preserveIt = preserveWeight.scorer(context);
|
||||
if (preserveIt != null) {
|
||||
bits.or(preserveIt);
|
||||
}
|
||||
if (negateFilter) {
|
||||
bits.flip(0, maxDoc);
|
||||
|
@ -145,14 +146,14 @@ public class PKIndexSplitter {
|
|||
final Bits oldLiveDocs = in.getLiveDocs();
|
||||
assert oldLiveDocs != null;
|
||||
final DocIdSetIterator it = new BitSetIterator(bits, 0L); // the cost is not useful here
|
||||
for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc()) {
|
||||
for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
|
||||
if (!oldLiveDocs.get(i)) {
|
||||
// we can safely modify the current bit, as the iterator already stepped over it:
|
||||
bits.clear(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
this.liveDocs = bits;
|
||||
this.numDocs = bits.cardinality();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue