LUCENE-10002: Add FixedBitSetCollector and corresponding collector manager to test framework (#766)

Some tests collect matching docs in a FixedBitSet. In the effort of moving such tests to using IndexSearcher#search(Query, CollectorManager) as part of LUCENE-10002, this commit adds a new FixedBitSetCollector class that exposes this functionality as well as a createManager method that returns a corresponding CollectorManager.
This commit is contained in:
Luca Cavanna 2022-03-30 16:14:39 +02:00 committed by GitHub
parent 2a3e5ca07f
commit 66bbc95586
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 95 additions and 230 deletions

View File

@ -31,7 +31,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
@ -39,9 +38,8 @@ import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
@ -697,29 +695,7 @@ public abstract class BaseSpatialTestCase extends LuceneTestCase {
}
private FixedBitSet searchIndex(IndexSearcher s, Query query, int maxDoc) throws IOException {
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected void doSetNextReader(LeafReaderContext context) {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
return hits;
return s.search(query, FixedBitSetCollector.createManager(maxDoc));
}
protected abstract Validator getValidator();

View File

@ -51,7 +51,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
@ -61,9 +60,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.bkd.BKDConfig;
@ -572,28 +573,8 @@ public class TestPointQueries extends LuceneTestCase {
System.out.println(Thread.currentThread().getName() + ": using query: " + query);
}
final BitSet hits = new BitSet();
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
final FixedBitSet hits =
s.search(query, FixedBitSetCollector.createManager(r.maxDoc()));
if (VERBOSE) {
System.out.println(
@ -870,28 +851,8 @@ public class TestPointQueries extends LuceneTestCase {
System.out.println(Thread.currentThread().getName() + ": using query: " + query);
}
final BitSet hits = new BitSet();
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
final FixedBitSet hits =
s.search(query, FixedBitSetCollector.createManager(r.maxDoc()));
if (VERBOSE) {
System.out.println(

View File

@ -29,7 +29,6 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanQuery;
@ -37,15 +36,13 @@ import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.analysis.MockTokenFilter;
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.FixedBitSet;
@ -123,32 +120,9 @@ public class TestHighlighterPhrase extends LuceneTestCase {
},
0,
true);
final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
indexSearcher.search(
phraseQuery,
new SimpleCollector() {
private int baseDoc;
@Override
public void collect(int i) {
bitset.set(this.baseDoc + i);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.baseDoc = context.docBase;
}
@Override
public void setScorer(Scorable scorer) {
// Do Nothing
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
});
final FixedBitSet bitset =
indexSearcher.search(
phraseQuery, FixedBitSetCollector.createManager(indexReader.maxDoc()));
assertEquals(1, bitset.cardinality());
final int maxDoc = indexReader.maxDoc();
final Highlighter highlighter =

View File

@ -24,10 +24,7 @@ import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.prefix.NumberRangePrefixTreeStrategy.Facets;
@ -36,9 +33,9 @@ import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.junit.Before;
import org.junit.Test;
import org.locationtech.spatial4j.shape.Shape;
@ -219,28 +216,8 @@ public class TestNumberRangeFacets extends StrategyTestCase {
}
private Bits searchForDocBits(Query query) throws IOException {
FixedBitSet bitSet = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
indexSearcher.search(
query,
new SimpleCollector() {
int leafDocBase;
@Override
public void collect(int doc) throws IOException {
bitSet.set(leafDocBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
leafDocBase = context.docBase;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
});
return bitSet;
return indexSearcher.search(
query, FixedBitSetCollector.createManager(indexSearcher.getIndexReader().maxDoc()));
}
private void preQueryHavoc() {

View File

@ -45,7 +45,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
@ -56,8 +55,6 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.spatial3d.geom.GeoArea;
import org.apache.lucene.spatial3d.geom.GeoAreaFactory;
import org.apache.lucene.spatial3d.geom.GeoBBoxFactory;
@ -76,6 +73,7 @@ import org.apache.lucene.spatial3d.geom.XYZSolidFactory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.geo.GeoTestUtil;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.DocIdSetBuilder;
@ -1041,29 +1039,7 @@ public class TestGeo3DPoint extends LuceneTestCase {
System.err.println(" using query: " + query);
}
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
final FixedBitSet hits = s.search(query, FixedBitSetCollector.createManager(r.maxDoc()));
if (VERBOSE) {
System.err.println(" hitCount: " + hits.cardinality());

View File

@ -47,7 +47,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
@ -59,13 +58,12 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
@ -1289,29 +1287,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
}
private FixedBitSet searchIndex(IndexSearcher s, Query query, int maxDoc) throws IOException {
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected void doSetNextReader(LeafReaderContext context) {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
return hits;
return s.search(query, FixedBitSetCollector.createManager(maxDoc));
}
private void buildError(

View File

@ -47,7 +47,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
@ -58,13 +57,12 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.search.FixedBitSetCollector;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
@ -1176,29 +1174,7 @@ public abstract class BaseXYPointTestCase extends LuceneTestCase {
}
private FixedBitSet searchIndex(IndexSearcher s, Query query, int maxDoc) throws IOException {
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
@Override
protected void doSetNextReader(LeafReaderContext context) {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
return hits;
return s.search(query, FixedBitSetCollector.createManager(maxDoc));
}
private void buildError(

View File

@ -16,7 +16,6 @@
*/
package org.apache.lucene.tests.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
@ -28,7 +27,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
@ -36,8 +34,6 @@ import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
@ -300,27 +296,7 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(
query,
new SimpleCollector() {
private int docBase;
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
});
final FixedBitSet hits = s.search(query, FixedBitSetCollector.createManager(maxDoc));
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; ++docID) {

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.tests.search;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.FixedBitSet;
/** Collector that accumulates matching docs in a {@link FixedBitSet} */
public class FixedBitSetCollector extends SimpleCollector {
private final FixedBitSet bitSet;
private int docBase;
FixedBitSetCollector(int maxDoc) {
this.bitSet = new FixedBitSet(maxDoc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) throws IOException {
bitSet.set(docBase + doc);
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
/**
* Creates a {@link CollectorManager} that can concurrently collect matching docs in a {@link
* FixedBitSet}
*/
public static CollectorManager<FixedBitSetCollector, FixedBitSet> createManager(int maxDoc) {
return new CollectorManager<>() {
@Override
public FixedBitSetCollector newCollector() {
return new FixedBitSetCollector(maxDoc);
}
@Override
public FixedBitSet reduce(Collection<FixedBitSetCollector> collectors) {
FixedBitSet reduced = new FixedBitSet(maxDoc);
for (FixedBitSetCollector collector : collectors) {
reduced.or(collector.bitSet);
}
return reduced;
}
};
}
}