mirror of https://github.com/apache/lucene.git
LUCENE-4571: improve minShouldMatch testing
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459521 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c6bc3fdd28
commit
ba7fabb680
|
@ -0,0 +1,236 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.AtomicReader;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
|
|
||||||
|
/** tests BooleanScorer2's minShouldMatch */
|
||||||
|
@SuppressCodecs({"Lucene40", "Lucene41"})
|
||||||
|
public class TestMinShouldMatch2 extends LuceneTestCase {
|
||||||
|
Directory dir;
|
||||||
|
DirectoryReader r;
|
||||||
|
AtomicReader reader;
|
||||||
|
IndexSearcher searcher;
|
||||||
|
|
||||||
|
static final String alwaysTerms[] = { "a" };
|
||||||
|
static final String commonTerms[] = { "b", "c", "d" };
|
||||||
|
static final String mediumTerms[] = { "e", "f", "g" };
|
||||||
|
static final String rareTerms[] = { "h", "i", "j" };
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
dir = newDirectory();
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||||
|
final int numDocs = 1000;
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
addSome(doc, alwaysTerms);
|
||||||
|
|
||||||
|
if (random().nextInt(100) < 90) {
|
||||||
|
addSome(doc, commonTerms);
|
||||||
|
}
|
||||||
|
if (random().nextInt(100) < 50) {
|
||||||
|
addSome(doc, mediumTerms);
|
||||||
|
}
|
||||||
|
if (random().nextInt(100) < 10) {
|
||||||
|
addSome(doc, rareTerms);
|
||||||
|
}
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
r = DirectoryReader.open(dir);
|
||||||
|
reader = getOnlySegmentReader(r);
|
||||||
|
searcher = new IndexSearcher(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addSome(Document doc, String values[]) {
|
||||||
|
List<String> list = Arrays.asList(values);
|
||||||
|
Collections.shuffle(list, random());
|
||||||
|
int howMany = _TestUtil.nextInt(random(), 1, list.size());
|
||||||
|
for (int i = 0; i < howMany; i++) {
|
||||||
|
doc.add(new StringField("field", list.get(i), Field.Store.NO));
|
||||||
|
doc.add(new SortedSetDocValuesField("dv", new BytesRef(list.get(i))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Scorer scorer(String values[], int minShouldMatch, boolean slow) throws Exception {
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
for (String value : values) {
|
||||||
|
bq.add(new TermQuery(new Term("field", value)), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
bq.setMinimumNumberShouldMatch(minShouldMatch);
|
||||||
|
|
||||||
|
BooleanWeight weight = (BooleanWeight) searcher.createNormalizedWeight(bq);
|
||||||
|
|
||||||
|
if (slow) {
|
||||||
|
return new SlowMinShouldMatchScorer(weight, reader.getSortedSetDocValues("dv"), reader.maxDoc());
|
||||||
|
} else {
|
||||||
|
return weight.scorer(reader.getContext(), true, false, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertNext(Scorer expected, Scorer actual) throws Exception {
|
||||||
|
if (actual == null) {
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, expected.nextDoc());
|
||||||
|
}
|
||||||
|
int doc;
|
||||||
|
while ((doc = expected.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
assertEquals(doc, actual.nextDoc());
|
||||||
|
}
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.nextDoc());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertAdvance(Scorer expected, Scorer actual, int amount) throws Exception {
|
||||||
|
if (actual == null) {
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, expected.nextDoc());
|
||||||
|
}
|
||||||
|
int prevDoc = 0;
|
||||||
|
int doc;
|
||||||
|
while ((doc = expected.advance(prevDoc+amount)) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
assertEquals(doc, actual.advance(prevDoc+amount));
|
||||||
|
prevDoc = doc;
|
||||||
|
}
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.advance(prevDoc+amount));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** simple test for next() */
|
||||||
|
public void testNext() throws Exception {
|
||||||
|
Scorer expected = scorer(new String[] { "b", "f", "j" }, 2, true);
|
||||||
|
Scorer actual = scorer(new String[] { "b", "f", "j" }, 2, false);
|
||||||
|
assertNext(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** simple test for advance() */
|
||||||
|
public void testAdvance() throws Exception {
|
||||||
|
Scorer expected = scorer(new String[] { "b", "f", "j" }, 2, true);
|
||||||
|
Scorer actual = scorer(new String[] { "b", "f", "j" }, 2, false);
|
||||||
|
assertAdvance(expected, actual, 25);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: more tests
|
||||||
|
|
||||||
|
// a slow min-should match scorer that uses a docvalues field.
|
||||||
|
// later, we can make debugging easier as it can record the set of ords it currently matched
|
||||||
|
// and e.g. print out their values and so on for the document
|
||||||
|
static class SlowMinShouldMatchScorer extends Scorer {
|
||||||
|
int currentDoc = -1; // current docid
|
||||||
|
int currentMatched = -1; // current number of terms matched
|
||||||
|
|
||||||
|
final SortedSetDocValues dv;
|
||||||
|
final int maxDoc;
|
||||||
|
|
||||||
|
final Set<Long> ords = new HashSet<Long>();
|
||||||
|
final int minNrShouldMatch;
|
||||||
|
|
||||||
|
SlowMinShouldMatchScorer(BooleanWeight weight, SortedSetDocValues dv, int maxDoc) {
|
||||||
|
super(weight);
|
||||||
|
this.dv = dv;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
BooleanQuery bq = (BooleanQuery) weight.getQuery();
|
||||||
|
this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
|
||||||
|
for (BooleanClause clause : bq.getClauses()) {
|
||||||
|
assert !clause.isProhibited();
|
||||||
|
assert !clause.isRequired();
|
||||||
|
Term term = ((TermQuery)clause.getQuery()).getTerm();
|
||||||
|
long ord = dv.lookupTerm(term.bytes());
|
||||||
|
if (ord >= 0) {
|
||||||
|
boolean success = ords.add(ord);
|
||||||
|
assert success; // no dups
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
return 1.0f; // bogus
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return currentMatched;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return currentDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
assert currentDoc != NO_MORE_DOCS;
|
||||||
|
for (currentDoc = currentDoc+1; currentDoc < maxDoc; currentDoc++) {
|
||||||
|
currentMatched = 0;
|
||||||
|
dv.setDocument(currentDoc);
|
||||||
|
long ord;
|
||||||
|
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
if (ords.contains(ord)) {
|
||||||
|
currentMatched++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentMatched >= minNrShouldMatch) {
|
||||||
|
return currentDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return currentDoc = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
int doc;
|
||||||
|
while ((doc = nextDoc()) < target) {
|
||||||
|
}
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return maxDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue