diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java new file mode 100644 index 00000000000..d2b738f64d0 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -0,0 +1,236 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery.BooleanWeight; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; + +/** tests BooleanScorer2's minShouldMatch */ +@SuppressCodecs({"Lucene40", "Lucene41"}) +public class TestMinShouldMatch2 extends LuceneTestCase { + Directory dir; + DirectoryReader r; + AtomicReader reader; + IndexSearcher searcher; + + static final String alwaysTerms[] = { "a" }; + static final String commonTerms[] = { "b", "c", "d" }; + static final String mediumTerms[] = { "e", "f", "g" }; + static final String rareTerms[] = { "h", "i", "j" }; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + final int numDocs = 1000; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + + addSome(doc, alwaysTerms); + + if (random().nextInt(100) < 90) { + addSome(doc, commonTerms); + } + if (random().nextInt(100) < 50) { + addSome(doc, mediumTerms); + } + if (random().nextInt(100) < 10) { + addSome(doc, rareTerms); + } + iw.addDocument(doc); + } + iw.forceMerge(1); + iw.close(); + r = DirectoryReader.open(dir); + reader = getOnlySegmentReader(r); + searcher = new IndexSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + super.tearDown(); + } + + private void addSome(Document doc, String values[]) { + List list = Arrays.asList(values); + Collections.shuffle(list, random()); + int howMany = _TestUtil.nextInt(random(), 1, list.size()); + for (int i = 0; i < howMany; i++) { + doc.add(new StringField("field", list.get(i), Field.Store.NO)); + doc.add(new SortedSetDocValuesField("dv", new BytesRef(list.get(i)))); + } + } + + private Scorer scorer(String values[], int minShouldMatch, boolean slow) throws Exception { + BooleanQuery bq = new BooleanQuery(); + for (String value : values) { + bq.add(new TermQuery(new Term("field", value)), BooleanClause.Occur.SHOULD); + } + bq.setMinimumNumberShouldMatch(minShouldMatch); + + BooleanWeight weight = (BooleanWeight) searcher.createNormalizedWeight(bq); + + if (slow) { + return new SlowMinShouldMatchScorer(weight, reader.getSortedSetDocValues("dv"), reader.maxDoc()); + } else { + return weight.scorer(reader.getContext(), true, false, null); + } + } + + private void assertNext(Scorer expected, Scorer actual) throws Exception { + if (actual == null) { + assertEquals(DocIdSetIterator.NO_MORE_DOCS, expected.nextDoc()); + } + int doc; + while ((doc = expected.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(doc, actual.nextDoc()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.nextDoc()); + } + + private void assertAdvance(Scorer expected, Scorer actual, int amount) throws Exception { + if (actual == null) { + assertEquals(DocIdSetIterator.NO_MORE_DOCS, expected.nextDoc()); + } + int prevDoc = 0; + int doc; + while ((doc = expected.advance(prevDoc+amount)) != DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(doc, actual.advance(prevDoc+amount)); + prevDoc = doc; + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.advance(prevDoc+amount)); + } + + /** simple test for next() */ + public void testNext() throws Exception { + Scorer expected = scorer(new String[] { "b", "f", "j" }, 2, true); + Scorer actual = scorer(new String[] { "b", "f", "j" }, 2, false); + assertNext(expected, actual); + } + + /** simple test for advance() */ + public void testAdvance() throws Exception { + Scorer expected = scorer(new String[] { "b", "f", "j" }, 2, true); + Scorer actual = scorer(new String[] { "b", "f", "j" }, 2, false); + assertAdvance(expected, actual, 25); + } + + // TODO: more tests + + // a slow min-should match scorer that uses a docvalues field. + // later, we can make debugging easier as it can record the set of ords it currently matched + // and e.g. print out their values and so on for the document + static class SlowMinShouldMatchScorer extends Scorer { + int currentDoc = -1; // current docid + int currentMatched = -1; // current number of terms matched + + final SortedSetDocValues dv; + final int maxDoc; + + final Set ords = new HashSet(); + final int minNrShouldMatch; + + SlowMinShouldMatchScorer(BooleanWeight weight, SortedSetDocValues dv, int maxDoc) { + super(weight); + this.dv = dv; + this.maxDoc = maxDoc; + BooleanQuery bq = (BooleanQuery) weight.getQuery(); + this.minNrShouldMatch = bq.getMinimumNumberShouldMatch(); + for (BooleanClause clause : bq.getClauses()) { + assert !clause.isProhibited(); + assert !clause.isRequired(); + Term term = ((TermQuery)clause.getQuery()).getTerm(); + long ord = dv.lookupTerm(term.bytes()); + if (ord >= 0) { + boolean success = ords.add(ord); + assert success; // no dups + } + } + } + + @Override + public float score() throws IOException { + return 1.0f; // bogus + } + + @Override + public int freq() throws IOException { + return currentMatched; + } + + @Override + public int docID() { + return currentDoc; + } + + @Override + public int nextDoc() throws IOException { + assert currentDoc != NO_MORE_DOCS; + for (currentDoc = currentDoc+1; currentDoc < maxDoc; currentDoc++) { + currentMatched = 0; + dv.setDocument(currentDoc); + long ord; + while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + if (ords.contains(ord)) { + currentMatched++; + } + } + if (currentMatched >= minNrShouldMatch) { + return currentDoc; + } + } + return currentDoc = NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + int doc; + while ((doc = nextDoc()) < target) { + } + return doc; + } + + @Override + public long cost() { + return maxDoc; + } + } +}