LUCENE-4571: improve minShouldMatch testing

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459521 13f79535-47bb-0310-9956-ffa450edef68
2013-03-21 20:50:30 +00:00 · 2013-03-21 20:50:30 +00:00 · ba7fabb680
parent c6bc3fdd28
commit ba7fabb680
1 changed files with 236 additions and 0 deletions
--- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
@ -0,0 +1,236 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+
+/** tests BooleanScorer2's minShouldMatch */
+@SuppressCodecs({"Lucene40", "Lucene41"})
+public class TestMinShouldMatch2 extends LuceneTestCase {
+  Directory dir;
+  DirectoryReader r;
+  AtomicReader reader;
+  IndexSearcher searcher;
+  
+  static final String alwaysTerms[] = { "a" };
+  static final String commonTerms[] = { "b", "c", "d" };
+  static final String mediumTerms[] = { "e", "f", "g" };
+  static final String rareTerms[]   = { "h", "i", "j" };
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    final int numDocs = 1000;
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      
+      addSome(doc, alwaysTerms);
+      
+      if (random().nextInt(100) < 90) {
+        addSome(doc, commonTerms);
+      }
+      if (random().nextInt(100) < 50) {
+        addSome(doc, mediumTerms);
+      }
+      if (random().nextInt(100) < 10) {
+        addSome(doc, rareTerms);
+      }
+      iw.addDocument(doc);
+    }
+    iw.forceMerge(1);
+    iw.close();
+    r = DirectoryReader.open(dir);
+    reader = getOnlySegmentReader(r);
+    searcher = new IndexSearcher(reader);
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    dir.close();
+    super.tearDown();
+  }
+  
+  private void addSome(Document doc, String values[]) {
+    List<String> list = Arrays.asList(values);
+    Collections.shuffle(list, random());
+    int howMany = _TestUtil.nextInt(random(), 1, list.size());
+    for (int i = 0; i < howMany; i++) {
+      doc.add(new StringField("field", list.get(i), Field.Store.NO));
+      doc.add(new SortedSetDocValuesField("dv", new BytesRef(list.get(i))));
+    }
+  }
+  
+  private Scorer scorer(String values[], int minShouldMatch, boolean slow) throws Exception {
+    BooleanQuery bq = new BooleanQuery();
+    for (String value : values) {
+      bq.add(new TermQuery(new Term("field", value)), BooleanClause.Occur.SHOULD);
+    }
+    bq.setMinimumNumberShouldMatch(minShouldMatch);
+
+    BooleanWeight weight = (BooleanWeight) searcher.createNormalizedWeight(bq);
+    
+    if (slow) {
+      return new SlowMinShouldMatchScorer(weight, reader.getSortedSetDocValues("dv"), reader.maxDoc());
+    } else {
+      return weight.scorer(reader.getContext(), true, false, null);
+    }
+  }
+  
+  private void assertNext(Scorer expected, Scorer actual) throws Exception {
+    if (actual == null) {
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, expected.nextDoc());
+    }
+    int doc;
+    while ((doc = expected.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+      assertEquals(doc, actual.nextDoc());
+    }
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.nextDoc());
+  }
+  
+  private void assertAdvance(Scorer expected, Scorer actual, int amount) throws Exception {
+    if (actual == null) {
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, expected.nextDoc());
+    }
+    int prevDoc = 0;
+    int doc;
+    while ((doc = expected.advance(prevDoc+amount)) != DocIdSetIterator.NO_MORE_DOCS) {
+      assertEquals(doc, actual.advance(prevDoc+amount));
+      prevDoc = doc;
+    }
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.advance(prevDoc+amount));
+  }
+  
+  /** simple test for next() */
+  public void testNext() throws Exception {
+    Scorer expected = scorer(new String[] { "b", "f", "j" }, 2, true);
+    Scorer actual = scorer(new String[] { "b", "f", "j" }, 2, false);
+    assertNext(expected, actual);
+  }
+  
+  /** simple test for advance() */
+  public void testAdvance() throws Exception {
+    Scorer expected = scorer(new String[] { "b", "f", "j" }, 2, true);
+    Scorer actual = scorer(new String[] { "b", "f", "j" }, 2, false);
+    assertAdvance(expected, actual, 25);
+  }
+  
+  // TODO: more tests
+  
+  // a slow min-should match scorer that uses a docvalues field.
+  // later, we can make debugging easier as it can record the set of ords it currently matched
+  // and e.g. print out their values and so on for the document
+  static class SlowMinShouldMatchScorer extends Scorer {
+    int currentDoc = -1;     // current docid
+    int currentMatched = -1; // current number of terms matched
+    
+    final SortedSetDocValues dv;
+    final int maxDoc;
+
+    final Set<Long> ords = new HashSet<Long>();
+    final int minNrShouldMatch;
+
+    SlowMinShouldMatchScorer(BooleanWeight weight, SortedSetDocValues dv, int maxDoc) {
+      super(weight);
+      this.dv = dv;
+      this.maxDoc = maxDoc;
+      BooleanQuery bq = (BooleanQuery) weight.getQuery();
+      this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
+      for (BooleanClause clause : bq.getClauses()) {
+        assert !clause.isProhibited();
+        assert !clause.isRequired();
+        Term term = ((TermQuery)clause.getQuery()).getTerm();
+        long ord = dv.lookupTerm(term.bytes());
+        if (ord >= 0) {
+          boolean success = ords.add(ord);
+          assert success; // no dups
+        }
+      }
+    }
+
+    @Override
+    public float score() throws IOException {
+      return 1.0f; // bogus
+    }
+
+    @Override
+    public int freq() throws IOException {
+      return currentMatched;
+    }
+
+    @Override
+    public int docID() {
+      return currentDoc;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      assert currentDoc != NO_MORE_DOCS;
+      for (currentDoc = currentDoc+1; currentDoc < maxDoc; currentDoc++) {
+        currentMatched = 0;
+        dv.setDocument(currentDoc);
+        long ord;
+        while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+          if (ords.contains(ord)) {
+            currentMatched++;
+          }
+        }
+        if (currentMatched >= minNrShouldMatch) {
+          return currentDoc;
+        }
+      }
+      return currentDoc = NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      int doc;
+      while ((doc = nextDoc()) < target) {
+      }
+      return doc;
+    }
+
+    @Override
+    public long cost() {
+      return maxDoc;
+    }
+  }
+}