LUCENE-2190: add CustomScoreQuery.setNextReader so subclass knows when we advance to a new sub-reader

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@896962 13f79535-47bb-0310-9956-ffa450edef68
2010-01-07 18:52:23 +00:00 · 2010-01-07 18:52:23 +00:00 · 4fb13e4a59
parent be9e1bf1bd
commit 4fb13e4a59
7 changed files with 143 additions and 43 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -53,6 +53,11 @@ API Changes
  Since the removal of compressed fields, Store can only be YES, so
  it's not necessary to specify.  (Erik Hatcher via Mike McCandless)

+* LUCENE-2190: Added setNextReader method to CustomScoreQuery, which
+  is necessary with per-segment searching to notify the subclass
+  which reader the int doc, passed to customScore, refers to.  (Paul
+  chez Jamespot via Mike McCandless)
+
 Bug fixes

 * LUCENE-2092: BooleanQuery was ignoring disableCoord in its hashCode
--- a/src/java/org/apache/lucene/search/function/CustomScoreQuery.java
+++ b/src/java/org/apache/lucene/search/function/CustomScoreQuery.java
@ -207,6 +207,9 @@ public class CustomScoreQuery extends Query {
   * <pre>
   *     ModifiedScore = subQueryScore * valSrcScore
   * </pre>
+   *
+   * <p><b>NOTE</b>: The doc is relative to the current
+   * reader, last passed to {@link #setNextReader}.
   * 
   * @param doc id of scored doc. 
   * @param subQueryScore score of that doc by the subQuery.
@ -217,6 +220,15 @@ public class CustomScoreQuery extends Query {
    return subQueryScore * valSrcScore;
  }

+  /**
+   * Called when the scoring switches to another reader.
+   * 
+   * @param reader
+   *          next IndexReader
+   */
+  public void setNextReader(IndexReader reader) throws IOException {
+  }
+
  /**
   * Explain the custom score.
   * Whenever overriding {@link #customScore(int, float, float[])}, 
@ -385,7 +397,6 @@ public class CustomScoreQuery extends Query {
   * A scorer that applies a (callback) function on scores of the subQuery.
   */
  private class CustomScorer extends Scorer {
-    private final CustomWeight weight;
    private final float qWeight;
    private Scorer subQueryScorer;
    private Scorer[] valSrcScorers;
@ -396,12 +407,12 @@ public class CustomScoreQuery extends Query {
    private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w,
        Scorer subQueryScorer, Scorer[] valSrcScorers) throws IOException {
      super(similarity);
-      this.weight = w;
      this.qWeight = w.getValue();
      this.subQueryScorer = subQueryScorer;
      this.valSrcScorers = valSrcScorers;
      this.reader = reader;
      this.vScores = new float[valSrcScorers.length];
+      setNextReader(reader);
    }

    @Override
--- a/src/test/org/apache/lucene/search/QueryUtils.java
+++ b/src/test/org/apache/lucene/search/QueryUtils.java
@ -5,8 +5,6 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
-import java.util.ArrayList;
-import java.util.List;

 import junit.framework.Assert;

@ -17,7 +15,6 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
 import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.ReaderUtil;

 /**
 * Copyright 2005 Apache Software Foundation
@ -283,6 +280,8 @@ public class QueryUtils {
        // FUTURE: ensure scorer.doc()==-1

        final float maxDiff = 1e-5f;
+        final IndexReader lastReader[] = {null};
+
        s.search(q, new Collector() {
          private Scorer sc;
          private IndexReader reader;
@ -338,7 +337,18 @@ public class QueryUtils {
          }

          @Override
-          public void setNextReader(IndexReader reader, int docBase) {
+          public void setNextReader(IndexReader reader, int docBase) throws IOException {
+            // confirm that skipping beyond the last doc, on the
+            // previous reader, hits NO_MORE_DOCS
+            if (lastReader[0] != null) {
+              final IndexReader previousReader = lastReader[0];
+              Weight w = q.weight(new IndexSearcher(previousReader));
+              Scorer scorer = w.scorer(previousReader, true, false);
+              if (scorer != null) {
+                boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
+                Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
+              }
+            }
            this.reader = reader;
            this.scorer = null;
            lastDoc[0] = -1;
@ -350,19 +360,15 @@ public class QueryUtils {
          }
        });

-        List<IndexReader> readerList = new ArrayList<IndexReader>();
-        ReaderUtil.gatherSubReaders(readerList, s.getIndexReader());
-        IndexReader[] readers =  readerList.toArray(new IndexReader[0]);
-        for(int i = 0; i < readers.length; i++) {
-          IndexReader reader = readers[i];
-          Weight w = q.weight(s);
-          Scorer scorer = w.scorer(reader, true, false);
-          
+        if (lastReader[0] != null) {
+          // confirm that skipping beyond the last doc, on the
+          // previous reader, hits NO_MORE_DOCS
+          final IndexReader previousReader = lastReader[0];
+          Weight w = q.weight(new IndexSearcher(previousReader));
+          Scorer scorer = w.scorer(previousReader, true, false);
          if (scorer != null) {
            boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
-      
-            if (more && lastDoc[0] != -1) 
-              Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
+            Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
          }
        }
      }
@ -373,6 +379,8 @@ public class QueryUtils {
    //System.out.println("checkFirstSkipTo: "+q);
    final float maxDiff = 1e-5f;
    final int lastDoc[] = {-1};
+    final IndexReader lastReader[] = {null};
+
    s.search(q,new Collector() {
      private Scorer scorer;
      private IndexReader reader;
@ -400,9 +408,22 @@ public class QueryUtils {
          throw new RuntimeException(e);
        }
      }
+
      @Override
-      public void setNextReader(IndexReader reader, int docBase) {
-        this.reader = reader;
+      public void setNextReader(IndexReader reader, int docBase) throws IOException {
+        // confirm that skipping beyond the last doc, on the
+        // previous reader, hits NO_MORE_DOCS
+        if (lastReader[0] != null) {
+          final IndexReader previousReader = lastReader[0];
+          Weight w = q.weight(new IndexSearcher(previousReader));
+          Scorer scorer = w.scorer(previousReader, true, false);
+          if (scorer != null) {
+            boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
+            Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
+          }
+        }
+
+        this.reader = lastReader[0] = reader;
        lastDoc[0] = -1;
      }
      @Override
@ -410,22 +431,17 @@ public class QueryUtils {
        return false;
      }
    });
-    
-    List<IndexReader> readerList = new ArrayList<IndexReader>();
-    ReaderUtil.gatherSubReaders(readerList, s.getIndexReader());
-    IndexReader[] readers = readerList.toArray(new IndexReader[0]);
-    for(int i = 0; i < readers.length; i++) {
-      IndexReader reader = readers[i];
-      Weight w = q.weight(s);
-      Scorer scorer = w.scorer(reader, true, false);
-      
+
+    if (lastReader[0] != null) {
+      // confirm that skipping beyond the last doc, on the
+      // previous reader, hits NO_MORE_DOCS
+      final IndexReader previousReader = lastReader[0];
+      Weight w = q.weight(new IndexSearcher(previousReader));
+      Scorer scorer = w.scorer(previousReader, true, false);
      if (scorer != null) {
        boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
-  
-        if (more && lastDoc[0] != -1) 
-          Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
+        Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
      }
    }
-
  }
 }
--- a/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
+++ b/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
@ -67,9 +67,19 @@ public class FunctionTestSetup extends LuceneTestCaseJ4 {
          "So here we are, with a perhaps much less interesting ",
          "text for the test, but oh much much safer. ",
  };
+  
+  protected Directory dir;
+  protected Analyzer anlzr;
+  
+  private final boolean doMultiSegment;

-  protected Directory dir = null;
-  protected Analyzer anlzr = null;
+  public FunctionTestSetup(boolean doMultiSegment) {
+    this.doMultiSegment = doMultiSegment;
+  }
+
+  public FunctionTestSetup() {
+    this(false);
+  }

  @Override
  @After
@ -100,7 +110,10 @@ public class FunctionTestSetup extends LuceneTestCaseJ4 {
      addDoc(iw, i);
      done[i] = true;
      i = (i + 4) % N_DOCS;
-      remaining--;
+      if (doMultiSegment && remaining % 3 == 0) {
+        iw.commit();
+      }
+      remaining --;
    }
    iw.close();
  }
--- a/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java
+++ b/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java
@ -29,12 +29,18 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;

+import org.apache.lucene.index.IndexReader;
+
 /**
 * Test CustomScoreQuery search.
 */
@SuppressWarnings({"MagicNumber"})
 public class TestCustomScoreQuery extends FunctionTestSetup {

+  /* @override constructor */
+  public TestCustomScoreQuery() {
+    super(true);
+  }

  /**
   * Test that CustomScoreQuery of Type.BYTE returns the expected scores.
@ -73,7 +79,7 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
    // INT field can be parsed as float
    doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.FLOAT, 1.0);
    doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.FLOAT, 5.0);
-    // same values, but in flot format
+    // same values, but in float format
    doTestCustomScore(FLOAT_FIELD, FieldScoreQuery.Type.FLOAT, 1.0);
    doTestCustomScore(FLOAT_FIELD, FieldScoreQuery.Type.FLOAT, 6.0);
  }
@ -133,6 +139,8 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
      }
      if (valSrcScores.length == 1) {
        return subQueryScore + valSrcScores[0];
+        // confirm that skipping beyond the last doc, on the
+        // previous reader, hits NO_MORE_DOCS
      }
      return (subQueryScore + valSrcScores[0]) * valSrcScores[1]; // we know there are two
    }
@ -157,6 +165,44 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
    }
  }

+  private final class CustomExternalQuery extends CustomScoreQuery {
+    private IndexReader reader;
+    private int[] values;
+
+    public float customScore(int doc, float subScore, float valSrcScore) {
+      assertTrue(doc <= reader.maxDoc());
+      return (float) values[doc];
+    }
+
+    public void setNextReader(IndexReader r) throws IOException {
+      reader = r;
+      values = FieldCache.DEFAULT.getInts(r, INT_FIELD);
+    }
+
+    public CustomExternalQuery(Query q) {
+      super(q);
+    }
+  }
+
+  public void testCustomExternalQuery() throws Exception {
+    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, TEXT_FIELD,anlzr); 
+    String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
+    Query q1 = qp.parse(qtxt); 
+    
+    final Query q = new CustomExternalQuery(q1);
+    log(q);
+
+    IndexSearcher s = new IndexSearcher(dir);
+    TopDocs hits = s.search(q, 1000);
+    assertEquals(N_DOCS, hits.totalHits);
+    for(int i=0;i<N_DOCS;i++) {
+      final int doc = hits.scoreDocs[i].doc;
+      final float score = hits.scoreDocs[i].score;
+      assertEquals("doc=" + doc, (float) 1+(4*doc) % N_DOCS, score, 0.0001);
+    }
+    s.close();
+  }
+  
  // Test that FieldScoreQuery returns docs with expected score.
  private void doTestCustomScore(String field, FieldScoreQuery.Type tp, double dboost) throws Exception, ParseException {
    float boost = (float) dboost;
--- a/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java
+++ b/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java
@ -18,7 +18,6 @@ package org.apache.lucene.search.function;
 */

 import java.util.HashMap;
-import java.util.Map;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.IndexSearcher;
@ -43,6 +42,11 @@ import static org.junit.Assert.*;
@SuppressWarnings({"UseOfSystemOutOrSystemErr"})
 public class TestFieldScoreQuery extends FunctionTestSetup {

+  /* @override constructor */
+  public TestFieldScoreQuery() {
+    super(true);
+  }
+
  /** Test that FieldScoreQuery of Type.BYTE returns docs in expected order. */
  @Test
  public void testRankByte () throws Exception {
@ -174,7 +178,7 @@ public class TestFieldScoreQuery extends FunctionTestSetup {
    expectedArrayTypes.put(FieldScoreQuery.Type.FLOAT, new float[0]);
    
    IndexSearcher s = new IndexSearcher(dir, true);
-    Object innerArray = null;
+    Object[] innerArray = new Object[s.getIndexReader().getSequentialSubReaders().length];

    boolean warned = false; // print warning once.
    for (int i=0; i<10; i++) {
@ -186,16 +190,16 @@ public class TestFieldScoreQuery extends FunctionTestSetup {
        IndexReader reader = readers[j];
        try {
          if (i == 0) {
-            innerArray = q.valSrc.getValues(reader).getInnerArray();
-            log(i + ".  compare: " + innerArray.getClass() + " to "
+            innerArray[j] = q.valSrc.getValues(reader).getInnerArray();
+            log(i + ".  compare: " + innerArray[j].getClass() + " to "
                + expectedArrayTypes.get(tp).getClass());
            assertEquals(
                "field values should be cached in the correct array type!",
-                innerArray.getClass(), expectedArrayTypes.get(tp).getClass());
+                innerArray[j].getClass(), expectedArrayTypes.get(tp).getClass());
          } else {
-            log(i + ".  compare: " + innerArray + " to "
+            log(i + ".  compare: " + innerArray[j] + " to "
                + q.valSrc.getValues(reader).getInnerArray());
-            assertSame("field values should be cached and reused!", innerArray,
+            assertSame("field values should be cached and reused!", innerArray[j],
                q.valSrc.getValues(reader).getInnerArray());
          }
        } catch (UnsupportedOperationException e) {
--- a/src/test/org/apache/lucene/search/function/TestOrdValues.java
+++ b/src/test/org/apache/lucene/search/function/TestOrdValues.java
@ -37,6 +37,11 @@ import org.junit.Test;
@SuppressWarnings({"UseOfSystemOutOrSystemErr"})
 public class TestOrdValues extends FunctionTestSetup {

+  /* @override constructor */
+  public TestOrdValues() {
+    super(false);
+  }
+
  /**
   * Test OrdFieldSource
   */