mirror of https://github.com/apache/lucene.git
Fix SpanScorer next()/skipTo() incompatability: LUCENE-413
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@391337 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
286f4f5f07
commit
040e9c6c9a
|
@ -16,6 +16,9 @@ Bug fixes
|
||||||
IndexWriter. These could be set in Lucene 1.4 using a system property.
|
IndexWriter. These could be set in Lucene 1.4 using a system property.
|
||||||
This feature had been removed without adding the corresponding
|
This feature had been removed without adding the corresponding
|
||||||
getter/setter methods. (Daniel Naber)
|
getter/setter methods. (Daniel Naber)
|
||||||
|
|
||||||
|
4. LUCENE-413: Fixed ArrayIndexOutOfBoundsException exceptions
|
||||||
|
when using SpanQueries. (Paul Elschot via Yonik Seeley)
|
||||||
|
|
||||||
1.9.1
|
1.9.1
|
||||||
|
|
||||||
|
|
|
@ -207,7 +207,7 @@ public class DisjunctionSumScorer extends Scorer {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (target <= currentDoc) {
|
if (target <= currentDoc) {
|
||||||
target = currentDoc + 1;
|
return true;
|
||||||
}
|
}
|
||||||
do {
|
do {
|
||||||
Scorer top = (Scorer) scorerQueue.top();
|
Scorer top = (Scorer) scorerQueue.top();
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package org.apache.lucene.search.spans;
|
package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright 2004 The Apache Software Foundation
|
* Copyright 2006 The Apache Software Foundation
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -43,6 +43,7 @@ class SpanScorer extends Scorer {
|
||||||
this.norms = norms;
|
this.norms = norms;
|
||||||
this.weight = weight;
|
this.weight = weight;
|
||||||
this.value = weight.getValue();
|
this.value = weight.getValue();
|
||||||
|
doc = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
|
@ -50,19 +51,35 @@ class SpanScorer extends Scorer {
|
||||||
more = spans.next();
|
more = spans.next();
|
||||||
firstTime = false;
|
firstTime = false;
|
||||||
}
|
}
|
||||||
|
return setFreqCurrentDoc();
|
||||||
|
}
|
||||||
|
|
||||||
if (!more) return false;
|
public boolean skipTo(int target) throws IOException {
|
||||||
|
if (firstTime) {
|
||||||
|
more = spans.skipTo(target);
|
||||||
|
firstTime = false;
|
||||||
|
}
|
||||||
|
if (! more) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead
|
||||||
|
more = spans.skipTo(target);
|
||||||
|
}
|
||||||
|
return setFreqCurrentDoc();
|
||||||
|
}
|
||||||
|
|
||||||
freq = 0.0f;
|
private boolean setFreqCurrentDoc() throws IOException {
|
||||||
|
if (! more) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
doc = spans.doc();
|
doc = spans.doc();
|
||||||
|
freq = 0.0f;
|
||||||
while (more && doc == spans.doc()) {
|
while (more && doc == spans.doc()) {
|
||||||
int matchLength = spans.end() - spans.start();
|
int matchLength = spans.end() - spans.start();
|
||||||
freq += getSimilarity().sloppyFreq(matchLength);
|
freq += getSimilarity().sloppyFreq(matchLength);
|
||||||
more = spans.next();
|
more = spans.next();
|
||||||
}
|
}
|
||||||
|
return more || (freq != 0);
|
||||||
return more || freq != 0.0f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int doc() { return doc; }
|
public int doc() { return doc; }
|
||||||
|
@ -72,22 +89,6 @@ class SpanScorer extends Scorer {
|
||||||
return raw * Similarity.decodeNorm(norms[doc]); // normalize
|
return raw * Similarity.decodeNorm(norms[doc]); // normalize
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean skipTo(int target) throws IOException {
|
|
||||||
more = spans.skipTo(target);
|
|
||||||
|
|
||||||
if (!more) return false;
|
|
||||||
|
|
||||||
freq = 0.0f;
|
|
||||||
doc = spans.doc();
|
|
||||||
|
|
||||||
while (more && spans.doc() == target) {
|
|
||||||
freq += getSimilarity().sloppyFreq(spans.end() - spans.start());
|
|
||||||
more = spans.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
return more || freq != 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Explanation explain(final int doc) throws IOException {
|
public Explanation explain(final int doc) throws IOException {
|
||||||
Explanation tfExplanation = new Explanation();
|
Explanation tfExplanation = new Explanation();
|
||||||
|
|
||||||
|
|
|
@ -25,11 +25,7 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
|
||||||
import org.apache.lucene.search.Hits;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
@ -44,6 +40,8 @@ public class TestSpansAdvanced extends TestCase {
|
||||||
// location to the index
|
// location to the index
|
||||||
protected Directory mDirectory;;
|
protected Directory mDirectory;;
|
||||||
|
|
||||||
|
protected IndexSearcher searcher;
|
||||||
|
|
||||||
// field names in the index
|
// field names in the index
|
||||||
private final static String FIELD_ID = "ID";
|
private final static String FIELD_ID = "ID";
|
||||||
protected final static String FIELD_TEXT = "TEXT";
|
protected final static String FIELD_TEXT = "TEXT";
|
||||||
|
@ -52,7 +50,6 @@ public class TestSpansAdvanced extends TestCase {
|
||||||
* Initializes the tests by adding 4 identical documents to the index.
|
* Initializes the tests by adding 4 identical documents to the index.
|
||||||
*/
|
*/
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
|
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
// create test index
|
// create test index
|
||||||
|
@ -63,10 +60,11 @@ public class TestSpansAdvanced extends TestCase {
|
||||||
addDocument(writer, "3", "I think it should work.");
|
addDocument(writer, "3", "I think it should work.");
|
||||||
addDocument(writer, "4", "I think it should work.");
|
addDocument(writer, "4", "I think it should work.");
|
||||||
writer.close();
|
writer.close();
|
||||||
|
searcher = new IndexSearcher(mDirectory);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void tearDown() throws Exception {
|
protected void tearDown() throws Exception {
|
||||||
|
searcher.close();
|
||||||
mDirectory.close();
|
mDirectory.close();
|
||||||
mDirectory = null;
|
mDirectory = null;
|
||||||
}
|
}
|
||||||
|
@ -94,7 +92,7 @@ public class TestSpansAdvanced extends TestCase {
|
||||||
*/
|
*/
|
||||||
public void testBooleanQueryWithSpanQueries() throws IOException {
|
public void testBooleanQueryWithSpanQueries() throws IOException {
|
||||||
|
|
||||||
doTestBooleanQueryWithSpanQueries(0.3884282f);
|
doTestBooleanQueryWithSpanQueries(searcher,0.3884282f);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -102,57 +100,63 @@ public class TestSpansAdvanced extends TestCase {
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
protected void doTestBooleanQueryWithSpanQueries(final float expectedScore) throws IOException {
|
protected void doTestBooleanQueryWithSpanQueries(IndexSearcher s, final float expectedScore) throws IOException {
|
||||||
|
|
||||||
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work"));
|
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work"));
|
||||||
final BooleanQuery query = new BooleanQuery();
|
final BooleanQuery query = new BooleanQuery();
|
||||||
query.add(spanQuery, BooleanClause.Occur.MUST);
|
query.add(spanQuery, BooleanClause.Occur.MUST);
|
||||||
query.add(spanQuery, BooleanClause.Occur.MUST);
|
query.add(spanQuery, BooleanClause.Occur.MUST);
|
||||||
final Hits hits = executeQuery(query);
|
|
||||||
final String[] expectedIds = new String[] { "1", "2", "3", "4" };
|
final String[] expectedIds = new String[] { "1", "2", "3", "4" };
|
||||||
final float[] expectedScores = new float[] { expectedScore, expectedScore, expectedScore, expectedScore };
|
final float[] expectedScores = new float[] { expectedScore, expectedScore, expectedScore, expectedScore };
|
||||||
assertHits(hits, "two span queries", expectedIds, expectedScores);
|
assertHits(s, query, "two span queries", expectedIds, expectedScores);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes the query and throws an assertion if the results don't match the
|
|
||||||
* expectedHits.
|
|
||||||
*
|
|
||||||
* @param query the query to execute
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
protected Hits executeQuery(final Query query) throws IOException {
|
|
||||||
|
|
||||||
final IndexSearcher searcher = new IndexSearcher(mDirectory);
|
|
||||||
final Hits hits = searcher.search(query);
|
|
||||||
searcher.close();
|
|
||||||
return hits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks to see if the hits are what we expected.
|
* Checks to see if the hits are what we expected.
|
||||||
*
|
*
|
||||||
* @param hits the search results
|
* @param query the query to execute
|
||||||
* @param description the description of the search
|
* @param description the description of the search
|
||||||
* @param expectedIds the expected document ids of the hits
|
* @param expectedIds the expected document ids of the hits
|
||||||
* @param expectedScores the expected scores of the hits
|
* @param expectedScores the expected scores of the hits
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
protected void assertHits(final Hits hits, final String description, final String[] expectedIds,
|
protected static void assertHits(Searcher s, Query query, final String description, final String[] expectedIds,
|
||||||
final float[] expectedScores) throws IOException {
|
final float[] expectedScores) throws IOException {
|
||||||
|
final float tolerance = 1e-5f;
|
||||||
|
|
||||||
|
// Hits hits = searcher.search(query);
|
||||||
|
// hits normalizes and throws things off if one score is greater than 1.0
|
||||||
|
TopDocs topdocs = s.search(query,null,10000);
|
||||||
|
|
||||||
|
/*****
|
||||||
// display the hits
|
// display the hits
|
||||||
/*System.out.println(hits.length() + " hits for search: \"" + description + '\"');
|
System.out.println(hits.length() + " hits for search: \"" + description + '\"');
|
||||||
for (int i = 0; i < hits.length(); i++) {
|
for (int i = 0; i < hits.length(); i++) {
|
||||||
System.out.println(" " + FIELD_ID + ':' + hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')');
|
System.out.println(" " + FIELD_ID + ':' + hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')');
|
||||||
}*/
|
}
|
||||||
|
*****/
|
||||||
|
|
||||||
// did we get the hits we expected
|
// did we get the hits we expected
|
||||||
assertEquals(expectedIds.length, hits.length());
|
assertEquals(expectedIds.length, topdocs.totalHits);
|
||||||
for (int i = 0; i < hits.length(); i++) {
|
for (int i = 0; i < topdocs.totalHits; i++) {
|
||||||
assertTrue(expectedIds[i].equals(hits.doc(i).get(FIELD_ID)));
|
//System.out.println(i + " exp: " + expectedIds[i]);
|
||||||
assertEquals(expectedScores[i], hits.score(i), 0);
|
//System.out.println(i + " field: " + hits.doc(i).get(FIELD_ID));
|
||||||
|
|
||||||
|
int id = topdocs.scoreDocs[i].doc;
|
||||||
|
float score = topdocs.scoreDocs[i].score;
|
||||||
|
Document doc = s.doc(id);
|
||||||
|
assertEquals(expectedIds[i], doc.get(FIELD_ID));
|
||||||
|
boolean scoreEq = Math.abs(expectedScores[i] - score) < tolerance;
|
||||||
|
if (!scoreEq) {
|
||||||
|
System.out.println(i + " warning, expected score: " + expectedScores[i] + ", actual " + score);
|
||||||
|
System.out.println(s.explain(query,id));
|
||||||
|
}
|
||||||
|
assertEquals(expectedScores[i], score, tolerance);
|
||||||
|
assertEquals(s.explain(query,id).getValue(), score, tolerance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -22,10 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
|
||||||
import org.apache.lucene.search.Hits;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
* Some expanded tests to make sure my patch doesn't break other SpanTermQuery
|
* Some expanded tests to make sure my patch doesn't break other SpanTermQuery
|
||||||
|
@ -34,7 +31,7 @@ import org.apache.lucene.search.Query;
|
||||||
* @author Reece Wilton
|
* @author Reece Wilton
|
||||||
*/
|
*/
|
||||||
public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
||||||
|
IndexSearcher searcher2;
|
||||||
/**
|
/**
|
||||||
* Initializes the tests by adding documents to the index.
|
* Initializes the tests by adding documents to the index.
|
||||||
*/
|
*/
|
||||||
|
@ -48,6 +45,9 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
||||||
addDocument(writer, "C", "It shouldn't.");
|
addDocument(writer, "C", "It shouldn't.");
|
||||||
addDocument(writer, "D", "Should we, should we, should we.");
|
addDocument(writer, "D", "Should we, should we, should we.");
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
|
// re-open the searcher since we added more docs
|
||||||
|
searcher2 = new IndexSearcher(mDirectory);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -69,11 +69,10 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
||||||
public void testSingleSpanQuery() throws IOException {
|
public void testSingleSpanQuery() throws IOException {
|
||||||
|
|
||||||
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
|
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
|
||||||
final Hits hits = executeQuery(spanQuery);
|
|
||||||
final String[] expectedIds = new String[] { "B", "D", "1", "2", "3", "4", "A" };
|
final String[] expectedIds = new String[] { "B", "D", "1", "2", "3", "4", "A" };
|
||||||
final float[] expectedScores = new float[] { 0.625f, 0.45927936f, 0.35355338f, 0.35355338f, 0.35355338f,
|
final float[] expectedScores = new float[] { 0.625f, 0.45927936f, 0.35355338f, 0.35355338f, 0.35355338f,
|
||||||
0.35355338f, 0.26516503f, };
|
0.35355338f, 0.26516503f, };
|
||||||
assertHits(hits, "single span query", expectedIds, expectedScores);
|
assertHits(searcher2, spanQuery, "single span query", expectedIds, expectedScores);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -88,10 +87,11 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
||||||
final BooleanQuery query = new BooleanQuery();
|
final BooleanQuery query = new BooleanQuery();
|
||||||
query.add(spanQuery1, BooleanClause.Occur.MUST);
|
query.add(spanQuery1, BooleanClause.Occur.MUST);
|
||||||
query.add(spanQuery2, BooleanClause.Occur.MUST);
|
query.add(spanQuery2, BooleanClause.Occur.MUST);
|
||||||
final Hits hits = executeQuery(query);
|
final String[] expectedIds = new String[] { "D", "A" };
|
||||||
final String[] expectedIds = new String[] { "A", "D" };
|
// these values were pre LUCENE-413
|
||||||
final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
|
// final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
|
||||||
assertHits(hits, "multiple different span queries", expectedIds, expectedScores);
|
final float[] expectedScores = new float[] { 1.0191123f, 0.93163157f };
|
||||||
|
assertHits(searcher2, query, "multiple different span queries", expectedIds, expectedScores);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -101,6 +101,6 @@ public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
||||||
*/
|
*/
|
||||||
public void testBooleanQueryWithSpanQueries() throws IOException {
|
public void testBooleanQueryWithSpanQueries() throws IOException {
|
||||||
|
|
||||||
doTestBooleanQueryWithSpanQueries(0.73500174f);
|
doTestBooleanQueryWithSpanQueries(searcher2, 0.73500174f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue