mirror of https://github.com/apache/lucene.git
LUCENE-6758: don't let queries over nonexistent fields screw up querynorm
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1701895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
50bf071600
commit
921c285350
|
@ -28,6 +28,9 @@ New Features
|
||||||
length computations, to avoid skew from documents that don't have the field.
|
length computations, to avoid skew from documents that don't have the field.
|
||||||
(Ahmet Arslan via Robert Muir)
|
(Ahmet Arslan via Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6758: Use docCount+1 for DefaultSimilarity's IDF, so that queries
|
||||||
|
containing nonexistent fields won't screw up querynorm. (Terry Smith, Robert Muir)
|
||||||
|
|
||||||
* SOLR-7876: The QueryTimeout interface now has a isTimeoutEnabled method
|
* SOLR-7876: The QueryTimeout interface now has a isTimeoutEnabled method
|
||||||
that can return false to exit from ExitableDirectoryReader wrapping at
|
that can return false to exit from ExitableDirectoryReader wrapping at
|
||||||
the point fields() is called. (yonik)
|
the point fields() is called. (yonik)
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.classification.utils.ConfusionMatrixGenerator;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||||
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -42,7 +43,7 @@ public class KNearestNeighborClassifierTest extends ClassificationTestBase<Bytes
|
||||||
leafReader = getSampleIndex(analyzer);
|
leafReader = getSampleIndex(analyzer);
|
||||||
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
||||||
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
||||||
ClassificationResult<BytesRef> resultDS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
ClassificationResult<BytesRef> resultDS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new BM25Similarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
||||||
ClassificationResult<BytesRef> resultLMS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
ClassificationResult<BytesRef> resultLMS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
|
||||||
assertTrue(resultDS.getScore() != resultLMS.getScore());
|
assertTrue(resultDS.getScore() != resultLMS.getScore());
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -133,10 +133,10 @@ public class DefaultSimilarity extends TFIDFSimilarity {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Implemented as <code>log(docCount/(docFreq+1)) + 1</code>. */
|
/** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */
|
||||||
@Override
|
@Override
|
||||||
public float idf(long docFreq, long docCount) {
|
public float idf(long docFreq, long docCount) {
|
||||||
return (float)(Math.log(docCount/(double)(docFreq+1)) + 1.0);
|
return (float)(Math.log((docCount+1)/(double)(docFreq+1)) + 1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -346,7 +346,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* </td>
|
* </td>
|
||||||
* <td valign="middle" align="center">
|
* <td valign="middle" align="center">
|
||||||
* <table summary="inverse document frequency computation">
|
* <table summary="inverse document frequency computation">
|
||||||
* <tr><td align="center" style="text-align: center"><small>docCount</small></td></tr>
|
* <tr><td align="center" style="text-align: center"><small>docCount+1</small></td></tr>
|
||||||
* <tr><td align="center" style="text-align: center">–––––––––</td></tr>
|
* <tr><td align="center" style="text-align: center">–––––––––</td></tr>
|
||||||
* <tr><td align="center" style="text-align: center"><small>docFreq+1</small></td></tr>
|
* <tr><td align="center" style="text-align: center"><small>docFreq+1</small></td></tr>
|
||||||
* </table>
|
* </table>
|
||||||
|
|
|
@ -330,11 +330,11 @@ public class TestPhraseQuery extends LuceneTestCase {
|
||||||
assertEquals(3, hits.length);
|
assertEquals(3, hits.length);
|
||||||
// Make sure that those matches where the terms appear closer to
|
// Make sure that those matches where the terms appear closer to
|
||||||
// each other get a higher score:
|
// each other get a higher score:
|
||||||
assertEquals(0.71, hits[0].score, 0.01);
|
assertEquals(1.0, hits[0].score, 0.01);
|
||||||
assertEquals(0, hits[0].doc);
|
assertEquals(0, hits[0].doc);
|
||||||
assertEquals(0.44, hits[1].score, 0.01);
|
assertEquals(0.62, hits[1].score, 0.01);
|
||||||
assertEquals(1, hits[1].doc);
|
assertEquals(1, hits[1].doc);
|
||||||
assertEquals(0.31, hits[2].score, 0.01);
|
assertEquals(0.43, hits[2].score, 0.01);
|
||||||
assertEquals(2, hits[2].doc);
|
assertEquals(2, hits[2].doc);
|
||||||
QueryUtils.check(random(), query,searcher);
|
QueryUtils.check(random(), query,searcher);
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
|
@ -121,17 +121,6 @@ public class TestTermScorer extends LuceneTestCase {
|
||||||
// The scores should be the same
|
// The scores should be the same
|
||||||
assertTrue(doc0.score + " does not equal: " + doc5.score,
|
assertTrue(doc0.score + " does not equal: " + doc5.score,
|
||||||
doc0.score == doc5.score);
|
doc0.score == doc5.score);
|
||||||
/*
|
|
||||||
* Score should be (based on Default Sim.: All floats are approximate tf = 1
|
|
||||||
* numDocs = 6 docFreq(all) = 2 idf = ln(6/3) + 1 = 1.693147 idf ^ 2 =
|
|
||||||
* 2.8667 boost = 1 lengthNorm = 1 //there is 1 term in every document coord
|
|
||||||
* = 1 sumOfSquaredWeights = (idf * boost) ^ 2 = 1.693147 ^ 2 = 2.8667
|
|
||||||
* queryNorm = 1 / (sumOfSquaredWeights)^0.5 = 1 /(1.693147) = 0.590
|
|
||||||
*
|
|
||||||
* score = 1 * 2.8667 * 1 * 1 * 0.590 = 1.69
|
|
||||||
*/
|
|
||||||
assertTrue(doc0.score + " does not equal: " + 1.6931472f,
|
|
||||||
doc0.score == 1.6931472f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNext() throws Exception {
|
public void testNext() throws Exception {
|
||||||
|
@ -145,10 +134,8 @@ public class TestTermScorer extends LuceneTestCase {
|
||||||
Scorer ts = weight.scorer(context);
|
Scorer ts = weight.scorer(context);
|
||||||
assertTrue("next did not return a doc",
|
assertTrue("next did not return a doc",
|
||||||
ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
assertTrue("score is not correct", ts.score() == 1.6931472f);
|
|
||||||
assertTrue("next did not return a doc",
|
assertTrue("next did not return a doc",
|
||||||
ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
assertTrue("score is not correct", ts.score() == 1.6931472f);
|
|
||||||
assertTrue("next returned a doc and it should not have",
|
assertTrue("next returned a doc and it should not have",
|
||||||
ts.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
ts.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,9 +17,52 @@ package org.apache.lucene.search.similarities;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public class TestDefaultSimilarity extends LuceneTestCase {
|
public class TestDefaultSimilarity extends LuceneTestCase {
|
||||||
|
private Directory directory;
|
||||||
|
private IndexReader indexReader;
|
||||||
|
private IndexSearcher indexSearcher;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
directory = newDirectory();
|
||||||
|
try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig())) {
|
||||||
|
Document document = new Document();
|
||||||
|
document.add(new StringField("test", "hit", Store.NO));
|
||||||
|
indexWriter.addDocument(document);
|
||||||
|
indexWriter.commit();
|
||||||
|
}
|
||||||
|
indexReader = DirectoryReader.open(directory);
|
||||||
|
indexSearcher = newSearcher(indexReader);
|
||||||
|
indexSearcher.setSimilarity(new DefaultSimilarity());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
IOUtils.close(indexReader, directory);
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
// Javadocs give this as an example so we test to make sure it's correct:
|
// Javadocs give this as an example so we test to make sure it's correct:
|
||||||
public void testPrecisionLoss() throws Exception {
|
public void testPrecisionLoss() throws Exception {
|
||||||
|
@ -27,4 +70,92 @@ public class TestDefaultSimilarity extends LuceneTestCase {
|
||||||
float v = sim.decodeNormValue(sim.encodeNormValue(.89f));
|
float v = sim.decodeNormValue(sim.encodeNormValue(.89f));
|
||||||
assertEquals(0.875f, v, 0.0001f);
|
assertEquals(0.875f, v, 0.0001f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testHit() throws IOException {
|
||||||
|
Query query = new TermQuery(new Term("test", "hit"));
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMiss() throws IOException {
|
||||||
|
Query query = new TermQuery(new Term("test", "miss"));
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(0, topDocs.totalHits);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmpty() throws IOException {
|
||||||
|
Query query = new TermQuery(new Term("empty", "miss"));
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(0, topDocs.totalHits);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBQHit() throws IOException {
|
||||||
|
Query query = new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("test", "hit")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBQHitOrMiss() throws IOException {
|
||||||
|
Query query = new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("test", "hit")), Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("test", "miss")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBQHitOrEmpty() throws IOException {
|
||||||
|
Query query = new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("test", "hit")), Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("empty", "miss")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDMQHit() throws IOException {
|
||||||
|
Query query = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(
|
||||||
|
new TermQuery(new Term("test", "hit"))),
|
||||||
|
0);
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDMQHitOrMiss() throws IOException {
|
||||||
|
Query query = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(
|
||||||
|
new TermQuery(new Term("test", "hit")),
|
||||||
|
new TermQuery(new Term("test", "miss"))),
|
||||||
|
0);
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDMQHitOrEmpty() throws IOException {
|
||||||
|
Query query = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(
|
||||||
|
new TermQuery(new Term("test", "hit")),
|
||||||
|
new TermQuery(new Term("empty", "miss"))),
|
||||||
|
0);
|
||||||
|
TopDocs topDocs = indexSearcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
assertEquals(1, topDocs.scoreDocs.length);
|
||||||
|
assertTrue(topDocs.scoreDocs[0].score != 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,168 +0,0 @@
|
||||||
package org.apache.lucene.search.spans;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.index.StoredDocument;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.*;
|
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
* Tests the span query bug in Lucene. It demonstrates that SpanTermQuerys don't
|
|
||||||
* work correctly in a BooleanQuery.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class TestSpansAdvanced extends LuceneTestCase {
|
|
||||||
|
|
||||||
// location to the index
|
|
||||||
protected Directory mDirectory;
|
|
||||||
protected IndexReader reader;
|
|
||||||
protected IndexSearcher searcher;
|
|
||||||
|
|
||||||
// field names in the index
|
|
||||||
private final static String FIELD_ID = "ID";
|
|
||||||
protected final static String FIELD_TEXT = "TEXT";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the tests by adding 4 identical documents to the index.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void setUp() throws Exception {
|
|
||||||
super.setUp();
|
|
||||||
// create test index
|
|
||||||
mDirectory = newDirectory();
|
|
||||||
final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory,
|
|
||||||
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
|
|
||||||
.setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
|
|
||||||
addDocument(writer, "1", "I think it should work.");
|
|
||||||
addDocument(writer, "2", "I think it should work.");
|
|
||||||
addDocument(writer, "3", "I think it should work.");
|
|
||||||
addDocument(writer, "4", "I think it should work.");
|
|
||||||
reader = writer.getReader();
|
|
||||||
writer.close();
|
|
||||||
searcher = newSearcher(reader);
|
|
||||||
searcher.setSimilarity(new DefaultSimilarity());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void tearDown() throws Exception {
|
|
||||||
reader.close();
|
|
||||||
mDirectory.close();
|
|
||||||
mDirectory = null;
|
|
||||||
super.tearDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds the document to the index.
|
|
||||||
*
|
|
||||||
* @param writer the Lucene index writer
|
|
||||||
* @param id the unique id of the document
|
|
||||||
* @param text the text of the document
|
|
||||||
*/
|
|
||||||
protected void addDocument(final RandomIndexWriter writer, final String id,
|
|
||||||
final String text) throws IOException {
|
|
||||||
|
|
||||||
final Document document = new Document();
|
|
||||||
document.add(newStringField(FIELD_ID, id, Field.Store.YES));
|
|
||||||
document.add(newTextField(FIELD_TEXT, text, Field.Store.YES));
|
|
||||||
writer.addDocument(document);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests two span queries.
|
|
||||||
*/
|
|
||||||
public void testBooleanQueryWithSpanQueries() throws IOException {
|
|
||||||
|
|
||||||
doTestBooleanQueryWithSpanQueries(searcher, 0.54932045f);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests two span queries.
|
|
||||||
*/
|
|
||||||
protected void doTestBooleanQueryWithSpanQueries(IndexSearcher s,
|
|
||||||
final float expectedScore) throws IOException {
|
|
||||||
|
|
||||||
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work"));
|
|
||||||
final BooleanQuery.Builder query = new BooleanQuery.Builder();
|
|
||||||
query.add(spanQuery, BooleanClause.Occur.MUST);
|
|
||||||
query.add(spanQuery, BooleanClause.Occur.MUST);
|
|
||||||
final String[] expectedIds = new String[] {"1", "2", "3", "4"};
|
|
||||||
final float[] expectedScores = new float[] {expectedScore, expectedScore,
|
|
||||||
expectedScore, expectedScore};
|
|
||||||
assertHits(s, query.build(), "two span queries", expectedIds, expectedScores);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks to see if the hits are what we expected.
|
|
||||||
*
|
|
||||||
* @param query the query to execute
|
|
||||||
* @param description the description of the search
|
|
||||||
* @param expectedIds the expected document ids of the hits
|
|
||||||
* @param expectedScores the expected scores of the hits
|
|
||||||
*/
|
|
||||||
protected static void assertHits(IndexSearcher s, Query query,
|
|
||||||
final String description, final String[] expectedIds,
|
|
||||||
final float[] expectedScores) throws IOException {
|
|
||||||
QueryUtils.check(random(), query, s);
|
|
||||||
|
|
||||||
final float tolerance = 1e-5f;
|
|
||||||
|
|
||||||
// Hits hits = searcher.search(query);
|
|
||||||
// hits normalizes and throws things off if one score is greater than 1.0
|
|
||||||
TopDocs topdocs = s.search(query, 10000);
|
|
||||||
|
|
||||||
/*****
|
|
||||||
* // display the hits System.out.println(hits.length() +
|
|
||||||
* " hits for search: \"" + description + '\"'); for (int i = 0; i <
|
|
||||||
* hits.length(); i++) { System.out.println(" " + FIELD_ID + ':' +
|
|
||||||
* hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')'); }
|
|
||||||
*****/
|
|
||||||
|
|
||||||
// did we get the hits we expected
|
|
||||||
assertEquals(expectedIds.length, topdocs.totalHits);
|
|
||||||
for (int i = 0; i < topdocs.totalHits; i++) {
|
|
||||||
// System.out.println(i + " exp: " + expectedIds[i]);
|
|
||||||
// System.out.println(i + " field: " + hits.doc(i).get(FIELD_ID));
|
|
||||||
|
|
||||||
int id = topdocs.scoreDocs[i].doc;
|
|
||||||
float score = topdocs.scoreDocs[i].score;
|
|
||||||
StoredDocument doc = s.doc(id);
|
|
||||||
assertEquals(expectedIds[i], doc.get(FIELD_ID));
|
|
||||||
boolean scoreEq = Math.abs(expectedScores[i] - score) < tolerance;
|
|
||||||
if (!scoreEq) {
|
|
||||||
System.out.println(i + " warning, expected score: " + expectedScores[i]
|
|
||||||
+ ", actual " + score);
|
|
||||||
System.out.println(s.explain(query, id));
|
|
||||||
}
|
|
||||||
assertEquals(expectedScores[i], score, tolerance);
|
|
||||||
assertEquals(s.explain(query, id).getValue(), score, tolerance);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,121 +0,0 @@
|
||||||
package org.apache.lucene.search.spans;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
||||||
import org.apache.lucene.search.*;
|
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
* Some expanded tests to make sure my patch doesn't break other SpanTermQuery
|
|
||||||
* functionality.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class TestSpansAdvanced2 extends TestSpansAdvanced {
|
|
||||||
IndexSearcher searcher2;
|
|
||||||
IndexReader reader2;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the tests by adding documents to the index.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void setUp() throws Exception {
|
|
||||||
super.setUp();
|
|
||||||
|
|
||||||
// create test index
|
|
||||||
final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory,
|
|
||||||
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
|
|
||||||
.setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())
|
|
||||||
.setSimilarity(new DefaultSimilarity()));
|
|
||||||
addDocument(writer, "A", "Should we, could we, would we?");
|
|
||||||
addDocument(writer, "B", "It should. Should it?");
|
|
||||||
addDocument(writer, "C", "It shouldn't.");
|
|
||||||
addDocument(writer, "D", "Should we, should we, should we.");
|
|
||||||
reader2 = writer.getReader();
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
// re-open the searcher since we added more docs
|
|
||||||
searcher2 = newSearcher(reader2);
|
|
||||||
searcher2.setSimilarity(new DefaultSimilarity());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void tearDown() throws Exception {
|
|
||||||
reader2.close();
|
|
||||||
super.tearDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Verifies that the index has the correct number of documents.
|
|
||||||
*/
|
|
||||||
public void testVerifyIndex() throws Exception {
|
|
||||||
final IndexReader reader = DirectoryReader.open(mDirectory);
|
|
||||||
assertEquals(8, reader.numDocs());
|
|
||||||
reader.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests a single span query that matches multiple documents.
|
|
||||||
*/
|
|
||||||
public void testSingleSpanQuery() throws IOException {
|
|
||||||
|
|
||||||
final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
|
|
||||||
final String[] expectedIds = new String[] {"B", "D", "1", "2", "3", "4",
|
|
||||||
"A"};
|
|
||||||
final float[] expectedScores = new float[] {0.8838834f, 0.64951903f,
|
|
||||||
0.5f, 0.5f, 0.5f, 0.5f, 0.375f,};
|
|
||||||
assertHits(searcher2, spanQuery, "single span query", expectedIds,
|
|
||||||
expectedScores);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests a single span query that matches multiple documents.
|
|
||||||
*/
|
|
||||||
public void testMultipleDifferentSpanQueries() throws IOException {
|
|
||||||
|
|
||||||
final Query spanQuery1 = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
|
|
||||||
final Query spanQuery2 = new SpanTermQuery(new Term(FIELD_TEXT, "we"));
|
|
||||||
final BooleanQuery.Builder query = new BooleanQuery.Builder();
|
|
||||||
query.add(spanQuery1, BooleanClause.Occur.MUST);
|
|
||||||
query.add(spanQuery2, BooleanClause.Occur.MUST);
|
|
||||||
final String[] expectedIds = new String[] {"D", "A"};
|
|
||||||
// these values were pre LUCENE-413
|
|
||||||
// final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
|
|
||||||
final float[] expectedScores = new float[] {1.44124233f, 1.31752586f};
|
|
||||||
assertHits(searcher2, query.build(), "multiple different span queries",
|
|
||||||
expectedIds, expectedScores);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests two span queries.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void testBooleanQueryWithSpanQueries() throws IOException {
|
|
||||||
|
|
||||||
doTestBooleanQueryWithSpanQueries(searcher2, 1.0394494f);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -226,7 +226,7 @@ public class TestValueSources extends LuceneTestCase {
|
||||||
try {
|
try {
|
||||||
searcher.setSimilarity(new DefaultSimilarity());
|
searcher.setSimilarity(new DefaultSimilarity());
|
||||||
ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
|
ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
|
||||||
assertHits(new FunctionQuery(vs), new float[] { 0.5945349f, 0.5945349f });
|
assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
|
||||||
assertAllExist(vs);
|
assertAllExist(vs);
|
||||||
} finally {
|
} finally {
|
||||||
searcher.setSimilarity(saved);
|
searcher.setSimilarity(saved);
|
||||||
|
@ -398,7 +398,7 @@ public class TestValueSources extends LuceneTestCase {
|
||||||
searcher.setSimilarity(new DefaultSimilarity());
|
searcher.setSimilarity(new DefaultSimilarity());
|
||||||
|
|
||||||
ValueSource vs = new QueryValueSource(new TermQuery(new Term("string","bar")), 42F);
|
ValueSource vs = new QueryValueSource(new TermQuery(new Term("string","bar")), 42F);
|
||||||
assertHits(new FunctionQuery(vs), new float[] { 42F, 1F });
|
assertHits(new FunctionQuery(vs), new float[] { 42F, 1.4054651F });
|
||||||
|
|
||||||
// valuesource should exist only for things matching the term query
|
// valuesource should exist only for things matching the term query
|
||||||
// sanity check via quick & dirty wrapper arround tf
|
// sanity check via quick & dirty wrapper arround tf
|
||||||
|
|
|
@ -87,8 +87,8 @@ public class DisMaxRequestHandlerTest extends SolrTestCaseJ4 {
|
||||||
req("cool stuff")
|
req("cool stuff")
|
||||||
,"//*[@numFound='3']"
|
,"//*[@numFound='3']"
|
||||||
,"//result/doc[1]/int[@name='id'][.='42']"
|
,"//result/doc[1]/int[@name='id'][.='42']"
|
||||||
,"//result/doc[2]/int[@name='id'][.='8675309']"
|
,"//result/doc[2]/int[@name='id'][.='666']"
|
||||||
,"//result/doc[3]/int[@name='id'][.='666']"
|
,"//result/doc[3]/int[@name='id'][.='8675309']"
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("multi qf",
|
assertQ("multi qf",
|
||||||
|
|
|
@ -1014,15 +1014,15 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
assertQ("functions over a query",
|
assertQ("functions over a query",
|
||||||
req("q","*:*", "stats", "true",
|
req("q","*:*", "stats", "true",
|
||||||
"stats.field", "{!lucene key=k}foo_t:cow")
|
"stats.field", "{!lucene key=k}foo_t:cow")
|
||||||
// scores are: 1.0, 0.625, 0.5, & "missing"
|
// TODO: change to not rely on exact scores
|
||||||
, kpre + "double[@name='min'][.='0.5']"
|
, kpre + "double[@name='min'][.='0.6115717887878418']"
|
||||||
, kpre + "double[@name='max'][.='1.0']"
|
, kpre + "double[@name='max'][.='1.2231435775756836']"
|
||||||
, kpre + "double[@name='sum'][.='2.125']"
|
, kpre + "double[@name='sum'][.='2.5991801023483276']"
|
||||||
, kpre + "long[@name='count'][.='3']"
|
, kpre + "long[@name='count'][.='3']"
|
||||||
, kpre + "long[@name='missing'][.='1']"
|
, kpre + "long[@name='missing'][.='1']"
|
||||||
, kpre + "double[@name='sumOfSquares'][.='1.640625']"
|
, kpre + "double[@name='sumOfSquares'][.='2.4545065967701163']"
|
||||||
, kpre + "double[@name='mean'][.='0.7083333333333334']"
|
, kpre + "double[@name='mean'][.='0.8663933674494425']"
|
||||||
, kpre + "double[@name='stddev'][.='0.2602082499332666']"
|
, kpre + "double[@name='stddev'][.='0.3182720497380833']"
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,10 +79,6 @@ public class TestDefaultStatsCache extends BaseDistributedSearchTestCase {
|
||||||
if (clients.size() == 1) {
|
if (clients.size() == 1) {
|
||||||
// only one shard
|
// only one shard
|
||||||
assertEquals(controlScore, shardScore);
|
assertEquals(controlScore, shardScore);
|
||||||
} else {
|
|
||||||
assertTrue("control:" + controlScore.floatValue() + " shard:"
|
|
||||||
+ shardScore.floatValue(),
|
|
||||||
controlScore.floatValue() > shardScore.floatValue());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue