LUCENE-9537: Add initial Indri search engine functionality to Lucene

This commit is contained in:
cammiemw 2021-01-29 14:47:24 -05:00 committed by GitHub
parent e4cede0e8c
commit 9cc5c9b798
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 832 additions and 10 deletions

View File

@ -1,4 +1,4 @@
Lucene Change Log
Lucene Change Log
For more information on past and future Lucene versions, please see:
http://s.apache.org/luceneversions
@ -84,6 +84,17 @@ API Changes
* LUCENE-9646: Set BM25Similarity discountOverlaps via the constructor (Patrick Marty via Bruno Roustant)
* LUCENE-9537: Added smoothingScore method and default implementation to
Scorable abstract class. The smoothing score allows scorers to calculate a
score for a document where the search term or subquery is not present. The
smoothing score acts like an idf so that documents that do not have terms or
subqueries that are more frequent in the index are not penalized as much as
documents that do not have less frequent terms or subqueries and prevents
scores which are the product or terms or subqueries from going to zero. Added
the implementation of the Indri AND and the IndriDirichletSimilarity from the
academic Indri search engine: http://www.lemurproject.org/indri.php.
(Cameron VandenBerg)
Improvements
* LUCENE-9687: Hunspell support improvements: add SpellChecker API, support default encoding and

View File

@ -184,8 +184,8 @@ public class IndexSearcher {
/**
* Runs searches for each segment separately, using the provided Executor. NOTE: if you are using
* {@link NIOFSDirectory}, do not use the shutdownNow method of ExecutorService as this uses
* Thread.interrupt under-the-hood which can silently close file descriptors (see <a
* href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>).
* Thread.interrupt under-the-hood which can silently close file descriptors (see <a href=
* "https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>).
*
* @lucene.experimental
*/
@ -199,8 +199,8 @@ public class IndexSearcher {
* <p>Given a non-<code>null</code> {@link Executor} this method runs searches for each segment
* separately, using the provided Executor. NOTE: if you are using {@link NIOFSDirectory}, do not
* use the shutdownNow method of ExecutorService as this uses Thread.interrupt under-the-hood
* which can silently close file descriptors (see <a
* href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>).
* which can silently close file descriptors (see <a href=
* "https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>).
*
* @see IndexReaderContext
* @see IndexReader#getContext()
@ -729,7 +729,7 @@ public class IndexSearcher {
throws IOException {
// TODO: should we make this
// threaded...? the Collector could be sync'd?
// threaded...? the Collector could be sync'd?
// always use single thread:
for (LeafReaderContext ctx : leaves) { // search each subreader
final LeafCollector leafCollector;

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
/** A Query that matches documents matching combinations of subqueries. */
public class IndriAndQuery extends IndriQuery {
public IndriAndQuery(List<BooleanClause> clauses) {
super(clauses);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
IndriAndQuery query = this;
return new IndriAndWeight(query, searcher, ScoreMode.TOP_SCORES, boost);
}
}

View File

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
/**
* Combines scores of subscorers. If a subscorer does not contain the docId, a smoothing score is
* calculated for that document/subscorer combination.
*/
public class IndriAndScorer extends IndriDisjunctionScorer {
protected IndriAndScorer(Weight weight, List<Scorer> subScorers, ScoreMode scoreMode, float boost)
throws IOException {
super(weight, subScorers, scoreMode, boost);
}
@Override
public float score(List<Scorer> subScorers) throws IOException {
int docId = this.docID();
return scoreDoc(subScorers, docId);
}
@Override
public float smoothingScore(List<Scorer> subScorers, int docId) throws IOException {
return scoreDoc(subScorers, docId);
}
private float scoreDoc(List<Scorer> subScorers, int docId) throws IOException {
double score = 0;
double boostSum = 0.0;
for (Scorer scorer : subScorers) {
if (scorer instanceof IndriScorer) {
IndriScorer indriScorer = (IndriScorer) scorer;
int scorerDocId = indriScorer.docID();
// If the query exists in the document, score the document
// Otherwise, compute a smoothing score, which acts like an idf
// for subqueries/terms
double tempScore = 0;
if (docId == scorerDocId) {
tempScore = indriScorer.score();
} else {
tempScore = indriScorer.smoothingScore(docId);
}
tempScore *= indriScorer.getBoost();
score += tempScore;
boostSum += indriScorer.getBoost();
}
}
if (boostSum == 0) {
return 0;
} else {
return (float) (score / boostSum);
}
}
}

View File

@ -0,0 +1,123 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
/** The Weight for IndriAndQuery, used to normalize, score and explain these queries. */
public class IndriAndWeight extends Weight {
private final IndriAndQuery query;
private final ArrayList<Weight> weights;
private final ScoreMode scoreMode;
private final float boost;
public IndriAndWeight(
IndriAndQuery query, IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
super(query);
this.query = query;
this.boost = boost;
this.scoreMode = scoreMode;
weights = new ArrayList<>();
for (BooleanClause c : query) {
Weight w = searcher.createWeight(c.getQuery(), scoreMode, 1.0f);
weights.add(w);
}
}
private Scorer getScorer(LeafReaderContext context) throws IOException {
List<Scorer> subScorers = new ArrayList<>();
for (Weight w : weights) {
Scorer scorer = w.scorer(context);
if (scorer != null) {
subScorers.add(scorer);
}
}
if (subScorers.isEmpty()) {
return null;
}
Scorer scorer = subScorers.get(0);
if (subScorers.size() > 1) {
scorer = new IndriAndScorer(this, subScorers, scoreMode, boost);
}
return scorer;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
return getScorer(context);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
Scorer scorer = getScorer(context);
if (scorer != null) {
BulkScorer bulkScorer = new DefaultBulkScorer(scorer);
return bulkScorer;
}
return null;
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
for (Weight w : weights) {
if (w.isCacheable(ctx) == false) return false;
}
return true;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
List<Explanation> subs = new ArrayList<>();
boolean fail = false;
Iterator<BooleanClause> cIter = query.iterator();
for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext(); ) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
Explanation e = w.explain(context, doc);
if (e.isMatch()) {
subs.add(e);
} else if (c.isRequired()) {
subs.add(
Explanation.noMatch(
"no match on required clause (" + c.getQuery().toString() + ")", e));
fail = true;
}
}
if (fail) {
return Explanation.noMatch(
"Failure to meet condition(s) of required/prohibited clause(s)", subs);
} else {
Scorer scorer = scorer(context);
if (scorer != null) {
int advanced = scorer.iterator().advance(doc);
assert advanced == doc;
return Explanation.match(scorer.score(), "sum of:", subs);
} else {
return Explanation.noMatch(
"Failure to meet condition(s) of required/prohibited clause(s)", subs);
}
}
}
}

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
/**
* The Indri implemenation of a disjunction scorer which stores the subscorers for the child
* queries. The score and smoothingScore methods use the list of all subscorers and not just the
* matches so that a smoothingScore can be calculated if there is not an exact match.
*/
public abstract class IndriDisjunctionScorer extends IndriScorer {
private final List<Scorer> subScorersList;
private final DisiPriorityQueue subScorers;
private final DocIdSetIterator approximation;
protected IndriDisjunctionScorer(
Weight weight, List<Scorer> subScorersList, ScoreMode scoreMode, float boost) {
super(weight, boost);
this.subScorersList = subScorersList;
this.subScorers = new DisiPriorityQueue(subScorersList.size());
for (Scorer scorer : subScorersList) {
final DisiWrapper w = new DisiWrapper(scorer);
this.subScorers.add(w);
}
this.approximation = new DisjunctionDISIApproximation(this.subScorers);
}
@Override
public DocIdSetIterator iterator() {
return approximation;
}
@Override
public float getMaxScore(int upTo) throws IOException {
return 0;
}
public List<Scorer> getSubMatches() throws IOException {
return subScorersList;
}
abstract float score(List<Scorer> subScorers) throws IOException;
public abstract float smoothingScore(List<Scorer> subScorers, int docId) throws IOException;
@Override
public float score() throws IOException {
return score(getSubMatches());
}
@Override
public float smoothingScore(int docId) throws IOException {
return smoothingScore(getSubMatches(), docId);
}
@Override
public int docID() {
return subScorers.top().doc;
}
}

View File

@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
/**
* A Basic abstract query that all IndriQueries can extend to implement toString, equals,
* getClauses, and iterator.
*/
public abstract class IndriQuery extends Query implements Iterable<BooleanClause> {
private List<BooleanClause> clauses;
public IndriQuery(List<BooleanClause> clauses) {
this.clauses = clauses;
}
@Override
public abstract Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException;
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
int i = 0;
for (BooleanClause c : this) {
buffer.append(c.getOccur().toString());
Query subQuery = c.getQuery();
if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
buffer.append("(");
buffer.append(subQuery.toString(field));
buffer.append(")");
} else {
buffer.append(subQuery.toString(field));
}
if (i != clauses.size() - 1) {
buffer.append(" ");
}
i += 1;
}
return buffer.toString();
}
@Override
public boolean equals(Object o) {
return sameClassAs(o) && equalsTo(getClass().cast(o));
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
private boolean equalsTo(IndriQuery other) {
return clauses.equals(other.clauses);
}
@Override
public int hashCode() {
int hashCode = Objects.hash(clauses);
if (hashCode == 0) {
hashCode = 1;
}
return hashCode;
}
@Override
public Iterator<BooleanClause> iterator() {
return clauses.iterator();
}
public List<BooleanClause> getClauses() {
return this.clauses;
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
/**
* The Indri parent scorer that stores the boost so that IndriScorers can use the boost outside of
* the term.
*/
public abstract class IndriScorer extends Scorer {
private float boost;
protected IndriScorer(Weight weight, float boost) {
super(weight);
this.boost = boost;
}
@Override
public abstract DocIdSetIterator iterator();
@Override
public abstract float getMaxScore(int upTo) throws IOException;
@Override
public abstract float score() throws IOException;
public abstract float smoothingScore(int docId) throws IOException;
@Override
public abstract int docID();
public float getBoost() {
return this.boost;
}
}

View File

@ -27,6 +27,21 @@ public abstract class Scorable {
/** Returns the score of the current document matching the query. */
public abstract float score() throws IOException;
/**
* Returns the smoothing score of the current document matching the query. This score is used when
* the query/term does not appear in the document, and behaves like an idf. The smoothing score is
* particularly important when the Scorer returns a product of probabilities so that the document
* score does not go to zero when one probability is zero. This can return 0 or a smoothing score.
*
* <p>Smoothing scores are described in many papers, including: Metzler, D. and Croft, W. B. ,
* "Combining the Language Model and Inference Network Approaches to Retrieval," Information
* Processing and Management Special Issue on Bayesian Networks and Information Retrieval, 40(5),
* pp.735-750.
*/
public float smoothingScore(int docId) throws IOException {
return 0f;
}
/** Returns the doc ID that is currently being scored. */
public abstract int docID();

View File

@ -75,6 +75,11 @@ public final class TermScorer extends Scorer {
return docScorer.score(postingsEnum.docID(), postingsEnum.freq());
}
@Override
public float smoothingScore(int docId) throws IOException {
return docScorer.score(docId, 0);
}
@Override
public int advanceShallow(int target) throws IOException {
return impactsDisi.advanceShallow(target);

View File

@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.search.Explanation;
/**
* Bayesian smoothing using Dirichlet priors as implemented in the Indri Search engine
* (http://www.lemurproject.org/indri.php). Indri Dirichelet Smoothing!
*
* <pre class="prettyprint">
* tf_E + mu*P(t|D) P(t|E)= documentLength + documentMu
* mu*P(t|C) + tf_D where P(t|D)= doclen + mu
* </pre>
*
* <p>A larger value for mu, produces more smoothing. Smoothing is most important for short
* documents where the probabilities are more granular.
*/
public class IndriDirichletSimilarity extends LMSimilarity {
/** The &mu; parameter. */
private final float mu;
/** Instantiates the similarity with the provided &mu; parameter. */
public IndriDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel);
this.mu = mu;
}
/** Instantiates the similarity with the provided &mu; parameter. */
public IndriDirichletSimilarity(float mu) {
this.mu = mu;
}
/** Instantiates the similarity with the default &mu; value of 2000. */
public IndriDirichletSimilarity(CollectionModel collectionModel) {
this(collectionModel, 2000);
}
/** Instantiates the similarity with the default &mu; value of 2000. */
public IndriDirichletSimilarity() {
this(new IndriCollectionModel(), 2000);
}
@Override
protected double score(BasicStats stats, double freq, double docLen) {
double collectionProbability = ((LMStats) stats).getCollectionProbability();
double score = (freq + (mu * collectionProbability)) / (docLen + mu);
return (Math.log(score));
}
@Override
protected void explain(List<Explanation> subs, BasicStats stats, double freq, double docLen) {
if (stats.getBoost() != 1.0f) {
subs.add(Explanation.match(stats.getBoost(), "boost"));
}
subs.add(Explanation.match(mu, "mu"));
double collectionProbability = ((LMStats) stats).getCollectionProbability();
Explanation weightExpl =
Explanation.match(
(float) Math.log((freq + (mu * collectionProbability)) / (docLen + mu)), "term weight");
subs.add(weightExpl);
subs.add(Explanation.match((float) Math.log(mu / (docLen + mu)), "document norm"));
super.explain(subs, stats, freq, docLen);
}
/** Returns the &mu; parameter. */
public float getMu() {
return mu;
}
public String getName() {
return String.format(Locale.ROOT, "IndriDirichlet(%f)", getMu());
}
/**
* Models {@code p(w|C)} as the number of occurrences of the term in the collection, divided by
* the total number of tokens {@code + 1}.
*/
public static class IndriCollectionModel implements CollectionModel {
/** Sole constructor: parameter-free */
public IndriCollectionModel() {}
@Override
public double computeProbability(BasicStats stats) {
return ((double) stats.getTotalTermFreq()) / ((double) stats.getNumberOfFieldTokens());
}
@Override
public String getName() {
return null;
}
}
}

View File

@ -0,0 +1,165 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.IndriDirichletSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestIndriAndQuery extends LuceneTestCase {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 0.0000f;
public Similarity sim = new IndriDirichletSimilarity();
public Directory index;
public IndexReader r;
public IndexSearcher s;
@Override
public void setUp() throws Exception {
super.setUp();
index = newDirectory();
RandomIndexWriter writer =
new RandomIndexWriter(
random(),
index,
newIndexWriterConfig(
new MockAnalyzer(
random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET))
.setSimilarity(sim)
.setMergePolicy(newLogMergePolicy()));
// Query is "President Washington"
{
Document d1 = new Document();
d1.add(newField("id", "d1", TextField.TYPE_STORED));
d1.add(
newTextField(
"body", "President Washington was the first leader of the US", Field.Store.YES));
writer.addDocument(d1);
}
{
Document d2 = new Document();
d2.add(newField("id", "d2", TextField.TYPE_STORED));
d2.add(
newTextField(
"body",
"The president is head of the executive branch of government",
Field.Store.YES));
writer.addDocument(d2);
}
{
Document d3 = new Document();
d3.add(newField("id", "d3", TextField.TYPE_STORED));
d3.add(
newTextField(
"body", "George Washington was a general in the Revolutionary War", Field.Store.YES));
writer.addDocument(d3);
}
{
Document d4 = new Document();
d4.add(newField("id", "d4", TextField.TYPE_STORED));
d4.add(newTextField("body", "A company or college can have a president", Field.Store.YES));
writer.addDocument(d4);
}
writer.forceMerge(1);
r = getOnlyLeafReader(writer.getReader());
writer.close();
s = new IndexSearcher(r);
s.setSimilarity(sim);
}
@Override
public void tearDown() throws Exception {
r.close();
index.close();
super.tearDown();
}
public void testSimpleQuery1() throws Exception {
BooleanClause clause1 = new BooleanClause(tq("body", "george"), Occur.SHOULD);
BooleanClause clause2 = new BooleanClause(tq("body", "washington"), Occur.SHOULD);
IndriAndQuery q = new IndriAndQuery(Arrays.asList(clause1, clause2));
ScoreDoc[] h = s.search(q, 1000).scoreDocs;
try {
assertEquals("2 docs should match " + q.toString(), 2, h.length);
} catch (Error e) {
printHits("testSimpleEqualScores1", h, s);
throw e;
}
}
public void testSimpleQuery2() throws Exception {
BooleanClause clause1 = new BooleanClause(tq("body", "president"), Occur.SHOULD);
BooleanClause clause2 = new BooleanClause(tq("body", "washington"), Occur.SHOULD);
IndriAndQuery q = new IndriAndQuery(Arrays.asList(clause1, clause2));
ScoreDoc[] h = s.search(q, 1000).scoreDocs;
try {
assertEquals("all docs should match " + q.toString(), 4, h.length);
} catch (Error e) {
printHits("testSimpleEqualScores1", h, s);
throw e;
}
}
/** macro */
protected Query tq(String f, String t) {
return new TermQuery(new Term(f, t));
}
/** macro */
protected Query tq(String f, String t, float b) {
Query q = tq(f, t);
return new BoostQuery(q, b);
}
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {
System.err.println("------- " + test + " -------");
for (int i = 0; i < h.length; i++) {
Document d = searcher.doc(h[i].doc);
float score = h[i].score;
System.err.println("#" + i + ": " + score + " - " + d.get("body"));
}
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.util.Random;
public class TestIndriDirichletSimilarity extends BaseSimilarityTestCase {
@Override
protected Similarity getSimilarity(Random random) {
// smoothing parameter mu, unbounded
final float mu;
switch (random.nextInt(4)) {
case 0:
// minimum value
mu = 0;
break;
case 1:
// tiny value
mu = Float.MIN_VALUE;
break;
case 2:
// maximum value
// we just limit the test to "reasonable" mu values but don't enforce
// this anywhere.
mu = Integer.MAX_VALUE;
break;
default:
// random value
mu = Integer.MAX_VALUE * random.nextFloat();
break;
}
return new IndriDirichletSimilarity(mu);
}
}

View File

@ -368,7 +368,9 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
float score = scorer.score(freq, norm);
// check that score isn't infinite or negative
assertTrue("infinite/NaN score: " + score, Float.isFinite(score));
assertTrue("negative score: " + score, score >= 0);
if (!(similarity instanceof IndriDirichletSimilarity)) {
assertTrue("negative score: " + score, score >= 0);
}
assertTrue("greater than maxScore: " + score + ">" + maxScore, score <= maxScore);
// check explanation matches
Explanation explanation =
@ -395,7 +397,9 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
float prevScore = scorer.score(prevFreq, norm);
// check that score isn't infinite or negative
assertTrue(Float.isFinite(prevScore));
assertTrue(prevScore >= 0);
if (!(similarity instanceof IndriDirichletSimilarity)) {
assertTrue(prevScore >= 0);
}
// check explanation matches
Explanation prevExplanation =
scorer.explain(
@ -419,7 +423,9 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
float prevNormScore = scorer.score(freq, norm - 1);
// check that score isn't infinite or negative
assertTrue(Float.isFinite(prevNormScore));
assertTrue(prevNormScore >= 0);
if (!(similarity instanceof IndriDirichletSimilarity)) {
assertTrue(prevNormScore >= 0);
}
// check explanation matches
Explanation prevNormExplanation =
scorer.explain(
@ -459,7 +465,9 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
float prevTermScore = prevTermScorer.score(freq, norm);
// check that score isn't infinite or negative
assertTrue(Float.isFinite(prevTermScore));
assertTrue(prevTermScore >= 0);
if (!(similarity instanceof IndriDirichletSimilarity)) {
assertTrue(prevTermScore >= 0);
}
// check explanation matches
Explanation prevTermExplanation =
prevTermScorer.explain(

View File

@ -33,6 +33,9 @@ public class TestFeature extends SolrTestCase {
// the FilterFeatureScorer may simply inherit Scorer's default implementation
if (scorerClassMethod.getName().equals("twoPhaseIterator")) continue;
// the FilterFeatureScorer may simply inherit Scorer's default implementation
if (scorerClassMethod.getName().equals("smoothingScore")) continue;
// the FilterFeatureScorer's implementation does not influence its parent Weight
if (scorerClassMethod.getName().equals("getWeight")) continue;