mirror of https://github.com/apache/lucene.git
LUCENE-5867: Add a BooleanSimilarity.
This commit is contained in:
parent
c415bc8d1d
commit
3e15233b23
|
@ -55,7 +55,10 @@ Other
|
|||
* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
|
||||
|
||||
======================= Lucene 6.4.0 =======================
|
||||
(No Changes)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
|
||||
|
||||
Improvements
|
||||
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Simple similarity that gives terms a score that is equal to their query
|
||||
* boost. This similarity is typically used with disabled norms since neither
|
||||
* document statistics nor index statistics are used for scoring. That said,
|
||||
* if norms are enabled, they will be computed the same way as
|
||||
* {@link SimilarityBase} and {@link BM25Similarity} with
|
||||
* {@link SimilarityBase#setDiscountOverlaps(boolean) discounted overlaps}
|
||||
* so that the {@link Similarity} can be changed after the index has been
|
||||
* created.
|
||||
*/
|
||||
public class BooleanSimilarity extends Similarity {
|
||||
|
||||
private static final Similarity BM25_SIM = new BM25Similarity();
|
||||
|
||||
/** Sole constructor */
|
||||
public BooleanSimilarity() {}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
return BM25_SIM.computeNorm(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return new BooleanWeight(boost);
|
||||
}
|
||||
|
||||
private static class BooleanWeight extends SimWeight {
|
||||
final float boost;
|
||||
|
||||
BooleanWeight(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||
final float boost = ((BooleanWeight) weight).boost;
|
||||
|
||||
return new SimScorer() {
|
||||
|
||||
@Override
|
||||
public float score(int doc, float freq) throws IOException {
|
||||
return boost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc, Explanation freq) throws IOException {
|
||||
Explanation queryBoostExpl = Explanation.match(boost, "query boost");
|
||||
return Explanation.match(
|
||||
queryBoostExpl.getValue(),
|
||||
"score(" + getClass().getSimpleName() + ", doc=" + doc + "), computed from:",
|
||||
queryBoostExpl);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computeSlopFactor(int distance) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
|
||||
return 1f;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestBooleanSimilarity extends LuceneTestCase {
|
||||
|
||||
public void testTermScoreIsEqualToBoost() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
|
||||
newIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
doc.add(new StringField("foo", "baz", Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setSimilarity(new BooleanSimilarity());
|
||||
TopDocs topDocs = searcher.search(new TermQuery(new Term("foo", "bar")), 2);
|
||||
assertEquals(2, topDocs.totalHits);
|
||||
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
|
||||
assertEquals(1f, topDocs.scoreDocs[1].score, 0f);
|
||||
|
||||
topDocs = searcher.search(new TermQuery(new Term("foo", "baz")), 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
topDocs = searcher.search(new BoostQuery(new TermQuery(new Term("foo", "baz")), 3f), 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(3f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testPhraseScoreIsEqualToBoost() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
|
||||
newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("foo", "bar baz quux", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.setSimilarity(new BooleanSimilarity());
|
||||
|
||||
PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");
|
||||
|
||||
TopDocs topDocs = searcher.search(query, 2);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
topDocs = searcher.search(new BoostQuery(query, 7), 2);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals(7f, topDocs.scoreDocs[0].score, 0f);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSameNormsAsBM25() {
|
||||
BooleanSimilarity sim1 = new BooleanSimilarity();
|
||||
BM25Similarity sim2 = new BM25Similarity();
|
||||
sim2.setDiscountOverlaps(true);
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
final int length = TestUtil.nextInt(random(), 1, 100);
|
||||
final int position = random().nextInt(length);
|
||||
final int numOverlaps = random().nextInt(50);
|
||||
final float boost = random().nextFloat() * 10;
|
||||
FieldInvertState state = new FieldInvertState("foo", position, length, numOverlaps, 100, boost);
|
||||
assertEquals(
|
||||
sim2.computeNorm(state),
|
||||
sim1.computeNorm(state),
|
||||
0f);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -91,6 +91,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
|
|||
allSims = new ArrayList<>();
|
||||
allSims.add(new ClassicSimilarity());
|
||||
allSims.add(new BM25Similarity());
|
||||
allSims.add(new BooleanSimilarity());
|
||||
for (BasicModel basicModel : BASIC_MODELS) {
|
||||
for (AfterEffect afterEffect : AFTER_EFFECTS) {
|
||||
for (Normalization normalization : NORMALIZATIONS) {
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.codecs.lucene70.Lucene70Codec;
|
|||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
|
||||
import org.apache.lucene.index.RandomCodec;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.RandomSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
|
@ -213,7 +212,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
|
|||
TimeZone randomTimeZone = randomTimeZone(random());
|
||||
timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
|
||||
TimeZone.setDefault(timeZone);
|
||||
similarity = random().nextBoolean() ? new ClassicSimilarity() : new RandomSimilarity(random());
|
||||
similarity = new RandomSimilarity(random());
|
||||
|
||||
// Check codec restrictions once at class level.
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue