mirror of https://github.com/apache/lucene.git
LUCENE-8638: remove LegacyBM25Similarity
This commit is contained in:
parent
b8210dee7a
commit
eeb296ce90
|
@ -22,13 +22,12 @@ System Requirements
|
|||
|
||||
API Changes
|
||||
|
||||
* LUCENE-8638: Remove deprecated methods in FST for lookup by output.
|
||||
* LUCENE-8638: Remove many deprecated methods and classes including FST.lookupByOutput(),
|
||||
LegacyBM25Similarity and Jaspell suggester.
|
||||
|
||||
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than
|
||||
kilometers.
|
||||
|
||||
* LUCENE-8638: Jaspell suggester removed.
|
||||
|
||||
* LUCENE-8982: Separate out native code to another module to allow cpp
|
||||
build with gradle. This also changes the name of the native "posix-support"
|
||||
library to LuceneNativeIO. (Zachary Chen, Dawid Weiss)
|
||||
|
|
|
@ -1,118 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.misc.search.similarity;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/**
|
||||
* Similarity that behaves like {@link BM25Similarity} while also applying the k1+1 factor to the
|
||||
* numerator of the scoring formula
|
||||
*
|
||||
* @see BM25Similarity
|
||||
* @deprecated {@link BM25Similarity} should be used instead
|
||||
*/
|
||||
@Deprecated
|
||||
public final class LegacyBM25Similarity extends Similarity {
|
||||
|
||||
private final BM25Similarity bm25Similarity;
|
||||
|
||||
/**
|
||||
* BM25 with these default values:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code k1 = 1.2}
|
||||
* <li>{@code b = 0.75}
|
||||
* <li>{@code discountOverlaps = true}
|
||||
* </ul>
|
||||
*/
|
||||
public LegacyBM25Similarity() {
|
||||
this.bm25Similarity = new BM25Similarity();
|
||||
}
|
||||
|
||||
/**
|
||||
* BM25 with the supplied parameter values.
|
||||
*
|
||||
* @param k1 Controls non-linear term frequency normalization (saturation).
|
||||
* @param b Controls to what degree document length normalizes tf values.
|
||||
* @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is not
|
||||
* within the range {@code [0..1]}
|
||||
*/
|
||||
public LegacyBM25Similarity(float k1, float b) {
|
||||
this.bm25Similarity = new BM25Similarity(k1, b);
|
||||
}
|
||||
|
||||
/**
|
||||
* BM25 with the supplied parameter values.
|
||||
*
|
||||
* @param k1 Controls non-linear term frequency normalization (saturation).
|
||||
* @param b Controls to what degree document length normalizes tf values.
|
||||
* @param discountOverlaps True if overlap tokens (tokens with a position of increment of zero)
|
||||
* are discounted from the document's length.
|
||||
* @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is not
|
||||
* within the range {@code [0..1]}
|
||||
*/
|
||||
public LegacyBM25Similarity(float k1, float b, boolean discountOverlaps) {
|
||||
this.bm25Similarity = new BM25Similarity(k1, b, discountOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
return bm25Similarity.computeNorm(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer scorer(
|
||||
float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return bm25Similarity.scorer(boost * (1 + bm25Similarity.getK1()), collectionStats, termStats);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <code>k1</code> parameter
|
||||
*
|
||||
* @see #LegacyBM25Similarity(float, float)
|
||||
*/
|
||||
public final float getK1() {
|
||||
return bm25Similarity.getK1();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <code>b</code> parameter
|
||||
*
|
||||
* @see #LegacyBM25Similarity(float, float)
|
||||
*/
|
||||
public final float getB() {
|
||||
return bm25Similarity.getB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if overlap tokens are discounted from the document's length.
|
||||
*
|
||||
* @see #LegacyBM25Similarity(float, float, boolean)
|
||||
*/
|
||||
public boolean getDiscountOverlaps() {
|
||||
return bm25Similarity.getDiscountOverlaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return bm25Similarity.toString();
|
||||
}
|
||||
}
|
|
@ -1,143 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.misc.search.similarity;
|
||||
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.BaseSimilarityTestCase;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
@Deprecated
|
||||
public class TestLegacyBM25Similarity extends BaseSimilarityTestCase {
|
||||
|
||||
public void testIllegalK1() {
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(Float.POSITIVE_INFINITY, 0.75f);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal k1 value"));
|
||||
|
||||
expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(-1, 0.75f);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal k1 value"));
|
||||
|
||||
expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(Float.NaN, 0.75f);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal k1 value"));
|
||||
}
|
||||
|
||||
public void testIllegalB() {
|
||||
IllegalArgumentException expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(1.2f, 2f);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal b value"));
|
||||
|
||||
expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(1.2f, -1f);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal b value"));
|
||||
|
||||
expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(1.2f, Float.POSITIVE_INFINITY);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal b value"));
|
||||
|
||||
expected =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LegacyBM25Similarity(1.2f, Float.NaN);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("illegal b value"));
|
||||
}
|
||||
|
||||
public void testDefaults() {
|
||||
LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
|
||||
BM25Similarity bm25Similarity = new BM25Similarity();
|
||||
assertEquals(bm25Similarity.getB(), legacyBM25Similarity.getB(), 0f);
|
||||
assertEquals(bm25Similarity.getK1(), legacyBM25Similarity.getK1(), 0f);
|
||||
}
|
||||
|
||||
public void testToString() {
|
||||
LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
|
||||
BM25Similarity bm25Similarity = new BM25Similarity();
|
||||
assertEquals(bm25Similarity.toString(), legacyBM25Similarity.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Similarity getSimilarity(Random random) {
|
||||
return new LegacyBM25Similarity(randomK1(random), randomB(random));
|
||||
}
|
||||
|
||||
private static float randomK1(Random random) {
|
||||
// term frequency normalization parameter k1
|
||||
switch (random.nextInt(4)) {
|
||||
case 0:
|
||||
// minimum value
|
||||
return 0;
|
||||
case 1:
|
||||
// tiny value
|
||||
return Float.MIN_VALUE;
|
||||
case 2:
|
||||
// maximum value
|
||||
// upper bounds on individual term's score is 43.262806 * (k1 + 1) * boost
|
||||
// we just limit the test to "reasonable" k1 values but don't enforce this anywhere.
|
||||
return Integer.MAX_VALUE;
|
||||
default:
|
||||
// random value
|
||||
return Integer.MAX_VALUE * random.nextFloat();
|
||||
}
|
||||
}
|
||||
|
||||
private static float randomB(Random random) {
|
||||
// length normalization parameter b [0 .. 1]
|
||||
switch (random.nextInt(4)) {
|
||||
case 0:
|
||||
// minimum value
|
||||
return 0;
|
||||
case 1:
|
||||
// tiny value
|
||||
return Float.MIN_VALUE;
|
||||
case 2:
|
||||
// maximum value
|
||||
return 1;
|
||||
default:
|
||||
// random value
|
||||
return random.nextFloat();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue