mirror of
https://github.com/apache/lucene.git
synced 2025-02-12 21:15:19 +00:00
LUCENE-7395, SOLR-9315: Fix PerFieldSimilarityWrapper to also delegate query norm and coordination factor using a default similarity added as ctor param
This commit is contained in:
parent
1244928262
commit
22d24969f5
@ -40,6 +40,10 @@ New Features
|
||||
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-7395: PerFieldSimilarityWrapper requires a default similarity
|
||||
for calculating query norm and coordination factor in Lucene 6.x.
|
||||
Lucene 7 will no longer have those factors. (Uwe Schindler, Sascha Markus)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
|
||||
@ -57,6 +61,10 @@ Bug Fixes
|
||||
* LUCENE-7391: Fix performance regression in MemoryIndex's fields() introduced
|
||||
in Lucene 6. (Steve Mason via David Smiley)
|
||||
|
||||
* LUCENE-7395, SOLR-9315: Fix PerFieldSimilarityWrapper to also delegate query
|
||||
norm and coordination factor using a default similarity added as ctor param.
|
||||
(Uwe Schindler, Sascha Markus)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7323: Compound file writing now verifies the incoming
|
||||
|
@ -29,16 +29,54 @@ import org.apache.lucene.search.TermStatistics;
|
||||
* <p>
|
||||
* Subclasses should implement {@link #get(String)} to return an appropriate
|
||||
* Similarity (for example, using field-specific parameter values) for the field.
|
||||
* <p>
|
||||
* For Lucene 6, you should pass a default similarity that is used for all non
|
||||
* field-specific methods. From Lucene 7 on, this is no longer required.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class PerFieldSimilarityWrapper extends Similarity {
|
||||
|
||||
/** Default similarity used for query norm and coordination factors. */
|
||||
protected final Similarity defaultSim;
|
||||
|
||||
/**
|
||||
* Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.)
|
||||
* Constructor taking a default similarity for all non-field specific calculations.
|
||||
* @param defaultSim is used for all non field-specific calculations, like
|
||||
* {@link #queryNorm(float)} and {@link #coord(int, int)}.
|
||||
*/
|
||||
public PerFieldSimilarityWrapper() {}
|
||||
public PerFieldSimilarityWrapper(Similarity defaultSim) {
|
||||
this.defaultSim = defaultSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Backwards compatibility constructor for 6.x series that creates a per-field
|
||||
* similarity where all non field-specific methods return a constant (1).
|
||||
* <p>
|
||||
* From Lucene 7 on, this will get the default again, because coordination
|
||||
* factors and query normalization will be removed.
|
||||
* @deprecated specify a default similarity for non field-specific calculations.
|
||||
*/
|
||||
@Deprecated
|
||||
public PerFieldSimilarityWrapper() {
|
||||
// a fake similarity that is only used to return the default of 1 for queryNorm and coord.
|
||||
this(new Similarity() {
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||
throw new AssertionError();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long computeNorm(FieldInvertState state) {
|
||||
@ -59,6 +97,16 @@ public abstract class PerFieldSimilarityWrapper extends Similarity {
|
||||
return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float coord(int overlap, int maxOverlap) {
|
||||
return defaultSim.coord(overlap, maxOverlap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float queryNorm(float valueForNormalization) {
|
||||
return defaultSim.queryNorm(valueForNormalization);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link Similarity} for scoring a field.
|
||||
*/
|
||||
|
@ -80,11 +80,8 @@ public class TestCustomNorms extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public class MySimProvider extends PerFieldSimilarityWrapper {
|
||||
Similarity delegate = new ClassicSimilarity();
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return delegate.queryNorm(sumOfSquaredWeights);
|
||||
public MySimProvider() {
|
||||
super(new ClassicSimilarity());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -92,14 +89,9 @@ public class TestCustomNorms extends LuceneTestCase {
|
||||
if (floatTestField.equals(field)) {
|
||||
return new FloatEncodingBoostSimilarity();
|
||||
} else {
|
||||
return delegate;
|
||||
return defaultSim;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return delegate.coord(overlap, maxOverlap);
|
||||
}
|
||||
}
|
||||
|
||||
public static class FloatEncodingBoostSimilarity extends Similarity {
|
||||
|
@ -154,12 +154,8 @@ public class TestNorms extends LuceneTestCase {
|
||||
|
||||
|
||||
public class MySimProvider extends PerFieldSimilarityWrapper {
|
||||
Similarity delegate = new ClassicSimilarity();
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
|
||||
return delegate.queryNorm(sumOfSquaredWeights);
|
||||
public MySimProvider() {
|
||||
super(new ClassicSimilarity());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -167,14 +163,9 @@ public class TestNorms extends LuceneTestCase {
|
||||
if (byteTestField.equals(field)) {
|
||||
return new ByteEncodingBoostSimilarity();
|
||||
} else {
|
||||
return delegate;
|
||||
return defaultSim;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return delegate.coord(overlap, maxOverlap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -71,23 +71,13 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
||||
final Similarity base = searcher1.getSimilarity(true);
|
||||
// boosting
|
||||
IndexSearcher searcher2 = newSearcher(ir, false);
|
||||
searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
|
||||
searcher2.setSimilarity(new PerFieldSimilarityWrapper(base) {
|
||||
final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");
|
||||
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
return "foo".equals(field) ? fooSim : base;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return base.coord(overlap, maxOverlap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
return base.queryNorm(sumOfSquaredWeights);
|
||||
}
|
||||
});
|
||||
|
||||
// in this case, we searched on field "foo". first document should have 2x the score.
|
||||
|
@ -89,9 +89,13 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
||||
}
|
||||
|
||||
private class ExampleSimilarityProvider extends PerFieldSimilarityWrapper {
|
||||
private Similarity sim1 = new Sim1();
|
||||
private Similarity sim2 = new Sim2();
|
||||
private final Similarity sim1 = new Sim1();
|
||||
private final Similarity sim2 = new Sim2();
|
||||
|
||||
public ExampleSimilarityProvider() {
|
||||
super(new Sim1());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
if (field.equals("foo")) {
|
||||
|
@ -259,13 +259,11 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
|
||||
final Similarity sim = new ClassicSimilarity();
|
||||
|
||||
iwc.setSimilarity(new PerFieldSimilarityWrapper(new ClassicSimilarity()) {
|
||||
@Override
|
||||
public Similarity get(String name) {
|
||||
assertEquals("field", name);
|
||||
return sim;
|
||||
return defaultSim;
|
||||
}
|
||||
});
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
|
||||
|
@ -91,7 +91,7 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
||||
final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
|
||||
ssB.setLengthNormFactors(5,8,0.1f, false);
|
||||
|
||||
Similarity sp = new PerFieldSimilarityWrapper() {
|
||||
Similarity sp = new PerFieldSimilarityWrapper(ss) {
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
if (field.equals("bar"))
|
||||
|
@ -31,41 +31,54 @@ import java.util.Random;
|
||||
* for the same field.
|
||||
*/
|
||||
public class RandomSimilarity extends PerFieldSimilarityWrapper {
|
||||
final ClassicSimilarity defaultSim = new ClassicSimilarity();
|
||||
final List<Similarity> knownSims;
|
||||
Map<String,Similarity> previousMappings = new HashMap<>();
|
||||
final Map<String,Similarity> previousMappings = new HashMap<>();
|
||||
final int perFieldSeed;
|
||||
final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
|
||||
final boolean shouldQueryNorm;
|
||||
|
||||
public RandomSimilarity(Random random) {
|
||||
super(new ClassicSimilarity() {
|
||||
final int coordType = random.nextInt(3); // 0 = no coord, 1 = coord, 2 = crazy coord
|
||||
final boolean shouldQueryNorm = random.nextBoolean();
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
if (coordType == 0) {
|
||||
return 1.0f;
|
||||
} else if (coordType == 1) {
|
||||
return super.coord(overlap, maxOverlap);
|
||||
} else {
|
||||
return overlap / ((float)maxOverlap + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
if (shouldQueryNorm) {
|
||||
return super.queryNorm(sumOfSquaredWeights);
|
||||
} else {
|
||||
return 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final String coordMethod;
|
||||
if (coordType == 0) {
|
||||
coordMethod = "no";
|
||||
} else if (coordType == 1) {
|
||||
coordMethod = "yes";
|
||||
} else {
|
||||
coordMethod = "crazy";
|
||||
}
|
||||
return "queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod;
|
||||
}
|
||||
|
||||
});
|
||||
perFieldSeed = random.nextInt();
|
||||
coordType = random.nextInt(3);
|
||||
shouldQueryNorm = random.nextBoolean();
|
||||
knownSims = new ArrayList<>(allSims);
|
||||
Collections.shuffle(knownSims, random);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
if (coordType == 0) {
|
||||
return 1.0f;
|
||||
} else if (coordType == 1) {
|
||||
return defaultSim.coord(overlap, maxOverlap);
|
||||
} else {
|
||||
return overlap / ((float)maxOverlap + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
if (shouldQueryNorm) {
|
||||
return defaultSim.queryNorm(sumOfSquaredWeights);
|
||||
} else {
|
||||
return 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Similarity get(String field) {
|
||||
assert field != null;
|
||||
@ -138,14 +151,6 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final String coordMethod;
|
||||
if (coordType == 0) {
|
||||
coordMethod = "no";
|
||||
} else if (coordType == 1) {
|
||||
coordMethod = "yes";
|
||||
} else {
|
||||
coordMethod = "crazy";
|
||||
}
|
||||
return "RandomSimilarity(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
|
||||
return "RandomSimilarity(" + defaultSim + "): " + previousMappings.toString();
|
||||
}
|
||||
}
|
||||
|
@ -132,16 +132,15 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
|
||||
}
|
||||
}
|
||||
assert null != defaultSim;
|
||||
final Similarity defaultSimilarity = defaultSim;
|
||||
similarity = new PerFieldSimilarityWrapper() {
|
||||
similarity = new PerFieldSimilarityWrapper(defaultSim) {
|
||||
@Override
|
||||
public Similarity get(String name) {
|
||||
FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
|
||||
if (fieldType == null) {
|
||||
return defaultSimilarity;
|
||||
return defaultSim;
|
||||
} else {
|
||||
Similarity similarity = fieldType.getSimilarity();
|
||||
return similarity == null ? defaultSimilarity : similarity;
|
||||
return similarity == null ? defaultSim : similarity;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user