LUCENE-7395, SOLR-9315: Fix PerFieldSimilarityWrapper to also delegate query norm and coordination factor using a default similarity added as ctor param

This commit is contained in:
Uwe Schindler 2016-07-27 10:06:52 +02:00
parent 1244928262
commit 22d24969f5
10 changed files with 118 additions and 83 deletions

View File

@ -40,6 +40,10 @@ New Features
Polygon instances from a standard GeoJSON string (Robert Muir, Mike
McCandless)
* LUCENE-7395: PerFieldSimilarityWrapper requires a default similarity
for calculating query norm and coordination factor in Lucene 6.x.
Lucene 7 will no longer have those factors. (Uwe Schindler, Sascha Markus)
Bug Fixes
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
@ -57,6 +61,10 @@ Bug Fixes
* LUCENE-7391: Fix performance regression in MemoryIndex's fields() introduced
in Lucene 6. (Steve Mason via David Smiley)
* LUCENE-7395, SOLR-9315: Fix PerFieldSimilarityWrapper to also delegate query
norm and coordination factor using a default similarity added as ctor param.
(Uwe Schindler, Sascha Markus)
Improvements
* LUCENE-7323: Compound file writing now verifies the incoming

View File

@ -29,16 +29,54 @@ import org.apache.lucene.search.TermStatistics;
* <p>
* Subclasses should implement {@link #get(String)} to return an appropriate
* Similarity (for example, using field-specific parameter values) for the field.
* <p>
* For Lucene 6, you should pass a default similarity that is used for all non
* field-specific methods. From Lucene 7 on, this is no longer required.
*
* @lucene.experimental
*/
public abstract class PerFieldSimilarityWrapper extends Similarity {
/** Default similarity used for query norm and coordination factors. */
protected final Similarity defaultSim;
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
* Constructor taking a default similarity for all non-field specific calculations.
* @param defaultSim is used for all non field-specific calculations, like
* {@link #queryNorm(float)} and {@link #coord(int, int)}.
*/
public PerFieldSimilarityWrapper() {}
public PerFieldSimilarityWrapper(Similarity defaultSim) {
this.defaultSim = defaultSim;
}
/**
* Backwards compatibility constructor for 6.x series that creates a per-field
* similarity where all non field-specific methods return a constant (1).
* <p>
* From Lucene 7 on, this will get the default again, because coordination
* factors and query normalization will be removed.
* @deprecated specify a default similarity for non field-specific calculations.
*/
@Deprecated
public PerFieldSimilarityWrapper() {
// a fake similarity that is only used to return the default of 1 for queryNorm and coord.
this(new Similarity() {
@Override
public long computeNorm(FieldInvertState state) {
throw new AssertionError();
}
@Override
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new AssertionError();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new AssertionError();
}
});
}
@Override
public final long computeNorm(FieldInvertState state) {
@ -59,6 +97,16 @@ public abstract class PerFieldSimilarityWrapper extends Similarity {
return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
}
@Override
public final float coord(int overlap, int maxOverlap) {
return defaultSim.coord(overlap, maxOverlap);
}
@Override
public final float queryNorm(float valueForNormalization) {
return defaultSim.queryNorm(valueForNormalization);
}
/**
* Returns a {@link Similarity} for scoring a field.
*/

View File

@ -80,11 +80,8 @@ public class TestCustomNorms extends LuceneTestCase {
}
public class MySimProvider extends PerFieldSimilarityWrapper {
Similarity delegate = new ClassicSimilarity();
@Override
public float queryNorm(float sumOfSquaredWeights) {
return delegate.queryNorm(sumOfSquaredWeights);
public MySimProvider() {
super(new ClassicSimilarity());
}
@Override
@ -92,14 +89,9 @@ public class TestCustomNorms extends LuceneTestCase {
if (floatTestField.equals(field)) {
return new FloatEncodingBoostSimilarity();
} else {
return delegate;
return defaultSim;
}
}
@Override
public float coord(int overlap, int maxOverlap) {
return delegate.coord(overlap, maxOverlap);
}
}
public static class FloatEncodingBoostSimilarity extends Similarity {

View File

@ -154,12 +154,8 @@ public class TestNorms extends LuceneTestCase {
public class MySimProvider extends PerFieldSimilarityWrapper {
Similarity delegate = new ClassicSimilarity();
@Override
public float queryNorm(float sumOfSquaredWeights) {
return delegate.queryNorm(sumOfSquaredWeights);
public MySimProvider() {
super(new ClassicSimilarity());
}
@Override
@ -167,14 +163,9 @@ public class TestNorms extends LuceneTestCase {
if (byteTestField.equals(field)) {
return new ByteEncodingBoostSimilarity();
} else {
return delegate;
return defaultSim;
}
}
@Override
public float coord(int overlap, int maxOverlap) {
return delegate.coord(overlap, maxOverlap);
}
}

View File

@ -71,23 +71,13 @@ public class TestDocValuesScoring extends LuceneTestCase {
final Similarity base = searcher1.getSimilarity(true);
// boosting
IndexSearcher searcher2 = newSearcher(ir, false);
searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
searcher2.setSimilarity(new PerFieldSimilarityWrapper(base) {
final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");
@Override
public Similarity get(String field) {
return "foo".equals(field) ? fooSim : base;
}
@Override
public float coord(int overlap, int maxOverlap) {
return base.coord(overlap, maxOverlap);
}
@Override
public float queryNorm(float sumOfSquaredWeights) {
return base.queryNorm(sumOfSquaredWeights);
}
});
// in this case, we searched on field "foo". first document should have 2x the score.

View File

@ -89,9 +89,13 @@ public class TestSimilarityProvider extends LuceneTestCase {
}
private class ExampleSimilarityProvider extends PerFieldSimilarityWrapper {
private Similarity sim1 = new Sim1();
private Similarity sim2 = new Sim2();
private final Similarity sim1 = new Sim1();
private final Similarity sim2 = new Sim2();
public ExampleSimilarityProvider() {
super(new Sim1());
}
@Override
public Similarity get(String field) {
if (field.equals("foo")) {

View File

@ -259,13 +259,11 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity sim = new ClassicSimilarity();
iwc.setSimilarity(new PerFieldSimilarityWrapper(new ClassicSimilarity()) {
@Override
public Similarity get(String name) {
assertEquals("field", name);
return sim;
return defaultSim;
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

View File

@ -91,7 +91,7 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
final SweetSpotSimilarity ssB = new SweetSpotSimilarity();
ssB.setLengthNormFactors(5,8,0.1f, false);
Similarity sp = new PerFieldSimilarityWrapper() {
Similarity sp = new PerFieldSimilarityWrapper(ss) {
@Override
public Similarity get(String field) {
if (field.equals("bar"))

View File

@ -31,41 +31,54 @@ import java.util.Random;
* for the same field.
*/
public class RandomSimilarity extends PerFieldSimilarityWrapper {
final ClassicSimilarity defaultSim = new ClassicSimilarity();
final List<Similarity> knownSims;
Map<String,Similarity> previousMappings = new HashMap<>();
final Map<String,Similarity> previousMappings = new HashMap<>();
final int perFieldSeed;
final int coordType; // 0 = no coord, 1 = coord, 2 = crazy coord
final boolean shouldQueryNorm;
public RandomSimilarity(Random random) {
super(new ClassicSimilarity() {
final int coordType = random.nextInt(3); // 0 = no coord, 1 = coord, 2 = crazy coord
final boolean shouldQueryNorm = random.nextBoolean();
@Override
public float coord(int overlap, int maxOverlap) {
if (coordType == 0) {
return 1.0f;
} else if (coordType == 1) {
return super.coord(overlap, maxOverlap);
} else {
return overlap / ((float)maxOverlap + 1);
}
}
@Override
public float queryNorm(float sumOfSquaredWeights) {
if (shouldQueryNorm) {
return super.queryNorm(sumOfSquaredWeights);
} else {
return 1.0f;
}
}
@Override
public synchronized String toString() {
final String coordMethod;
if (coordType == 0) {
coordMethod = "no";
} else if (coordType == 1) {
coordMethod = "yes";
} else {
coordMethod = "crazy";
}
return "queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod;
}
});
perFieldSeed = random.nextInt();
coordType = random.nextInt(3);
shouldQueryNorm = random.nextBoolean();
knownSims = new ArrayList<>(allSims);
Collections.shuffle(knownSims, random);
}
@Override
public float coord(int overlap, int maxOverlap) {
if (coordType == 0) {
return 1.0f;
} else if (coordType == 1) {
return defaultSim.coord(overlap, maxOverlap);
} else {
return overlap / ((float)maxOverlap + 1);
}
}
@Override
public float queryNorm(float sumOfSquaredWeights) {
if (shouldQueryNorm) {
return defaultSim.queryNorm(sumOfSquaredWeights);
} else {
return 1.0f;
}
}
@Override
public synchronized Similarity get(String field) {
assert field != null;
@ -138,14 +151,6 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
@Override
public synchronized String toString() {
final String coordMethod;
if (coordType == 0) {
coordMethod = "no";
} else if (coordType == 1) {
coordMethod = "yes";
} else {
coordMethod = "crazy";
}
return "RandomSimilarity(queryNorm=" + shouldQueryNorm + ",coord=" + coordMethod + "): " + previousMappings.toString();
return "RandomSimilarity(" + defaultSim + "): " + previousMappings.toString();
}
}

View File

@ -132,16 +132,15 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
}
}
assert null != defaultSim;
final Similarity defaultSimilarity = defaultSim;
similarity = new PerFieldSimilarityWrapper() {
similarity = new PerFieldSimilarityWrapper(defaultSim) {
@Override
public Similarity get(String name) {
FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
if (fieldType == null) {
return defaultSimilarity;
return defaultSim;
} else {
Similarity similarity = fieldType.getSimilarity();
return similarity == null ? defaultSimilarity : similarity;
return similarity == null ? defaultSim : similarity;
}
}
};