PR 13757 follow-up: add missing with-discountOverlaps Similarity constructor variants, CHANGES.txt entries (#13845)

This commit is contained in:
Christine Poerschke 2024-10-04 17:08:35 +01:00 committed by GitHub
parent a4a6cfb39c
commit dab731175c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 81 additions and 8 deletions

View File

@ -33,7 +33,7 @@ Other
API Changes API Changes
--------------------- ---------------------
(No changes) * GITHUB#13845: Add missing with-discountOverlaps Similarity constructor variants. (Pierre Salagnac, Christine Poerschke, Robert Muir)
New Features New Features
--------------------- ---------------------
@ -376,6 +376,9 @@ API Changes
* GITHUB#13568, GITHUB#13750: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions * GITHUB#13568, GITHUB#13750: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
or drill-down. (Egor Potemkin) or drill-down. (Egor Potemkin)
* GITHUB#13757: For similarities, provide default computeNorm implementation and remove remaining discountOverlaps setters.
(Christine Poerschke, Adrien Grand, Robert Muir)
New Features New Features
--------------------- ---------------------

View File

@ -44,13 +44,26 @@ public abstract class Axiomatic extends SimilarityBase {
protected final int queryLen; protected final int queryLen;
/** /**
* Constructor setting all Axiomatic hyperparameters * Constructor setting all Axiomatic hyperparameters and using default discountOverlaps value.
* *
* @param s hyperparam for the growth function * @param s hyperparam for the growth function
* @param queryLen the query length * @param queryLen the query length
* @param k hyperparam for the primitive weighting function * @param k hyperparam for the primitive weighting function
*/ */
public Axiomatic(float s, int queryLen, float k) { public Axiomatic(float s, int queryLen, float k) {
this(true, s, queryLen, k);
}
/**
* Constructor setting all Axiomatic hyperparameters
*
* @param discountOverlaps true if overlap tokens should not impact document length for scoring.
* @param s hyperparam for the growth function
* @param queryLen the query length
* @param k hyperparam for the primitive weighting function
*/
public Axiomatic(boolean discountOverlaps, float s, int queryLen, float k) {
super(discountOverlaps);
if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) { if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) {
throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1"); throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1");
} }

View File

@ -46,11 +46,23 @@ public class DFISimilarity extends SimilarityBase {
private final Independence independence; private final Independence independence;
/** /**
* Create DFI with the specified divergence from independence measure * Create DFI with the specified divergence from independence measure and using default
* discountOverlaps value
* *
* @param independenceMeasure measure of divergence from independence * @param independenceMeasure measure of divergence from independence
*/ */
public DFISimilarity(Independence independenceMeasure) { public DFISimilarity(Independence independenceMeasure) {
this(independenceMeasure, true);
}
/**
* Create DFI with the specified parameters
*
* @param independenceMeasure measure of divergence from independence
* @param discountOverlaps true if overlap tokens should not impact document length for scoring.
*/
public DFISimilarity(Independence independenceMeasure, boolean discountOverlaps) {
super(discountOverlaps);
this.independence = independenceMeasure; this.independence = independenceMeasure;
} }

View File

@ -83,7 +83,7 @@ public class DFRSimilarity extends SimilarityBase {
protected final Normalization normalization; protected final Normalization normalization;
/** /**
* Creates DFRSimilarity from the three components. * Creates DFRSimilarity from the three components and using default discountOverlaps value.
* *
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead * <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}. * pass {@link NoNormalization}.
@ -98,7 +98,7 @@ public class DFRSimilarity extends SimilarityBase {
} }
/** /**
* Creates DFRSimilarity from the three components. * Creates DFRSimilarity from the three components and with the specified discountOverlaps value.
* *
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead * <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}. * pass {@link NoNormalization}.

View File

@ -76,7 +76,7 @@ public class IBSimilarity extends SimilarityBase {
protected final Normalization normalization; protected final Normalization normalization;
/** /**
* Creates IBSimilarity from the three components. * Creates IBSimilarity from the three components and using default discountOverlaps value.
* *
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead * <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}. * pass {@link NoNormalization}.
@ -86,6 +86,26 @@ public class IBSimilarity extends SimilarityBase {
* @param normalization term frequency normalization * @param normalization term frequency normalization
*/ */
public IBSimilarity(Distribution distribution, Lambda lambda, Normalization normalization) { public IBSimilarity(Distribution distribution, Lambda lambda, Normalization normalization) {
this(distribution, lambda, normalization, true);
}
/**
* Creates IBSimilarity from the three components and with the specified discountOverlaps value.
*
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
*
* @param distribution probabilistic distribution modeling term occurrence
* @param lambda distribution's &lambda;<sub>w</sub> parameter
* @param normalization term frequency normalization
* @param discountOverlaps true if overlap tokens should not impact document length for scoring.
*/
public IBSimilarity(
Distribution distribution,
Lambda lambda,
Normalization normalization,
boolean discountOverlaps) {
super(discountOverlaps);
this.distribution = distribution; this.distribution = distribution;
this.lambda = lambda; this.lambda = lambda;
this.normalization = normalization; this.normalization = normalization;

View File

@ -37,6 +37,13 @@ public class IndriDirichletSimilarity extends LMSimilarity {
/** The &mu; parameter. */ /** The &mu; parameter. */
private final float mu; private final float mu;
/** Instantiates the similarity with the provided parameters. */
public IndriDirichletSimilarity(
CollectionModel collectionModel, boolean discountOverlaps, float mu) {
super(collectionModel, discountOverlaps);
this.mu = mu;
}
/** Instantiates the similarity with the provided &mu; parameter. */ /** Instantiates the similarity with the provided &mu; parameter. */
public IndriDirichletSimilarity(CollectionModel collectionModel, float mu) { public IndriDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel); super(collectionModel);

View File

@ -39,7 +39,13 @@ public class LMDirichletSimilarity extends LMSimilarity {
/** Instantiates the similarity with the provided &mu; parameter. */ /** Instantiates the similarity with the provided &mu; parameter. */
public LMDirichletSimilarity(CollectionModel collectionModel, float mu) { public LMDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel); this(collectionModel, true, mu);
}
/** Instantiates the similarity with the provided parameters. */
public LMDirichletSimilarity(
CollectionModel collectionModel, boolean discountOverlaps, float mu) {
super(collectionModel, discountOverlaps);
if (Float.isFinite(mu) == false || mu < 0) { if (Float.isFinite(mu) == false || mu < 0) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"illegal mu value: " + mu + ", must be a non-negative finite value"); "illegal mu value: " + mu + ", must be a non-negative finite value");

View File

@ -43,7 +43,13 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
/** Instantiates with the specified collectionModel and &lambda; parameter. */ /** Instantiates with the specified collectionModel and &lambda; parameter. */
public LMJelinekMercerSimilarity(CollectionModel collectionModel, float lambda) { public LMJelinekMercerSimilarity(CollectionModel collectionModel, float lambda) {
super(collectionModel); this(collectionModel, true, lambda);
}
/** Instantiates with the specified collectionModel and parameters. */
public LMJelinekMercerSimilarity(
CollectionModel collectionModel, boolean discountOverlaps, float lambda) {
super(collectionModel, discountOverlaps);
if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) { if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) {
throw new IllegalArgumentException("lambda must be in the range (0 .. 1]"); throw new IllegalArgumentException("lambda must be in the range (0 .. 1]");
} }

View File

@ -43,6 +43,12 @@ public abstract class LMSimilarity extends SimilarityBase {
/** Creates a new instance with the specified collection language model. */ /** Creates a new instance with the specified collection language model. */
public LMSimilarity(CollectionModel collectionModel) { public LMSimilarity(CollectionModel collectionModel) {
this(collectionModel, true);
}
/** Creates a new instance with the specified collection language model and discountOverlaps. */
public LMSimilarity(CollectionModel collectionModel, boolean discountOverlaps) {
super(discountOverlaps);
this.collectionModel = collectionModel; this.collectionModel = collectionModel;
} }