From 4236da27d1b1cbced6c3fed4b3d3094fe796fa7e Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Sat, 19 Nov 2016 08:28:25 +0100 Subject: [PATCH] LUCENE-7466 - added axiomatic similarity, patch from Peilin Yang --- .../lucene/search/similarities/Axiomatic.java | 159 ++++++++++++++++++ .../search/similarities/AxiomaticF1EXP.java | 95 +++++++++++ .../search/similarities/AxiomaticF1LOG.java | 88 ++++++++++ .../search/similarities/AxiomaticF2EXP.java | 94 +++++++++++ .../search/similarities/AxiomaticF2LOG.java | 86 ++++++++++ .../search/similarities/AxiomaticF3EXP.java | 94 +++++++++++ .../search/similarities/AxiomaticF3LOG.java | 83 +++++++++ .../similarities/TestAxiomaticSimilarity.java | 86 ++++++++++ 8 files changed, 785 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java new file mode 100644 index 00000000000..9c2854c3bbd --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + + +import java.util.List; + +import org.apache.lucene.search.Explanation; + +/** + * Axiomatic approaches for IR. From Hui Fang and Chengxiang Zhai + * 2005. An Exploration of Axiomatic Approaches to Information Retrieval. + * In Proceedings of the 28th annual international ACM SIGIR + * conference on Research and development in information retrieval + * (SIGIR '05). ACM, New York, NY, USA, 480-487. + *

+ * There are a family of models. All of them are based on BM25, + * Pivoted Document Length Normalization and Language model with + * Dirichlet prior. Some components (e.g. Term Frequency, + * Inverted Document Frequency) in the original models are modified + * so that they follow some axiomatic constraints. + *

+ * + * @lucene.experimental + */ +public abstract class Axiomatic extends SimilarityBase { + /** + * hyperparam for the growth function + */ + protected final float s; + + /** + * hyperparam for the primitive weighthing function + */ + protected final float k; + + /** + * the query length + */ + protected final int queryLen; + + /** + * Constructor setting all Axiomatic hyperparameters + * @param s hyperparam for the growth function + * @param queryLen the query length + * @param k hyperparam for the primitive weighting function + */ + public Axiomatic(float s, int queryLen, float k) { + if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) { + throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1"); + } + if (Float.isFinite(k) == false || Float.isNaN(k) || k < 0 || k > 1) { + throw new IllegalArgumentException("illegal k value: " + k + ", must be between 0 and 1"); + } + if (queryLen < 0 || queryLen > Integer.MAX_VALUE) { + throw new IllegalArgumentException("illegal query length value: " + + queryLen + ", must be larger 0 and smaller than MAX_INT"); + } + this.s = s; + this.queryLen = queryLen; + this.k = k; + } + + /** + * Constructor setting only s, letting k and queryLen to default + * @param s hyperparam for the growth function + */ + public Axiomatic(float s) { + this(s, 1, 0.35f); + } + + /** + * Constructor setting s and queryLen, letting k to default + * @param s hyperparam for the growth function + * @param queryLen the query length + */ + public Axiomatic(float s, int queryLen) { + this(s, queryLen, 0.35f); + } + + /** + * Default constructor + */ + public Axiomatic() { + this(0.25f, 1, 0.35f); + } + + @Override + public float score(BasicStats stats, float freq, float docLen) { + return tf(stats, freq, docLen) + * ln(stats, freq, docLen) + * tfln(stats, freq, docLen) + * idf(stats, freq, docLen) + - gamma(stats, freq, docLen); + } + + @Override + protected void explain(List subs, BasicStats stats, int doc, + float freq, float docLen) { + if (stats.getBoost() != 1.0f) { + subs.add(Explanation.match(stats.getBoost(), "boost")); + } + + subs.add(Explanation.match(this.k, "k")); + subs.add(Explanation.match(this.s, "s")); + subs.add(Explanation.match(this.queryLen, "queryLen")); + subs.add(Explanation.match(tf(stats, freq, docLen), "tf")); + subs.add(Explanation.match(ln(stats, freq, docLen), "ln")); + subs.add(Explanation.match(tfln(stats, freq, docLen), "tfln")); + subs.add(Explanation.match(idf(stats, freq, docLen), "idf")); + subs.add(Explanation.match(gamma(stats, freq, docLen), "gamma")); + super.explain(subs, stats, doc, freq, docLen); + } + + /** + * Name of the axiomatic method. + */ + @Override + public abstract String toString(); + + /** + * compute the term frequency component + */ + protected abstract float tf(BasicStats stats, float freq, float docLen); + + /** + * compute the document length component + */ + protected abstract float ln(BasicStats stats, float freq, float docLen); + + /** + * compute the mixed term frequency and document length component + */ + protected abstract float tfln(BasicStats stats, float freq, float docLen); + + /** + * compute the inverted document frequency component + */ + protected abstract float idf(BasicStats stats, float freq, float docLen); + + /** + * compute the gamma component (only for F3EXp and F3LOG) + */ + protected abstract float gamma(BasicStats stats, float freq, float docLen); +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java new file mode 100644 index 00000000000..62317fdf73f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F1EXP is defined as Sum(tf(term_doc_freq)*ln(docLen)*IDF(term)) + * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF1EXP extends Axiomatic { + /** + * Constructor setting s and k, letting queryLen to default + * @param s hyperparam for the growth function + * @param k hyperparam for the primitive weighting function + */ + public AxiomaticF1EXP(float s, float k) { + super(s, 1, k); + } + + /** + * Constructor setting s only, letting k and queryLen to default + * @param s hyperparam for the growth function + */ + public AxiomaticF1EXP(float s) { + this(s, 0.35f); + } + + /** + * Default constructor + */ + public AxiomaticF1EXP() { + super(); + } + + @Override + public String toString() { + return "F1EXP"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return (stats.getAvgFieldLength() + this.s) / (stats.getAvgFieldLength() + docLen * this.s); + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java new file mode 100644 index 00000000000..7cce2be4e95 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F1LOG is defined as Sum(tf(term_doc_freq)*ln(docLen)*IDF(term)) + * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF1LOG extends Axiomatic { + + /** + * Constructor setting s only, letting k and queryLen to default + * + * @param s hyperparam for the growth function + */ + public AxiomaticF1LOG(float s) { + super(s); + } + + /** + * Default constructor + */ + public AxiomaticF1LOG() { + super(); + } + + @Override + public String toString() { + return "F1LOG"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return (stats.getAvgFieldLength() + this.s) / (stats.getAvgFieldLength() + docLen * this.s); + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq()); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java new file mode 100644 index 00000000000..f9bc98a4d27 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tfln(term_doc_freq, docLen)*IDF(term)) + * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF2EXP extends Axiomatic { + /** + * Constructor setting s and k, letting queryLen to default + * @param s hyperparam for the growth function + * @param k hyperparam for the primitive weighting function + */ + public AxiomaticF2EXP(float s, float k) { + super(s, 1, k); + } + + /** + * Constructor setting s only, letting k and queryLen to default + * @param s hyperparam for the growth function + */ + public AxiomaticF2EXP(float s) { + this(s, 0.35f); + } + + /** + * Default constructor + */ + public AxiomaticF2EXP() { + super(); + } + + @Override + public String toString() { + return "F2EXP"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return freq / (freq + this.s + this.s * docLen / stats.getAvgFieldLength()); + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java new file mode 100644 index 00000000000..fee2000bec3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tfln(term_doc_freq, docLen)*IDF(term)) + * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF2LOG extends Axiomatic { + /** + * Constructor setting s only, letting k and queryLen to default + * + * @param s hyperparam for the growth function + */ + public AxiomaticF2LOG(float s) { + super(s); + } + + /** + * Default constructor + */ + public AxiomaticF2LOG() { + super(); + } + + @Override + public String toString() { + return "F2LOG"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return freq / (freq + this.s + this.s * docLen / stats.getAvgFieldLength()); + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq()); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java new file mode 100644 index 00000000000..c20194ac28a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tf(term_doc_freq)*IDF(term)-gamma(docLen, queryLen)) + * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq + * gamma(docLen, queryLen) = (docLen-queryLen)*queryLen*s/avdl + * + * @lucene.experimental + */ +public class AxiomaticF3EXP extends Axiomatic { + + /** + * Constructor setting all Axiomatic hyperparameters + * + * @param s hyperparam for the growth function + * @param queryLen the query length + * @param k hyperparam for the primitive weighting function + */ + public AxiomaticF3EXP(float s, int queryLen, float k) { + super(s, queryLen, k); + } + + /** + * Constructor setting s and queryLen, letting k to default + * + * @param s hyperparam for the growth function + * @param queryLen the query length + */ + public AxiomaticF3EXP(float s, int queryLen) { + this(s, queryLen, 0.35f); + } + + @Override + public String toString() { + return "F3EXP"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return (docLen - this.queryLen) * this.s * this.queryLen / stats.getAvgFieldLength(); + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java new file mode 100644 index 00000000000..a9d82aded39 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tf(term_doc_freq)*IDF(term)-gamma(docLen, queryLen)) + * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq + * gamma(docLen, queryLen) = (docLen-queryLen)*queryLen*s/avdl + * + * @lucene.experimental + */ +public class AxiomaticF3LOG extends Axiomatic { + + /** + * Constructor setting s and queryLen, letting k to default + * + * @param s hyperparam for the growth function + * @param queryLen the query length + */ + public AxiomaticF3LOG(float s, int queryLen) { + super(s, queryLen); + } + + @Override + public String toString() { + return "F3LOG"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq()); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return (docLen - this.queryLen) * this.s * this.queryLen / stats.getAvgFieldLength(); + } +} \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java new file mode 100644 index 00000000000..44c7e1d9d16 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestAxiomaticSimilarity extends LuceneTestCase { + + public void testSaneNormValues() { + Axiomatic sim = new AxiomaticF2EXP(); + for (int i = 0; i < 256; i++) { + float len = sim.decodeNormValue((byte) i); + assertFalse("negative len: " + len + ", byte=" + i, len < 0.0f); + assertFalse("inf len: " + len + ", byte=" + i, Float.isInfinite(len)); + assertFalse("nan len for byte=" + i, Float.isNaN(len)); + if (i > 0) { + assertTrue("len is not decreasing: " + len + ",byte=" + i, len < sim.decodeNormValue((byte) (i - 1))); + } + } + } + + public void testIllegalS() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(Float.POSITIVE_INFINITY, 0.1f); + }); + assertTrue(expected.getMessage().contains("illegal s value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(-1, 0.1f); + }); + assertTrue(expected.getMessage().contains("illegal s value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(Float.NaN, 0.1f); + }); + assertTrue(expected.getMessage().contains("illegal s value")); + } + + public void testIllegalK() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, 2f); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, -1f); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, Float.POSITIVE_INFINITY); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, Float.NaN); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + } + + public void testIllegalQL() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF3EXP(0.35f, -1); + }); + assertTrue(expected.getMessage().contains("illegal query length value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, Integer.MAX_VALUE + 1); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + } +}