+ * There are a family of models. All of them are based on BM25,
+ * Pivoted Document Length Normalization and Language model with
+ * Dirichlet prior. Some components (e.g. Term Frequency,
+ * Inverted Document Frequency) in the original models are modified
+ * so that they follow some axiomatic constraints.
+ *
+ *
+ * @lucene.experimental
+ */
+public abstract class Axiomatic extends SimilarityBase {
+ /**
+ * hyperparam for the growth function
+ */
+ protected final float s;
+
+ /**
+ * hyperparam for the primitive weighthing function
+ */
+ protected final float k;
+
+ /**
+ * the query length
+ */
+ protected final int queryLen;
+
+ /**
+ * Constructor setting all Axiomatic hyperparameters
+ * @param s hyperparam for the growth function
+ * @param queryLen the query length
+ * @param k hyperparam for the primitive weighting function
+ */
+ public Axiomatic(float s, int queryLen, float k) {
+ if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) {
+ throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1");
+ }
+ if (Float.isFinite(k) == false || Float.isNaN(k) || k < 0 || k > 1) {
+ throw new IllegalArgumentException("illegal k value: " + k + ", must be between 0 and 1");
+ }
+ if (queryLen < 0 || queryLen > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("illegal query length value: "
+ + queryLen + ", must be larger 0 and smaller than MAX_INT");
+ }
+ this.s = s;
+ this.queryLen = queryLen;
+ this.k = k;
+ }
+
+ /**
+ * Constructor setting only s, letting k and queryLen to default
+ * @param s hyperparam for the growth function
+ */
+ public Axiomatic(float s) {
+ this(s, 1, 0.35f);
+ }
+
+ /**
+ * Constructor setting s and queryLen, letting k to default
+ * @param s hyperparam for the growth function
+ * @param queryLen the query length
+ */
+ public Axiomatic(float s, int queryLen) {
+ this(s, queryLen, 0.35f);
+ }
+
+ /**
+ * Default constructor
+ */
+ public Axiomatic() {
+ this(0.25f, 1, 0.35f);
+ }
+
+ @Override
+ public float score(BasicStats stats, float freq, float docLen) {
+ return tf(stats, freq, docLen)
+ * ln(stats, freq, docLen)
+ * tfln(stats, freq, docLen)
+ * idf(stats, freq, docLen)
+ - gamma(stats, freq, docLen);
+ }
+
+ @Override
+ protected void explain(List subs, BasicStats stats, int doc,
+ float freq, float docLen) {
+ if (stats.getBoost() != 1.0f) {
+ subs.add(Explanation.match(stats.getBoost(), "boost"));
+ }
+
+ subs.add(Explanation.match(this.k, "k"));
+ subs.add(Explanation.match(this.s, "s"));
+ subs.add(Explanation.match(this.queryLen, "queryLen"));
+ subs.add(Explanation.match(tf(stats, freq, docLen), "tf"));
+ subs.add(Explanation.match(ln(stats, freq, docLen), "ln"));
+ subs.add(Explanation.match(tfln(stats, freq, docLen), "tfln"));
+ subs.add(Explanation.match(idf(stats, freq, docLen), "idf"));
+ subs.add(Explanation.match(gamma(stats, freq, docLen), "gamma"));
+ super.explain(subs, stats, doc, freq, docLen);
+ }
+
+ /**
+ * Name of the axiomatic method.
+ */
+ @Override
+ public abstract String toString();
+
+ /**
+ * compute the term frequency component
+ */
+ protected abstract float tf(BasicStats stats, float freq, float docLen);
+
+ /**
+ * compute the document length component
+ */
+ protected abstract float ln(BasicStats stats, float freq, float docLen);
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ protected abstract float tfln(BasicStats stats, float freq, float docLen);
+
+ /**
+ * compute the inverted document frequency component
+ */
+ protected abstract float idf(BasicStats stats, float freq, float docLen);
+
+ /**
+ * compute the gamma component (only for F3EXp and F3LOG)
+ */
+ protected abstract float gamma(BasicStats stats, float freq, float docLen);
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
new file mode 100644
index 00000000000..62317fdf73f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+/**
+ * F1EXP is defined as Sum(tf(term_doc_freq)*ln(docLen)*IDF(term))
+ * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq
+ *
+ * @lucene.experimental
+ */
+public class AxiomaticF1EXP extends Axiomatic {
+ /**
+ * Constructor setting s and k, letting queryLen to default
+ * @param s hyperparam for the growth function
+ * @param k hyperparam for the primitive weighting function
+ */
+ public AxiomaticF1EXP(float s, float k) {
+ super(s, 1, k);
+ }
+
+ /**
+ * Constructor setting s only, letting k and queryLen to default
+ * @param s hyperparam for the growth function
+ */
+ public AxiomaticF1EXP(float s) {
+ this(s, 0.35f);
+ }
+
+ /**
+ * Default constructor
+ */
+ public AxiomaticF1EXP() {
+ super();
+ }
+
+ @Override
+ public String toString() {
+ return "F1EXP";
+ }
+
+ /**
+ * compute the term frequency component
+ */
+ @Override
+ protected float tf(BasicStats stats, float freq, float docLen) {
+ if (freq <= 0.0) return 0f;
+ return (float) (1 + Math.log(1 + Math.log(freq)));
+ }
+
+ /**
+ * compute the document length component
+ */
+ @Override
+ protected float ln(BasicStats stats, float freq, float docLen) {
+ return (stats.getAvgFieldLength() + this.s) / (stats.getAvgFieldLength() + docLen * this.s);
+ }
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ @Override
+ protected float tfln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the inverted document frequency component
+ */
+ @Override
+ protected float idf(BasicStats stats, float freq, float docLen) {
+ return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k);
+ }
+
+ /**
+ * compute the gamma component
+ */
+ @Override
+ protected float gamma(BasicStats stats, float freq, float docLen) {
+ return 0f;
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
new file mode 100644
index 00000000000..7cce2be4e95
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+/**
+ * F1LOG is defined as Sum(tf(term_doc_freq)*ln(docLen)*IDF(term))
+ * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq
+ *
+ * @lucene.experimental
+ */
+public class AxiomaticF1LOG extends Axiomatic {
+
+ /**
+ * Constructor setting s only, letting k and queryLen to default
+ *
+ * @param s hyperparam for the growth function
+ */
+ public AxiomaticF1LOG(float s) {
+ super(s);
+ }
+
+ /**
+ * Default constructor
+ */
+ public AxiomaticF1LOG() {
+ super();
+ }
+
+ @Override
+ public String toString() {
+ return "F1LOG";
+ }
+
+ /**
+ * compute the term frequency component
+ */
+ @Override
+ protected float tf(BasicStats stats, float freq, float docLen) {
+ if (freq <= 0.0) return 0f;
+ return (float) (1 + Math.log(1 + Math.log(freq)));
+ }
+
+ /**
+ * compute the document length component
+ */
+ @Override
+ protected float ln(BasicStats stats, float freq, float docLen) {
+ return (stats.getAvgFieldLength() + this.s) / (stats.getAvgFieldLength() + docLen * this.s);
+ }
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ @Override
+ protected float tfln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the inverted document frequency component
+ */
+ @Override
+ protected float idf(BasicStats stats, float freq, float docLen) {
+ return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq());
+ }
+
+ /**
+ * compute the gamma component
+ */
+ @Override
+ protected float gamma(BasicStats stats, float freq, float docLen) {
+ return 0f;
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java
new file mode 100644
index 00000000000..f9bc98a4d27
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+/**
+ * F2EXP is defined as Sum(tfln(term_doc_freq, docLen)*IDF(term))
+ * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq
+ *
+ * @lucene.experimental
+ */
+public class AxiomaticF2EXP extends Axiomatic {
+ /**
+ * Constructor setting s and k, letting queryLen to default
+ * @param s hyperparam for the growth function
+ * @param k hyperparam for the primitive weighting function
+ */
+ public AxiomaticF2EXP(float s, float k) {
+ super(s, 1, k);
+ }
+
+ /**
+ * Constructor setting s only, letting k and queryLen to default
+ * @param s hyperparam for the growth function
+ */
+ public AxiomaticF2EXP(float s) {
+ this(s, 0.35f);
+ }
+
+ /**
+ * Default constructor
+ */
+ public AxiomaticF2EXP() {
+ super();
+ }
+
+ @Override
+ public String toString() {
+ return "F2EXP";
+ }
+
+ /**
+ * compute the term frequency component
+ */
+ @Override
+ protected float tf(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the document length component
+ */
+ @Override
+ protected float ln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ @Override
+ protected float tfln(BasicStats stats, float freq, float docLen) {
+ return freq / (freq + this.s + this.s * docLen / stats.getAvgFieldLength());
+ }
+
+ /**
+ * compute the inverted document frequency component
+ */
+ @Override
+ protected float idf(BasicStats stats, float freq, float docLen) {
+ return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k);
+ }
+
+ /**
+ * compute the gamma component
+ */
+ @Override
+ protected float gamma(BasicStats stats, float freq, float docLen) {
+ return 0f;
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java
new file mode 100644
index 00000000000..fee2000bec3
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+/**
+ * F2EXP is defined as Sum(tfln(term_doc_freq, docLen)*IDF(term))
+ * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq
+ *
+ * @lucene.experimental
+ */
+public class AxiomaticF2LOG extends Axiomatic {
+ /**
+ * Constructor setting s only, letting k and queryLen to default
+ *
+ * @param s hyperparam for the growth function
+ */
+ public AxiomaticF2LOG(float s) {
+ super(s);
+ }
+
+ /**
+ * Default constructor
+ */
+ public AxiomaticF2LOG() {
+ super();
+ }
+
+ @Override
+ public String toString() {
+ return "F2LOG";
+ }
+
+ /**
+ * compute the term frequency component
+ */
+ @Override
+ protected float tf(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the document length component
+ */
+ @Override
+ protected float ln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ @Override
+ protected float tfln(BasicStats stats, float freq, float docLen) {
+ return freq / (freq + this.s + this.s * docLen / stats.getAvgFieldLength());
+ }
+
+ /**
+ * compute the inverted document frequency component
+ */
+ @Override
+ protected float idf(BasicStats stats, float freq, float docLen) {
+ return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq());
+ }
+
+ /**
+ * compute the gamma component
+ */
+ @Override
+ protected float gamma(BasicStats stats, float freq, float docLen) {
+ return 0f;
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
new file mode 100644
index 00000000000..c20194ac28a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+/**
+ * F2EXP is defined as Sum(tf(term_doc_freq)*IDF(term)-gamma(docLen, queryLen))
+ * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq
+ * gamma(docLen, queryLen) = (docLen-queryLen)*queryLen*s/avdl
+ *
+ * @lucene.experimental
+ */
+public class AxiomaticF3EXP extends Axiomatic {
+
+ /**
+ * Constructor setting all Axiomatic hyperparameters
+ *
+ * @param s hyperparam for the growth function
+ * @param queryLen the query length
+ * @param k hyperparam for the primitive weighting function
+ */
+ public AxiomaticF3EXP(float s, int queryLen, float k) {
+ super(s, queryLen, k);
+ }
+
+ /**
+ * Constructor setting s and queryLen, letting k to default
+ *
+ * @param s hyperparam for the growth function
+ * @param queryLen the query length
+ */
+ public AxiomaticF3EXP(float s, int queryLen) {
+ this(s, queryLen, 0.35f);
+ }
+
+ @Override
+ public String toString() {
+ return "F3EXP";
+ }
+
+ /**
+ * compute the term frequency component
+ */
+ @Override
+ protected float tf(BasicStats stats, float freq, float docLen) {
+ if (freq <= 0.0) return 0f;
+ return (float) (1 + Math.log(1 + Math.log(freq)));
+ }
+
+ /**
+ * compute the document length component
+ */
+ @Override
+ protected float ln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ @Override
+ protected float tfln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the inverted document frequency component
+ */
+ @Override
+ protected float idf(BasicStats stats, float freq, float docLen) {
+ return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k);
+ }
+
+ /**
+ * compute the gamma component
+ */
+ @Override
+ protected float gamma(BasicStats stats, float freq, float docLen) {
+ return (docLen - this.queryLen) * this.s * this.queryLen / stats.getAvgFieldLength();
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
new file mode 100644
index 00000000000..a9d82aded39
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+/**
+ * F2EXP is defined as Sum(tf(term_doc_freq)*IDF(term)-gamma(docLen, queryLen))
+ * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq
+ * gamma(docLen, queryLen) = (docLen-queryLen)*queryLen*s/avdl
+ *
+ * @lucene.experimental
+ */
+public class AxiomaticF3LOG extends Axiomatic {
+
+ /**
+ * Constructor setting s and queryLen, letting k to default
+ *
+ * @param s hyperparam for the growth function
+ * @param queryLen the query length
+ */
+ public AxiomaticF3LOG(float s, int queryLen) {
+ super(s, queryLen);
+ }
+
+ @Override
+ public String toString() {
+ return "F3LOG";
+ }
+
+ /**
+ * compute the term frequency component
+ */
+ @Override
+ protected float tf(BasicStats stats, float freq, float docLen) {
+ if (freq <= 0.0) return 0f;
+ return (float) (1 + Math.log(1 + Math.log(freq)));
+ }
+
+ /**
+ * compute the document length component
+ */
+ @Override
+ protected float ln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the mixed term frequency and document length component
+ */
+ @Override
+ protected float tfln(BasicStats stats, float freq, float docLen) {
+ return 1f;
+ }
+
+ /**
+ * compute the inverted document frequency component
+ */
+ @Override
+ protected float idf(BasicStats stats, float freq, float docLen) {
+ return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq());
+ }
+
+ /**
+ * compute the gamma component
+ */
+ @Override
+ protected float gamma(BasicStats stats, float freq, float docLen) {
+ return (docLen - this.queryLen) * this.s * this.queryLen / stats.getAvgFieldLength();
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
index 977af5387e6..6c5ea15aac5 100644
--- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
@@ -51,8 +51,8 @@ import org.apache.lucene.search.TermQuery;
* are provided so that the generated queries can be customized.
*/
public class QueryBuilder {
- private Analyzer analyzer;
- private boolean enablePositionIncrements = true;
+ protected Analyzer analyzer;
+ protected boolean enablePositionIncrements = true;
/** Creates a new QueryBuilder using the given analyzer. */
public QueryBuilder(Analyzer analyzer) {
@@ -186,9 +186,12 @@ public class QueryBuilder {
/**
* Creates a query from the analysis chain.
*
- * Expert: this is more useful for subclasses such as queryparsers.
+ * Expert: this is more useful for subclasses such as queryparsers.
* If using this class directly, just use {@link #createBooleanQuery(String, String)}
- * and {@link #createPhraseQuery(String, String)}
+ * and {@link #createPhraseQuery(String, String)}. This is a complex method and
+ * it is usually not necessary to override it in a subclass; instead, override
+ * methods like {@link #newBooleanQuery}, etc., if possible.
+ *
* @param analyzer analyzer used for this query
* @param operator default boolean operator used for this query
* @param field field to create queries against
@@ -265,7 +268,7 @@ public class QueryBuilder {
/**
* Creates simple term query from the cached tokenstream contents
*/
- private Query analyzeTerm(String field, TokenStream stream) throws IOException {
+ protected Query analyzeTerm(String field, TokenStream stream) throws IOException {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
stream.reset();
@@ -279,7 +282,7 @@ public class QueryBuilder {
/**
* Creates simple boolean query from the cached tokenstream contents
*/
- private Query analyzeBoolean(String field, TokenStream stream) throws IOException {
+ protected Query analyzeBoolean(String field, TokenStream stream) throws IOException {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
stream.reset();
@@ -291,7 +294,7 @@ public class QueryBuilder {
return newSynonymQuery(terms.toArray(new Term[terms.size()]));
}
- private void add(BooleanQuery.Builder q, List current, BooleanClause.Occur operator) {
+ protected void add(BooleanQuery.Builder q, List current, BooleanClause.Occur operator) {
if (current.isEmpty()) {
return;
}
@@ -305,7 +308,7 @@ public class QueryBuilder {
/**
* Creates complex boolean query from the cached tokenstream contents
*/
- private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
+ protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
BooleanQuery.Builder q = newBooleanQuery();
List currentQuery = new ArrayList<>();
@@ -328,7 +331,7 @@ public class QueryBuilder {
/**
* Creates simple phrase query from the cached tokenstream contents
*/
- private Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
+ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
@@ -352,7 +355,7 @@ public class QueryBuilder {
/**
* Creates complex phrase query from the cached tokenstream contents
*/
- private Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
+ protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder();
mpqb.setSlop(slop);
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java
new file mode 100644
index 00000000000..44c7e1d9d16
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestAxiomaticSimilarity extends LuceneTestCase {
+
+ public void testSaneNormValues() {
+ Axiomatic sim = new AxiomaticF2EXP();
+ for (int i = 0; i < 256; i++) {
+ float len = sim.decodeNormValue((byte) i);
+ assertFalse("negative len: " + len + ", byte=" + i, len < 0.0f);
+ assertFalse("inf len: " + len + ", byte=" + i, Float.isInfinite(len));
+ assertFalse("nan len for byte=" + i, Float.isNaN(len));
+ if (i > 0) {
+ assertTrue("len is not decreasing: " + len + ",byte=" + i, len < sim.decodeNormValue((byte) (i - 1)));
+ }
+ }
+ }
+
+ public void testIllegalS() {
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(Float.POSITIVE_INFINITY, 0.1f);
+ });
+ assertTrue(expected.getMessage().contains("illegal s value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(-1, 0.1f);
+ });
+ assertTrue(expected.getMessage().contains("illegal s value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(Float.NaN, 0.1f);
+ });
+ assertTrue(expected.getMessage().contains("illegal s value"));
+ }
+
+ public void testIllegalK() {
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(0.35f, 2f);
+ });
+ assertTrue(expected.getMessage().contains("illegal k value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(0.35f, -1f);
+ });
+ assertTrue(expected.getMessage().contains("illegal k value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(0.35f, Float.POSITIVE_INFINITY);
+ });
+ assertTrue(expected.getMessage().contains("illegal k value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(0.35f, Float.NaN);
+ });
+ assertTrue(expected.getMessage().contains("illegal k value"));
+ }
+
+ public void testIllegalQL() {
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF3EXP(0.35f, -1);
+ });
+ assertTrue(expected.getMessage().contains("illegal query length value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new AxiomaticF2EXP(0.35f, Integer.MAX_VALUE + 1);
+ });
+ assertTrue(expected.getMessage().contains("illegal k value"));
+ }
+}
diff --git a/lucene/site/changes/changes2html.pl b/lucene/site/changes/changes2html.pl
index d71f2962655..b3f8fdb58c5 100755
--- a/lucene/site/changes/changes2html.pl
+++ b/lucene/site/changes/changes2html.pl
@@ -23,9 +23,8 @@
use strict;
use warnings;
+use XML::Simple;
-# JIRA REST API documentation:
-my $project_info_url = 'https://issues.apache.org/jira/rest/api/2/project';
my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
my $github_pull_request_prefix = 'https://github.com/apache/lucene-solr/pull/';
my $bugzilla_url_prefix = 'http://issues.apache.org/bugzilla/show_bug.cgi?id=';
@@ -45,7 +44,7 @@ my @releases = ();
my @lines = ; # Get all input at once
#
-# Cmdline args: (only from Solr)
+# Cmdline args: (only from Solr)
#
my $product = $ARGV[0];
my %release_dates = &setup_release_dates($ARGV[1]);
@@ -804,10 +803,6 @@ sub get_release_date {
# Handle '1.2 RC6', which should be '1.2 final'
$release = '1.2 final' if ($release eq '1.2 RC6');
- if (not exists($release_dates{$release})) {
- $release =~ s/\.0\.0/\.0/;
- }
-
$reldate = ( exists($release_dates{$release})
? $release_dates{$release}
: 'unknown');
@@ -825,60 +820,46 @@ sub get_release_date {
# Returns a list of alternating release names and dates, for use in populating
# the %release_dates hash.
#
-# Pulls release dates via the JIRA REST API. JIRA does not list
-# X.Y RCZ releases independently from releases X.Y, so the RC dates
-# as well as those named "final" are included below.
+# Pulls release dates from the project DOAP file.
#
sub setup_release_dates {
my %release_dates;
my $file = shift;
- if (uc($product) eq 'LUCENE') {
- %release_dates
- = ( '0.01' => '2000-03-30', '0.04' => '2000-04-19',
- '1.0' => '2000-10-04', '1.01b' => '2001-06-02',
- '1.2 RC1' => '2001-10-02', '1.2 RC2' => '2001-10-19',
- '1.2 RC3' => '2002-01-27', '1.2 RC4' => '2002-02-14',
- '1.2 RC5' => '2002-05-14', '1.2 final' => '2002-06-13',
- '1.3 RC1' => '2003-03-24', '1.3 RC2' => '2003-10-22',
- '1.3 RC3' => '2003-11-25', '1.3 final' => '2003-12-26',
- '1.4 RC1' => '2004-03-29', '1.4 RC2' => '2004-03-30',
- '1.4 RC3' => '2004-05-11', '1.4 final' => '2004-07-01',
- '1.4.1' => '2004-08-02', '1.4.2' => '2004-10-01',
- '1.4.3' => '2004-12-07', '1.9 RC1' => '2006-02-21',
- '1.9 final' => '2006-02-27', '1.9.1' => '2006-03-02',
- '2.0.0' => '2006-05-26', '2.1.0' => '2007-02-14',
- '2.2.0' => '2007-06-19', '2.3.0' => '2008-01-21',
- '2.3.1' => '2008-02-22', '2.3.2' => '2008-05-05',
- '2.4.0' => '2008-10-06', '2.4.1' => '2009-03-09',
- '2.9.0' => '2009-09-23', '2.9.1' => '2009-11-06',
- '3.0.0' => '2009-11-25');
- }
- my $project_info_json = readFile($file);
- my $project_info = json2perl($project_info_json);
- for my $version (@{$project_info->{versions}}) {
- if ($version->{releaseDate}) {
- my $date = substr($version->{releaseDate}, 0, 10);
- my $version_name = $version->{name};
- $release_dates{$version->{name}} = $date;
- if ($version_name =~ /^\d+\.\d+$/) {
- my $full_version_name = "$version->{name}.0";
- $release_dates{$full_version_name} = $date;
+ my $project_info = XMLin($file)->{Project};
+ my $version;
+ my $date;
+ for my $release (@{$project_info->{release}}) {
+ $version = $release->{Version};
+ if ($version->{created}) {
+ $date = normalize_date($version->{created});
+ my $version_name = $version->{revision};
+ $release_dates{$version->{revision}} = $date;
+ if ($version_name =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) {
+ my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0"
+ $release_dates{$padded_version_name} = $date;
+ } elsif ($version_name =~ /\.0(?=[^.0-9]|$)/) {
+ my $trimmed_version_name = $version_name;
+ $trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0"
+ $release_dates{$trimmed_version_name} = $date;
}
}
}
return %release_dates;
}
-sub readFile {
- my $file = shift;
- open(F, '<'.$file) || die "could not open $file: $!";
- local $/ = undef;
- my $project_info_json = ;
- close(F);
- return $project_info_json;
+#
+# normalize_date
+#
+# Left-zero-pads month and day-of-month to 2 digits in dates of format YYYY-(M)M-(D)D
+#
+sub normalize_date {
+ my $date = shift;
+ my ($year, $month, $dom) = $date =~ /^(2\d\d\d)-(\d+)-(\d+)$/;
+ return sprintf("%04d-%02d-%02d", $year, $month, $dom);
}
+
#
# setup_month_regex
#
@@ -1038,23 +1019,4 @@ sub setup_lucene_bugzilla_jira_map {
36628 => 432);
}
-#
-# json2perl
-#
-# Converts a JSON string to the equivalent Perl data structure
-#
-sub json2perl {
- my $json_string = shift;
- $json_string =~ s/(:\s*)(true|false)/$1"$2"/g;
- $json_string =~ s/":/",/g;
- $json_string =~ s/\'/\\'/g;
- $json_string =~ s/\"/\'/g;
- my $project_info = eval $json_string;
- die "ERROR eval'ing munged JSON string ||$json_string||: $@\n"
- if ($@);
- die "ERROR empty value after eval'ing JSON string ||$json_string||\n"
- unless $project_info;
- return $project_info;
-}
-
1;
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9ba9eb8abbd..6c2b99c4af8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -170,6 +170,8 @@ Bug Fixes
(Mark Miller, Michael Sun)
* SOLR-9729: JDBCStream improvements (Kevin Risden)
+
+* SOLR-9626: new Admin UI now also highlights matched terms in the Analysis screen. (Alexandre Rafalovitch)
Other Changes
----------------------
diff --git a/solr/build.xml b/solr/build.xml
index b426d79a40e..4c52b031d37 100644
--- a/solr/build.xml
+++ b/solr/build.xml
@@ -483,7 +483,7 @@
+ changes.product="solr"/>
-
+
diff --git a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
index 462c6561870..ae1ea4703ad 100644
--- a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
+++ b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
@@ -188,10 +188,10 @@ class JSONWriter extends TextResponseWriter {
}
/** Represents a NamedList directly as a JSON Object (essentially a Map)
- * repeating any keys if they are repeated in the NamedList. null is mapped
- * to "".
+ * repeating any keys if they are repeated in the NamedList.
+ * null key is mapped to "".
*/
- // NamedList("a"=1,"bar"="foo",null=3) => {"a":1,"bar":"foo","":3}
+ // NamedList("a"=1,"bar"="foo",null=3,null=null) => {"a":1,"bar":"foo","":3,"":null}
protected void writeNamedListAsMapWithDups(String name, NamedList val) throws IOException {
int sz = val.size();
writeMapOpener(sz);
@@ -214,7 +214,7 @@ class JSONWriter extends TextResponseWriter {
}
// Represents a NamedList directly as an array of JSON objects...
- // NamedList("a"=1,"b"=2,null=3) => [{"a":1},{"b":2},3]
+ // NamedList("a"=1,"b"=2,null=3,null=null) => [{"a":1},{"b":2},3,null]
protected void writeNamedListAsArrMap(String name, NamedList val) throws IOException {
int sz = val.size();
indent();
@@ -249,7 +249,7 @@ class JSONWriter extends TextResponseWriter {
}
// Represents a NamedList directly as an array of JSON objects...
- // NamedList("a"=1,"b"=2,null=3) => [["a",1],["b",2],[null,3]]
+ // NamedList("a"=1,"b"=2,null=3,null=null) => [["a",1],["b",2],[null,3],[null,null]]
protected void writeNamedListAsArrArr(String name, NamedList val) throws IOException {
int sz = val.size();
indent();
@@ -293,7 +293,7 @@ class JSONWriter extends TextResponseWriter {
// Represents a NamedList directly as an array with keys/values
// interleaved.
- // NamedList("a"=1,"b"=2,null=3) => ["a",1,"b",2,null,3]
+ // NamedList("a"=1,"b"=2,null=3,null=null) => ["a",1,"b",2,null,3,null,null]
protected void writeNamedListAsFlat(String name, NamedList val) throws IOException {
int sz = val.size();
writeArrayOpener(sz*2);
@@ -676,7 +676,7 @@ class JSONWriter extends TextResponseWriter {
/**
* Writes NamedLists directly as an array of NamedValuePair JSON objects...
- * NamedList("a"=1,"b"=2,null=3) => [{"name":"a","int":1},{"name":"b","int":2},{"int":3}]
+ * NamedList("a"=1,"b"=2,null=3,null=null) => [{"name":"a","int":1},{"name":"b","int":2},{"int":3},{"null":null}]
* NamedList("a"=1,"bar"="foo",null=3.4f) => [{"name":"a","int":1},{"name":"bar","str":"foo"},{"float":3.4}]
*/
class ArrayOfNamedValuePairJSONWriter extends JSONWriter {
diff --git a/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java b/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java
index 076d322437f..a056016d5b8 100644
--- a/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java
+++ b/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java
@@ -98,6 +98,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
NamedList nl = new NamedList();
nl.add("data1", "he\u2028llo\u2029!"); // make sure that 2028 and 2029 are both escaped (they are illegal in javascript)
nl.add(null, 42);
+ nl.add(null, null);
rsp.add("nl", nl);
rsp.add("byte", Byte.valueOf((byte)-3));
@@ -108,15 +109,15 @@ public class JSONWriterTest extends SolrTestCaseJ4 {
final String expectedNLjson;
if (namedListStyle == JSONWriter.JSON_NL_FLAT) {
- expectedNLjson = "\"nl\":[\"data1\",\"he\\u2028llo\\u2029!\",null,42]";
+ expectedNLjson = "\"nl\":[\"data1\",\"he\\u2028llo\\u2029!\",null,42,null,null]";
} else if (namedListStyle == JSONWriter.JSON_NL_MAP) {
- expectedNLjson = "\"nl\":{\"data1\":\"he\\u2028llo\\u2029!\",\"\":42}";
+ expectedNLjson = "\"nl\":{\"data1\":\"he\\u2028llo\\u2029!\",\"\":42,\"\":null}";
} else if (namedListStyle == JSONWriter.JSON_NL_ARROFARR) {
- expectedNLjson = "\"nl\":[[\"data1\",\"he\\u2028llo\\u2029!\"],[null,42]]";
+ expectedNLjson = "\"nl\":[[\"data1\",\"he\\u2028llo\\u2029!\"],[null,42],[null,null]]";
} else if (namedListStyle == JSONWriter.JSON_NL_ARROFMAP) {
- expectedNLjson = "\"nl\":[{\"data1\":\"he\\u2028llo\\u2029!\"},42]";
+ expectedNLjson = "\"nl\":[{\"data1\":\"he\\u2028llo\\u2029!\"},42,null]";
} else if (namedListStyle == JSONWriter.JSON_NL_ARROFNVP) {
- expectedNLjson = "\"nl\":[{\"name\":\"data1\",\"str\":\"he\\u2028llo\\u2029!\"},{\"int\":42}]";
+ expectedNLjson = "\"nl\":[{\"name\":\"data1\",\"str\":\"he\\u2028llo\\u2029!\"},{\"int\":42},{\"null\":null}]";
} else {
expectedNLjson = null;
fail("unexpected namedListStyle="+namedListStyle);
diff --git a/solr/webapp/web/js/angular/controllers/analysis.js b/solr/webapp/web/js/angular/controllers/analysis.js
index ccd556aa087..48ec36995f7 100644
--- a/solr/webapp/web/js/angular/controllers/analysis.js
+++ b/solr/webapp/web/js/angular/controllers/analysis.js
@@ -76,9 +76,17 @@ solrAdminApp.controller('AnalysisController',
for (key in tokenhash) {
if (key == "match" || key=="positionHistory") {
- //@ todo do something
+ //skip, to not display these keys in the UI
} else {
- token.keys.push({name:key, value:tokenhash[key]});
+ var tokenInfo = new Object();
+ tokenInfo.name = key;
+ tokenInfo.value = tokenhash[key];
+ if ('text' === key || 'raw_bytes' === key ) {
+ if (tokenhash.match) {
+ tokenInfo.extraclass = 'match'; //to highlight matching text strings
+ }
+ }
+ token.keys.push(tokenInfo);
}
}
tokens.push(token);
diff --git a/solr/webapp/web/partials/analysis.html b/solr/webapp/web/partials/analysis.html
index 3c1b4565dc5..23527f7351c 100644
--- a/solr/webapp/web/partials/analysis.html
+++ b/solr/webapp/web/partials/analysis.html
@@ -106,7 +106,7 @@ limitations under the License.
|