From f42cc2a8c38b8da7fc9a28a5de09b386973fc69f Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 18 Nov 2016 15:57:35 +0000 Subject: [PATCH 1/7] SOLR-9782: for json.nl expand test coverage and comments w.r.t. NamedList(null=null) --- .../apache/solr/response/JSONResponseWriter.java | 14 +++++++------- .../org/apache/solr/response/JSONWriterTest.java | 11 ++++++----- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java index 462c6561870..ae1ea4703ad 100644 --- a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java @@ -188,10 +188,10 @@ class JSONWriter extends TextResponseWriter { } /** Represents a NamedList directly as a JSON Object (essentially a Map) - * repeating any keys if they are repeated in the NamedList. null is mapped - * to "". + * repeating any keys if they are repeated in the NamedList. + * null key is mapped to "". */ - // NamedList("a"=1,"bar"="foo",null=3) => {"a":1,"bar":"foo","":3} + // NamedList("a"=1,"bar"="foo",null=3,null=null) => {"a":1,"bar":"foo","":3,"":null} protected void writeNamedListAsMapWithDups(String name, NamedList val) throws IOException { int sz = val.size(); writeMapOpener(sz); @@ -214,7 +214,7 @@ class JSONWriter extends TextResponseWriter { } // Represents a NamedList directly as an array of JSON objects... - // NamedList("a"=1,"b"=2,null=3) => [{"a":1},{"b":2},3] + // NamedList("a"=1,"b"=2,null=3,null=null) => [{"a":1},{"b":2},3,null] protected void writeNamedListAsArrMap(String name, NamedList val) throws IOException { int sz = val.size(); indent(); @@ -249,7 +249,7 @@ class JSONWriter extends TextResponseWriter { } // Represents a NamedList directly as an array of JSON objects... - // NamedList("a"=1,"b"=2,null=3) => [["a",1],["b",2],[null,3]] + // NamedList("a"=1,"b"=2,null=3,null=null) => [["a",1],["b",2],[null,3],[null,null]] protected void writeNamedListAsArrArr(String name, NamedList val) throws IOException { int sz = val.size(); indent(); @@ -293,7 +293,7 @@ class JSONWriter extends TextResponseWriter { // Represents a NamedList directly as an array with keys/values // interleaved. - // NamedList("a"=1,"b"=2,null=3) => ["a",1,"b",2,null,3] + // NamedList("a"=1,"b"=2,null=3,null=null) => ["a",1,"b",2,null,3,null,null] protected void writeNamedListAsFlat(String name, NamedList val) throws IOException { int sz = val.size(); writeArrayOpener(sz*2); @@ -676,7 +676,7 @@ class JSONWriter extends TextResponseWriter { /** * Writes NamedLists directly as an array of NamedValuePair JSON objects... - * NamedList("a"=1,"b"=2,null=3) => [{"name":"a","int":1},{"name":"b","int":2},{"int":3}] + * NamedList("a"=1,"b"=2,null=3,null=null) => [{"name":"a","int":1},{"name":"b","int":2},{"int":3},{"null":null}] * NamedList("a"=1,"bar"="foo",null=3.4f) => [{"name":"a","int":1},{"name":"bar","str":"foo"},{"float":3.4}] */ class ArrayOfNamedValuePairJSONWriter extends JSONWriter { diff --git a/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java b/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java index 076d322437f..a056016d5b8 100644 --- a/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java +++ b/solr/core/src/test/org/apache/solr/response/JSONWriterTest.java @@ -98,6 +98,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 { NamedList nl = new NamedList(); nl.add("data1", "he\u2028llo\u2029!"); // make sure that 2028 and 2029 are both escaped (they are illegal in javascript) nl.add(null, 42); + nl.add(null, null); rsp.add("nl", nl); rsp.add("byte", Byte.valueOf((byte)-3)); @@ -108,15 +109,15 @@ public class JSONWriterTest extends SolrTestCaseJ4 { final String expectedNLjson; if (namedListStyle == JSONWriter.JSON_NL_FLAT) { - expectedNLjson = "\"nl\":[\"data1\",\"he\\u2028llo\\u2029!\",null,42]"; + expectedNLjson = "\"nl\":[\"data1\",\"he\\u2028llo\\u2029!\",null,42,null,null]"; } else if (namedListStyle == JSONWriter.JSON_NL_MAP) { - expectedNLjson = "\"nl\":{\"data1\":\"he\\u2028llo\\u2029!\",\"\":42}"; + expectedNLjson = "\"nl\":{\"data1\":\"he\\u2028llo\\u2029!\",\"\":42,\"\":null}"; } else if (namedListStyle == JSONWriter.JSON_NL_ARROFARR) { - expectedNLjson = "\"nl\":[[\"data1\",\"he\\u2028llo\\u2029!\"],[null,42]]"; + expectedNLjson = "\"nl\":[[\"data1\",\"he\\u2028llo\\u2029!\"],[null,42],[null,null]]"; } else if (namedListStyle == JSONWriter.JSON_NL_ARROFMAP) { - expectedNLjson = "\"nl\":[{\"data1\":\"he\\u2028llo\\u2029!\"},42]"; + expectedNLjson = "\"nl\":[{\"data1\":\"he\\u2028llo\\u2029!\"},42,null]"; } else if (namedListStyle == JSONWriter.JSON_NL_ARROFNVP) { - expectedNLjson = "\"nl\":[{\"name\":\"data1\",\"str\":\"he\\u2028llo\\u2029!\"},{\"int\":42}]"; + expectedNLjson = "\"nl\":[{\"name\":\"data1\",\"str\":\"he\\u2028llo\\u2029!\"},{\"int\":42},{\"null\":null}]"; } else { expectedNLjson = null; fail("unexpected namedListStyle="+namedListStyle); From 4236da27d1b1cbced6c3fed4b3d3094fe796fa7e Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Sat, 19 Nov 2016 08:28:25 +0100 Subject: [PATCH 2/7] LUCENE-7466 - added axiomatic similarity, patch from Peilin Yang --- .../lucene/search/similarities/Axiomatic.java | 159 ++++++++++++++++++ .../search/similarities/AxiomaticF1EXP.java | 95 +++++++++++ .../search/similarities/AxiomaticF1LOG.java | 88 ++++++++++ .../search/similarities/AxiomaticF2EXP.java | 94 +++++++++++ .../search/similarities/AxiomaticF2LOG.java | 86 ++++++++++ .../search/similarities/AxiomaticF3EXP.java | 94 +++++++++++ .../search/similarities/AxiomaticF3LOG.java | 83 +++++++++ .../similarities/TestAxiomaticSimilarity.java | 86 ++++++++++ 8 files changed, 785 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java create mode 100644 lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java new file mode 100644 index 00000000000..9c2854c3bbd --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + + +import java.util.List; + +import org.apache.lucene.search.Explanation; + +/** + * Axiomatic approaches for IR. From Hui Fang and Chengxiang Zhai + * 2005. An Exploration of Axiomatic Approaches to Information Retrieval. + * In Proceedings of the 28th annual international ACM SIGIR + * conference on Research and development in information retrieval + * (SIGIR '05). ACM, New York, NY, USA, 480-487. + *

+ * There are a family of models. All of them are based on BM25, + * Pivoted Document Length Normalization and Language model with + * Dirichlet prior. Some components (e.g. Term Frequency, + * Inverted Document Frequency) in the original models are modified + * so that they follow some axiomatic constraints. + *

+ * + * @lucene.experimental + */ +public abstract class Axiomatic extends SimilarityBase { + /** + * hyperparam for the growth function + */ + protected final float s; + + /** + * hyperparam for the primitive weighthing function + */ + protected final float k; + + /** + * the query length + */ + protected final int queryLen; + + /** + * Constructor setting all Axiomatic hyperparameters + * @param s hyperparam for the growth function + * @param queryLen the query length + * @param k hyperparam for the primitive weighting function + */ + public Axiomatic(float s, int queryLen, float k) { + if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) { + throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1"); + } + if (Float.isFinite(k) == false || Float.isNaN(k) || k < 0 || k > 1) { + throw new IllegalArgumentException("illegal k value: " + k + ", must be between 0 and 1"); + } + if (queryLen < 0 || queryLen > Integer.MAX_VALUE) { + throw new IllegalArgumentException("illegal query length value: " + + queryLen + ", must be larger 0 and smaller than MAX_INT"); + } + this.s = s; + this.queryLen = queryLen; + this.k = k; + } + + /** + * Constructor setting only s, letting k and queryLen to default + * @param s hyperparam for the growth function + */ + public Axiomatic(float s) { + this(s, 1, 0.35f); + } + + /** + * Constructor setting s and queryLen, letting k to default + * @param s hyperparam for the growth function + * @param queryLen the query length + */ + public Axiomatic(float s, int queryLen) { + this(s, queryLen, 0.35f); + } + + /** + * Default constructor + */ + public Axiomatic() { + this(0.25f, 1, 0.35f); + } + + @Override + public float score(BasicStats stats, float freq, float docLen) { + return tf(stats, freq, docLen) + * ln(stats, freq, docLen) + * tfln(stats, freq, docLen) + * idf(stats, freq, docLen) + - gamma(stats, freq, docLen); + } + + @Override + protected void explain(List subs, BasicStats stats, int doc, + float freq, float docLen) { + if (stats.getBoost() != 1.0f) { + subs.add(Explanation.match(stats.getBoost(), "boost")); + } + + subs.add(Explanation.match(this.k, "k")); + subs.add(Explanation.match(this.s, "s")); + subs.add(Explanation.match(this.queryLen, "queryLen")); + subs.add(Explanation.match(tf(stats, freq, docLen), "tf")); + subs.add(Explanation.match(ln(stats, freq, docLen), "ln")); + subs.add(Explanation.match(tfln(stats, freq, docLen), "tfln")); + subs.add(Explanation.match(idf(stats, freq, docLen), "idf")); + subs.add(Explanation.match(gamma(stats, freq, docLen), "gamma")); + super.explain(subs, stats, doc, freq, docLen); + } + + /** + * Name of the axiomatic method. + */ + @Override + public abstract String toString(); + + /** + * compute the term frequency component + */ + protected abstract float tf(BasicStats stats, float freq, float docLen); + + /** + * compute the document length component + */ + protected abstract float ln(BasicStats stats, float freq, float docLen); + + /** + * compute the mixed term frequency and document length component + */ + protected abstract float tfln(BasicStats stats, float freq, float docLen); + + /** + * compute the inverted document frequency component + */ + protected abstract float idf(BasicStats stats, float freq, float docLen); + + /** + * compute the gamma component (only for F3EXp and F3LOG) + */ + protected abstract float gamma(BasicStats stats, float freq, float docLen); +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java new file mode 100644 index 00000000000..62317fdf73f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F1EXP is defined as Sum(tf(term_doc_freq)*ln(docLen)*IDF(term)) + * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF1EXP extends Axiomatic { + /** + * Constructor setting s and k, letting queryLen to default + * @param s hyperparam for the growth function + * @param k hyperparam for the primitive weighting function + */ + public AxiomaticF1EXP(float s, float k) { + super(s, 1, k); + } + + /** + * Constructor setting s only, letting k and queryLen to default + * @param s hyperparam for the growth function + */ + public AxiomaticF1EXP(float s) { + this(s, 0.35f); + } + + /** + * Default constructor + */ + public AxiomaticF1EXP() { + super(); + } + + @Override + public String toString() { + return "F1EXP"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return (stats.getAvgFieldLength() + this.s) / (stats.getAvgFieldLength() + docLen * this.s); + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java new file mode 100644 index 00000000000..7cce2be4e95 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F1LOG is defined as Sum(tf(term_doc_freq)*ln(docLen)*IDF(term)) + * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF1LOG extends Axiomatic { + + /** + * Constructor setting s only, letting k and queryLen to default + * + * @param s hyperparam for the growth function + */ + public AxiomaticF1LOG(float s) { + super(s); + } + + /** + * Default constructor + */ + public AxiomaticF1LOG() { + super(); + } + + @Override + public String toString() { + return "F1LOG"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return (stats.getAvgFieldLength() + this.s) / (stats.getAvgFieldLength() + docLen * this.s); + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq()); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java new file mode 100644 index 00000000000..f9bc98a4d27 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2EXP.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tfln(term_doc_freq, docLen)*IDF(term)) + * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF2EXP extends Axiomatic { + /** + * Constructor setting s and k, letting queryLen to default + * @param s hyperparam for the growth function + * @param k hyperparam for the primitive weighting function + */ + public AxiomaticF2EXP(float s, float k) { + super(s, 1, k); + } + + /** + * Constructor setting s only, letting k and queryLen to default + * @param s hyperparam for the growth function + */ + public AxiomaticF2EXP(float s) { + this(s, 0.35f); + } + + /** + * Default constructor + */ + public AxiomaticF2EXP() { + super(); + } + + @Override + public String toString() { + return "F2EXP"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return freq / (freq + this.s + this.s * docLen / stats.getAvgFieldLength()); + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java new file mode 100644 index 00000000000..fee2000bec3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF2LOG.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tfln(term_doc_freq, docLen)*IDF(term)) + * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq + * + * @lucene.experimental + */ +public class AxiomaticF2LOG extends Axiomatic { + /** + * Constructor setting s only, letting k and queryLen to default + * + * @param s hyperparam for the growth function + */ + public AxiomaticF2LOG(float s) { + super(s); + } + + /** + * Default constructor + */ + public AxiomaticF2LOG() { + super(); + } + + @Override + public String toString() { + return "F2LOG"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return freq / (freq + this.s + this.s * docLen / stats.getAvgFieldLength()); + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq()); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return 0f; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java new file mode 100644 index 00000000000..c20194ac28a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tf(term_doc_freq)*IDF(term)-gamma(docLen, queryLen)) + * where IDF(t) = pow((N+1)/df(t), k) N=total num of docs, df=doc freq + * gamma(docLen, queryLen) = (docLen-queryLen)*queryLen*s/avdl + * + * @lucene.experimental + */ +public class AxiomaticF3EXP extends Axiomatic { + + /** + * Constructor setting all Axiomatic hyperparameters + * + * @param s hyperparam for the growth function + * @param queryLen the query length + * @param k hyperparam for the primitive weighting function + */ + public AxiomaticF3EXP(float s, int queryLen, float k) { + super(s, queryLen, k); + } + + /** + * Constructor setting s and queryLen, letting k to default + * + * @param s hyperparam for the growth function + * @param queryLen the query length + */ + public AxiomaticF3EXP(float s, int queryLen) { + this(s, queryLen, 0.35f); + } + + @Override + public String toString() { + return "F3EXP"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.pow((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq(), this.k); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return (docLen - this.queryLen) * this.s * this.queryLen / stats.getAvgFieldLength(); + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java new file mode 100644 index 00000000000..a9d82aded39 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +/** + * F2EXP is defined as Sum(tf(term_doc_freq)*IDF(term)-gamma(docLen, queryLen)) + * where IDF(t) = ln((N+1)/df(t)) N=total num of docs, df=doc freq + * gamma(docLen, queryLen) = (docLen-queryLen)*queryLen*s/avdl + * + * @lucene.experimental + */ +public class AxiomaticF3LOG extends Axiomatic { + + /** + * Constructor setting s and queryLen, letting k to default + * + * @param s hyperparam for the growth function + * @param queryLen the query length + */ + public AxiomaticF3LOG(float s, int queryLen) { + super(s, queryLen); + } + + @Override + public String toString() { + return "F3LOG"; + } + + /** + * compute the term frequency component + */ + @Override + protected float tf(BasicStats stats, float freq, float docLen) { + if (freq <= 0.0) return 0f; + return (float) (1 + Math.log(1 + Math.log(freq))); + } + + /** + * compute the document length component + */ + @Override + protected float ln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the mixed term frequency and document length component + */ + @Override + protected float tfln(BasicStats stats, float freq, float docLen) { + return 1f; + } + + /** + * compute the inverted document frequency component + */ + @Override + protected float idf(BasicStats stats, float freq, float docLen) { + return (float) Math.log((stats.getNumberOfDocuments() + 1.0) / stats.getDocFreq()); + } + + /** + * compute the gamma component + */ + @Override + protected float gamma(BasicStats stats, float freq, float docLen) { + return (docLen - this.queryLen) * this.s * this.queryLen / stats.getAvgFieldLength(); + } +} \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java new file mode 100644 index 00000000000..44c7e1d9d16 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticSimilarity.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.similarities; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestAxiomaticSimilarity extends LuceneTestCase { + + public void testSaneNormValues() { + Axiomatic sim = new AxiomaticF2EXP(); + for (int i = 0; i < 256; i++) { + float len = sim.decodeNormValue((byte) i); + assertFalse("negative len: " + len + ", byte=" + i, len < 0.0f); + assertFalse("inf len: " + len + ", byte=" + i, Float.isInfinite(len)); + assertFalse("nan len for byte=" + i, Float.isNaN(len)); + if (i > 0) { + assertTrue("len is not decreasing: " + len + ",byte=" + i, len < sim.decodeNormValue((byte) (i - 1))); + } + } + } + + public void testIllegalS() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(Float.POSITIVE_INFINITY, 0.1f); + }); + assertTrue(expected.getMessage().contains("illegal s value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(-1, 0.1f); + }); + assertTrue(expected.getMessage().contains("illegal s value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(Float.NaN, 0.1f); + }); + assertTrue(expected.getMessage().contains("illegal s value")); + } + + public void testIllegalK() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, 2f); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, -1f); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, Float.POSITIVE_INFINITY); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, Float.NaN); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + } + + public void testIllegalQL() { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF3EXP(0.35f, -1); + }); + assertTrue(expected.getMessage().contains("illegal query length value")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + new AxiomaticF2EXP(0.35f, Integer.MAX_VALUE + 1); + }); + assertTrue(expected.getMessage().contains("illegal k value")); + } +} From 380b5ca626b396f1231936ca5d581416866f11b1 Mon Sep 17 00:00:00 2001 From: Alexandre Rafalovitch Date: Sun, 20 Nov 2016 22:18:55 +1100 Subject: [PATCH 3/7] SOLR-9626: Add css class, when match key is found --- solr/CHANGES.txt | 2 ++ solr/webapp/web/js/angular/controllers/analysis.js | 12 ++++++++++-- solr/webapp/web/partials/analysis.html | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9ba9eb8abbd..6c2b99c4af8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -170,6 +170,8 @@ Bug Fixes (Mark Miller, Michael Sun) * SOLR-9729: JDBCStream improvements (Kevin Risden) + +* SOLR-9626: new Admin UI now also highlights matched terms in the Analysis screen. (Alexandre Rafalovitch) Other Changes ---------------------- diff --git a/solr/webapp/web/js/angular/controllers/analysis.js b/solr/webapp/web/js/angular/controllers/analysis.js index ccd556aa087..48ec36995f7 100644 --- a/solr/webapp/web/js/angular/controllers/analysis.js +++ b/solr/webapp/web/js/angular/controllers/analysis.js @@ -76,9 +76,17 @@ solrAdminApp.controller('AnalysisController', for (key in tokenhash) { if (key == "match" || key=="positionHistory") { - //@ todo do something + //skip, to not display these keys in the UI } else { - token.keys.push({name:key, value:tokenhash[key]}); + var tokenInfo = new Object(); + tokenInfo.name = key; + tokenInfo.value = tokenhash[key]; + if ('text' === key || 'raw_bytes' === key ) { + if (tokenhash.match) { + tokenInfo.extraclass = 'match'; //to highlight matching text strings + } + } + token.keys.push(tokenInfo); } } tokens.push(token); diff --git a/solr/webapp/web/partials/analysis.html b/solr/webapp/web/partials/analysis.html index 3c1b4565dc5..23527f7351c 100644 --- a/solr/webapp/web/partials/analysis.html +++ b/solr/webapp/web/partials/analysis.html @@ -106,7 +106,7 @@ limitations under the License. - + From c3f172a40830b31d005dbb7c6bd518ea236aa5fb Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Sun, 20 Nov 2016 14:21:42 +0100 Subject: [PATCH 4/7] LUCENE-7466 - adjusted changes.txt to reflect added axiomatic sim --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dfbf3182307..9e41067dda5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -65,6 +65,8 @@ New features * LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand) +* LUCENE-7466: Added AxiomaticSimilarity. (Peilin Yang via Tommaso Teofili) + Bug Fixes * LUCENE-7547: JapaneseTokenizerFactory was failing to close the From b426838e8f2427cec07133ebfb49b267b570fbc1 Mon Sep 17 00:00:00 2001 From: yonik Date: Sun, 20 Nov 2016 22:56:38 -0500 Subject: [PATCH 5/7] LUCENE-7567: don't clone BooleanClause in builder --- .../java/org/apache/lucene/search/BooleanQuery.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index e67d7f4cc53..2ea0d0e94e9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -109,7 +109,10 @@ public class BooleanQuery extends Query implements Iterable { * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */ public Builder add(BooleanClause clause) { - add(clause.getQuery(), clause.getOccur()); + if (clauses.size() >= maxClauseCount) { + throw new TooManyClauses(); + } + clauses.add(clause); return this; } @@ -120,11 +123,7 @@ public class BooleanQuery extends Query implements Iterable { * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */ public Builder add(Query query, Occur occur) { - if (clauses.size() >= maxClauseCount) { - throw new TooManyClauses(); - } - clauses.add(new BooleanClause(query, occur)); - return this; + return add(new BooleanClause(query, occur)); } /** Create a new {@link BooleanQuery} based on the parameters that have From bb3278dd1797a45e06e7c03445ead75bad09828b Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Mon, 21 Nov 2016 06:29:05 -0500 Subject: [PATCH 6/7] LUCENE-7560: make QueryBuilder.analyzeXXX methods protected --- .../org/apache/lucene/util/QueryBuilder.java | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 977af5387e6..6c5ea15aac5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -51,8 +51,8 @@ import org.apache.lucene.search.TermQuery; * are provided so that the generated queries can be customized. */ public class QueryBuilder { - private Analyzer analyzer; - private boolean enablePositionIncrements = true; + protected Analyzer analyzer; + protected boolean enablePositionIncrements = true; /** Creates a new QueryBuilder using the given analyzer. */ public QueryBuilder(Analyzer analyzer) { @@ -186,9 +186,12 @@ public class QueryBuilder { /** * Creates a query from the analysis chain. *

- * Expert: this is more useful for subclasses such as queryparsers. + * Expert: this is more useful for subclasses such as queryparsers. * If using this class directly, just use {@link #createBooleanQuery(String, String)} - * and {@link #createPhraseQuery(String, String)} + * and {@link #createPhraseQuery(String, String)}. This is a complex method and + * it is usually not necessary to override it in a subclass; instead, override + * methods like {@link #newBooleanQuery}, etc., if possible. + * * @param analyzer analyzer used for this query * @param operator default boolean operator used for this query * @param field field to create queries against @@ -265,7 +268,7 @@ public class QueryBuilder { /** * Creates simple term query from the cached tokenstream contents */ - private Query analyzeTerm(String field, TokenStream stream) throws IOException { + protected Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); @@ -279,7 +282,7 @@ public class QueryBuilder { /** * Creates simple boolean query from the cached tokenstream contents */ - private Query analyzeBoolean(String field, TokenStream stream) throws IOException { + protected Query analyzeBoolean(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); @@ -291,7 +294,7 @@ public class QueryBuilder { return newSynonymQuery(terms.toArray(new Term[terms.size()])); } - private void add(BooleanQuery.Builder q, List current, BooleanClause.Occur operator) { + protected void add(BooleanQuery.Builder q, List current, BooleanClause.Occur operator) { if (current.isEmpty()) { return; } @@ -305,7 +308,7 @@ public class QueryBuilder { /** * Creates complex boolean query from the cached tokenstream contents */ - private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException { + protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException { BooleanQuery.Builder q = newBooleanQuery(); List currentQuery = new ArrayList<>(); @@ -328,7 +331,7 @@ public class QueryBuilder { /** * Creates simple phrase query from the cached tokenstream contents */ - private Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); @@ -352,7 +355,7 @@ public class QueryBuilder { /** * Creates complex phrase query from the cached tokenstream contents */ - private Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder(); mpqb.setSlop(slop); From 33ff6cde9be9b9ab9c2e4c8f9dc5221bc998b673 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Mon, 21 Nov 2016 16:47:40 -0500 Subject: [PATCH 7/7] LUCENE-7543: Make changes-to-html target an offline operation, by moving the Lucene and Solr DOAP RDF files into the Git source repository under dev-tools/doap/ and then pulling release dates from those files, rather than from JIRA. --- dev-tools/doap/README.txt | 5 + dev-tools/doap/lucene.rdf | 707 ++++++++++++++++++++++++++++ dev-tools/doap/solr.rdf | 455 ++++++++++++++++++ lucene/CHANGES.txt | 4 + lucene/build.xml | 5 +- lucene/common-build.xml | 5 +- lucene/site/changes/changes2html.pl | 96 ++-- solr/build.xml | 4 +- 8 files changed, 1207 insertions(+), 74 deletions(-) create mode 100644 dev-tools/doap/README.txt create mode 100644 dev-tools/doap/lucene.rdf create mode 100644 dev-tools/doap/solr.rdf diff --git a/dev-tools/doap/README.txt b/dev-tools/doap/README.txt new file mode 100644 index 00000000000..f2a9a9c807a --- /dev/null +++ b/dev-tools/doap/README.txt @@ -0,0 +1,5 @@ +This folder contains the DOAP[1] files for each project. + +Upon release, these files should be updated to include new release details. + +[1] DOAP: https://github.com/edumbill/doap diff --git a/dev-tools/doap/lucene.rdf b/dev-tools/doap/lucene.rdf new file mode 100644 index 00000000000..8f70467443d --- /dev/null +++ b/dev-tools/doap/lucene.rdf @@ -0,0 +1,707 @@ + + + + + + 2001-09-01 + + Apache Lucene Core + + + + Apache Lucene is a high-performance, full-featured text search engine library + Apache Lucene is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform. + + + + + Java + + + + + + + + + + + + + + + + + Apache Lucene Team + + + + + + + lucene-6.3.0 + 2016-11-08 + 6.3.0 + + + + + lucene-6.2.1 + 2016-09-20 + 6.2.1 + + + + + lucene-6.2.0 + 2016-08-25 + 6.2.0 + + + + + lucene-6.1.0 + 2016-06-17 + 6.1.0 + + + + + lucene-6.0.1 + 2016-05-28 + 6.0.1 + + + + + lucene-6.0.0 + 2016-04-08 + 6.0.0 + + + + + lucene-5.5.3 + 2016-09-09 + 5.5.3 + + + + + lucene-5.5.2 + 2016-06-25 + 5.5.2 + + + + + lucene-5.5.1 + 2016-05-05 + 5.5.1 + + + + + lucene-5.5.0 + 2016-02-22 + 5.5.0 + + + + + lucene-5.4.1 + 2016-01-23 + 5.4.1 + + + + + lucene-5.4.0 + 2015-12-14 + 5.4.0 + + + + + lucene-5.3.2 + 2016-01-23 + 5.3.2 + + + + + lucene-5.3.1 + 2015-09-24 + 5.3.1 + + + + + lucene-5.3.0 + 2015-08-21 + 5.3.0 + + + + + lucene-5.2.1 + 2015-06-15 + 5.2.1 + + + + + lucene-5.2.0 + 2015-06-07 + 5.2.0 + + + + + lucene-5.1.0 + 2015-04-14 + 5.1.0 + + + + + lucene-5.0.0 + 2015-02-20 + 5.0.0 + + + + + lucene-4.10.4 + 2015-03-03 + 4.10.4 + + + + + lucene-4.10.3 + 2014-12-29 + 4.10.3 + + + + + lucene-4.10.2 + 2014-10-31 + 4.10.2 + + + + + lucene-4.10.1 + 2014-09-29 + 4.10.1 + + + + + lucene-4.10.0 + 2014-09-03 + 4.10.0 + + + + + lucene-4.9.1 + 2014-09-22 + 4.9.1 + + + + + lucene-4.9.0 + 2014-06-25 + 4.9.0 + + + + + lucene-4.8.1 + 2014-05-20 + 4.8.1 + + + + + lucene-4.8.0 + 2014-04-28 + 4.8.0 + + + + + lucene-4.7.2 + 2014-04-15 + 4.7.2 + + + + + lucene-4.7.1 + 2014-04-02 + 4.7.1 + + + + + lucene-4.7.0 + 2014-02-26 + 4.7.0 + + + + + lucene-4.6.1 + 2014-01-28 + 4.6.1 + + + + + lucene-4.6.0 + 2013-11-22 + 4.6.0 + + + + + lucene-4.5.1 + 2013-10-24 + 4.5.1 + + + + + lucene-4.5 + 2013-10-05 + 4.5 + + + + + lucene-4.4 + 2013-07-23 + 4.4 + + + + + lucene-4.3.1 + 2013-06-18 + 4.3.1 + + + + + lucene-4.3 + 2013-05-06 + 4.3 + + + + + lucene-4.2.1 + 2013-04-03 + 4.2.1 + + + + + lucene-4.2 + 2013-03-11 + 4.2 + + + + + lucene-4.1 + 2013-01-22 + 4.1 + + + + + lucene-4.0 + 2012-10-12 + 4.0 + + + + + lucene-4.0-BETA + 2012-08-13 + 4.0-BETA + + + + + lucene-4.0-ALPHA + 2012-07-03 + 4.0-ALPHA + + + + + lucene-3.6.2 + 2012-12-25 + 3.6.2 + + + + + lucene-3.6.1 + 2012-07-22 + 3.6.1 + + + + + lucene-3.6 + 2012-04-12 + 3.6 + + + + + lucene-3.5 + 2011-11-11 + 3.5 + + + + + lucene-3.4 + 2011-09-15 + 3.4 + + + + + lucene-3.3 + 2011-07-10 + 3.3 + + + + + lucene-3.2 + 2011-06-03 + 3.2 + + + + + lucene-3.1 + 2011-03-31 + 3.1 + + + + + lucene-3.0.3 + 2010-12-03 + 3.0.3 + + + + + lucene-3.0.2 + 2010-06-18 + 3.0.2 + + + + + lucene-3.0.1 + 2010-02-26 + 3.0.1 + + + + + lucene-3.0 + 2009-11-25 + 3.0 + + + + + lucene-2.9.4 + 2010-12-03 + 2.9.4 + + + + + lucene-2.9.3 + 2010-06-18 + 2.9.3 + + + + + lucene-2.9.2 + 2010-02-26 + 2.9.2 + + + + + lucene-2.9.1 + 2009-11-06 + 2.9.1 + + + + + lucene-2.9 + 2009-09-25 + 2.9 + + + + + lucene-2.4.1 + 2009-03-09 + 2.4.1 + + + + + lucene-2.4 + 2008-10-08 + 2.4 + + + + + lucene-2.3.2 + 2008-05-06 + 2.3.2 + + + + + lucene-2.3.1 + 2008-02-22 + 2.3.1 + + + + + lucene-2.3 + 2008-01-23 + 2.3 + + + + + lucene-2.2 + 2007-06-19 + 2.2 + + + + + lucene-2.1 + 2007-02-17 + 2.1 + + + + + lucene-2.0.0 + 2006-05-26 + 2.0.0 + + + + + lucene-1.9.1 + 2006-03-02 + 1.9.1 + + + + + lucene-1.9-final + 2006-02-27 + 1.9 final + + + + + lucene-1.9-rc1 + 2006-02-21 + 1.9 RC1 + + + + + lucene-1.4.3 + 2004-12-07 + 1.4.3 + + + + + lucene-1.4.2 + 2004-10-01 + 1.4.2 + + + + + lucene-1.4.1 + 2004-08-02 + 1.4.1 + + + + + lucene-1.4-final + 2004-07-01 + 1.4 final + + + + + lucene-1.4-rc3 + 2004-05-11 + 1.4 RC3 + + + + + lucene-1.4-rc2 + 2004-03-30 + 1.4 RC2 + + + + + lucene-1.4-rc1 + 2004-03-29 + 1.4 RC1 + + + + + lucene-1.3-final + 2003-12-26 + 1.3 final + + + + + lucene-1.3-rc3 + 2003-11-25 + 1.3 RC3 + + + + + lucene-1.3-rc2 + 2003-10-22 + 1.3 RC2 + + + + + lucene-1.3-rc1 + 2003-03-24 + 1.3 RC1 + + + + + lucene-1.2-final + 2002-06-13 + 1.2 final + + + + + lucene-1.2-rc5 + 2002-05-14 + 1.2 RC5 + + + + + lucene-1.2-rc4 + 2002-02-14 + 1.2 RC4 + + + + + lucene-1.2-rc3 + 2002-01-27 + 1.2 RC3 + + + + + lucene-1.2-rc2 + 2001-10-19 + 1.2 RC2 + + + + + lucene-1.2-rc1 + 2001-10-02 + 1.2 RC1 + + + + + lucene-1.01b + 2001-06-02 + 1.01b + + + + + lucene-1.0 + 2000-10-04 + 1.0 + + + + + lucene-0.04 + 2000-04-19 + 0.04 + + + + + lucene-0.01 + 2000-03-30 + 0.01 + + + + diff --git a/dev-tools/doap/solr.rdf b/dev-tools/doap/solr.rdf new file mode 100644 index 00000000000..1483c64da29 --- /dev/null +++ b/dev-tools/doap/solr.rdf @@ -0,0 +1,455 @@ + + + + + + 2006-01-17 + + Apache Solr + + + + Solr is a full-text search server + Solr is an open source enterprise search server based on the Lucene Java search library, with XML/HTTP and JSON, Ruby, and Python APIs, hit highlighting, faceted search, caching, replication, and a web administration interface. + + + + + Java + + + + + + + + + + + + + + + + + Apache Solr Team + + + + + + + solr-6.3.0 + 2016-11-08 + 6.3.0 + + + + + solr-6.2.1 + 2016-09-20 + 6.2.1 + + + + + solr-6.2.0 + 2016-08-25 + 6.2.0 + + + + + solr-6.1.0 + 2016-06-17 + 6.1.0 + + + + + solr-6.0.1 + 2016-05-28 + 6.0.1 + + + + + solr-6.0.0 + 2016-04-08 + 6.0.0 + + + + + solr-5.5.3 + 2016-09-09 + 5.5.3 + + + + + solr-5.5.2 + 2016-06-25 + 5.5.2 + + + + + solr-5.5.1 + 2016-05-05 + 5.5.1 + + + + + solr-5.5.0 + 2016-02-22 + 5.5.0 + + + + + solr-5.4.1 + 2016-01-23 + 5.4.1 + + + + + solr-5.4.0 + 2015-12-14 + 5.4.0 + + + + + solr-5.3.2 + 2016-01-23 + 5.3.2 + + + + + solr-5.3.1 + 2015-09-24 + 5.3.1 + + + + + solr-5.3.0 + 2015-08-21 + 5.3.0 + + + + + solr-5.2.1 + 2015-06-15 + 5.2.1 + + + + + solr-5.2.0 + 2015-06-07 + 5.2.0 + + + + + solr-5.1.0 + 2015-04-14 + 5.1.0 + + + + + solr-5.0.0 + 2015-02-20 + 5.0.0 + + + + + solr-4.10.4 + 2015-03-03 + 4.10.4 + + + + + solr-4.10.3 + 2014-12-29 + 4.10.3 + + + + + solr-4.10.2 + 2014-10-31 + 4.10.2 + + + + + solr-4.10.1 + 2014-09-29 + 4.10.1 + + + + + solr-4.10.0 + 2014-09-03 + 4.10.0 + + + + + solr-4.9.1 + 2014-09-22 + 4.9.1 + + + + + solr-4.9.0 + 2014-06-25 + 4.9.0 + + + + + solr-4.8.1 + 2014-05-20 + 4.8.1 + + + + + solr-4.8.0 + 2014-04-28 + 4.8.0 + + + + + solr-4.7.2 + 2014-04-15 + 4.7.2 + + + + + solr-4.7.1 + 2014-02-26 + 4.7.1 + + + + + solr-4.7.0 + 2014-02-26 + 4.7.0 + + + + + solr-4.6.1 + 2014-01-28 + 4.6.1 + + + + + solr-4.6.0 + 2013-11-22 + 4.6.0 + + + + + solr-4.5.1 + 2013-10-24 + 4.5.1 + + + + + solr-4.5 + 2013-10-05 + 4.5 + + + + + solr-4.4 + 2013-07-23 + 4.4 + + + + + solr-4.3.1 + 2013-06-18 + 4.3.1 + + + + + solr-4.3 + 2013-05-06 + 4.3 + + + + + solr-4.2.1 + 2013-04-03 + 4.2.1 + + + + + solr-4.2 + 2013-03-11 + 4.2 + + + + + solr-4.1 + 2013-01-22 + 4.1 + + + + + solr-4.0 + 2012-10-12 + 4.0 + + + + + lucene-4.0-BETA + 2012-08-13 + 4.0-BETA + + + + + lucene-4.0-ALPHA + 2012-07-03 + 4.0-ALPHA + + + + + lucene-3.6.2 + 2012-12-25 + 3.6.2 + + + + + lucene-3.6.1 + 2012-07-22 + 3.6.1 + + + + + solr-3.6 + 2012-04-12 + 3.6 + + + + + solr-3.5 + 2011-11-11 + 3.5 + + + + + solr-3.4 + 2011-09-15 + 3.4 + + + + + solr-3.3 + 2011-07-10 + 3.3 + + + + + solr-3.2 + 2011-06-03 + 3.2 + + + + + solr-3.1 + 2011-03-31 + 3.1 + + + + + solr-1.4.1 + 2010-6-25 + 1.4.1 + + + + + solr-1.4.0 + 2009-11-10 + 1.4.0 + + + + + solr-1.3.0 + 2008-09-16 + 1.3.0 + + + + + solr-1.2.0 + 2007-06-06 + 1.2.0 + + + + + solr-1.1.0 + 2006-12-22 + 1.1.0 + + + + diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9e41067dda5..ca39e65d889 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,10 @@ Build * LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman) +* LUCENE-7543: Make changes-to-html target an offline operation, by moving the + Lucene and Solr DOAP RDF files into the Git source repository under + dev-tools/doap/ and then pulling release dates from those files, rather than + from JIRA. (Mano Kovacs, hossman, Steve Rowe) ======================= Lucene 6.3.0 ======================= diff --git a/lucene/build.xml b/lucene/build.xml index 11f4644467d..8b73ca622dd 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -357,13 +357,14 @@ + changes.product="lucene"/> + @@ -478,7 +479,7 @@ - + diff --git a/lucene/common-build.xml b/lucene/common-build.xml index d92c4529014..d2db40bbd18 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -2504,18 +2504,17 @@ ${ant.project.name}.test.dependencies=${test.classpath.list} + - - + diff --git a/lucene/site/changes/changes2html.pl b/lucene/site/changes/changes2html.pl index d71f2962655..b3f8fdb58c5 100755 --- a/lucene/site/changes/changes2html.pl +++ b/lucene/site/changes/changes2html.pl @@ -23,9 +23,8 @@ use strict; use warnings; +use XML::Simple; -# JIRA REST API documentation: -my $project_info_url = 'https://issues.apache.org/jira/rest/api/2/project'; my $jira_url_prefix = 'http://issues.apache.org/jira/browse/'; my $github_pull_request_prefix = 'https://github.com/apache/lucene-solr/pull/'; my $bugzilla_url_prefix = 'http://issues.apache.org/bugzilla/show_bug.cgi?id='; @@ -45,7 +44,7 @@ my @releases = (); my @lines = ; # Get all input at once # -# Cmdline args: (only from Solr) +# Cmdline args: (only from Solr) # my $product = $ARGV[0]; my %release_dates = &setup_release_dates($ARGV[1]); @@ -804,10 +803,6 @@ sub get_release_date { # Handle '1.2 RC6', which should be '1.2 final' $release = '1.2 final' if ($release eq '1.2 RC6'); - if (not exists($release_dates{$release})) { - $release =~ s/\.0\.0/\.0/; - } - $reldate = ( exists($release_dates{$release}) ? $release_dates{$release} : 'unknown'); @@ -825,60 +820,46 @@ sub get_release_date { # Returns a list of alternating release names and dates, for use in populating # the %release_dates hash. # -# Pulls release dates via the JIRA REST API. JIRA does not list -# X.Y RCZ releases independently from releases X.Y, so the RC dates -# as well as those named "final" are included below. +# Pulls release dates from the project DOAP file. # sub setup_release_dates { my %release_dates; my $file = shift; - if (uc($product) eq 'LUCENE') { - %release_dates - = ( '0.01' => '2000-03-30', '0.04' => '2000-04-19', - '1.0' => '2000-10-04', '1.01b' => '2001-06-02', - '1.2 RC1' => '2001-10-02', '1.2 RC2' => '2001-10-19', - '1.2 RC3' => '2002-01-27', '1.2 RC4' => '2002-02-14', - '1.2 RC5' => '2002-05-14', '1.2 final' => '2002-06-13', - '1.3 RC1' => '2003-03-24', '1.3 RC2' => '2003-10-22', - '1.3 RC3' => '2003-11-25', '1.3 final' => '2003-12-26', - '1.4 RC1' => '2004-03-29', '1.4 RC2' => '2004-03-30', - '1.4 RC3' => '2004-05-11', '1.4 final' => '2004-07-01', - '1.4.1' => '2004-08-02', '1.4.2' => '2004-10-01', - '1.4.3' => '2004-12-07', '1.9 RC1' => '2006-02-21', - '1.9 final' => '2006-02-27', '1.9.1' => '2006-03-02', - '2.0.0' => '2006-05-26', '2.1.0' => '2007-02-14', - '2.2.0' => '2007-06-19', '2.3.0' => '2008-01-21', - '2.3.1' => '2008-02-22', '2.3.2' => '2008-05-05', - '2.4.0' => '2008-10-06', '2.4.1' => '2009-03-09', - '2.9.0' => '2009-09-23', '2.9.1' => '2009-11-06', - '3.0.0' => '2009-11-25'); - } - my $project_info_json = readFile($file); - my $project_info = json2perl($project_info_json); - for my $version (@{$project_info->{versions}}) { - if ($version->{releaseDate}) { - my $date = substr($version->{releaseDate}, 0, 10); - my $version_name = $version->{name}; - $release_dates{$version->{name}} = $date; - if ($version_name =~ /^\d+\.\d+$/) { - my $full_version_name = "$version->{name}.0"; - $release_dates{$full_version_name} = $date; + my $project_info = XMLin($file)->{Project}; + my $version; + my $date; + for my $release (@{$project_info->{release}}) { + $version = $release->{Version}; + if ($version->{created}) { + $date = normalize_date($version->{created}); + my $version_name = $version->{revision}; + $release_dates{$version->{revision}} = $date; + if ($version_name =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) { + my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0" + $release_dates{$padded_version_name} = $date; + } elsif ($version_name =~ /\.0(?=[^.0-9]|$)/) { + my $trimmed_version_name = $version_name; + $trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0" + $release_dates{$trimmed_version_name} = $date; } } } return %release_dates; } -sub readFile { - my $file = shift; - open(F, '<'.$file) || die "could not open $file: $!"; - local $/ = undef; - my $project_info_json = ; - close(F); - return $project_info_json; +# +# normalize_date +# +# Left-zero-pads month and day-of-month to 2 digits in dates of format YYYY-(M)M-(D)D +# +sub normalize_date { + my $date = shift; + my ($year, $month, $dom) = $date =~ /^(2\d\d\d)-(\d+)-(\d+)$/; + return sprintf("%04d-%02d-%02d", $year, $month, $dom); } + # # setup_month_regex # @@ -1038,23 +1019,4 @@ sub setup_lucene_bugzilla_jira_map { 36628 => 432); } -# -# json2perl -# -# Converts a JSON string to the equivalent Perl data structure -# -sub json2perl { - my $json_string = shift; - $json_string =~ s/(:\s*)(true|false)/$1"$2"/g; - $json_string =~ s/":/",/g; - $json_string =~ s/\'/\\'/g; - $json_string =~ s/\"/\'/g; - my $project_info = eval $json_string; - die "ERROR eval'ing munged JSON string ||$json_string||: $@\n" - if ($@); - die "ERROR empty value after eval'ing JSON string ||$json_string||\n" - unless $project_info; - return $project_info; -} - 1; diff --git a/solr/build.xml b/solr/build.xml index b426d79a40e..4c52b031d37 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -483,7 +483,7 @@ + changes.product="solr"/> - +

{{value.value}}