SOLR-8270: Change implicit default Similarity to use BM25 when luceneMatchVersion >= 6

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1713902 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-11-11 17:48:40 +00:00
parent 9f9a5d50f0
commit da18097f85
14 changed files with 234 additions and 146 deletions

View File

@ -51,14 +51,20 @@ Upgrading from Solr 5.x
to allow for multiple full query results (DocLists) per Solr request.
TransformContext was rendered redundant and was removed. (yonik)
* DefaultSimilarityFactory has been removed. If you currently have DefaultSimilarityFactory explicitly
refrenced in your schema.xml, edit your config to use the functionally identical ClassicSimilarityFactory.
See SOLR-8239 for more details.
* SchemaSimilarityFactory has been modified to use BM25Similarity as the default for fieldTypes that
do not explicitly declare a Similarity. The legacy behavior of using ClassicSimilarity as the
default will occur if the luceneMatchVersion for the collection is less then 6.0. See SOLR-8261 for
more details.
* Several changes have been made regarding the "Similiarity" used in Solr, in order to provide
better default behavior for new users. There are 3 key impacts of these changes on existing
users who upgrade:
* DefaultSimilarityFactory has been removed. If you currently have DefaultSimilarityFactory explicitly
refrenced in your schema.xml, edit your config to use the functionally identical ClassicSimilarityFactory.
See SOLR-8239 for more details.
* The implicit default Similarity used when no <similarity/> is configured in schema.xml has
been changed to BM25SimilarityFactory. Users who wish to preserve backcompatible behavior should
either explicitly configure ClassicSimilarityFactory, or ensure that the luceneMatchVersion
for the collection is less then 6.0. See SOLR-8270 for details.
* SchemaSimilarityFactory has been modified to use BM25Similarity as the default for fieldTypes that
do not explicitly declare a Similarity. The legacy behavior of using ClassicSimilarity as the
default will occur if the luceneMatchVersion for the collection is less then 6.0. See SOLR-8261 for
more details.
Detailed Change List
----------------------
@ -144,6 +150,8 @@ Other Changes
* SOLR-8258: Change default hdfs tlog replication factor from 1 to 3. (Mark Miller)
* SOLR-8270: Change implicit default Similarity to use BM25 when luceneMatchVersion >= 6 (hossman)
================== 5.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -65,6 +65,7 @@ import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SchemaXmlWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.similarities.BM25SimilarityFactory;
import org.apache.solr.search.similarities.ClassicSimilarityFactory;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.plugin.SolrCoreAware;
@ -497,9 +498,13 @@ public class IndexSchema {
Node node = (Node) xpath.evaluate(expression, document, XPathConstants.NODE);
similarityFactory = readSimilarity(loader, node);
if (similarityFactory == null) {
similarityFactory = new ClassicSimilarityFactory();
final NamedList similarityParams = new NamedList();
Version luceneVersion = getDefaultLuceneMatchVersion();
if (getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_6_0_0)) {
similarityFactory = new BM25SimilarityFactory();
} else {
similarityFactory = new ClassicSimilarityFactory();
}
final NamedList similarityParams = new NamedList();
similarityFactory.init(SolrParams.toSolrParams(similarityParams));
} else {
isExplicitSimilarity = true;

View File

@ -382,7 +382,35 @@ valued. -->
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
<!-- for testing tfidf functions, see TestFunctionQuery.testTFIDFFunctions -->
<dynamicField name="*_tfidf" type="tfidf_text" indexed="true" stored="true" />
<fieldType name="tfidf_text" class="solr.TextField" positionIncrementGap="100">
<similarity class="solr.ClassicSimilarityFactory" />
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<dynamicField name="*_extf" type="file"/>
<dynamicField name="*_extfs" type="sfile"/>
@ -403,4 +431,6 @@ valued. -->
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema>

View File

@ -72,31 +72,30 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
@Test
public void testGroupingGroupSortingScore_basic() {
assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_i", "2")));
assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "4","name", "author2", "title", "title", "group_i", "2")));
assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_i", "1")));
assertU(add(doc("id", "1", "id_i", "1", "name", "author1", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "2", "id_i", "2", "name", "author1", "title", "the title", "group_i", "2")));
assertU(add(doc("id", "3", "id_i", "3", "name", "author2", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "4", "id_i", "4", "name", "author2", "title", "title", "group_i", "2")));
assertU(add(doc("id", "5", "id_i", "5", "name", "author3", "title", "the title of a title", "group_i", "1")));
assertU(commit());
assertQ(req("q","title:title", "group", "true", "group.field","name")
// function based query for predictable scores not affect by similarity
assertQ(req("q","{!func}id_i", "group", "true", "group.field","name", "fl", "id, score")
,"//lst[@name='grouped']/lst[@name='name']"
,"*[count(//arr[@name='groups']/lst) = 3]"
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author3']"
,"//arr[@name='groups']/lst[1]/result[@numFound='1']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='5']"
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author1']"
,"//arr[@name='groups']/lst[3]/result[@numFound='2']"
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='2']"
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
// ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
assertQ(req("q", "title:title", "group", "true", "group.field", "group_i")
@ -115,71 +114,76 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
@Test
public void testGroupingGroupSortingScore_withTotalGroupCount() {
assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_i", "2")));
assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "4","name", "author2", "title", "title", "group_i", "2")));
assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_i", "1")));
assertU(add(doc("id", "1", "id_i", "1", "name", "author1", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "2", "id_i", "2", "name", "author1", "title", "the title", "group_i", "2")));
assertU(add(doc("id", "3", "id_i", "3", "name", "author2", "title", "a book title", "group_i", "1")));
assertU(add(doc("id", "4", "id_i", "4", "name", "author2", "title", "title", "group_i", "2")));
assertU(add(doc("id", "5", "id_i", "5", "name", "author3", "title", "the title of a title", "group_i", "1")));
assertU(commit());
assertQ(req("q","title:title", "group", "true", "group.field","name", "group.ngroups", "true")
// function based query for predictable scores not affect by similarity
assertQ(req("q","{!func}id_i", "group", "true", "group.field","name", "group.ngroups", "true")
,"//lst[@name='grouped']/lst[@name='name']"
,"//lst[@name='grouped']/lst[@name='name']/int[@name='matches'][.='5']"
,"//lst[@name='grouped']/lst[@name='name']/int[@name='ngroups'][.='3']"
,"*[count(//arr[@name='groups']/lst) = 3]"
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author3']"
,"//arr[@name='groups']/lst[1]/result[@numFound='1']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='5']"
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author1']"
,"//arr[@name='groups']/lst[3]/result[@numFound='2']"
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='2']"
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
assertQ(req("q", "title:title", "group", "true", "group.field", "group_i", "group.ngroups", "true")
// function based query for predictable scores not affect by similarity
assertQ(req("q", "{!func}id_i", "group", "true", "group.field", "group_i", "group.ngroups", "true")
, "//lst[@name='grouped']/lst[@name='group_i']/int[@name='matches'][.='5']"
, "//lst[@name='grouped']/lst[@name='group_i']/int[@name='ngroups'][.='2']"
, "*[count(//arr[@name='groups']/lst) = 2]"
, "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
, "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='1']"
, "//arr[@name='groups']/lst[1]/result[@numFound='3']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='5']"
, "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='2']"
, "//arr[@name='groups']/lst[2]/result[@numFound='2']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
, "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
, "//arr[@name='groups']/lst[2]/result[@numFound='3']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
);
}
@Test
public void testGroupingGroupSortingScore_basicWithGroupSortEqualToSort() {
assertU(add(doc("id", "1","name", "author1", "title", "a book title")));
assertU(add(doc("id", "2","name", "author1", "title", "the title")));
assertU(add(doc("id", "3","name", "author2", "title", "a book title")));
assertU(add(doc("id", "4","name", "author2", "title", "title")));
assertU(add(doc("id", "5","name", "author3", "title", "the title of a title")));
assertU(add(doc("id", "1", "id_i", "1", "name", "author1", "title", "a book title")));
assertU(add(doc("id", "2", "id_i", "2", "name", "author1", "title", "the title")));
assertU(add(doc("id", "3", "id_i", "3", "name", "author2", "title", "a book title")));
assertU(add(doc("id", "4", "id_i", "4", "name", "author2", "title", "title")));
assertU(add(doc("id", "5", "id_i", "5", "name", "author3", "title", "the title of a title")));
assertU(commit());
assertQ(req("q", "title:title", "group", "true", "group.field", "name", "sort", "score desc", "group.sort", "score desc")
, "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
, "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
// function based query for predictable scores not affect by similarity
assertQ(req("q", "{!func}id_i", "group", "true", "group.field", "name",
"sort", "score desc", "group.sort", "score desc")
, "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author3']"
, "//arr[@name='groups']/lst[1]/result[@numFound='1']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='5']"
, "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
, "//arr[@name='groups']/lst[2]/result[@numFound='2']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
, "//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author1']"
, "//arr[@name='groups']/lst[3]/result[@numFound='2']"
, "//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='2']"
, "//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
// ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
, "//arr[@name='groups']/lst[3]/result[@numFound='1']"
, "//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
}

View File

@ -454,9 +454,9 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
assertU(adoc("id", "6", "title", "XXXX XXXX", "str_s1", "z"));
assertU(adoc("id", "7", "title", "AAAA", "str_s1", "a"));
assertU(adoc("id", "8", "title", "QQQQ", "str_s1", "q"));
assertU(adoc("id", "9", "title", "QQQQ QQQQ", "str_s1", "r"));
assertU(adoc("id", "10", "title", "QQQQ QQQQ QQQQ", "str_s1", "s"));
assertU(adoc("id", "8", "title", " QQQQ trash trash", "str_s1", "q"));
assertU(adoc("id", "9", "title", " QQQQ QQQQ trash", "str_s1", "r"));
assertU(adoc("id", "10", "title", "QQQQ QQQQ QQQQ ", "str_s1", "s"));
assertU(commit());
@ -498,21 +498,21 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
"indent", "true",
CommonParams.FL, "id, score")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.='8']"
, "//result/doc[1]/str[@name='id'][.='10']"
, "//result/doc[2]/str[@name='id'][.='9']"
, "//result/doc[3]/str[@name='id'][.='10']"
, "//result/doc[3]/str[@name='id'][.='8']"
);
assertQ("", req(CommonParams.Q, "QQQQ", CommonParams.QT, "/elevate",
QueryElevationParams.MARK_EXCLUDES, "true",
"indent", "true",
CommonParams.FL, "id, score, [excluded]")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.='8']"
, "//result/doc[1]/str[@name='id'][.='10']"
, "//result/doc[2]/str[@name='id'][.='9']"
, "//result/doc[3]/str[@name='id'][.='10']",
"//result/doc[1]/bool[@name='[excluded]'][.='false']",
, "//result/doc[3]/str[@name='id'][.='8']",
"//result/doc[1]/bool[@name='[excluded]'][.='true']",
"//result/doc[2]/bool[@name='[excluded]'][.='false']",
"//result/doc[3]/bool[@name='[excluded]'][.='true']"
"//result/doc[3]/bool[@name='[excluded]'][.='false']"
);
} finally {
delete();
@ -523,9 +523,9 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
public void testSorting() throws Exception {
try {
init("schema12.xml");
assertU(adoc("id", "a", "title", "ipod", "str_s1", "a"));
assertU(adoc("id", "b", "title", "ipod ipod", "str_s1", "b"));
assertU(adoc("id", "c", "title", "ipod ipod ipod", "str_s1", "c"));
assertU(adoc("id", "a", "title", "ipod trash trash", "str_s1", "a"));
assertU(adoc("id", "b", "title", "ipod ipod trash", "str_s1", "b"));
assertU(adoc("id", "c", "title", "ipod ipod ipod ", "str_s1", "c"));
assertU(adoc("id", "x", "title", "boosted", "str_s1", "x"));
assertU(adoc("id", "y", "title", "boosted boosted", "str_s1", "y"));
@ -546,9 +546,9 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
assertQ("Make sure standard sort works as expected", req
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.='a']"
, "//result/doc[1]/str[@name='id'][.='c']"
, "//result/doc[2]/str[@name='id'][.='b']"
, "//result/doc[3]/str[@name='id'][.='c']"
, "//result/doc[3]/str[@name='id'][.='a']"
);
// Explicitly set what gets boosted
@ -562,9 +562,9 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
, "//result/doc[1]/str[@name='id'][.='x']"
, "//result/doc[2]/str[@name='id'][.='y']"
, "//result/doc[3]/str[@name='id'][.='z']"
, "//result/doc[4]/str[@name='id'][.='a']"
, "//result/doc[4]/str[@name='id'][.='c']"
, "//result/doc[5]/str[@name='id'][.='b']"
, "//result/doc[6]/str[@name='id'][.='c']"
, "//result/doc[6]/str[@name='id'][.='a']"
);
booster.elevationCache.clear();
@ -576,8 +576,8 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.='a']"
, "//result/doc[2]/str[@name='id'][.='x']"
, "//result/doc[3]/str[@name='id'][.='b']"
, "//result/doc[4]/str[@name='id'][.='c']"
, "//result/doc[3]/str[@name='id'][.='c']"
, "//result/doc[4]/str[@name='id'][.='b']"
);
// Test reverse sort
@ -585,10 +585,11 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args));
assertQ("All four should make it", req
, "//*[@numFound='4']"
// NOTE REVERSED doc[X] indices
, "//result/doc[4]/str[@name='id'][.='a']"
, "//result/doc[3]/str[@name='id'][.='x']"
, "//result/doc[2]/str[@name='id'][.='b']"
, "//result/doc[1]/str[@name='id'][.='c']"
, "//result/doc[2]/str[@name='id'][.='c']"
, "//result/doc[1]/str[@name='id'][.='b']"
);
// Try normal sort by 'id'
@ -643,8 +644,8 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
assertQ(null, req
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.='x']"
, "//result/doc[2]/str[@name='id'][.='b']"
, "//result/doc[3]/str[@name='id'][.='c']"
, "//result/doc[2]/str[@name='id'][.='c']"
, "//result/doc[3]/str[@name='id'][.='b']"
);
@ -659,8 +660,8 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
, "//result/doc[1]/str[@name='id'][.='x']"
, "//result/doc[2]/str[@name='id'][.='y']"
, "//result/doc[3]/str[@name='id'][.='z']"
, "//result/doc[4]/str[@name='id'][.='a']"
, "//result/doc[5]/str[@name='id'][.='c']"
, "//result/doc[4]/str[@name='id'][.='c']"
, "//result/doc[5]/str[@name='id'][.='a']"
);
args.put(QueryElevationParams.IDS, "x,z,y");

View File

@ -993,7 +993,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
assertU(adoc("id", "1", "a_f", "2.3", "b_f", "9.7", "foo_t", "how now brown cow"));
assertU(adoc("id", "2", "a_f", "4.5", "b_f", "8.6", "foo_t", "cow cow cow cow"));
assertU(adoc("id", "3", "a_f", "5.6", "b_f", "7.5", "foo_t", "red fox"));
assertU(adoc("id", "3", "a_f", "5.6", "b_f", "7.5", "foo_t", "red fox")); // no cow
assertU(adoc("id", "4", "a_f", "6.7", "b_f", "6.3", "foo_t", "red cow"));
assertU(commit());
@ -1011,20 +1011,21 @@ public class StatsComponentTest extends AbstractSolrTestCase {
, kpre + "double[@name='stddev'][.='10.622007151430441']"
);
// force constant score for matches so we aren't dependent on similarity
final float constScore = 4.2F;
final double expectedScore = (double) constScore;
assertQ("functions over a query",
req("q","*:*", "stats", "true",
"stats.field", "{!lucene key=k}foo_t:cow")
// TODO: change to not rely on exact scores
, kpre + "double[@name='min'][.='0.6115717887878418']"
, kpre + "double[@name='max'][.='1.2231435775756836']"
, kpre + "double[@name='sum'][.='2.5991801023483276']"
"stats.field", "{!lucene key=k}foo_t:cow^=" + constScore)
, kpre + "double[@name='min'][.='" + expectedScore + "']"
, kpre + "double[@name='max'][.='" + expectedScore + "']"
, kpre + "double[@name='sum'][.='" + (3D * expectedScore) + "']"
, kpre + "long[@name='count'][.='3']"
, kpre + "long[@name='missing'][.='1']"
, kpre + "double[@name='sumOfSquares'][.='2.4545065967701163']"
, kpre + "double[@name='mean'][.='0.8663933674494425']"
, kpre + "double[@name='stddev'][.='0.3182720497380833']"
, kpre + "double[@name='sumOfSquares'][.='" + (3D * Math.pow(expectedScore, 2D)) + "']"
, kpre + "double[@name='mean'][.='" + expectedScore + "']"
, kpre + "double[@name='stddev'][.='0.0']"
);
}
/**

View File

@ -24,7 +24,7 @@ public class TestSchemaSimilarityResource extends SolrRestletTestBase {
public void testGetSchemaSimilarity() throws Exception {
assertQ("/schema/similarity?indent=on&wt=xml",
"count(/response/lst[@name='similarity']) = 1",
"/response/lst[@name='similarity']/str[@name='class'][.='org.apache.solr.search.similarities.ClassicSimilarityFactory']");
"/response/lst[@name='similarity']/str[@name='class'][.='org.apache.solr.search.similarities.BM25SimilarityFactory']");
}
}

View File

@ -91,6 +91,9 @@ public class ChangedSchemaMergeTest extends SolrTestCaseJ4 {
changed.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
changed.getUpdateHandler().commit(new CommitUpdateCommand(req, true));
} catch (Throwable e) {
SolrCore.log.error("Test exception, logging so not swallowed if there is a (finally) shutdown exception: " + e.getMessage(), e);
throw e;
} finally {
if (cc != null) cc.shutdown();
}

View File

@ -701,7 +701,6 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "foo bar",
"qf", "phrase_sw",
"pf", "phrase_sw^10",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
@ -710,7 +709,6 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "foo bar",
"qf", "phrase_sw",
"pf2", "phrase_sw^10",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
@ -719,7 +717,6 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "a b bar",
"qf", "phrase_sw",
"pf3", "phrase_sw^10",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s2']");
@ -729,7 +726,6 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"qf", "phrase_sw",
"pf2", "phrase_sw^10",
"ps", "2",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
@ -739,7 +735,6 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"qf", "phrase_sw",
"pf3", "phrase_sw^10",
"ps", "3",
"bf", "boost_d",
"fl", "score,*",
"debugQuery", "true",
"defType", "edismax"),
@ -772,31 +767,31 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
assertQ(
"ps2 not working",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps2",
"2", "bf", "boost_d", "fl", "score,*", "defType", "edismax"),
"2", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ(
"Specifying slop in pf2 param not working",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw~2^10", "bf",
"boost_d", "fl", "score,*", "defType", "edismax"),
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw~2^10",
"fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ(
"Slop in ps2 parameter should override ps",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps",
"0", "ps2", "2", "bf", "boost_d", "fl", "score,*", "defType",
"0", "ps2", "2", "fl", "score,*", "defType",
"edismax"), "//doc[1]/str[@name='id'][.='s0']");
assertQ(
"ps3 not working",
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw^10", "ps3",
"3", "bf", "boost_d", "fl", "score,*", "defType", "edismax"),
"3", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ(
"Specifying slop in pf3 param not working",
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw~3^10", "bf",
"boost_d", "fl", "score,*", "defType", "edismax"),
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw~3^10",
"fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ("ps2 should not override slop specified inline in pf2",

View File

@ -425,8 +425,11 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
//Test range query embedded in larger query
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=6}");
params.add("q", "*:* OR test_ti:[0 TO 2000]");
params.add("rqq", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
// function query for predictible scores (relative to id) independent of similarity
params.add("q", "{!func}id");
// constant score for each clause (unique per doc) for predictible scores independent of similarity
// NOTE: biased in favor of doc id == 2
params.add("rqq", "id:1^=10 id:2^=40 id:3^=30 id:4^=40 id:5^=50 id:6^=60");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "6");
@ -435,9 +438,9 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
"//result/doc[1]/float[@name='id'][.='6.0']",
"//result/doc[2]/float[@name='id'][.='5.0']",
"//result/doc[3]/float[@name='id'][.='4.0']",
"//result/doc[4]/float[@name='id'][.='2.0']",
"//result/doc[5]/float[@name='id'][.='1.0']",
"//result/doc[6]/float[@name='id'][.='3.0']"
"//result/doc[4]/float[@name='id'][.='2.0']", // reranked out of orig order
"//result/doc[5]/float[@name='id'][.='3.0']",
"//result/doc[6]/float[@name='id'][.='1.0']"
);

View File

@ -304,7 +304,7 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
);
assertJQ(req("q","+filter(*:*)^=10 +filter(id:1)", "fl","id,score", "sort","id asc")
,"/response/docs/[0]/score==1.0" // normalization reduces to 1
,"/response/docs/[0]/score==10.0"
);
}

View File

@ -28,6 +28,7 @@ import java.util.Random;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
@ -326,28 +327,10 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
assertQ(req("fl","*,score","q", "{!func}docfreq($field,$value)", "fq","id:6", "field","a_t", "value","cow"), "//float[@name='score']='3.0'");
assertQ(req("fl","*,score","q", "{!func}termfreq(a_t,cow)", "fq","id:6"), "//float[@name='score']='5.0'");
TFIDFSimilarity similarity = new ClassicSimilarity();
// make sure it doesn't get a NPE if no terms are present in a field.
assertQ(req("fl","*,score","q", "{!func}termfreq(nofield_t,cow)", "fq","id:6"), "//float[@name='score']='0.0'");
assertQ(req("fl","*,score","q", "{!func}docfreq(nofield_t,cow)", "fq","id:6"), "//float[@name='score']='0.0'");
assertQ(req("fl","*,score","q", "{!func}idf(nofield_t,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.idf(0,6) + "'");
assertQ(req("fl","*,score","q", "{!func}tf(nofield_t,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.tf(0) + "'");
assertQ(req("fl","*,score","q", "{!func}idf(a_t,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.idf(3,6) + "'");
assertQ(req("fl","*,score","q", "{!func}tf(a_t,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.tf(5) + "'");
FieldInvertState state = new FieldInvertState("a_t");
state.setBoost(1.0f);
state.setLength(4);
long norm = similarity.computeNorm(state);
float nrm = similarity.decodeNormValue((byte) norm);
assertQ(req("fl","*,score","q", "{!func}norm(a_t)", "fq","id:2"),
"//float[@name='score']='" + nrm + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
// test that ord and rord are working on a global index basis, not just
// at the segment level (since Lucene 2.9 has switched to per-segment searching)
assertQ(req("fl","*,score","q", "{!func}ord(id)", "fq","id:6"), "//float[@name='score']='5.0'");
@ -374,7 +357,7 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
}
assertU(commit());
assertU(adoc("id","120", "text","batman superman")); // in a smaller segment
assertU(adoc("id","121", "text","superman"));
assertU(adoc("id","121", "text","superman junkterm"));
assertU(commit());
// superman has a higher df (thus lower idf) in one segment, but reversed in the complete index
@ -408,7 +391,7 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
// OK
}
// test that sorting by function weights correctly. superman should sort higher than batman due to idf of the whole index
// test that sorting by function query weights correctly. superman should sort higher than batman due to idf of the whole index
assertQ(req("q", "*:*", "fq","id:120 OR id:121", "sort","{!func v=$sortfunc} desc", "sortfunc","query($qq)", "qq","text:(batman OR superman)")
,"*//doc[1]/float[.='120.0']"
@ -416,6 +399,50 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
);
}
public void testTFIDFFunctions() {
clearIndex();
TFIDFSimilarity similarity = null;
{
Similarity sim = h.getCore().getLatestSchema().getFieldType("a_tfidf").getSimilarity();
assertNotNull("Test needs *_tfidf to use a TFIDFSimilarity ... who broke the config?", sim);
assertTrue("Test needs *_tfidf to use a TFIDFSimilarity ... who broke the config: " + sim.getClass(),
sim instanceof TFIDFSimilarity);
similarity = (TFIDFSimilarity) sim;
}
assertU(adoc("id","1", "a_tdt","2009-08-31T12:10:10.123Z", "b_tdt","2009-08-31T12:10:10.124Z"));
assertU(adoc("id","2", "a_tfidf","how now brown cow"));
assertU(commit()); // create more than one segment
assertU(adoc("id","3", "a_tfidf","brown cow"));
assertU(adoc("id","4"));
assertU(commit()); // create more than one segment
assertU(adoc("id","5"));
assertU(adoc("id","6", "a_tfidf","cow cow cow cow cow"));
assertU(commit());
// make sure it doesn't get a NPE if no terms are present in a field.
assertQ(req("fl","*,score","q", "{!func}idf(nofield_tfidf,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.idf(0,6) + "'");
assertQ(req("fl","*,score","q", "{!func}tf(nofield_tfidf,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.tf(0) + "'");
// fields with real values
assertQ(req("fl","*,score","q", "{!func}idf(a_tfidf,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.idf(3,6) + "'");
assertQ(req("fl","*,score","q", "{!func}tf(a_tfidf,cow)", "fq","id:6"),
"//float[@name='score']='" + similarity.tf(5) + "'");
FieldInvertState state = new FieldInvertState("a_tfidf");
state.setBoost(1.0f);
state.setLength(4);
long norm = similarity.computeNorm(state);
float nrm = similarity.decodeNormValue((byte) norm);
assertQ(req("fl","*,score","q", "{!func}norm(a_tfidf)", "fq","id:2"),
"//float[@name='score']='" + nrm + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
}
/**
* test collection-level term stats (new in 4.x indexes)
*/

View File

@ -18,6 +18,7 @@ package org.apache.solr.search.similarities;
*/
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.util.Version;
import org.junit.After;
@ -26,6 +27,7 @@ import org.junit.After;
* (ie: no similarity configured in schema.xml at all) is consistnent with
* expectations based on the luceneMatchVersion
* @see <a href="https://issues.apache.org/jira/browse/SOLR-5561">SOLR-5561</a>
* @see <a href="https://issues.apache.org/jira/browse/SOLR-8057">SOLR-8057</a>
*/
public class TestNonDefinedSimilarityFactory extends BaseSimilarityTestCase {
@ -34,10 +36,19 @@ public class TestNonDefinedSimilarityFactory extends BaseSimilarityTestCase {
deleteCore();
}
public void testCurrent() throws Exception {
public void testCurrentBM25() throws Exception {
// no sys prop set, rely on LATEST
initCore("solrconfig-basic.xml","schema-tiny.xml");
BM25Similarity sim = getSimilarity("text", BM25Similarity.class);
assertEquals(0.75F, sim.getB(), 0.0F);
}
public void testClassic() throws Exception {
// any value below 6.0 should have this behavior
System.setProperty("tests.luceneMatchVersion", "5.3");
initCore("solrconfig-basic.xml","schema-tiny.xml");
ClassicSimilarity sim = getSimilarity("text", ClassicSimilarity.class);
assertEquals(true, sim.getDiscountOverlaps());
System.clearProperty("tests.luceneMatchVersion");
}
}

View File

@ -240,7 +240,7 @@ public class SchemaTest extends RestTestBase {
new SchemaRequest.GlobalSimilarity();
SchemaResponse.GlobalSimilarityResponse globalSimilarityResponse = globalSimilarityRequest.process(getSolrClient());
assertValidSchemaResponse(globalSimilarityResponse);
assertEquals("org.apache.solr.search.similarities.ClassicSimilarityFactory",
assertEquals("org.apache.solr.search.similarities.BM25SimilarityFactory",
globalSimilarityResponse.getSimilarity().get("class"));
}