From ca040402d9470969d7f8fe81c5bf4125e9344cde Mon Sep 17 00:00:00 2001 From: Alessandro Benedetti Date: Tue, 24 Nov 2020 10:11:08 +0000 Subject: [PATCH] SOLR-15015: added support to parametric Interleaving algorithm (#2096) --- solr/CHANGES.txt | 2 + .../solr/ltr/interleaving/Interleaving.java | 5 +- .../solr/ltr/search/LTRQParserPlugin.java | 10 +- .../apache/solr/ltr/TestLTRQParserPlugin.java | 28 --- .../TestLTRQParserInterleaving.java | 206 ++++++++++++++++++ solr/solr-ref-guide/src/learning-to-rank.adoc | 8 + 6 files changed, 229 insertions(+), 30 deletions(-) create mode 100644 solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 8db9795c02a..479dba0c772 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -183,6 +183,8 @@ Improvements * SOLR-14977 : ContainerPlugins should be configurable with custom objects (noble, ab) +* SOLR-15015 : Add interleaving algorithm parameter support in Learning To Rank (Alessandro Benedetti) + Optimizations --------------------- * SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java index 1038acad77f..db044439ce0 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java @@ -18,6 +18,7 @@ package org.apache.solr.ltr.interleaving; import org.apache.lucene.search.ScoreDoc; +import org.apache.solr.common.SolrException; import org.apache.solr.ltr.interleaving.algorithms.TeamDraftInterleaving; /** @@ -36,8 +37,10 @@ public interface Interleaving { static Interleaving getImplementation(String algorithm) { switch(algorithm) { case TEAM_DRAFT: - default: return new TeamDraftInterleaving(); + default: + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Unknown Interleaving algorithm: " + algorithm); } } } \ No newline at end of file diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java index 51009f7dcf2..d8832ffa233 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java @@ -78,6 +78,13 @@ public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAwa **/ public static final String RERANK_DOCS = "reRankDocs"; + /** query parser plugin: default interleaving algorithm **/ + public static final String DEFAULT_INTERLEAVING_ALGORITHM = Interleaving.TEAM_DRAFT; + + /** query parser plugin:the param that selects the interleaving algorithm to use **/ + public static final String INTERLEAVING_ALGORITHM = "interleavingAlgorithm"; + + @Override @SuppressWarnings({"unchecked"}) public void init(@SuppressWarnings("rawtypes") NamedList args) { @@ -209,8 +216,9 @@ public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAwa SolrQueryRequestContextUtils.setScoringQueries(req, new LTRScoringQuery[] { rerankingQuery }); return new LTRQuery(rerankingQuery, reRankDocs); } else { + String interleavingAlgorithm = localParams.get(INTERLEAVING_ALGORITHM, DEFAULT_INTERLEAVING_ALGORITHM); SolrQueryRequestContextUtils.setScoringQueries(req, rerankingQueries); - return new LTRInterleavingQuery(Interleaving.getImplementation(Interleaving.TEAM_DRAFT),rerankingQueries, reRankDocs); + return new LTRInterleavingQuery(Interleaving.getImplementation(interleavingAlgorithm), rerankingQueries, reRankDocs); } } } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java index e6132102ac0..6730b9b5b8c 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java @@ -51,20 +51,6 @@ public class TestLTRQParserPlugin extends TestRerankBase { assert (res.contains("Must provide one or two models in the request")); } - @Test - public void interleavingLtrTooManyModelsTest() throws Exception { - final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "*, score"); - query.add("rows", "4"); - query.add("fv", "true"); - query.add("rq", "{!ltr model=modelA model=modelB model=C reRankDocs=100}"); - - final String res = restTestHarness.query("/query" + query.toQueryString()); - assert (res.contains("Must provide one or two models in the request")); - } - @Test public void ltrModelIdDoesNotExistTest() throws Exception { final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; @@ -93,20 +79,6 @@ public class TestLTRQParserPlugin extends TestRerankBase { assert (res.contains("the model 0 is empty")); } - @Test - public void interleavingLtrModelIsEmptyTest() throws Exception { - final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "*, score"); - query.add("rows", "4"); - query.add("fv", "true"); - query.add("rq", "{!ltr model=6029760550880411648 model=\"\" reRankDocs=100}"); - - final String res = restTestHarness.query("/query" + query.toQueryString()); - assert (res.contains("the model 1 is empty")); - } - @Test public void ltrBadRerankDocsTest() throws Exception { final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java new file mode 100644 index 00000000000..ebaeae1e1c3 --- /dev/null +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.interleaving; + +import java.util.Random; + +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.ltr.TestRerankBase; +import org.apache.solr.ltr.feature.SolrFeature; +import org.apache.solr.ltr.interleaving.algorithms.TeamDraftInterleaving; +import org.apache.solr.ltr.model.LinearModel; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestLTRQParserInterleaving extends TestRerankBase { + @Before + public void before() throws Exception { + setuptest(false); + + assertU(adoc("id", "1", "title", "w1", "description", "w5", "popularity", + "1")); + assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description", + "w2 2asd asdd didid", "popularity", "2")); + assertU(adoc("id", "3", "title", "w1", "description", "w5", "popularity", + "3")); + assertU(adoc("id", "4", "title", "w1", "description", "w1", "popularity", + "6")); + assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity", + "5")); + assertU(adoc("id", "6", "title", "w6 w2", "description", "w1 w2", + "popularity", "6")); + assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description", + "w6 w2 w3 w4 w5 w8", "popularity", "88888")); + assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description", + "w1 w1 w1 w2 w2 w5", "popularity", "88888")); + assertU(commit()); + + loadFeaturesAndModelsForInterleaving(); + } + + @After + public void after() throws Exception { + aftertest(); + } + + protected void loadFeaturesAndModelsForInterleaving() throws Exception { + loadFeature("featureA1", SolrFeature.class.getName(), + "{\"fq\":[\"{!terms f=popularity}88888\"]}"); + loadFeature("featureA2", SolrFeature.class.getName(), + "{\"fq\":[\"{!terms f=title}${user_query}\"]}"); + loadFeature("featureAB", SolrFeature.class.getName(), + "{\"fq\":[\"{!terms f=title}${user_query}\"]}"); + loadFeature("featureB1", SolrFeature.class.getName(), + "{\"fq\":[\"{!terms f=popularity}6\"]}"); + loadFeature("featureB2", SolrFeature.class.getName(), + "{\"fq\":[\"{!terms f=description}${user_query}\"]}"); + loadFeature("featureC1", SolrFeature.class.getName(),"featureStore2", + "{\"fq\":[\"{!terms f=popularity}6\"]}"); + loadFeature("featureC2", SolrFeature.class.getName(),"featureStore2", + "{\"fq\":[\"{!terms f=popularity}1\"]}"); + + loadModel("modelA", LinearModel.class.getName(), + new String[]{"featureA1", "featureA2", "featureAB"}, + "{\"weights\":{\"featureA1\":3.0, \"featureA2\":9.0, \"featureAB\":27.0}}"); + + loadModel("modelB", LinearModel.class.getName(), + new String[]{"featureB1", "featureB2", "featureAB"}, + "{\"weights\":{\"featureB1\":2.0, \"featureB2\":4.0, \"featureAB\":8.0}}"); + + loadModel("modelC", LinearModel.class.getName(), + new String[]{"featureC1", "featureC2"},"featureStore2", + "{\"weights\":{\"featureC1\":5.0, \"featureC2\":25.0}}"); + } + + @Test + public void interleavingLtrNotSupportedAlgorithmTest() throws Exception { + final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.add("fl", "*, score"); + query.add("rows", "4"); + query.add("fv", "true"); + query.add("rq", "{!ltr model=modelA model=modelB interleavingAlgorithm=unsupported reRankDocs=100}"); + + final String res = restTestHarness.query("/query" + query.toQueryString()); + assert (res.contains("Unknown Interleaving algorithm: unsupported")); + } + + @Test + public void interleavingLtrTooManyModelsTest() throws Exception { + final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.add("fl", "*, score"); + query.add("rows", "4"); + query.add("fv", "true"); + query.add("rq", "{!ltr model=modelA model=modelB model=C reRankDocs=100}"); + + final String res = restTestHarness.query("/query" + query.toQueryString()); + assert (res.contains("Must provide one or two models in the request")); + } + + @Test + public void interleavingLtrModelIsEmptyTest() throws Exception { + final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.add("fl", "*, score"); + query.add("rows", "4"); + query.add("fv", "true"); + query.add("rq", "{!ltr model=modelA model=\"\" reRankDocs=100}"); + + final String res = restTestHarness.query("/query" + query.toQueryString()); + assert (res.contains("the model 1 is empty")); + } + + @Test + public void defaultAlgorithm_shouldApplyTeamDraftInterleaving() throws Exception { + TeamDraftInterleaving.setRANDOM(new Random(10101010));//Random Boolean Choices Generation from Seed: [0,1,1] + + final SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.add("fl", "*, score,interleavingPick:[interleaving]"); + query.add("rows", "10"); + query.add("debugQuery", "true"); + query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8 + query.add("rq", + "{!ltr model=modelA model=modelB reRankDocs=10 efi.user_query='w5'}"); + + /* + Doc1 = "featureB2=1.0", ScoreA(0), ScoreB(4) + Doc3 = "featureB2=1.0", ScoreA(0), ScoreB(4) + Doc4 = "featureB1=1.0", ScoreA(0), ScoreB(2) + Doc7 ="featureA1=1.0,featureA2=1.0,featureAB=1.0,featureB2=1.0", ScoreA(39), ScoreB(12) + Doc8 = "featureA1=1.0,featureB2=1.0", ScoreA(3), ScoreB(4) + ModelARerankedList = [7,8,1,3,4] + ModelBRerankedList = [7,1,3,8,4] + + Random Boolean Choices Generation from Seed: [0,1,1] + */ + String[] expectedInterleavingPicks = new String[]{"modelA", "modelB", "modelB", "modelA", "modelB"}; + int[] expectedInterleaved = new int[]{7, 1, 3, 8, 4}; + + String[] tests = new String[11]; + tests[0] = "/response/numFound/==5"; + for (int i = 1; i <= 5; i++) { + tests[i] = "/response/docs/[" + (i - 1) + "]/id==\"" + expectedInterleaved[(i - 1)] + "\""; + tests[i + 5] = "/response/docs/[" + (i - 1) + "]/interleavingPick==" + expectedInterleavingPicks[(i - 1)]; + } + assertJQ("/query" + query.toQueryString(), tests); + + } + + @Test + public void teamDraftAlgorithm_shouldApplyTeamDraftInterleaving() throws Exception { + TeamDraftInterleaving.setRANDOM(new Random(10101010));//Random Boolean Choices Generation from Seed: [0,1,1] + + final SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.add("fl", "*, score,interleavingPick:[interleaving]"); + query.add("rows", "10"); + query.add("debugQuery", "true"); + query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8 + query.add("rq", + "{!ltr model=modelA model=modelB reRankDocs=10 interleavingAlgorithm=TeamDraft efi.user_query='w5'}"); + + /* + Doc1 = "featureB2=1.0", ScoreA(0), ScoreB(4) + Doc3 = "featureB2=1.0", ScoreA(0), ScoreB(4) + Doc4 = "featureB1=1.0", ScoreA(0), ScoreB(2) + Doc7 ="featureA1=1.0,featureA2=1.0,featureAB=1.0,featureB2=1.0", ScoreA(39), ScoreB(12) + Doc8 = "featureA1=1.0,featureB2=1.0", ScoreA(3), ScoreB(4) + ModelARerankedList = [7,8,1,3,4] + ModelBRerankedList = [7,1,3,8,4] + + Random Boolean Choices Generation from Seed: [0,1,1] + */ + String[] expectedInterleavingPicks = new String[]{"modelA", "modelB", "modelB", "modelA", "modelB"}; + int[] expectedInterleaved = new int[]{7, 1, 3, 8, 4}; + + String[] tests = new String[11]; + tests[0] = "/response/numFound/==5"; + for (int i = 1; i <= 5; i++) { + tests[i] = "/response/docs/[" + (i - 1) + "]/id==\"" + expectedInterleaved[(i - 1)] + "\""; + tests[i + 5] = "/response/docs/[" + (i - 1) + "]/interleavingPick==" + expectedInterleavingPicks[(i - 1)]; + } + assertJQ("/query" + query.toQueryString(), tests); + + } + +} diff --git a/solr/solr-ref-guide/src/learning-to-rank.adoc b/solr/solr-ref-guide/src/learning-to-rank.adoc index 481f3b76902..f17072aab69 100644 --- a/solr/solr-ref-guide/src/learning-to-rank.adoc +++ b/solr/solr-ref-guide/src/learning-to-rank.adoc @@ -329,6 +329,14 @@ The output XML will include the model picked for each search result, resembling }} ---- +=== Running a Rerank Query with Interleaving passing a specific algorithm +To rerank the results of a query, interleaving two models using a specific algorithm, add the `interleavingAlgorithm` local parameter to the ltr query parser, for example: + +[source,text] +http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myModelA model=myModelB reRankDocs=100 interleavingAlgorithm=TeamDraft}&fl=id,score + +Currently the only (and default) algorithm supported is 'TeamDraft'. + === External Feature Information The {solr-javadocs}/contrib/ltr/org/apache/solr/ltr/feature/ValueFeature.html[ValueFeature] and {solr-javadocs}/contrib/ltr/org/apache/solr/ltr/feature/SolrFeature.html[SolrFeature] classes support the use of external feature information, `efi` for short.