From 23762d50150e61715c6dc07cca626ad790dbec4a Mon Sep 17 00:00:00 2001 From: Varun Thacker Date: Wed, 5 Aug 2015 13:30:53 +0000 Subject: [PATCH] SOLR-7818: Distributed stats is only calculated with the terms that are present in the last shard of a distributed request git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1694210 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 ++ .../solr/search/stats/ExactStatsCache.java | 10 +++-- .../solr/search/stats/TestDistribIDF.java | 44 +++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 848ea5c7968..14a5e026eb1 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -275,6 +275,9 @@ Bug Fixes * SOLR-7756: ExactStatsCache and LRUStatsCache will throw an NPE when a term is not present on a shard. (Varun Thacker, Anshum Gupta) +* SOLR-7818: Distributed stats is only calculated with the terms that are present in the last shard + of a distributed request. (Varun Thacker, Anshum Gupta) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java index 84bfd2cccc3..f39a35948ae 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java @@ -17,6 +17,7 @@ package org.apache.solr.search.stats; * limitations under the License. */ +import com.google.common.collect.Lists; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -91,6 +92,7 @@ public class ExactStatsCache extends StatsCache { @Override public void mergeToGlobalStats(SolrQueryRequest req, List responses) { + Set allTerms = new HashSet<>(); for (ShardResponse r : responses) { LOG.debug("Merging to global stats, shard={}, response={}", r.getShard(), r.getSolrResponse().getResponse()); String shard = r.getShard(); @@ -103,16 +105,16 @@ public class ExactStatsCache extends StatsCache { addToPerShardTermStats(req, shard, termStatsString); } List terms = nl.getAll(TERMS_KEY); - if (terms != null) { - req.getContext().put(TERMS_KEY, terms); - } - + allTerms.addAll(terms); String colStatsString = (String) nl.get(COL_STATS_KEY); Map colStats = StatsUtil.colStatsMapFromString(colStatsString); if (colStats != null) { addToPerShardColStats(req, shard, colStats); } } + if (allTerms.size() > 0) { + req.getContext().put(TERMS_KEY, Lists.newArrayList(allTerms)); + } if (LOG.isDebugEnabled()) printStats(req); } diff --git a/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java b/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java index 6af890de44f..f6f73caf0f9 100644 --- a/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java +++ b/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java @@ -138,6 +138,50 @@ public class TestDistribIDF extends SolrTestCaseJ4 { } } + @Test + public void testMultiCollectionQuery() throws Exception { + // collection1 and collection2 are collections which have distributed idf enabled + // collection1_local and collection2_local don't have distributed idf available + // Only one doc has cat:football in each collection + // When doing queries across collections we want to test that the query takes into account + // distributed idf for the collection=collection1,collection2 query. + // The way we verify is that score should be the same when querying across collection1 and collection2 + // But should be different when querying across collection1_local and collection2_local + // since the idf is calculated per shard + + createCollection("collection1", "conf1"); + createCollection("collection1_local", "conf2"); + createCollection("collection2", "conf1"); + createCollection("collection2_local", "conf2"); + + addDocsRandomly(); + + //Test against all nodes + for (JettySolrRunner jettySolrRunner : solrCluster.getJettySolrRunners()) { + SolrClient solrClient = new HttpSolrClient(jettySolrRunner.getBaseUrl().toString()); + SolrClient solrClient_local = new HttpSolrClient(jettySolrRunner.getBaseUrl().toString()); + + SolrQuery query = new SolrQuery("cat:football"); + query.setFields("*,score").add("collection", "collection1,collection2"); + QueryResponse queryResponse = solrClient.query("collection1", query); + assertEquals(2, queryResponse.getResults().getNumFound()); + float score1 = (float) queryResponse.getResults().get(0).get("score"); + float score2 = (float) queryResponse.getResults().get(1).get("score"); + assertEquals("Doc1 score=" + score1 + " Doc2 score=" + score2, 0, Float.compare(score1, score2)); + + + query = new SolrQuery("cat:football"); + query.setFields("*,score").add("collection", "collection1_local,collection2_local"); + queryResponse = solrClient_local.query("collection1_local", query); + assertEquals(2, queryResponse.getResults().getNumFound()); + assertEquals(2, queryResponse.getResults().get(0).get("id")); + assertEquals(1, queryResponse.getResults().get(1).get("id")); + float score1_local = (float) queryResponse.getResults().get(0).get("score"); + float score2_local = (float) queryResponse.getResults().get(1).get("score"); + assertEquals("Doc1 score=" + score1_local + " Doc2 score=" + score2_local, 1, Float.compare(score1_local, score2_local)); + } + } + private void createCollection(String name, String config) throws Exception { createCollection(name, config, CompositeIdRouter.NAME); }