SOLR-7756: ExactStatsCache and LRUStatsCache will throw an NPE when a term is not present on a shard

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1694182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Varun Thacker 2015-08-05 11:40:12 +00:00
parent 2de2e0a16f
commit 4b83947e1c
6 changed files with 279 additions and 73 deletions

View File

@ -272,6 +272,9 @@ Bug Fixes
* SOLR-6357: Allow delete documents by doing a score join query. (Mikhail Khludnev, Timothy Potter)
* SOLR-7756: ExactStatsCache and LRUStatsCache will throw an NPE when a term is not present on a shard.
(Varun Thacker, Anshum Gupta)
Optimizations
----------------------

View File

@ -53,8 +53,7 @@ import java.util.Set;
* query terms (and collection statistics for term fields).
*/
public class ExactStatsCache extends StatsCache {
private static final Logger LOG = LoggerFactory
.getLogger(ExactStatsCache.class);
private static final Logger LOG = LoggerFactory.getLogger(ExactStatsCache.class);
// experimenting with strategy that takes more RAM, but also doesn't share memory
// across threads
@ -91,8 +90,7 @@ public class ExactStatsCache extends StatsCache {
}
@Override
public void mergeToGlobalStats(SolrQueryRequest req,
List<ShardResponse> responses) {
public void mergeToGlobalStats(SolrQueryRequest req, List<ShardResponse> responses) {
for (ShardResponse r : responses) {
LOG.debug("Merging to global stats, shard={}, response={}", r.getShard(), r.getSolrResponse().getResponse());
String shard = r.getShard();
@ -101,7 +99,6 @@ public class ExactStatsCache extends StatsCache {
// TODO: nl == null if not all shards respond (no server hosting shard)
String termStatsString = (String) nl.get(TERM_STATS_KEY);
if (termStatsString != null) {
addToPerShardTermStats(req, shard, termStatsString);
}
@ -109,23 +106,20 @@ public class ExactStatsCache extends StatsCache {
if (terms != null) {
req.getContext().put(TERMS_KEY, terms);
}
String colStatsString = (String) nl.get(COL_STATS_KEY);
if (colStatsString != null) {
Map<String,CollectionStats> colStats = StatsUtil
.colStatsMapFromString(colStatsString);
Map<String,CollectionStats> colStats = StatsUtil.colStatsMapFromString(colStatsString);
if (colStats != null) {
addToPerShardColStats(req, shard, colStats);
}
}
}
if (LOG.isDebugEnabled()) printStats(req);
}
protected void addToPerShardColStats(SolrQueryRequest req, String shard,
Map<String,CollectionStats> colStats) {
protected void addToPerShardColStats(SolrQueryRequest req, String shard, Map<String,CollectionStats> colStats) {
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = new HashMap<String,Map<String,CollectionStats>>();
perShardColStats = new HashMap<>();
req.getContext().put(PER_SHARD_COL_STATS, perShardColStats);
}
perShardColStats.put(shard, colStats);
@ -144,12 +138,11 @@ public class ExactStatsCache extends StatsCache {
}
protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
Map<String,TermStats> termStats = StatsUtil
.termStatsMapFromString(termStatsString);
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(termStatsString);
if (termStats != null) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = new HashMap<String,Map<String,TermStats>>();
perShardTermStats = new HashMap<>();
req.getContext().put(PER_SHARD_TERM_STATS, perShardTermStats);
}
perShardTermStats.put(shard, termStats);
@ -160,11 +153,11 @@ public class ExactStatsCache extends StatsCache {
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
Query q = rb.getQuery();
try {
HashSet<Term> terms = new HashSet<Term>();
HashSet<Term> terms = new HashSet<>();
searcher.createNormalizedWeight(q, true).extractTerms(terms);
IndexReaderContext context = searcher.getTopReaderContext();
HashMap<String,TermStats> statsMap = new HashMap<String,TermStats>();
HashMap<String,CollectionStats> colMap = new HashMap<String,CollectionStats>();
HashMap<String,TermStats> statsMap = new HashMap<>();
HashMap<String,CollectionStats> colMap = new HashMap<>();
for (Term t : terms) {
TermContext termContext = TermContext.build(context, t);
@ -176,21 +169,22 @@ public class ExactStatsCache extends StatsCache {
statsMap.put(t.toString(), new TermStats(t.field(), tst));
rb.rsp.add(TERMS_KEY, t.toString());
if (!colMap.containsKey(t.field())) { // collection stats for this field
colMap.put(
t.field(),
new CollectionStats(searcher.localCollectionStatistics(t.field())));
colMap.put(t.field(), new CollectionStats(searcher.localCollectionStatistics(t.field())));
}
}
if (statsMap.size() != 0 && colMap.size() != 0) { //Don't add empty keys
String termStatsString = StatsUtil.termStatsMapToString(statsMap);
rb.rsp.add(TERM_STATS_KEY, termStatsString);
String colStatsString = StatsUtil.colStatsMapToString(colMap);
rb.rsp.add(COL_STATS_KEY, colStatsString);
if (LOG.isDebugEnabled()) {
LOG.debug("termStats=" + termStatsString + ", collectionStats="
+ colStatsString + ", terms=" + terms + ", numDocs=" + searcher.maxDoc());
}
}
String termStatsString = StatsUtil.termStatsMapToString(statsMap);
rb.rsp.add(TERM_STATS_KEY, termStatsString);
String colStatsString = StatsUtil.colStatsMapToString(colMap);
rb.rsp.add(COL_STATS_KEY, colStatsString);
if (LOG.isDebugEnabled()) {
LOG.debug("termStats=" + termStatsString + ", collectionStats="
+ colStatsString + ", terms=" + terms + ", numDocs="
+ searcher.maxDoc());
}
} catch (IOException e) {
LOG.error("Error collecting local stats, query='" + q.toString() + "'", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error collecting local stats.", e);
@ -203,13 +197,13 @@ public class ExactStatsCache extends StatsCache {
ModifiableSolrParams params = outgoing.params;
List<String> terms = (List<String>) rb.req.getContext().get(TERMS_KEY);
if (terms != null) {
Set<String> fields = new HashSet<String>();
Set<String> fields = new HashSet<>();
for (String t : terms) {
String[] fv = t.split(":");
fields.add(fv[0]);
}
Map<String,TermStats> globalTermStats = new HashMap<String,TermStats>();
Map<String,CollectionStats> globalColStats = new HashMap<String,CollectionStats>();
Map<String,TermStats> globalTermStats = new HashMap<>();
Map<String,CollectionStats> globalColStats = new HashMap<>();
// aggregate collection stats, only for the field in terms
for (String shard : rb.shards) {
@ -266,7 +260,7 @@ public class ExactStatsCache extends StatsCache {
perShardTermStats = Collections.emptyMap();
}
Map<String,TermStats> cache = perShardTermStats.get(shard);
return cache.get(t);
return (cache != null) ? cache.get(t) : null; //Term doesn't exist in shard
}
@Override
@ -274,8 +268,7 @@ public class ExactStatsCache extends StatsCache {
String globalTermStats = req.getParams().get(TERM_STATS_KEY);
String globalColStats = req.getParams().get(COL_STATS_KEY);
if (globalColStats != null) {
Map<String,CollectionStats> colStats = StatsUtil
.colStatsMapFromString(globalColStats);
Map<String,CollectionStats> colStats = StatsUtil.colStatsMapFromString(globalColStats);
if (colStats != null) {
for (Entry<String,CollectionStats> e : colStats.entrySet()) {
addToGlobalColStats(req, e);
@ -284,8 +277,7 @@ public class ExactStatsCache extends StatsCache {
}
LOG.debug("Global collection stats={}", globalColStats);
if (globalTermStats == null) return;
Map<String,TermStats> termStats = StatsUtil
.termStatsMapFromString(globalTermStats);
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(globalTermStats);
if (termStats != null) {
for (Entry<String,TermStats> e : termStats.entrySet()) {
addToGlobalTermStats(req, e);
@ -297,7 +289,7 @@ public class ExactStatsCache extends StatsCache {
Entry<String,CollectionStats> e) {
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
if (currentGlobalColStats == null) {
currentGlobalColStats = new HashMap<String,CollectionStats>();
currentGlobalColStats = new HashMap<>();
req.getContext().put(CURRENT_GLOBAL_COL_STATS, currentGlobalColStats);
}
currentGlobalColStats.put(e.getKey(), e.getValue());
@ -306,7 +298,7 @@ public class ExactStatsCache extends StatsCache {
protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
if (currentGlobalTermStats == null) {
currentGlobalTermStats = new HashMap<String,TermStats>();
currentGlobalTermStats = new HashMap<>();
req.getContext().put(CURRENT_GLOBAL_TERM_STATS, currentGlobalTermStats);
}
currentGlobalTermStats.put(e.getKey(), e.getValue());

View File

@ -54,8 +54,7 @@ import org.slf4j.LoggerFactory;
* that is updated with the global statistics on every request.
*/
public class LRUStatsCache extends ExactStatsCache {
private static final Logger LOG = LoggerFactory
.getLogger(LRUStatsCache.class);
private static final Logger LOG = LoggerFactory.getLogger(LRUStatsCache.class);
// local stats obtained from shard servers
private final Map<String,SolrCache<String,TermStats>> perShardTermStats = new ConcurrentHashMap<>();
@ -88,21 +87,18 @@ public class LRUStatsCache extends ExactStatsCache {
}
@Override
protected void addToPerShardColStats(SolrQueryRequest req, String shard,
Map<String,CollectionStats> colStats) {
protected void addToPerShardColStats(SolrQueryRequest req, String shard, Map<String,CollectionStats> colStats) {
perShardColStats.put(shard, colStats);
}
@Override
protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb,
String shard) {
protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb, String shard) {
return perShardColStats.get(shard);
}
@Override
protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
Map<String,TermStats> termStats = StatsUtil
.termStatsMapFromString(termStatsString);
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(termStatsString);
if (termStats != null) {
SolrCache<String,TermStats> cache = perShardTermStats.get(shard);
if (cache == null) { // initialize
@ -119,12 +115,11 @@ public class LRUStatsCache extends ExactStatsCache {
@Override
protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) {
SolrCache<String,TermStats> cache = perShardTermStats.get(shard);
return cache.get(t);
return (cache != null) ? cache.get(t) : null; //Term doesn't exist in shard
}
@Override
protected void addToGlobalColStats(SolrQueryRequest req,
Entry<String,CollectionStats> e) {
protected void addToGlobalColStats(SolrQueryRequest req, Entry<String,CollectionStats> e) {
currentGlobalColStats.put(e.getKey(), e.getValue());
}
@ -137,8 +132,7 @@ public class LRUStatsCache extends ExactStatsCache {
private final SolrCache<String,TermStats> termStatsCache;
private final Map<String,CollectionStats> colStatsCache;
public LRUStatsSource(SolrCache<String,TermStats> termStatsCache,
Map<String,CollectionStats> colStatsCache) {
public LRUStatsSource(SolrCache<String,TermStats> termStatsCache, Map<String,CollectionStats> colStatsCache) {
this.termStatsCache = termStatsCache;
this.colStatsCache = colStatsCache;
}

View File

@ -195,7 +195,7 @@ public class StatsUtil {
if (data == null || data.trim().length() == 0) {
return null;
}
Map<String,TermStats> map = new HashMap<String,TermStats>();
Map<String,TermStats> map = new HashMap<>();
String[] entries = data.split("!");
for (String es : entries) {
TermStats termStats = termStatsFromString(es, null);

View File

@ -18,8 +18,12 @@
<schema name="minimal" version="1.1">
<types>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="_root_" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<dynamicField name="*" type="string" indexed="true" stored="true" />
</fields>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,213 @@
package org.apache.solr.search.stats;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.AbstractDistribZkTestBase;
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.CompositeIdRouter;
import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ShardParams;
import org.junit.Test;
public class TestDistribIDF extends SolrTestCaseJ4 {
private MiniSolrCloudCluster solrCluster;
@Override
public void setUp() throws Exception {
if (random().nextBoolean()) {
System.setProperty("solr.statsCache", ExactStatsCache.class.getName());
} else {
System.setProperty("solr.statsCache", LRUStatsCache.class.getName());
}
super.setUp();
final File solrXml = getFile("solr").toPath().resolve("solr-no-core.xml").toFile();
solrCluster = new MiniSolrCloudCluster(3, createTempDir().toFile(), solrXml, buildJettyConfig("/solr"));
// set some system properties for use by tests
System.setProperty("solr.test.sys.prop1", "propone");
System.setProperty("solr.test.sys.prop2", "proptwo");
File configDir = getFile("solr").toPath().resolve("collection1/conf").toFile();
solrCluster.uploadConfigDir(configDir, "conf1");
configDir = getFile("solr").toPath().resolve("configsets/configset-2/conf").toFile();
solrCluster.uploadConfigDir(configDir, "conf2");
}
@Override
public void tearDown() throws Exception {
solrCluster.shutdown();
System.clearProperty("solr.statsCache");
System.clearProperty("solr.test.sys.prop1");
System.clearProperty("solr.test.sys.prop2");
super.tearDown();
}
@Test
public void testSimpleQuery() throws Exception {
//3 shards. 3rd shard won't have any data.
createCollection("onecollection", "conf1", ImplicitDocRouter.NAME);
createCollection("onecollection_local", "conf2", ImplicitDocRouter.NAME);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", 1);
doc.setField("cat", "football");
doc.addField(ShardParams._ROUTE_, "a");
solrCluster.getSolrClient().add("onecollection", doc);
solrCluster.getSolrClient().add("onecollection_local", doc);
doc = new SolrInputDocument();
doc.setField("id", 2);
doc.setField("cat", "football");
doc.addField(ShardParams._ROUTE_, "b");
solrCluster.getSolrClient().add("onecollection", doc);
solrCluster.getSolrClient().add("onecollection_local", doc);
int nDocs = TestUtil.nextInt(random(), 10, 100);
for (int i=0; i<nDocs; i++) {
doc = new SolrInputDocument();
doc.setField("id", 3 + i);
String cat = TestUtil.randomSimpleString(random());
if (!cat.equals("football")) { //Making sure no other document has the query term in it.
doc.setField("cat", cat);
if (rarely()) { //Put most documents in shard b so that 'football' becomes 'rare' in shard b
doc.addField(ShardParams._ROUTE_, "a");
} else {
doc.addField(ShardParams._ROUTE_, "b");
}
solrCluster.getSolrClient().add("onecollection", doc);
solrCluster.getSolrClient().add("onecollection_local", doc);
}
}
solrCluster.getSolrClient().commit("onecollection");
solrCluster.getSolrClient().commit("onecollection_local");
//Test against all nodes
for (JettySolrRunner jettySolrRunner : solrCluster.getJettySolrRunners()) {
SolrClient solrClient = new HttpSolrClient(jettySolrRunner.getBaseUrl().toString());
SolrClient solrClient_local = new HttpSolrClient(jettySolrRunner.getBaseUrl().toString());
SolrQuery query = new SolrQuery("cat:football");
query.setFields("*,score");
QueryResponse queryResponse = solrClient.query("onecollection", query);
assertEquals(2, queryResponse.getResults().getNumFound());
float score1 = (float) queryResponse.getResults().get(0).get("score");
float score2 = (float) queryResponse.getResults().get(1).get("score");
assertEquals("Doc1 score=" + score1 + " Doc2 score=" + score2, 0, Float.compare(score1, score2));
query = new SolrQuery("cat:football");
query.setShowDebugInfo(true);
query.setFields("*,score");
queryResponse = solrClient_local.query("onecollection_local", query);
assertEquals(2, queryResponse.getResults().getNumFound());
assertEquals(2, queryResponse.getResults().get(0).get("id"));
assertEquals(1, queryResponse.getResults().get(1).get("id"));
float score1_local = (float) queryResponse.getResults().get(0).get("score");
float score2_local = (float) queryResponse.getResults().get(1).get("score");
assertEquals("Doc1 score=" + score1_local + " Doc2 score=" + score2_local, 1, Float.compare(score1_local, score2_local));
}
}
private void createCollection(String name, String config) throws Exception {
createCollection(name, config, CompositeIdRouter.NAME);
}
private void createCollection(String name, String config, String router) throws Exception {
CollectionAdminResponse response;
if (router.equals(ImplicitDocRouter.NAME)) {
CollectionAdminRequest.Create create = new CollectionAdminRequest.Create();
create.setConfigName(config);
create.setCollectionName(name);
create.setReplicationFactor(1);
create.setMaxShardsPerNode(1);
create.setRouterName(router);
create.setShards("a,b,c");
response = create.process(solrCluster.getSolrClient());
} else {
CollectionAdminRequest.Create create = new CollectionAdminRequest.Create();
create.setConfigName(config);
create.setCollectionName(name);
create.setNumShards(2);
create.setReplicationFactor(1);
create.setMaxShardsPerNode(1);
response = create.process(solrCluster.getSolrClient());
}
if (response.getStatus() != 0 || response.getErrorMessages() != null) {
fail("Could not create collection. Response" + response.toString());
}
ZkStateReader zkStateReader = solrCluster.getSolrClient().getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(name, zkStateReader, false, true, 100);
}
private void addDocsRandomly() throws IOException, SolrServerException {
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", 1);
doc.setField("cat", "football");
solrCluster.getSolrClient().add("collection1", doc);
solrCluster.getSolrClient().add("collection1_local", doc);
doc = new SolrInputDocument();
doc.setField("id", 2);
doc.setField("cat", "football");
solrCluster.getSolrClient().add("collection2", doc);
solrCluster.getSolrClient().add("collection2_local", doc);
int nDocs = TestUtil.nextInt(random(), 10, 100);
int collection1Count = 1;
int collection2Count = 1;
for (int i=0; i<nDocs; i++) {
doc = new SolrInputDocument();
doc.setField("id", 3 + i);
String cat = TestUtil.randomSimpleString(random());
if (!cat.equals("football")) { //Making sure no other document has the query term in it.
doc.setField("cat", cat);
if (rarely()) { //Put most documents in collection2* so that 'football' becomes 'rare' in collection2*
solrCluster.getSolrClient().add("collection1", doc);
solrCluster.getSolrClient().add("collection1_local", doc);
collection1Count++;
} else {
solrCluster.getSolrClient().add("collection2", doc);
solrCluster.getSolrClient().add("collection2_local", doc);
collection2Count++;
}
}
}
log.info("numDocs={}. collection1Count={} collection2Count={}", nDocs, collection1Count, collection2Count);
solrCluster.getSolrClient().commit("collection1");
solrCluster.getSolrClient().commit("collection2");
solrCluster.getSolrClient().commit("collection1_local");
solrCluster.getSolrClient().commit("collection2_local");
}
}