SOLR-8419: TermVectorComponent: fix accidental inclusion of docs when distrib.singlePass. Remove 'uniqueKeyField' from response. distrib now requires a schema unique key.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720714 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-12-18 02:15:56 +00:00
parent 3972c0e9eb
commit 46392dde41
4 changed files with 44 additions and 34 deletions

View File

@ -268,6 +268,9 @@ Bug Fixes
and then that replica is brought down, it will think it is up-to-date when
restarted. (shalin, Mark Miller, yonik)
* SOLR-8419: TermVectorComponent for distributed search when distrib.singlePass could include term
vectors for documents that matched the query yet weren't in the returned documents. (David Smiley)
Other Changes
----------------------
@ -345,6 +348,9 @@ Other Changes
* SOLR-8279: Add a new test fault injection approach and a new SolrCloud test that stops and starts the cluster
while indexing data and with random faults. (Mark Miller)
* SOLR-8419: TermVectorComponent for distributed search now requires a uniqueKey in the schema. Also, it no longer
returns "uniqueKeyField" in the response. (David Smiley)
================== 5.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -83,9 +83,12 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
public static final String COMPONENT_NAME = "tv";
protected NamedList initParams;
public static final String TERM_VECTORS = "termVectors";
private static final String TV_KEY_WARNINGS = "warnings";
protected NamedList initParams;
/**
* Helper method for determining the list of fields that we should
* try to find term vectors on.
@ -147,7 +150,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
String uniqFieldName = null;
if (keyField != null) {
uniqFieldName = keyField.getName();
termVectors.add("uniqueKeyFieldName", uniqFieldName);
}
FieldOptions allFields = new FieldOptions();
@ -182,7 +184,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
//we have specific fields to retrieve, or no fields
for (String field : fields) {
// workarround SOLR-3523
// workaround SOLR-3523
if (null == field || "score".equals(field)) continue;
// we don't want to issue warnings about the uniqueKey field
@ -226,29 +228,24 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
}
} //else, deal with all fields
// NOTE: currently all typs of warnings are schema driven, and garunteed
// NOTE: currently all types of warnings are schema driven, and guaranteed
// to be consistent across all shards - if additional types of warnings
// are added that might be differnet between shards, finishStage() needs
// are added that might be different between shards, finishStage() needs
// to be changed to account for that.
boolean hasWarnings = false;
if (!noTV.isEmpty()) {
warnings.add("noTermVectors", noTV);
hasWarnings = true;
}
if (!noPos.isEmpty()) {
warnings.add("noPositions", noPos);
hasWarnings = true;
}
if (!noOff.isEmpty()) {
warnings.add("noOffsets", noOff);
hasWarnings = true;
}
if (!noPay.isEmpty()) {
warnings.add("noPayloads", noPay);
hasWarnings = true;
}
if (hasWarnings) {
termVectors.add("warnings", warnings);
if (warnings.size() > 0) {
termVectors.add(TV_KEY_WARNINGS, warnings);
}
DocListAndSet listAndSet = rb.getResults();
@ -442,7 +439,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
public void finishStage(ResponseBuilder rb) {
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
NamedList<Object> termVectors = new NamedList<>();
NamedList<Object> termVectorsNL = new NamedList<>();
Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];
for (ShardRequest sreq : rb.finished) {
@ -451,15 +448,19 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
}
for (ShardResponse srsp : sreq.responses) {
NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
// Add metadata (that which isn't a uniqueKey value):
Object warningsNL = nl.get(TV_KEY_WARNINGS);
// assume if that if warnings is already present; we don't need to merge.
if (warningsNL != null && termVectorsNL.indexOf(TV_KEY_WARNINGS, 0) < 0) {
termVectorsNL.add(TV_KEY_WARNINGS, warningsNL);
}
// UniqueKey data
for (int i=0; i < nl.size(); i++) {
String key = nl.getName(i);
ShardDoc sdoc = rb.resultIds.get(key);
if (null == sdoc) {
// metadata, only need from one node, leave in order
if (termVectors.indexOf(key,0) < 0) {
termVectors.add(key, nl.getVal(i));
}
} else {
if (sdoc != null) {// can be null when rb.onePassDistributedQuery
int idx = sdoc.positionInResponse;
arr[idx] = new NamedList.NamedListEntry<>(key, nl.getVal(i));
}
@ -467,8 +468,8 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
}
}
// remove nulls in case not all docs were able to be retrieved
termVectors.addAll(SolrPluginUtils.removeNulls(arr, new NamedList<Object>()));
rb.rsp.add(TERM_VECTORS, termVectors);
SolrPluginUtils.removeNulls(arr, termVectorsNL);
rb.rsp.add(TERM_VECTORS, termVectorsNL);
}
}

View File

@ -20,6 +20,7 @@ package org.apache.solr.handler.component;
import org.apache.lucene.util.Constants;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.TermVectorParams;
import org.junit.BeforeClass;
import org.junit.Test;
@ -188,7 +189,15 @@ public class TermVectorComponentDistributedTest extends BaseDistributedSearchTes
"q", q,
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.ALL, "true");
query("sort", "id desc",
"qt",tv,
"q", q,
"rows", 1,
ShardParams.DISTRIB_SINGLE_PASS, "true",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.ALL, "true");
// per field stuff
query("sort", "id desc",

View File

@ -130,8 +130,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
);
// tv.fl diff from fl
assertJQ(req("json.nl","map",
@ -143,8 +142,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
);
// multi-valued tv.fl
assertJQ(req("json.nl","map",
@ -157,8 +155,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
);
// re-use fl glob
assertJQ(req("json.nl","map",
@ -172,8 +169,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
);
// re-use fl, ignore things we can't handle
assertJQ(req("json.nl","map",
@ -184,8 +180,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
);
// re-use (multi-valued) fl, ignore things we can't handle
assertJQ(req("json.nl","map",
@ -197,8 +192,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
);
}