SOLR-8419: TermVectorComponent: fix accidental inclusion of docs when distrib.singlePass. Remove 'uniqueKeyField' from response. distrib now requires a schema unique key.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720714 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-12-18 02:15:56 +00:00
parent 3972c0e9eb
commit 46392dde41
4 changed files with 44 additions and 34 deletions

View File

@ -268,6 +268,9 @@ Bug Fixes
and then that replica is brought down, it will think it is up-to-date when and then that replica is brought down, it will think it is up-to-date when
restarted. (shalin, Mark Miller, yonik) restarted. (shalin, Mark Miller, yonik)
* SOLR-8419: TermVectorComponent for distributed search when distrib.singlePass could include term
vectors for documents that matched the query yet weren't in the returned documents. (David Smiley)
Other Changes Other Changes
---------------------- ----------------------
@ -345,6 +348,9 @@ Other Changes
* SOLR-8279: Add a new test fault injection approach and a new SolrCloud test that stops and starts the cluster * SOLR-8279: Add a new test fault injection approach and a new SolrCloud test that stops and starts the cluster
while indexing data and with random faults. (Mark Miller) while indexing data and with random faults. (Mark Miller)
* SOLR-8419: TermVectorComponent for distributed search now requires a uniqueKey in the schema. Also, it no longer
returns "uniqueKeyField" in the response. (David Smiley)
================== 5.4.0 ================== ================== 5.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -83,9 +83,12 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
public static final String COMPONENT_NAME = "tv"; public static final String COMPONENT_NAME = "tv";
protected NamedList initParams;
public static final String TERM_VECTORS = "termVectors"; public static final String TERM_VECTORS = "termVectors";
private static final String TV_KEY_WARNINGS = "warnings";
protected NamedList initParams;
/** /**
* Helper method for determining the list of fields that we should * Helper method for determining the list of fields that we should
* try to find term vectors on. * try to find term vectors on.
@ -147,7 +150,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
String uniqFieldName = null; String uniqFieldName = null;
if (keyField != null) { if (keyField != null) {
uniqFieldName = keyField.getName(); uniqFieldName = keyField.getName();
termVectors.add("uniqueKeyFieldName", uniqFieldName);
} }
FieldOptions allFields = new FieldOptions(); FieldOptions allFields = new FieldOptions();
@ -182,7 +184,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
//we have specific fields to retrieve, or no fields //we have specific fields to retrieve, or no fields
for (String field : fields) { for (String field : fields) {
// workarround SOLR-3523 // workaround SOLR-3523
if (null == field || "score".equals(field)) continue; if (null == field || "score".equals(field)) continue;
// we don't want to issue warnings about the uniqueKey field // we don't want to issue warnings about the uniqueKey field
@ -226,29 +228,24 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
} }
} //else, deal with all fields } //else, deal with all fields
// NOTE: currently all typs of warnings are schema driven, and garunteed // NOTE: currently all types of warnings are schema driven, and guaranteed
// to be consistent across all shards - if additional types of warnings // to be consistent across all shards - if additional types of warnings
// are added that might be differnet between shards, finishStage() needs // are added that might be different between shards, finishStage() needs
// to be changed to account for that. // to be changed to account for that.
boolean hasWarnings = false;
if (!noTV.isEmpty()) { if (!noTV.isEmpty()) {
warnings.add("noTermVectors", noTV); warnings.add("noTermVectors", noTV);
hasWarnings = true;
} }
if (!noPos.isEmpty()) { if (!noPos.isEmpty()) {
warnings.add("noPositions", noPos); warnings.add("noPositions", noPos);
hasWarnings = true;
} }
if (!noOff.isEmpty()) { if (!noOff.isEmpty()) {
warnings.add("noOffsets", noOff); warnings.add("noOffsets", noOff);
hasWarnings = true;
} }
if (!noPay.isEmpty()) { if (!noPay.isEmpty()) {
warnings.add("noPayloads", noPay); warnings.add("noPayloads", noPay);
hasWarnings = true;
} }
if (hasWarnings) { if (warnings.size() > 0) {
termVectors.add("warnings", warnings); termVectors.add(TV_KEY_WARNINGS, warnings);
} }
DocListAndSet listAndSet = rb.getResults(); DocListAndSet listAndSet = rb.getResults();
@ -442,7 +439,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
public void finishStage(ResponseBuilder rb) { public void finishStage(ResponseBuilder rb) {
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
NamedList<Object> termVectors = new NamedList<>(); NamedList<Object> termVectorsNL = new NamedList<>();
Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()]; Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];
for (ShardRequest sreq : rb.finished) { for (ShardRequest sreq : rb.finished) {
@ -451,15 +448,19 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
} }
for (ShardResponse srsp : sreq.responses) { for (ShardResponse srsp : sreq.responses) {
NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS); NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
// Add metadata (that which isn't a uniqueKey value):
Object warningsNL = nl.get(TV_KEY_WARNINGS);
// assume if that if warnings is already present; we don't need to merge.
if (warningsNL != null && termVectorsNL.indexOf(TV_KEY_WARNINGS, 0) < 0) {
termVectorsNL.add(TV_KEY_WARNINGS, warningsNL);
}
// UniqueKey data
for (int i=0; i < nl.size(); i++) { for (int i=0; i < nl.size(); i++) {
String key = nl.getName(i); String key = nl.getName(i);
ShardDoc sdoc = rb.resultIds.get(key); ShardDoc sdoc = rb.resultIds.get(key);
if (null == sdoc) { if (sdoc != null) {// can be null when rb.onePassDistributedQuery
// metadata, only need from one node, leave in order
if (termVectors.indexOf(key,0) < 0) {
termVectors.add(key, nl.getVal(i));
}
} else {
int idx = sdoc.positionInResponse; int idx = sdoc.positionInResponse;
arr[idx] = new NamedList.NamedListEntry<>(key, nl.getVal(i)); arr[idx] = new NamedList.NamedListEntry<>(key, nl.getVal(i));
} }
@ -467,8 +468,8 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
} }
} }
// remove nulls in case not all docs were able to be retrieved // remove nulls in case not all docs were able to be retrieved
termVectors.addAll(SolrPluginUtils.removeNulls(arr, new NamedList<Object>())); SolrPluginUtils.removeNulls(arr, termVectorsNL);
rb.rsp.add(TERM_VECTORS, termVectors); rb.rsp.add(TERM_VECTORS, termVectorsNL);
} }
} }

View File

@ -20,6 +20,7 @@ package org.apache.solr.handler.component;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.TermVectorParams; import org.apache.solr.common.params.TermVectorParams;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -189,6 +190,14 @@ public class TermVectorComponentDistributedTest extends BaseDistributedSearchTes
TermVectorComponent.COMPONENT_NAME, "true", TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.ALL, "true"); TermVectorParams.ALL, "true");
query("sort", "id desc",
"qt",tv,
"q", q,
"rows", 1,
ShardParams.DISTRIB_SINGLE_PASS, "true",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.ALL, "true");
// per field stuff // per field stuff
query("sort", "id desc", query("sort", "id desc",

View File

@ -130,8 +130,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
" 'uniqueKeyFieldName':'id'}"
); );
// tv.fl diff from fl // tv.fl diff from fl
assertJQ(req("json.nl","map", assertJQ(req("json.nl","map",
@ -143,8 +142,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true") TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," + ,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
" 'uniqueKeyFieldName':'id'}"
); );
// multi-valued tv.fl // multi-valued tv.fl
assertJQ(req("json.nl","map", assertJQ(req("json.nl","map",
@ -157,8 +155,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true") TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," + ,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
" 'uniqueKeyFieldName':'id'}"
); );
// re-use fl glob // re-use fl glob
assertJQ(req("json.nl","map", assertJQ(req("json.nl","map",
@ -172,8 +169,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
" 'uniqueKeyFieldName':'id'}"
); );
// re-use fl, ignore things we can't handle // re-use fl, ignore things we can't handle
assertJQ(req("json.nl","map", assertJQ(req("json.nl","map",
@ -184,8 +180,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true") TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," + ,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
" 'uniqueKeyFieldName':'id'}"
); );
// re-use (multi-valued) fl, ignore things we can't handle // re-use (multi-valued) fl, ignore things we can't handle
assertJQ(req("json.nl","map", assertJQ(req("json.nl","map",
@ -197,8 +192,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
TermVectorParams.TF, "true") TermVectorParams.TF, "true")
,"/termVectors=={'0':{'uniqueKey':'0'," + ,"/termVectors=={'0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
" 'uniqueKeyFieldName':'id'}"
); );
} }