mirror of https://github.com/apache/lucene.git
SOLR-8419: TermVectorComponent: fix accidental inclusion of docs when distrib.singlePass. Remove 'uniqueKeyField' from response. distrib now requires a schema unique key.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720714 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3972c0e9eb
commit
46392dde41
|
@ -268,6 +268,9 @@ Bug Fixes
|
|||
and then that replica is brought down, it will think it is up-to-date when
|
||||
restarted. (shalin, Mark Miller, yonik)
|
||||
|
||||
* SOLR-8419: TermVectorComponent for distributed search when distrib.singlePass could include term
|
||||
vectors for documents that matched the query yet weren't in the returned documents. (David Smiley)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
@ -345,6 +348,9 @@ Other Changes
|
|||
* SOLR-8279: Add a new test fault injection approach and a new SolrCloud test that stops and starts the cluster
|
||||
while indexing data and with random faults. (Mark Miller)
|
||||
|
||||
* SOLR-8419: TermVectorComponent for distributed search now requires a uniqueKey in the schema. Also, it no longer
|
||||
returns "uniqueKeyField" in the response. (David Smiley)
|
||||
|
||||
================== 5.4.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||
|
|
|
@ -83,9 +83,12 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
public static final String COMPONENT_NAME = "tv";
|
||||
|
||||
protected NamedList initParams;
|
||||
public static final String TERM_VECTORS = "termVectors";
|
||||
|
||||
private static final String TV_KEY_WARNINGS = "warnings";
|
||||
|
||||
protected NamedList initParams;
|
||||
|
||||
/**
|
||||
* Helper method for determining the list of fields that we should
|
||||
* try to find term vectors on.
|
||||
|
@ -147,7 +150,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
String uniqFieldName = null;
|
||||
if (keyField != null) {
|
||||
uniqFieldName = keyField.getName();
|
||||
termVectors.add("uniqueKeyFieldName", uniqFieldName);
|
||||
}
|
||||
|
||||
FieldOptions allFields = new FieldOptions();
|
||||
|
@ -182,7 +184,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
//we have specific fields to retrieve, or no fields
|
||||
for (String field : fields) {
|
||||
|
||||
// workarround SOLR-3523
|
||||
// workaround SOLR-3523
|
||||
if (null == field || "score".equals(field)) continue;
|
||||
|
||||
// we don't want to issue warnings about the uniqueKey field
|
||||
|
@ -226,29 +228,24 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
} //else, deal with all fields
|
||||
|
||||
// NOTE: currently all typs of warnings are schema driven, and garunteed
|
||||
// NOTE: currently all types of warnings are schema driven, and guaranteed
|
||||
// to be consistent across all shards - if additional types of warnings
|
||||
// are added that might be differnet between shards, finishStage() needs
|
||||
// are added that might be different between shards, finishStage() needs
|
||||
// to be changed to account for that.
|
||||
boolean hasWarnings = false;
|
||||
if (!noTV.isEmpty()) {
|
||||
warnings.add("noTermVectors", noTV);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (!noPos.isEmpty()) {
|
||||
warnings.add("noPositions", noPos);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (!noOff.isEmpty()) {
|
||||
warnings.add("noOffsets", noOff);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (!noPay.isEmpty()) {
|
||||
warnings.add("noPayloads", noPay);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (hasWarnings) {
|
||||
termVectors.add("warnings", warnings);
|
||||
if (warnings.size() > 0) {
|
||||
termVectors.add(TV_KEY_WARNINGS, warnings);
|
||||
}
|
||||
|
||||
DocListAndSet listAndSet = rb.getResults();
|
||||
|
@ -442,7 +439,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
public void finishStage(ResponseBuilder rb) {
|
||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
|
||||
NamedList<Object> termVectors = new NamedList<>();
|
||||
NamedList<Object> termVectorsNL = new NamedList<>();
|
||||
Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];
|
||||
|
||||
for (ShardRequest sreq : rb.finished) {
|
||||
|
@ -451,15 +448,19 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
for (ShardResponse srsp : sreq.responses) {
|
||||
NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
|
||||
|
||||
// Add metadata (that which isn't a uniqueKey value):
|
||||
Object warningsNL = nl.get(TV_KEY_WARNINGS);
|
||||
// assume if that if warnings is already present; we don't need to merge.
|
||||
if (warningsNL != null && termVectorsNL.indexOf(TV_KEY_WARNINGS, 0) < 0) {
|
||||
termVectorsNL.add(TV_KEY_WARNINGS, warningsNL);
|
||||
}
|
||||
|
||||
// UniqueKey data
|
||||
for (int i=0; i < nl.size(); i++) {
|
||||
String key = nl.getName(i);
|
||||
ShardDoc sdoc = rb.resultIds.get(key);
|
||||
if (null == sdoc) {
|
||||
// metadata, only need from one node, leave in order
|
||||
if (termVectors.indexOf(key,0) < 0) {
|
||||
termVectors.add(key, nl.getVal(i));
|
||||
}
|
||||
} else {
|
||||
if (sdoc != null) {// can be null when rb.onePassDistributedQuery
|
||||
int idx = sdoc.positionInResponse;
|
||||
arr[idx] = new NamedList.NamedListEntry<>(key, nl.getVal(i));
|
||||
}
|
||||
|
@ -467,8 +468,8 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
// remove nulls in case not all docs were able to be retrieved
|
||||
termVectors.addAll(SolrPluginUtils.removeNulls(arr, new NamedList<Object>()));
|
||||
rb.rsp.add(TERM_VECTORS, termVectors);
|
||||
SolrPluginUtils.removeNulls(arr, termVectorsNL);
|
||||
rb.rsp.add(TERM_VECTORS, termVectorsNL);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.handler.component;
|
|||
import org.apache.lucene.util.Constants;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.params.TermVectorParams;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -189,6 +190,14 @@ public class TermVectorComponentDistributedTest extends BaseDistributedSearchTes
|
|||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.ALL, "true");
|
||||
|
||||
query("sort", "id desc",
|
||||
"qt",tv,
|
||||
"q", q,
|
||||
"rows", 1,
|
||||
ShardParams.DISTRIB_SINGLE_PASS, "true",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.ALL, "true");
|
||||
|
||||
// per field stuff
|
||||
|
||||
query("sort", "id desc",
|
||||
|
|
|
@ -130,8 +130,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||
);
|
||||
// tv.fl diff from fl
|
||||
assertJQ(req("json.nl","map",
|
||||
|
@ -143,8 +142,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||
);
|
||||
// multi-valued tv.fl
|
||||
assertJQ(req("json.nl","map",
|
||||
|
@ -157,8 +155,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||
);
|
||||
// re-use fl glob
|
||||
assertJQ(req("json.nl","map",
|
||||
|
@ -172,8 +169,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||
);
|
||||
// re-use fl, ignore things we can't handle
|
||||
assertJQ(req("json.nl","map",
|
||||
|
@ -184,8 +180,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||
);
|
||||
// re-use (multi-valued) fl, ignore things we can't handle
|
||||
assertJQ(req("json.nl","map",
|
||||
|
@ -197,8 +192,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||
);
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue