mirror of https://github.com/apache/lucene.git
SOLR-8419: TermVectorComponent: fix accidental inclusion of docs when distrib.singlePass. Remove 'uniqueKeyField' from response. distrib now requires a schema unique key.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720714 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3972c0e9eb
commit
46392dde41
|
@ -268,6 +268,9 @@ Bug Fixes
|
||||||
and then that replica is brought down, it will think it is up-to-date when
|
and then that replica is brought down, it will think it is up-to-date when
|
||||||
restarted. (shalin, Mark Miller, yonik)
|
restarted. (shalin, Mark Miller, yonik)
|
||||||
|
|
||||||
|
* SOLR-8419: TermVectorComponent for distributed search when distrib.singlePass could include term
|
||||||
|
vectors for documents that matched the query yet weren't in the returned documents. (David Smiley)
|
||||||
|
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
@ -345,6 +348,9 @@ Other Changes
|
||||||
* SOLR-8279: Add a new test fault injection approach and a new SolrCloud test that stops and starts the cluster
|
* SOLR-8279: Add a new test fault injection approach and a new SolrCloud test that stops and starts the cluster
|
||||||
while indexing data and with random faults. (Mark Miller)
|
while indexing data and with random faults. (Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-8419: TermVectorComponent for distributed search now requires a uniqueKey in the schema. Also, it no longer
|
||||||
|
returns "uniqueKeyField" in the response. (David Smiley)
|
||||||
|
|
||||||
================== 5.4.0 ==================
|
================== 5.4.0 ==================
|
||||||
|
|
||||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||||
|
|
|
@ -83,9 +83,12 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
|
|
||||||
public static final String COMPONENT_NAME = "tv";
|
public static final String COMPONENT_NAME = "tv";
|
||||||
|
|
||||||
protected NamedList initParams;
|
|
||||||
public static final String TERM_VECTORS = "termVectors";
|
public static final String TERM_VECTORS = "termVectors";
|
||||||
|
|
||||||
|
private static final String TV_KEY_WARNINGS = "warnings";
|
||||||
|
|
||||||
|
protected NamedList initParams;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method for determining the list of fields that we should
|
* Helper method for determining the list of fields that we should
|
||||||
* try to find term vectors on.
|
* try to find term vectors on.
|
||||||
|
@ -147,7 +150,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
String uniqFieldName = null;
|
String uniqFieldName = null;
|
||||||
if (keyField != null) {
|
if (keyField != null) {
|
||||||
uniqFieldName = keyField.getName();
|
uniqFieldName = keyField.getName();
|
||||||
termVectors.add("uniqueKeyFieldName", uniqFieldName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldOptions allFields = new FieldOptions();
|
FieldOptions allFields = new FieldOptions();
|
||||||
|
@ -182,7 +184,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
//we have specific fields to retrieve, or no fields
|
//we have specific fields to retrieve, or no fields
|
||||||
for (String field : fields) {
|
for (String field : fields) {
|
||||||
|
|
||||||
// workarround SOLR-3523
|
// workaround SOLR-3523
|
||||||
if (null == field || "score".equals(field)) continue;
|
if (null == field || "score".equals(field)) continue;
|
||||||
|
|
||||||
// we don't want to issue warnings about the uniqueKey field
|
// we don't want to issue warnings about the uniqueKey field
|
||||||
|
@ -226,29 +228,24 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
} //else, deal with all fields
|
} //else, deal with all fields
|
||||||
|
|
||||||
// NOTE: currently all typs of warnings are schema driven, and garunteed
|
// NOTE: currently all types of warnings are schema driven, and guaranteed
|
||||||
// to be consistent across all shards - if additional types of warnings
|
// to be consistent across all shards - if additional types of warnings
|
||||||
// are added that might be differnet between shards, finishStage() needs
|
// are added that might be different between shards, finishStage() needs
|
||||||
// to be changed to account for that.
|
// to be changed to account for that.
|
||||||
boolean hasWarnings = false;
|
|
||||||
if (!noTV.isEmpty()) {
|
if (!noTV.isEmpty()) {
|
||||||
warnings.add("noTermVectors", noTV);
|
warnings.add("noTermVectors", noTV);
|
||||||
hasWarnings = true;
|
|
||||||
}
|
}
|
||||||
if (!noPos.isEmpty()) {
|
if (!noPos.isEmpty()) {
|
||||||
warnings.add("noPositions", noPos);
|
warnings.add("noPositions", noPos);
|
||||||
hasWarnings = true;
|
|
||||||
}
|
}
|
||||||
if (!noOff.isEmpty()) {
|
if (!noOff.isEmpty()) {
|
||||||
warnings.add("noOffsets", noOff);
|
warnings.add("noOffsets", noOff);
|
||||||
hasWarnings = true;
|
|
||||||
}
|
}
|
||||||
if (!noPay.isEmpty()) {
|
if (!noPay.isEmpty()) {
|
||||||
warnings.add("noPayloads", noPay);
|
warnings.add("noPayloads", noPay);
|
||||||
hasWarnings = true;
|
|
||||||
}
|
}
|
||||||
if (hasWarnings) {
|
if (warnings.size() > 0) {
|
||||||
termVectors.add("warnings", warnings);
|
termVectors.add(TV_KEY_WARNINGS, warnings);
|
||||||
}
|
}
|
||||||
|
|
||||||
DocListAndSet listAndSet = rb.getResults();
|
DocListAndSet listAndSet = rb.getResults();
|
||||||
|
@ -442,7 +439,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
public void finishStage(ResponseBuilder rb) {
|
public void finishStage(ResponseBuilder rb) {
|
||||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||||
|
|
||||||
NamedList<Object> termVectors = new NamedList<>();
|
NamedList<Object> termVectorsNL = new NamedList<>();
|
||||||
Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];
|
Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];
|
||||||
|
|
||||||
for (ShardRequest sreq : rb.finished) {
|
for (ShardRequest sreq : rb.finished) {
|
||||||
|
@ -451,15 +448,19 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
for (ShardResponse srsp : sreq.responses) {
|
for (ShardResponse srsp : sreq.responses) {
|
||||||
NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
|
NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
|
||||||
|
|
||||||
|
// Add metadata (that which isn't a uniqueKey value):
|
||||||
|
Object warningsNL = nl.get(TV_KEY_WARNINGS);
|
||||||
|
// assume if that if warnings is already present; we don't need to merge.
|
||||||
|
if (warningsNL != null && termVectorsNL.indexOf(TV_KEY_WARNINGS, 0) < 0) {
|
||||||
|
termVectorsNL.add(TV_KEY_WARNINGS, warningsNL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// UniqueKey data
|
||||||
for (int i=0; i < nl.size(); i++) {
|
for (int i=0; i < nl.size(); i++) {
|
||||||
String key = nl.getName(i);
|
String key = nl.getName(i);
|
||||||
ShardDoc sdoc = rb.resultIds.get(key);
|
ShardDoc sdoc = rb.resultIds.get(key);
|
||||||
if (null == sdoc) {
|
if (sdoc != null) {// can be null when rb.onePassDistributedQuery
|
||||||
// metadata, only need from one node, leave in order
|
|
||||||
if (termVectors.indexOf(key,0) < 0) {
|
|
||||||
termVectors.add(key, nl.getVal(i));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int idx = sdoc.positionInResponse;
|
int idx = sdoc.positionInResponse;
|
||||||
arr[idx] = new NamedList.NamedListEntry<>(key, nl.getVal(i));
|
arr[idx] = new NamedList.NamedListEntry<>(key, nl.getVal(i));
|
||||||
}
|
}
|
||||||
|
@ -467,8 +468,8 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// remove nulls in case not all docs were able to be retrieved
|
// remove nulls in case not all docs were able to be retrieved
|
||||||
termVectors.addAll(SolrPluginUtils.removeNulls(arr, new NamedList<Object>()));
|
SolrPluginUtils.removeNulls(arr, termVectorsNL);
|
||||||
rb.rsp.add(TERM_VECTORS, termVectors);
|
rb.rsp.add(TERM_VECTORS, termVectorsNL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.handler.component;
|
||||||
import org.apache.lucene.util.Constants;
|
import org.apache.lucene.util.Constants;
|
||||||
|
|
||||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||||
|
import org.apache.solr.common.params.ShardParams;
|
||||||
import org.apache.solr.common.params.TermVectorParams;
|
import org.apache.solr.common.params.TermVectorParams;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -189,6 +190,14 @@ public class TermVectorComponentDistributedTest extends BaseDistributedSearchTes
|
||||||
TermVectorComponent.COMPONENT_NAME, "true",
|
TermVectorComponent.COMPONENT_NAME, "true",
|
||||||
TermVectorParams.ALL, "true");
|
TermVectorParams.ALL, "true");
|
||||||
|
|
||||||
|
query("sort", "id desc",
|
||||||
|
"qt",tv,
|
||||||
|
"q", q,
|
||||||
|
"rows", 1,
|
||||||
|
ShardParams.DISTRIB_SINGLE_PASS, "true",
|
||||||
|
TermVectorComponent.COMPONENT_NAME, "true",
|
||||||
|
TermVectorParams.ALL, "true");
|
||||||
|
|
||||||
// per field stuff
|
// per field stuff
|
||||||
|
|
||||||
query("sort", "id desc",
|
query("sort", "id desc",
|
||||||
|
|
|
@ -130,8 +130,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
||||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||||
" 'uniqueKeyFieldName':'id'}"
|
|
||||||
);
|
);
|
||||||
// tv.fl diff from fl
|
// tv.fl diff from fl
|
||||||
assertJQ(req("json.nl","map",
|
assertJQ(req("json.nl","map",
|
||||||
|
@ -143,8 +142,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
||||||
TermVectorParams.TF, "true")
|
TermVectorParams.TF, "true")
|
||||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||||
" 'uniqueKeyFieldName':'id'}"
|
|
||||||
);
|
);
|
||||||
// multi-valued tv.fl
|
// multi-valued tv.fl
|
||||||
assertJQ(req("json.nl","map",
|
assertJQ(req("json.nl","map",
|
||||||
|
@ -157,8 +155,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
||||||
TermVectorParams.TF, "true")
|
TermVectorParams.TF, "true")
|
||||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||||
" 'uniqueKeyFieldName':'id'}"
|
|
||||||
);
|
);
|
||||||
// re-use fl glob
|
// re-use fl glob
|
||||||
assertJQ(req("json.nl","map",
|
assertJQ(req("json.nl","map",
|
||||||
|
@ -172,8 +169,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
||||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||||
" 'uniqueKeyFieldName':'id'}"
|
|
||||||
);
|
);
|
||||||
// re-use fl, ignore things we can't handle
|
// re-use fl, ignore things we can't handle
|
||||||
assertJQ(req("json.nl","map",
|
assertJQ(req("json.nl","map",
|
||||||
|
@ -184,8 +180,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
||||||
TermVectorParams.TF, "true")
|
TermVectorParams.TF, "true")
|
||||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||||
" 'uniqueKeyFieldName':'id'}"
|
|
||||||
);
|
);
|
||||||
// re-use (multi-valued) fl, ignore things we can't handle
|
// re-use (multi-valued) fl, ignore things we can't handle
|
||||||
assertJQ(req("json.nl","map",
|
assertJQ(req("json.nl","map",
|
||||||
|
@ -197,8 +192,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
||||||
TermVectorParams.TF, "true")
|
TermVectorParams.TF, "true")
|
||||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}"
|
||||||
" 'uniqueKeyFieldName':'id'}"
|
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue