mirror of https://github.com/apache/lucene.git
SOLR-3229: Fixed TermVectorComponent to work with distributed search
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1370870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e528549a3b
commit
38c1b46817
|
@ -26,6 +26,11 @@ $Id$
|
|||
|
||||
================== 4.0.0 ==================
|
||||
|
||||
Upgrading from Solr 4.0.0-BETA
|
||||
----------------------
|
||||
|
||||
In order to better support distributed search mode, the TermVectorComponent's response format has been changed so that if the schema defines a uniqueKeyField, then that field value is used as the "key" for each document in it's response section, instead of the internal lucene doc id. Users w/o a uniqueKeyField will continue to see the same response format. See SOLR-3229 for more details.
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -33,6 +38,7 @@ Bug Fixes
|
|||
to tlog flags not being cleared when no updates were buffered during a previous
|
||||
replication. (Markus Jelsma, Mark Miller, yonik)
|
||||
|
||||
* SOLR-3229: Fixed TermVectorComponent to work with distributed search
|
||||
|
||||
================== 4.0.0-BETA ===================
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import java.util.Set;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -141,6 +142,15 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
NamedList<Object> termVectors = new NamedList<Object>();
|
||||
rb.rsp.add(TERM_VECTORS, termVectors);
|
||||
|
||||
IndexSchema schema = rb.req.getSchema();
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
String uniqFieldName = null;
|
||||
if (keyField != null) {
|
||||
uniqFieldName = keyField.getName();
|
||||
termVectors.add("uniqueKeyFieldName", uniqFieldName);
|
||||
}
|
||||
|
||||
FieldOptions allFields = new FieldOptions();
|
||||
//figure out what options we have, and try to get the appropriate vector
|
||||
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
|
||||
|
@ -158,8 +168,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
allFields.tfIdf = true;
|
||||
}
|
||||
|
||||
//use this to validate our fields
|
||||
IndexSchema schema = rb.req.getSchema();
|
||||
//Build up our per field mapping
|
||||
Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
|
||||
NamedList<List<String>> warnings = new NamedList<List<String>>();
|
||||
|
@ -175,6 +183,11 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
// workarround SOLR-3523
|
||||
if (null == field || "score".equals(field)) continue;
|
||||
|
||||
// we don't want to issue warnings about the uniqueKey field
|
||||
// since it can cause lots of confusion in distributed requests
|
||||
// where the uniqueKey field is injected into the fl for merging
|
||||
final boolean fieldIsUniqueKey = field.equals(uniqFieldName);
|
||||
|
||||
SchemaField sf = schema.getFieldOrNull(field);
|
||||
if (sf != null) {
|
||||
if (sf.storeTermVector()) {
|
||||
|
@ -190,15 +203,15 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
|
||||
//Validate these are even an option
|
||||
option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
|
||||
if (option.positions && !sf.storeTermPositions()){
|
||||
if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey){
|
||||
noPos.add(field);
|
||||
}
|
||||
option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
|
||||
if (option.offsets && !sf.storeTermOffsets()){
|
||||
if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey){
|
||||
noOff.add(field);
|
||||
}
|
||||
} else {//field doesn't have term vectors
|
||||
noTV.add(field);
|
||||
if (!fieldIsUniqueKey) noTV.add(field);
|
||||
}
|
||||
} else {
|
||||
//field doesn't exist
|
||||
|
@ -206,6 +219,11 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
} //else, deal with all fields
|
||||
|
||||
// NOTE: currently all typs of warnings are schema driven, and garunteed
|
||||
// to be consistent across all shards - if additional types of warnings
|
||||
// are added that might be differnet between shards, finishStage() needs
|
||||
// to be changed to account for that.
|
||||
boolean hasWarnings = false;
|
||||
if (!noTV.isEmpty()) {
|
||||
warnings.add("noTermVectors", noTV);
|
||||
|
@ -236,11 +254,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
IndexReader reader = searcher.getIndexReader();
|
||||
//the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
String uniqFieldName = null;
|
||||
if (keyField != null) {
|
||||
uniqFieldName = keyField.getName();
|
||||
}
|
||||
|
||||
//Only load the id field to get the uniqueKey of that
|
||||
//field
|
||||
|
||||
|
@ -277,7 +291,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
while (iter.hasNext()) {
|
||||
Integer docId = iter.next();
|
||||
NamedList<Object> docNL = new NamedList<Object>();
|
||||
termVectors.add("doc-" + docId, docNL);
|
||||
|
||||
if (keyField != null) {
|
||||
reader.document(docId, getUniqValue);
|
||||
|
@ -286,9 +299,13 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
uniqVal = uniqValues.get(0);
|
||||
uniqValues.clear();
|
||||
docNL.add("uniqueKey", uniqVal);
|
||||
termVectors.add("uniqueKeyFieldName", uniqFieldName);
|
||||
termVectors.add(uniqVal, docNL);
|
||||
}
|
||||
} else {
|
||||
// support for schemas w/o a unique key,
|
||||
termVectors.add("doc-" + docId, docNL);
|
||||
}
|
||||
|
||||
if ( null != fields ) {
|
||||
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
|
||||
final String field = entry.getKey();
|
||||
|
@ -394,46 +411,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int distributedProcess(ResponseBuilder rb) throws IOException {
|
||||
int result = ResponseBuilder.STAGE_DONE;
|
||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
//Go ask each shard for it's vectors
|
||||
// for each shard, collect the documents for that shard.
|
||||
HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>();
|
||||
for (ShardDoc sdoc : rb.resultIds.values()) {
|
||||
Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
|
||||
if (shardDocs == null) {
|
||||
shardDocs = new ArrayList<ShardDoc>();
|
||||
shardMap.put(sdoc.shard, shardDocs);
|
||||
}
|
||||
shardDocs.add(sdoc);
|
||||
}
|
||||
// Now create a request for each shard to retrieve the stored fields
|
||||
for (Collection<ShardDoc> shardDocs : shardMap.values()) {
|
||||
ShardRequest sreq = new ShardRequest();
|
||||
sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;
|
||||
|
||||
sreq.shards = new String[]{shardDocs.iterator().next().shard};
|
||||
|
||||
sreq.params = new ModifiableSolrParams();
|
||||
|
||||
// add original params
|
||||
sreq.params.add(rb.req.getParams());
|
||||
sreq.params.remove(CommonParams.Q);//remove the query
|
||||
ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
|
||||
for (ShardDoc shardDoc : shardDocs) {
|
||||
ids.add(shardDoc.id.toString());
|
||||
}
|
||||
sreq.params.add(TermVectorParams.DOC_IDS, StrUtils.join(ids, ','));
|
||||
|
||||
rb.addRequest(this, sreq);
|
||||
}
|
||||
result = ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static int getDocFreq(IndexReader reader, String field, BytesRef term) {
|
||||
int result = 1;
|
||||
try {
|
||||
|
@ -449,6 +426,40 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishStage(ResponseBuilder rb) {
|
||||
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
|
||||
|
||||
NamedList termVectors = new NamedList<Object>();
|
||||
Map.Entry<String, Object>[] arr = new NamedList.NamedListEntry[rb.resultIds.size()];
|
||||
|
||||
for (ShardRequest sreq : rb.finished) {
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) == 0 || !sreq.params.getBool(COMPONENT_NAME, false)) {
|
||||
continue;
|
||||
}
|
||||
for (ShardResponse srsp : sreq.responses) {
|
||||
NamedList<Object> nl = (NamedList<Object>)srsp.getSolrResponse().getResponse().get(TERM_VECTORS);
|
||||
for (int i=0; i < nl.size(); i++) {
|
||||
String key = nl.getName(i);
|
||||
ShardDoc sdoc = rb.resultIds.get(key);
|
||||
if (null == sdoc) {
|
||||
// metadata, only need from one node, leave in order
|
||||
if (termVectors.indexOf(key,0) < 0) {
|
||||
termVectors.add(key, nl.getVal(i));
|
||||
}
|
||||
} else {
|
||||
int idx = sdoc.positionInResponse;
|
||||
arr[idx] = new NamedList.NamedListEntry<Object>(key, nl.getVal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// remove nulls in case not all docs were able to be retrieved
|
||||
termVectors.addAll(SolrPluginUtils.removeNulls(new NamedList<Object>(arr)));
|
||||
rb.rsp.add(TERM_VECTORS, termVectors);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////// NamedListInitializedPlugin methods //////////////////////
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.common.params.TermVectorParams;
|
||||
|
||||
public class TermVectorComponentDistributedTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
public void doTest() throws Exception {
|
||||
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("maxScore", SKIPVAL);
|
||||
handle.put("score", SKIPVAL);
|
||||
handle.put("[docid]", SKIPVAL);
|
||||
|
||||
// SOLR-3720: TODO: TVC doesn't "merge" df and idf .. should it?
|
||||
handle.put("df", SKIPVAL);
|
||||
handle.put("tf-idf", SKIPVAL);
|
||||
|
||||
index("id", "0",
|
||||
"test_posofftv", "This is a title and another title",
|
||||
"test_basictv", "This is a title and another title",
|
||||
"test_notv", "This is a title and another title",
|
||||
"test_postv", "This is a title and another title",
|
||||
"test_offtv", "This is a title and another title"
|
||||
);
|
||||
index("id", "1",
|
||||
"test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_notv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_postv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_offtv", "The quick reb fox jumped over the lazy brown dogs."
|
||||
);
|
||||
|
||||
index("id", "2",
|
||||
"test_posofftv", "This is a document",
|
||||
"test_basictv", "This is a document",
|
||||
"test_notv", "This is a document",
|
||||
"test_postv", "This is a document",
|
||||
"test_offtv", "This is a document"
|
||||
);
|
||||
index("id", "3",
|
||||
"test_posofftv", "another document",
|
||||
"test_basictv", "another document",
|
||||
"test_notv", "another document",
|
||||
"test_postv", "another document",
|
||||
"test_offtv", "another document"
|
||||
);
|
||||
//bunch of docs that are variants on blue
|
||||
index("id", "4",
|
||||
"test_posofftv", "blue",
|
||||
"test_basictv", "blue",
|
||||
"test_notv", "blue",
|
||||
"test_postv", "blue",
|
||||
"test_offtv", "blue"
|
||||
);
|
||||
index("id", "5",
|
||||
"test_posofftv", "blud",
|
||||
"test_basictv", "blud",
|
||||
"test_notv", "blud",
|
||||
"test_postv", "blud",
|
||||
"test_offtv", "blud"
|
||||
);
|
||||
index("id", "6",
|
||||
"test_posofftv", "boue",
|
||||
"test_basictv", "boue",
|
||||
"test_notv", "boue",
|
||||
"test_postv", "boue",
|
||||
"test_offtv", "boue"
|
||||
);
|
||||
index("id", "7",
|
||||
"test_posofftv", "glue",
|
||||
"test_basictv", "glue",
|
||||
"test_notv", "glue",
|
||||
"test_postv", "glue",
|
||||
"test_offtv", "glue"
|
||||
);
|
||||
index("id", "8",
|
||||
"test_posofftv", "blee",
|
||||
"test_basictv", "blee",
|
||||
"test_notv", "blee",
|
||||
"test_postv", "blee",
|
||||
"test_offtv", "blee"
|
||||
);
|
||||
index("id", "9",
|
||||
"test_posofftv", "blah",
|
||||
"test_basictv", "blah",
|
||||
"test_notv", "blah",
|
||||
"test_postv", "blah",
|
||||
"test_offtv", "blah"
|
||||
);
|
||||
|
||||
commit();
|
||||
|
||||
final String tv = "tvrh";
|
||||
|
||||
for (String q : new String[] {"id:0", "id:7", "id:[3 TO 6]", "*:*"}) {
|
||||
|
||||
query("sort","id desc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true");
|
||||
|
||||
// tv.fl diff from fl
|
||||
query("sort", "id asc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
"fl", "*,score",
|
||||
"tv.fl", "test_basictv,test_offtv",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true");
|
||||
|
||||
// multi-valued tv.fl
|
||||
query("sort", "id asc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
"fl", "*,score",
|
||||
"tv.fl", "test_basictv",
|
||||
"tv.fl","test_offtv",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true");
|
||||
// re-use fl glob
|
||||
query("sort", "id desc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
"fl", "*,score",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true");
|
||||
// re-use fl, ignore things we can't handle
|
||||
query("sort", "id desc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
"fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true");
|
||||
|
||||
// re-use (multi-valued) fl, ignore things we can't handle
|
||||
query("sort", "id desc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
"fl", "score,test_basictv",
|
||||
"fl", "[docid],test_postv,val:sum(3,4)",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true");
|
||||
|
||||
// test some other options
|
||||
|
||||
query("sort", "id asc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true",
|
||||
TermVectorParams.DF, "true",
|
||||
TermVectorParams.OFFSETS, "true",
|
||||
TermVectorParams.POSITIONS, "true",
|
||||
TermVectorParams.TF_IDF, "true");
|
||||
|
||||
query("sort", "id desc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.ALL, "true");
|
||||
|
||||
// per field stuff
|
||||
|
||||
query("sort", "id desc",
|
||||
"qt",tv,
|
||||
"shards.qt",tv,
|
||||
"q", q,
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true",
|
||||
TermVectorParams.DF, "true",
|
||||
TermVectorParams.OFFSETS, "true",
|
||||
TermVectorParams.POSITIONS, "true",
|
||||
TermVectorParams.TF_IDF, "true",
|
||||
TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv",
|
||||
"f.test_posofftv." + TermVectorParams.POSITIONS, "false",
|
||||
"f.test_offtv." + TermVectorParams.OFFSETS, "false",
|
||||
"f.test_basictv." + TermVectorParams.DF, "false",
|
||||
"f.test_basictv." + TermVectorParams.TF, "false",
|
||||
"f.test_basictv." + TermVectorParams.TF_IDF, "false");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -121,7 +121,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testBasics() throws Exception {
|
||||
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
|
@ -136,7 +136,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"tv.fl", "test_basictv,test_offtv",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
|
@ -150,7 +150,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"tv.fl","test_offtv",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
|
@ -162,7 +162,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"fl", "*,score",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
|
@ -176,7 +176,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
|
@ -189,7 +189,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"fl", "[docid],test_postv,val:sum(3,4)",
|
||||
TermVectorComponent.COMPONENT_NAME, "true",
|
||||
TermVectorParams.TF, "true")
|
||||
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
|
||||
,"/termVectors=={'0':{'uniqueKey':'0'," +
|
||||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
|
@ -201,12 +201,12 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
public void testOptions() throws Exception {
|
||||
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
|
||||
, TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true")
|
||||
,"/termVectors/doc-0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
|
||||
,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
|
||||
);
|
||||
|
||||
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
|
||||
, TermVectorParams.ALL, "true")
|
||||
,"/termVectors/doc-0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
|
||||
,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
|
||||
);
|
||||
|
||||
// test each combination at random
|
||||
|
@ -217,7 +217,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
{ TermVectorParams.POSITIONS, "'positions':{'position':1}" },
|
||||
{ TermVectorParams.DF, "'df':2" },
|
||||
{ TermVectorParams.TF_IDF, "'tf-idf':0.5" } };
|
||||
StringBuilder expected = new StringBuilder("/termVectors/doc-0/test_posofftv/anoth=={");
|
||||
StringBuilder expected = new StringBuilder("/termVectors/0/test_posofftv/anoth=={");
|
||||
boolean first = true;
|
||||
for (int i = 0; i < options.length; i++) {
|
||||
final boolean use = random().nextBoolean();
|
||||
|
@ -248,59 +248,13 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
,"f.test_basictv." + TermVectorParams.TF, "false"
|
||||
,"f.test_basictv." + TermVectorParams.TF_IDF, "false"
|
||||
)
|
||||
,"/termVectors/doc-0/test_basictv=={'anoth':{},'titl':{}}"
|
||||
,"/termVectors/doc-0/test_postv/anoth=={'tf':1, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
|
||||
,"/termVectors/doc-0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}"
|
||||
,"/termVectors/0/test_basictv=={'anoth':{},'titl':{}}"
|
||||
,"/termVectors/0/test_postv/anoth=={'tf':1, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
|
||||
,"/termVectors/0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}"
|
||||
,"/termVectors/warnings=={ 'noTermVectors':['test_notv'], 'noPositions':['test_basictv', 'test_offtv'], 'noOffsets':['test_basictv', 'test_postv']}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// TODO: this test is really fragile since it pokes around in solr's guts and makes many assumptions.
|
||||
// it should be rewritten to use the real distributed interface
|
||||
@Test
|
||||
public void testDistributed() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermVectorComponent tvComp = (TermVectorComponent) core.getSearchComponent("tvComponent");
|
||||
assertTrue("tvComp is null and it shouldn't be", tvComp != null);
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CommonParams.Q, "id:0");
|
||||
params.add(CommonParams.QT, "tvrh");
|
||||
params.add(TermVectorParams.TF, "true");
|
||||
params.add(TermVectorParams.DF, "true");
|
||||
params.add(TermVectorParams.OFFSETS, "true");
|
||||
params.add(TermVectorParams.POSITIONS, "true");
|
||||
params.add(TermVectorComponent.COMPONENT_NAME, "true");
|
||||
|
||||
ResponseBuilder rb = new ResponseBuilder(new LocalSolrQueryRequest(core, params), new SolrQueryResponse(), (List)Arrays.asList(tvComp));
|
||||
rb.stage = ResponseBuilder.STAGE_GET_FIELDS;
|
||||
rb.shards = new String[]{"localhost:0", "localhost:1", "localhost:2", "localhost:3"};//we don't actually call these, since we are going to invoke distributedProcess directly
|
||||
rb.resultIds = new HashMap<Object, ShardDoc>();
|
||||
|
||||
rb.outgoing = new ArrayList<ShardRequest>();
|
||||
//one doc per shard, but make sure there are enough docs to go around
|
||||
for (int i = 0; i < rb.shards.length; i++){
|
||||
ShardDoc doc = new ShardDoc();
|
||||
doc.id = i; //must be a valid doc that was indexed.
|
||||
doc.score = 1 - (i / (float)rb.shards.length);
|
||||
doc.positionInResponse = i;
|
||||
doc.shard = rb.shards[i];
|
||||
doc.orderInShard = 0;
|
||||
rb.resultIds.put(doc.id, doc);
|
||||
}
|
||||
|
||||
int result = tvComp.distributedProcess(rb);
|
||||
assertTrue(result + " does not equal: " + ResponseBuilder.STAGE_DONE, result == ResponseBuilder.STAGE_DONE);
|
||||
//one outgoing per shard
|
||||
assertTrue("rb.outgoing Size: " + rb.outgoing.size() + " is not: " + rb.shards.length, rb.outgoing.size() == rb.shards.length);
|
||||
for (ShardRequest request : rb.outgoing) {
|
||||
ModifiableSolrParams solrParams = request.params;
|
||||
log.info("Shard: " + Arrays.asList(request.shards) + " Params: " + solrParams);
|
||||
}
|
||||
|
||||
rb.req.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue