SOLR-3537: Fixed TermVectorComponent so that it will not fail if the fl param contains globs or psuedo-fields

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1349012 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2012-06-11 21:08:48 +00:00
parent 2ac3eb27c4
commit 91b1e22e99
3 changed files with 128 additions and 9 deletions

View File

@ -491,6 +491,9 @@ Bug Fixes
* SOLR-2923: IllegalArgumentException when using useFilterForSortedQuery on an
empty index. (Adrien Grand via Mark Miller)
* SOLR-3537: Fixed TermVectorComponent so that it will not fail if the fl
param contains globs or psuedo-fields (hossman)
Other Changes
----------------------

View File

@ -1,9 +1,13 @@
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -27,6 +31,7 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.SolrIndexSearcher;
@ -81,6 +86,51 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
protected NamedList initParams;
public static final String TERM_VECTORS = "termVectors";
/**
* Helper method for determining the list of fields that we should
* try to find term vectors on.
* <p>
* Does simple (non-glob-supporting) parsing on the
* {@link TermVectorParams#FIELDS} param if specified, otherwise it returns
* the concrete field values specified in {@link CommonParams.FL} --
* ignoring functions, transformers, or literals.
* </p>
* <p>
* If "fl=*" is used, or neither param is specified, then <code>null</code>
* will be returned. If the empty set is returned, it means the "fl"
* specified consisted entirely of things that are not real fields
* (ie: functions, transformers, partial-globs, score, etc...) and not
* supported by this component.
* </p>
*/
private Set<String> getFields(ResponseBuilder rb) {
SolrParams params = rb.req.getParams();
String[] fldLst = params.getParams(TermVectorParams.FIELDS);
if (null == fldLst || 0 == fldLst.length ||
(1 == fldLst.length && 0 == fldLst[0].length())) {
// no tv.fl, parse the main fl
ReturnFields rf = new ReturnFields
(params.getParams(CommonParams.FL), rb.req);
if (rf.wantsAllFields()) {
return null;
}
Set<String> fieldNames = rf.getLuceneFieldNames();
return (null != fieldNames) ?
fieldNames :
// return empty set indicating no fields should be used
Collections.<String>emptySet();
}
// otherwise us the raw fldList as is, no special parsing or globs
Set<String> fieldNames = new LinkedHashSet<String>();
for (String fl : fldLst) {
fieldNames.addAll(Arrays.asList(SolrPluginUtils.split(fl)));
}
return fieldNames;
}
@Override
public void process(ResponseBuilder rb) throws IOException {
@ -108,11 +158,6 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
allFields.tfIdf = true;
}
String fldLst = params.get(TermVectorParams.FIELDS);
if (fldLst == null) {
fldLst = params.get(CommonParams.FL);
}
//use this to validate our fields
IndexSchema schema = rb.req.getSchema();
//Build up our per field mapping
@ -122,10 +167,14 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
List<String> noPos = new ArrayList<String>();
List<String> noOff = new ArrayList<String>();
//we have specific fields to retrieve
if (fldLst != null) {
String [] fields = SolrPluginUtils.split(fldLst);
Set<String> fields = getFields(rb);
if ( null != fields ) {
//we have specific fields to retrieve, or no fields
for (String field : fields) {
// workarround SOLR-3523
if (null == field || "score".equals(field)) continue;
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
if (sf.storeTermVector()) {
@ -240,7 +289,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
termVectors.add("uniqueKeyFieldName", uniqFieldName);
}
}
if (!fieldOptions.isEmpty()) {
if ( null != fields ) {
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
final String field = entry.getKey();
final Terms vector = reader.getTermVector(docId, field);

View File

@ -128,6 +128,73 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
// tv.fl diff from fl
assertJQ(req("json.nl","map",
"qt",tv,
"q", "id:0",
"fl", "*,score",
"tv.fl", "test_basictv,test_offtv",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.TF, "true")
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
// multi-valued tv.fl
assertJQ(req("json.nl","map",
"qt",tv,
"q", "id:0",
"fl", "*,score",
"tv.fl", "test_basictv",
"tv.fl","test_offtv",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.TF, "true")
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
// re-use fl glob
assertJQ(req("json.nl","map",
"qt",tv,
"q", "id:0",
"fl", "*,score",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.TF, "true")
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
// re-use fl, ignore things we can't handle
assertJQ(req("json.nl","map",
"qt",tv,
"q", "id:0",
"fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.TF, "true")
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
// re-use (multi-valued) fl, ignore things we can't handle
assertJQ(req("json.nl","map",
"qt",tv,
"q", "id:0",
"fl", "score,test_basictv",
"fl", "[docid],test_postv,val:sum(3,4)",
TermVectorComponent.COMPONENT_NAME, "true",
TermVectorParams.TF, "true")
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
}
@Test