mirror of https://github.com/apache/lucene.git
SOLR-1556: added per field capabilities to TermVectorComponent, also error msgs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@960204 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f8056de0b0
commit
5ba4331613
|
@ -188,6 +188,12 @@ New Features
|
|||
* SOLR-1974: Add LimitTokenCountFilterFactory. (koji)
|
||||
|
||||
* SOLR-1966: QueryElevationComponent can now return just the included results in the elevation file (gsingers, yonik)
|
||||
|
||||
* SOLR-1556: TermVectorComponent now supports per field overrides. Also, it now throws an error
|
||||
if passed in fields do not exist and warnings
|
||||
if fields that do not have term vector options (termVectors, offsets, positions)
|
||||
that align with the schema declaration. It also
|
||||
will now return warnings about (gsingers)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermVectorMapper;
|
||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -19,9 +21,11 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocListAndSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -33,6 +37,7 @@ import java.util.HashMap;
|
|||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
|
@ -78,27 +83,90 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
NamedList termVectors = new NamedList();
|
||||
rb.rsp.add(TERM_VECTORS, termVectors);
|
||||
FieldOptions allFields = new FieldOptions();
|
||||
//figure out what options we have, and try to get the appropriate vector
|
||||
boolean termFreq = params.getBool(TermVectorParams.TF, false);
|
||||
boolean positions = params.getBool(TermVectorParams.POSITIONS, false);
|
||||
boolean offsets = params.getBool(TermVectorParams.OFFSETS, false);
|
||||
boolean docFreq = params.getBool(TermVectorParams.DF, false);
|
||||
boolean tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
|
||||
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
|
||||
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
|
||||
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
|
||||
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
|
||||
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
|
||||
//boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
|
||||
//short cut to all values.
|
||||
boolean all = params.getBool(TermVectorParams.ALL, false);
|
||||
if (all == true){
|
||||
termFreq = true;
|
||||
positions = true;
|
||||
offsets = true;
|
||||
docFreq = true;
|
||||
tfIdf = true;
|
||||
if (all == true) {
|
||||
allFields.termFreq = true;
|
||||
allFields.positions = true;
|
||||
allFields.offsets = true;
|
||||
allFields.docFreq = true;
|
||||
allFields.tfIdf = true;
|
||||
}
|
||||
|
||||
String[] fields = params.getParams(TermVectorParams.FIELDS);
|
||||
if (fields == null) {
|
||||
fields = params.getParams(CommonParams.FL);
|
||||
String fldLst = params.get(TermVectorParams.FIELDS);
|
||||
if (fldLst == null) {
|
||||
fldLst = params.get(CommonParams.FL);
|
||||
}
|
||||
|
||||
//use this to validate our fields
|
||||
IndexSchema schema = rb.req.getSchema();
|
||||
//Build up our per field mapping
|
||||
Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
|
||||
NamedList warnings = new NamedList();
|
||||
List<String> noTV = new ArrayList<String>();
|
||||
List<String> noPos = new ArrayList<String>();
|
||||
List<String> noOff = new ArrayList<String>();
|
||||
|
||||
//we have specific fields to retrieve
|
||||
if (fldLst != null) {
|
||||
String [] fields = SolrPluginUtils.split(fldLst);
|
||||
for (String field : fields) {
|
||||
SchemaField sf = schema.getFieldOrNull(field);
|
||||
if (sf != null) {
|
||||
if (sf.storeTermVector()) {
|
||||
FieldOptions option = fieldOptions.get(field);
|
||||
if (option == null) {
|
||||
option = new FieldOptions();
|
||||
option.fieldName = field;
|
||||
fieldOptions.put(field, option);
|
||||
}
|
||||
//get the per field mappings
|
||||
option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
|
||||
option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
|
||||
option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
|
||||
//Validate these are even an option
|
||||
option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
|
||||
if (option.positions == true && sf.storeTermPositions() == false){
|
||||
noPos.add(field);
|
||||
}
|
||||
option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
|
||||
if (option.offsets == true && sf.storeTermOffsets() == false){
|
||||
noOff.add(field);
|
||||
}
|
||||
} else {//field doesn't have term vectors
|
||||
noTV.add(field);
|
||||
}
|
||||
} else {
|
||||
//field doesn't exist
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
|
||||
}
|
||||
}
|
||||
} //else, deal with all fields
|
||||
boolean hasWarnings = false;
|
||||
if (noTV.isEmpty() == false) {
|
||||
warnings.add("noTermVectors", noTV);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (noPos.isEmpty() == false) {
|
||||
warnings.add("noPositions", noPos);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (noOff.isEmpty() == false) {
|
||||
warnings.add("noOffsets", noOff);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (hasWarnings == true) {
|
||||
termVectors.add("warnings", warnings);
|
||||
}
|
||||
|
||||
DocListAndSet listAndSet = rb.getResults();
|
||||
List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
|
||||
Iterator<Integer> iter;
|
||||
|
@ -112,22 +180,43 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
IndexReader reader = searcher.getReader();
|
||||
//the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
|
||||
TVMapper mapper = new TVMapper(fields, reader, termFreq, positions, offsets, docFreq, tfIdf);
|
||||
IndexSchema schema = rb.req.getSchema();
|
||||
String uniqFieldName = schema.getUniqueKeyField().getName();
|
||||
//Only load the id field
|
||||
SchemaField keyField = schema.getUniqueKeyField();
|
||||
String uniqFieldName = null;
|
||||
if (keyField != null) {
|
||||
uniqFieldName = keyField.getName();
|
||||
}
|
||||
//Only load the id field to get the uniqueKey of that field
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.singleton(uniqFieldName), Collections.<String>emptySet());
|
||||
TVMapper mapper = new TVMapper(reader);
|
||||
mapper.fieldOptions = allFields; //this will only stay set if fieldOptions.isEmpty() (in other words, only if the user didn't set any fields)
|
||||
while (iter.hasNext()) {
|
||||
Integer docId = iter.next();
|
||||
NamedList docNL = new NamedList();
|
||||
termVectors.add("doc-" + docId, docNL);
|
||||
mapper.docNL = docNL;
|
||||
Document document = reader.document(docId, fieldSelector);
|
||||
String uniqId = document.get(uniqFieldName);
|
||||
docNL.add("uniqueKey", uniqId);
|
||||
reader.getTermFreqVector(docId, mapper);
|
||||
termVectors.add("doc-" + docId, docNL);
|
||||
|
||||
if (keyField != null) {
|
||||
Document document = reader.document(docId, fieldSelector);
|
||||
Fieldable uniqId = document.getField(uniqFieldName);
|
||||
String uniqVal = null;
|
||||
if (uniqId != null) {
|
||||
uniqVal = keyField.getType().storedToReadable(uniqId);
|
||||
}
|
||||
if (uniqVal != null) {
|
||||
docNL.add("uniqueKey", uniqVal);
|
||||
termVectors.add("uniqueKeyFieldName", uniqFieldName);
|
||||
}
|
||||
}
|
||||
if (fieldOptions.isEmpty() == false) {
|
||||
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
|
||||
mapper.fieldOptions = entry.getValue();
|
||||
reader.getTermFreqVector(docId, entry.getKey(), mapper);
|
||||
}
|
||||
} else {
|
||||
//deal with all fields by using the allFieldMapper
|
||||
reader.getTermFreqVector(docId, mapper);
|
||||
}
|
||||
}
|
||||
termVectors.add("uniqueKeyFieldName", uniqFieldName);
|
||||
}
|
||||
|
||||
private List<Integer> getInts(String[] vals) {
|
||||
|
@ -186,43 +275,27 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
|
||||
private static class TVMapper extends TermVectorMapper {
|
||||
private NamedList docNL;
|
||||
private IndexReader reader;
|
||||
private Set<String> fields;
|
||||
private boolean termFreq, positions, offsets, docFreq, tfIdf;
|
||||
private NamedList docNL;
|
||||
|
||||
//needs to be set for each new field
|
||||
FieldOptions fieldOptions;
|
||||
|
||||
//internal vars not passed in by construction
|
||||
private boolean map, useOffsets, usePositions;
|
||||
private boolean useOffsets, usePositions;
|
||||
//private Map<String, Integer> idfCache;
|
||||
private NamedList fieldNL;
|
||||
private Term currentTerm;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param fields
|
||||
* @param reader
|
||||
* @param termFreq
|
||||
* @param positions true if the TVM should try to get position info from the Term Vector, assuming it is present
|
||||
* @param offsets true if the TVM should try to get offset info from the Term Vector, assuming it is present
|
||||
* @param docFreq
|
||||
* @param tfIdf
|
||||
*/
|
||||
public TVMapper(String[] fields, IndexReader reader, boolean termFreq, boolean positions, boolean offsets, boolean docFreq, boolean tfIdf) {
|
||||
|
||||
public TVMapper(IndexReader reader) {
|
||||
this.reader = reader;
|
||||
this.fields = fields != null ? new HashSet<String>(Arrays.asList(fields)) : Collections.<String>emptySet();
|
||||
this.termFreq = termFreq;
|
||||
this.positions = positions;
|
||||
this.offsets = offsets;
|
||||
this.docFreq = docFreq;
|
||||
this.tfIdf = tfIdf;
|
||||
|
||||
}
|
||||
|
||||
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
|
||||
if (map == true && fieldNL != null) {
|
||||
NamedList termInfo = new NamedList();
|
||||
NamedList termInfo = new NamedList();
|
||||
fieldNL.add(term, termInfo);
|
||||
if (termFreq == true) {
|
||||
if (fieldOptions.termFreq == true) {
|
||||
termInfo.add("tf", frequency);
|
||||
}
|
||||
if (useOffsets == true) {
|
||||
|
@ -237,18 +310,17 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
if (usePositions == true) {
|
||||
NamedList positionsNL = new NamedList();
|
||||
for (int i = 0; i < positions.length; i++) {
|
||||
positionsNL.add("position", positions[i]);
|
||||
positionsNL.add("position", positions[i]);
|
||||
}
|
||||
termInfo.add("positions", positionsNL);
|
||||
}
|
||||
if (docFreq == true) {
|
||||
if (fieldOptions.docFreq == true) {
|
||||
termInfo.add("df", getDocFreq(term));
|
||||
}
|
||||
if (tfIdf == true){
|
||||
if (fieldOptions.tfIdf == true) {
|
||||
double tfIdfVal = ((double) frequency) / getDocFreq(term);
|
||||
termInfo.add("tf-idf", tfIdfVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int getDocFreq(String term) {
|
||||
|
@ -270,29 +342,23 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
|
||||
|
||||
if (docFreq == true && reader != null) {
|
||||
if (fieldOptions.docFreq == true && reader != null) {
|
||||
this.currentTerm = new Term(field);
|
||||
}
|
||||
useOffsets = storeOffsets && offsets;
|
||||
usePositions = storePositions && positions;
|
||||
if (fields.isEmpty() || fields.contains(field)) {
|
||||
map = true;
|
||||
fieldNL = new NamedList();
|
||||
docNL.add(field, fieldNL);
|
||||
} else {
|
||||
map = false;
|
||||
fieldNL = null;
|
||||
}
|
||||
useOffsets = storeOffsets && fieldOptions.offsets;
|
||||
usePositions = storePositions && fieldOptions.positions;
|
||||
fieldNL = new NamedList();
|
||||
docNL.add(field, fieldNL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isIgnoringPositions() {
|
||||
return this.positions == false; // if we are not interested in positions, then return true telling Lucene to skip loading them
|
||||
return fieldOptions.positions == false; // if we are not interested in positions, then return true telling Lucene to skip loading them
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isIgnoringOffsets() {
|
||||
return this.offsets == false; // if we are not interested in offsets, then return true telling Lucene to skip loading them
|
||||
return fieldOptions.offsets == false; // if we are not interested in offsets, then return true telling Lucene to skip loading them
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -301,6 +367,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
|
||||
//////////////////////// NamedListInitializedPlugin methods //////////////////////
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
|
@ -327,3 +394,8 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
return "A Component for working with Term Vectors";
|
||||
}
|
||||
}
|
||||
|
||||
class FieldOptions {
|
||||
String fieldName;
|
||||
boolean termFreq, positions, offsets, docFreq, tfIdf;
|
||||
}
|
|
@ -1,7 +1,6 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -19,6 +18,7 @@ import static org.junit.Assert.*;
|
|||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -46,18 +46,77 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml","schema.xml");
|
||||
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "test_posofftv", "This is a title and another title")));
|
||||
assertNull(h.validateUpdate(adoc("id", "1", "test_posofftv",
|
||||
"The quick reb fox jumped over the lazy brown dogs.")));
|
||||
assertNull(h.validateUpdate(adoc("id", "2", "test_posofftv", "This is a document")));
|
||||
assertNull(h.validateUpdate(adoc("id", "3", "test_posofftv", "another document")));
|
||||
assertNull(h.validateUpdate(adoc("id", "0",
|
||||
"test_posofftv", "This is a title and another title",
|
||||
"test_basictv", "This is a title and another title",
|
||||
"test_notv", "This is a title and another title",
|
||||
"test_postv", "This is a title and another title",
|
||||
"test_offtv", "This is a title and another title"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "1",
|
||||
"test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_notv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_postv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_offtv", "The quick reb fox jumped over the lazy brown dogs."
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "2",
|
||||
"test_posofftv", "This is a document",
|
||||
"test_basictv", "This is a document",
|
||||
"test_notv", "This is a document",
|
||||
"test_postv", "This is a document",
|
||||
"test_offtv", "This is a document"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "3",
|
||||
"test_posofftv", "another document",
|
||||
"test_basictv", "another document",
|
||||
"test_notv", "another document",
|
||||
"test_postv", "another document",
|
||||
"test_offtv", "another document"
|
||||
)));
|
||||
//bunch of docs that are variants on blue
|
||||
assertNull(h.validateUpdate(adoc("id", "4", "test_posofftv", "blue")));
|
||||
assertNull(h.validateUpdate(adoc("id", "5", "test_posofftv", "blud")));
|
||||
assertNull(h.validateUpdate(adoc("id", "6", "test_posofftv", "boue")));
|
||||
assertNull(h.validateUpdate(adoc("id", "7", "test_posofftv", "glue")));
|
||||
assertNull(h.validateUpdate(adoc("id", "8", "test_posofftv", "blee")));
|
||||
assertNull(h.validateUpdate(adoc("id", "9", "test_posofftv", "blah")));
|
||||
assertNull(h.validateUpdate(adoc("id", "4",
|
||||
"test_posofftv", "blue",
|
||||
"test_basictv", "blue",
|
||||
"test_notv", "blue",
|
||||
"test_postv", "blue",
|
||||
"test_offtv", "blue"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "5",
|
||||
"test_posofftv", "blud",
|
||||
"test_basictv", "blud",
|
||||
"test_notv", "blud",
|
||||
"test_postv", "blud",
|
||||
"test_offtv", "blud"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "6",
|
||||
"test_posofftv", "boue",
|
||||
"test_basictv", "boue",
|
||||
"test_notv", "boue",
|
||||
"test_postv", "boue",
|
||||
"test_offtv", "boue"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "7",
|
||||
"test_posofftv", "glue",
|
||||
"test_basictv", "glue",
|
||||
"test_notv", "glue",
|
||||
"test_postv", "glue",
|
||||
"test_offtv", "glue"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "8",
|
||||
"test_posofftv", "blee",
|
||||
"test_basictv", "blee",
|
||||
"test_notv", "blee",
|
||||
"test_postv", "blee",
|
||||
"test_offtv", "blee"
|
||||
)));
|
||||
assertNull(h.validateUpdate(adoc("id", "9",
|
||||
"test_posofftv", "blah",
|
||||
"test_basictv", "blah",
|
||||
"test_notv", "blah",
|
||||
"test_postv", "blah",
|
||||
"test_offtv", "blah"
|
||||
)));
|
||||
|
||||
assertNull(h.validateUpdate(commit()));
|
||||
}
|
||||
|
@ -80,10 +139,10 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
NamedList values = rsp.getValues();
|
||||
NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
|
||||
assertTrue("termVectors is null and it shouldn't be", termVectors != null);
|
||||
// System.out.println("TVs:" + termVectors);
|
||||
System.out.println("TVs:" + termVectors);
|
||||
NamedList doc = (NamedList) termVectors.getVal(0);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
assertTrue(doc.size() + " does not equal: " + 2, doc.size() == 2);
|
||||
assertEquals(doc.size(), 5);
|
||||
NamedList field = (NamedList) doc.get("test_posofftv");
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue(field.size() + " does not equal: " + 2, field.size() == 2);
|
||||
|
@ -127,7 +186,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
// System.out.println("TVs: " + termVectors);
|
||||
NamedList doc = (NamedList) termVectors.getVal(0);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
assertTrue(doc.size() + " does not equal: " + 2, doc.size() == 2);
|
||||
assertEquals(doc.size(), 5);
|
||||
NamedList offtv = (NamedList) doc.get("test_posofftv");
|
||||
assertTrue("offtv is null and it shouldn't be", offtv != null);
|
||||
assertTrue("offtv Size: " + offtv.size() + " is not: " + 2, offtv.size() == 2);
|
||||
|
@ -144,8 +203,114 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
Double tfIdf = (Double) another.get("tf-idf");
|
||||
assertTrue("tfIdf is null and it shouldn't be", tfIdf != null);
|
||||
assertTrue(tfIdf + " does not equal: " + 0.5, tfIdf == 0.5);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
<field name="test_basictv" type="text" termVectors="true"/>
|
||||
<field name="test_notv" type="text" termVectors="false"/>
|
||||
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
|
||||
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
|
||||
<field name="test_posofftv" type="text" termVectors="true"
|
||||
termPositions="true" termOffsets="true"/>
|
||||
*/
|
||||
@Test
|
||||
public void testPerField() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
SearchComponent tvComp = core.getSearchComponent("tvComponent");
|
||||
assertTrue("tvComp is null and it shouldn't be", tvComp != null);
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CommonParams.Q, "id:0");
|
||||
params.add(CommonParams.QT, "tvrh");
|
||||
params.add(TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv");
|
||||
params.add(TermVectorParams.TF, "true");
|
||||
params.add(TermVectorParams.DF, "true");
|
||||
params.add(TermVectorParams.OFFSETS, "true");
|
||||
params.add(TermVectorParams.POSITIONS, "true");
|
||||
params.add(TermVectorParams.TF_IDF, "true");
|
||||
params.add(TermVectorComponent.COMPONENT_NAME, "true");
|
||||
//per field
|
||||
params.add("f.test_posofftv." + TermVectorParams.POSITIONS, "false");
|
||||
params.add("f.test_offtv." + TermVectorParams.OFFSETS, "false");
|
||||
params.add("f.test_basictv." + TermVectorParams.DF, "false");
|
||||
params.add("f.test_basictv." + TermVectorParams.TF, "false");
|
||||
params.add("f.test_basictv." + TermVectorParams.TF_IDF, "false");
|
||||
SolrRequestHandler handler = core.getRequestHandler("tvrh");
|
||||
SolrQueryResponse rsp;
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
NamedList values = rsp.getValues();
|
||||
NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
|
||||
assertTrue("termVectors is null and it shouldn't be", termVectors != null);
|
||||
System.out.println("TVs: " + termVectors);
|
||||
NamedList doc = (NamedList) termVectors.get("doc-0");
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
assertEquals(doc.size(), 5);
|
||||
NamedList vec;
|
||||
NamedList another;
|
||||
NamedList offsets;
|
||||
NamedList pos;
|
||||
Integer df;
|
||||
Double val;
|
||||
vec = (NamedList) doc.get("test_posofftv");
|
||||
assertNotNull(vec);
|
||||
assertEquals(vec.size(), 2);
|
||||
another = (NamedList) vec.get("anoth");
|
||||
offsets = (NamedList) another.get("offsets");
|
||||
assertNotNull(offsets);
|
||||
assertTrue(offsets.size() > 0);
|
||||
pos = (NamedList) another.get("positions");
|
||||
//positions should be null, since we turned them off
|
||||
assertNull(pos);
|
||||
df = (Integer) another.get("df");
|
||||
assertNotNull(df);
|
||||
assertTrue(df == 2);
|
||||
val = (Double) another.get("tf-idf");
|
||||
assertTrue("tfIdf is null and it shouldn't be", val != null);
|
||||
assertTrue(val + " does not equal: " + 0.5, val == 0.5);
|
||||
//Try out the other fields, too
|
||||
vec = (NamedList) doc.get("test_offtv");
|
||||
assertNotNull(vec);
|
||||
assertEquals(vec.size(), 2);
|
||||
another = (NamedList) vec.get("anoth");
|
||||
offsets = (NamedList) another.get("offsets");
|
||||
assertNull(offsets);
|
||||
pos = (NamedList) another.get("positions");
|
||||
//positions should be null, since we turned them off
|
||||
assertNull(vec.toString(), pos);
|
||||
df = (Integer) another.get("df");
|
||||
assertNotNull(df);
|
||||
assertTrue(df == 2);
|
||||
val = (Double) another.get("tf-idf");
|
||||
assertTrue("tfIdf is null and it shouldn't be", val != null);
|
||||
assertTrue(val + " does not equal: " + 0.5, val == 0.5);
|
||||
vec = (NamedList) doc.get("test_basictv");
|
||||
assertNotNull(vec);
|
||||
assertEquals(vec.size(), 2);
|
||||
another = (NamedList) vec.get("anoth");
|
||||
offsets = (NamedList) another.get("offsets");
|
||||
assertNull(offsets);
|
||||
pos = (NamedList) another.get("positions");
|
||||
assertNull(pos);
|
||||
df = (Integer) another.get("df");
|
||||
assertNull(df);
|
||||
val = (Double) another.get("tf-idf");
|
||||
assertNull(val);
|
||||
val = (Double) another.get("tf");
|
||||
assertNull(val);
|
||||
//Now validate we have error messages
|
||||
NamedList warnings = (NamedList) termVectors.get("warnings");
|
||||
assertNotNull(warnings);
|
||||
List<String> theList;
|
||||
theList = (List<String>) warnings.get("noTermVectors");
|
||||
assertNotNull(theList);
|
||||
assertEquals(theList.size(), 1);
|
||||
theList = (List<String>) warnings.get("noPositions");
|
||||
assertNotNull(theList);
|
||||
assertEquals(theList.size(), 2);
|
||||
theList = (List<String>) warnings.get("noOffsets");
|
||||
assertNotNull(theList);
|
||||
assertEquals(theList.size(), 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -165,14 +330,14 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
NamedList values = rsp.getValues();
|
||||
NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
|
||||
assertTrue("termVectors is null and it shouldn't be", termVectors != null);
|
||||
NamedList doc = (NamedList) termVectors.getVal(0);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
assertTrue(doc.size() + " does not equal: " + 1, doc.size() == 1);
|
||||
Exception exception = rsp.getException();
|
||||
assertNotNull(exception);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testDistributed() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
|
|
Loading…
Reference in New Issue