SOLR-5911: term vector payload support

in schema & TermVectorComponent & LukeRequestHandler

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1669492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-03-27 02:01:38 +00:00
parent 493d3fd2f0
commit ef0209189e
15 changed files with 175 additions and 79 deletions

View File

@ -230,6 +230,9 @@ New Features
* SOLR-7240: '/' redirects to '/solr/' for convenience (Martijn Koster, hossman)
* SOLR-5911: Added payload support for term vectors. New "termPayloads" option for fields
/ types in the schema, and "tv.payloads" param for the term vector component.
(Mike McCandless, David Smiley)
Bug Fixes
----------------------

View File

@ -20,16 +20,37 @@ package org.apache.solr.handler.admin;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
@ -48,18 +69,18 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.update.SolrIndexWriter;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.CopyField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
import static org.apache.lucene.index.IndexOptions.DOCS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
/**
* This handler exposes the internal lucene index. It is inspired by and
@ -185,6 +206,7 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorPayloads()) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null && DOCS == opts ) ?
@ -223,6 +245,7 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (f != null && f.storeTermVector() ) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPayloads() ) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null &&
f.omitTermFreqAndPositions() ) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );

View File

@ -11,10 +11,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
@ -24,6 +24,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermVectorParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
@ -58,7 +59,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
* Return term vectors for the documents in a query result set.
* <p>
* Info available:
* term, frequency, position, offset, IDF.
* term, frequency, position, offset, payloads, IDF.
* <p>
* <b>Note</b> Returning IDF can be expensive.
*
@ -153,6 +154,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
//boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
@ -161,6 +163,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
allFields.termFreq = true;
allFields.positions = true;
allFields.offsets = true;
allFields.payloads = true;
allFields.docFreq = true;
allFields.tfIdf = true;
}
@ -171,6 +174,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
List<String> noTV = new ArrayList<>();
List<String> noPos = new ArrayList<>();
List<String> noOff = new ArrayList<>();
List<String> noPay = new ArrayList<>();
Set<String> fields = getFields(rb);
if ( null != fields ) {
@ -207,6 +211,10 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey){
noOff.add(field);
}
option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey){
noPay.add(field);
}
} else {//field doesn't have term vectors
if (!fieldIsUniqueKey) noTV.add(field);
}
@ -234,6 +242,10 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
warnings.add("noOffsets", noOff);
hasWarnings = true;
}
if (!noPay.isEmpty()) {
warnings.add("noPayloads", noPay);
hasWarnings = true;
}
if (hasWarnings) {
termVectors.add("warnings", warnings);
}
@ -341,22 +353,27 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
termInfo.add("tf", freq);
}
dpEnum = termsEnum.postings(null, dpEnum, PostingsEnum.ALL);
boolean useOffsets = false;
boolean usePositions = false;
int dpEnumFlags = 0;
dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
//payloads require offsets
dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
dpEnum = termsEnum.postings(null, dpEnum, dpEnumFlags);
boolean atNextDoc = false;
if (dpEnum != null) {
dpEnum.nextDoc();
usePositions = fieldOptions.positions;
useOffsets = fieldOptions.offsets;
atNextDoc = true;
}
NamedList<Integer> positionsNL = null;
NamedList<Number> theOffsets = null;
if (atNextDoc && dpEnumFlags != 0) {
NamedList<Integer> positionsNL = null;
NamedList<Number> theOffsets = null;
NamedList<String> thePayloads = null;
if (usePositions || useOffsets) {
for (int i = 0; i < freq; i++) {
final int pos = dpEnum.nextPosition();
if (usePositions && pos >= 0) {
if (fieldOptions.positions && pos >= 0) {
if (positionsNL == null) {
positionsNL = new NamedList<>();
termInfo.add("positions", positionsNL);
@ -364,19 +381,24 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
positionsNL.add("position", pos);
}
if (useOffsets && theOffsets == null) {
if (dpEnum.startOffset() == -1) {
useOffsets = false;
} else {
int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
if (startOffset >= 0) {
if (theOffsets == null) {
theOffsets = new NamedList<>();
termInfo.add("offsets", theOffsets);
}
}
if (theOffsets != null) {
theOffsets.add("start", dpEnum.startOffset());
theOffsets.add("end", dpEnum.endOffset());
}
BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
if (payload != null) {
if (thePayloads == null) {
thePayloads = new NamedList<>();
termInfo.add("payloads", thePayloads);
}
thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
}
}
}
@ -472,5 +494,5 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
class FieldOptions {
String fieldName;
boolean termFreq, positions, offsets, docFreq, tfIdf;
boolean termFreq, positions, offsets, payloads, docFreq, tfIdf;
}

View File

@ -17,15 +17,33 @@ package org.apache.solr.schema;
* limitations under the License.
*/
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -43,17 +61,6 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
/***
* Field type for support of string values with custom sort order.
*/
@ -389,6 +396,7 @@ public class EnumField extends PrimitiveFieldType {
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
newType.setNumericType(FieldType.NumericType.INT);
newType.setNumericPrecisionStep(DEFAULT_PRECISION_STEP);

View File

@ -17,8 +17,8 @@
package org.apache.solr.schema;
import java.util.Map;
import java.util.HashMap;
import java.util.Map;
/**
*
@ -52,13 +52,15 @@ public abstract class FieldProperties {
protected final static int STORE_OFFSETS = 0x00004000;
protected final static int DOC_VALUES = 0x00008000;
protected final static int STORE_TERMPAYLOADS = 0x00010000;
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
"binary", "omitNorms", "omitTermFreqAndPositions",
"termVectors", "termPositions", "termOffsets",
"multiValued",
"sortMissingFirst","sortMissingLast","required", "omitPositions",
"storeOffsetsWithPositions", "docValues"
"storeOffsetsWithPositions", "docValues", "termPayloads"
};
static final Map<String,Integer> propertyMap = new HashMap<>();

View File

@ -17,8 +17,6 @@
package org.apache.solr.schema;
import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@ -67,6 +65,8 @@ import org.apache.solr.search.Sorting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
/**
* Base class for all field types used by an index schema.
*
@ -268,6 +268,7 @@ public abstract class FieldType extends FieldProperties {
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
return createField(field.getName(), val, newType, boost);
}

View File

@ -28,12 +28,11 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
@ -178,6 +177,7 @@ public class PreAnalyzedField extends FieldType {
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
return newType;
}

View File

@ -17,19 +17,18 @@
package org.apache.solr.schema;
import org.apache.solr.common.SolrException;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.search.SortField;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.QParser;
import org.apache.solr.response.TextResponseWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.IOException;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.search.SortField;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
/**
* Encapsulates all information about a Field in a Solr Schema
@ -94,6 +93,7 @@ public final class SchemaField extends FieldProperties {
public boolean storeTermVector() { return (properties & STORE_TERMVECTORS)!=0; }
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
public boolean storeTermPayloads() { return (properties & STORE_TERMPAYLOADS)!=0; }
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
public boolean omitTermFreqAndPositions() { return (properties & OMIT_TF_POSITIONS)!=0; }
@ -236,7 +236,7 @@ public final class SchemaField extends FieldProperties {
if (on(falseProps,INDEXED)) {
int pp = (INDEXED
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed field:" + props);
}
@ -269,7 +269,7 @@ public final class SchemaField extends FieldProperties {
}
if (on(falseProps,STORE_TERMVECTORS)) {
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting termvector field options:" + props);
}
@ -323,6 +323,7 @@ public final class SchemaField extends FieldProperties {
properties.add(getPropertyName(STORE_TERMVECTORS), storeTermVector());
properties.add(getPropertyName(STORE_TERMPOSITIONS), storeTermPositions());
properties.add(getPropertyName(STORE_TERMOFFSETS), storeTermOffsets());
properties.add(getPropertyName(STORE_TERMPAYLOADS), storeTermPayloads());
properties.add(getPropertyName(OMIT_NORMS), omitNorms());
properties.add(getPropertyName(OMIT_TF_POSITIONS), omitTermFreqAndPositions());
properties.add(getPropertyName(OMIT_POSITIONS), omitPositions());

View File

@ -103,6 +103,16 @@
</analyzer>
</fieldType>
<fieldtype name="text_payload_tv" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory"/>
</analyzer>
</fieldtype>
<fieldType name="nametext" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
@ -478,6 +488,8 @@
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
<field name="test_posofftv" type="text" termVectors="true"
termPositions="true" termOffsets="true"/>
<field name="test_posoffpaytv" type="text_payload_tv" termVectors="true"
termPositions="true" termOffsets="true" termPayloads="true"/>
<!-- test highlit field settings -->
<field name="test_hlt" type="highlittext" indexed="true"/>

View File

@ -17,6 +17,8 @@
package org.apache.solr;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
@ -26,18 +28,10 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LazyDocument;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext.Context;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.English;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
@ -56,10 +50,7 @@ import org.apache.solr.schema.IndexSchemaFactory;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.util.RefCounted;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
/**
@ -540,6 +531,11 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets());
assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions());
f = ischema.getField("test_posoffpaytv");
luf = f.createField("test", 0f);
assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets() && f.storeTermPayloads());
assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions() && luf.fieldType().storeTermVectorPayloads());
}
@Test

View File

@ -1,19 +1,13 @@
package org.apache.solr.handler.component;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.TermVectorParams;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.TermVectorParams;
import org.junit.BeforeClass;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -42,6 +36,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
initCore("solrconfig.xml","schema.xml");
assertU(adoc("id", "0",
"test_posoffpaytv", "This is a title and another title",
"test_posofftv", "This is a title and another title",
"test_basictv", "This is a title and another title",
"test_notv", "This is a title and another title",
@ -49,6 +44,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "This is a title and another title"
));
assertU(adoc("id", "1",
"test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.",
"test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
"test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
"test_notv", "The quick reb fox jumped over the lazy brown dogs.",
@ -56,6 +52,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "The quick reb fox jumped over the lazy brown dogs."
));
assertU(adoc("id", "2",
"test_posoffpaytv", "This is a document",
"test_posofftv", "This is a document",
"test_basictv", "This is a document",
"test_notv", "This is a document",
@ -63,6 +60,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "This is a document"
));
assertU(adoc("id", "3",
"test_posoffpaytv", "another document",
"test_posofftv", "another document",
"test_basictv", "another document",
"test_notv", "another document",
@ -71,6 +69,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
));
//bunch of docs that are variants on blue
assertU(adoc("id", "4",
"test_posoffpaytv", "blue",
"test_posofftv", "blue",
"test_basictv", "blue",
"test_notv", "blue",
@ -78,6 +77,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "blue"
));
assertU(adoc("id", "5",
"test_posoffpaytv", "blud",
"test_posofftv", "blud",
"test_basictv", "blud",
"test_notv", "blud",
@ -85,6 +85,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "blud"
));
assertU(adoc("id", "6",
"test_posoffpaytv", "boue",
"test_posofftv", "boue",
"test_basictv", "boue",
"test_notv", "boue",
@ -92,6 +93,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "boue"
));
assertU(adoc("id", "7",
"test_posoffpaytv", "glue",
"test_posofftv", "glue",
"test_basictv", "glue",
"test_notv", "glue",
@ -99,6 +101,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "glue"
));
assertU(adoc("id", "8",
"test_posoffpaytv", "blee",
"test_posofftv", "blee",
"test_basictv", "blee",
"test_notv", "blee",
@ -106,6 +109,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
"test_offtv", "blee"
));
assertU(adoc("id", "9",
"test_posoffpaytv", "blah",
"test_posofftv", "blah",
"test_basictv", "blah",
"test_notv", "blah",
@ -125,6 +129,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
@ -166,6 +171,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
@ -241,7 +247,8 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
public void testPerField() throws Exception {
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true"
,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv"
,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv"
,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false"
,"f.test_posofftv." + TermVectorParams.POSITIONS, "false"
,"f.test_offtv." + TermVectorParams.OFFSETS, "false"
,"f.test_basictv." + TermVectorParams.DF, "false"
@ -255,6 +262,17 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
);
}
@Test
public void testPayloads() throws Exception {
// This field uses TokenOffsetPayloadTokenFilter, which
// stuffs start (20) and end offset (27) into the
// payload:
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
, TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true",
TermVectorParams.PAYLOADS, "true")
,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}"
);
}
}

View File

@ -24,7 +24,7 @@ public class TestFieldResource extends SolrRestletTestBase {
public void testGetField() throws Exception {
assertQ("/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true",
"count(/response/lst[@name='field']) = 1",
"count(/response/lst[@name='field']/*) = 15",
"count(/response/lst[@name='field']/*) = 16",
"/response/lst[@name='field']/str[@name='name'] = 'test_postv'",
"/response/lst[@name='field']/str[@name='type'] = 'text'",
"/response/lst[@name='field']/bool[@name='indexed'] = 'true'",
@ -32,6 +32,7 @@ public class TestFieldResource extends SolrRestletTestBase {
"/response/lst[@name='field']/bool[@name='docValues'] = 'false'",
"/response/lst[@name='field']/bool[@name='termVectors'] = 'true'",
"/response/lst[@name='field']/bool[@name='termPositions'] = 'true'",
"/response/lst[@name='field']/bool[@name='termPayloads'] = 'false'",
"/response/lst[@name='field']/bool[@name='termOffsets'] = 'false'",
"/response/lst[@name='field']/bool[@name='omitNorms'] = 'false'",
"/response/lst[@name='field']/bool[@name='omitTermFreqAndPositions'] = 'false'",
@ -61,6 +62,7 @@ public class TestFieldResource extends SolrRestletTestBase {
"/field/termVectors==true",
"/field/termPositions==true",
"/field/termOffsets==false",
"/field/termPayloads==false",
"/field/omitNorms==false",
"/field/omitTermFreqAndPositions==false",
"/field/omitPositions==false",

View File

@ -93,6 +93,8 @@
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
termPayloads: Store payload information with the term vector. This
will increase storage costs.
required: The field is required. It will throw an error if the
value does not exist
default: a value that should be used if no value is specified

View File

@ -30,6 +30,7 @@ public enum FieldFlag {
TERM_VECTOR_STORED('V', "TermVector Stored"),
TERM_VECTOR_OFFSET('o', "Store Offset With TermVector"),
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
TERM_VECTOR_PAYLOADS('y', "Store Payloads With TermVector"),
OMIT_NORMS('O', "Omit Norms"),
OMIT_TF('F', "Omit Term Frequencies & Positions"),
OMIT_POSITIONS('P', "Omit Positions"),

View File

@ -35,6 +35,11 @@ public interface TermVectorParams {
* */
public static final String POSITIONS = TV_PREFIX + "positions";
/**
* Return Term Vector payloads information
*
* */
public static final String PAYLOADS = TV_PREFIX + "payloads";
/**
* Return offset information, if available
* */
public static final String OFFSETS = TV_PREFIX + "offsets";