mirror of https://github.com/apache/lucene.git
SOLR-5911: term vector payload support
in schema & TermVectorComponent & LukeRequestHandler git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1669492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
493d3fd2f0
commit
ef0209189e
|
@ -230,6 +230,9 @@ New Features
|
|||
|
||||
* SOLR-7240: '/' redirects to '/solr/' for convenience (Martijn Koster, hossman)
|
||||
|
||||
* SOLR-5911: Added payload support for term vectors. New "termPayloads" option for fields
|
||||
/ types in the schema, and "tv.payloads" param for the term vector component.
|
||||
(Mike McCandless, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
|
|
@ -20,16 +20,37 @@ package org.apache.solr.handler.admin;
|
|||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -48,18 +69,18 @@ import org.apache.solr.common.util.SimpleOrderedMap;
|
|||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.CopyField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.CopyField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
|
||||
import static org.apache.lucene.index.IndexOptions.DOCS;
|
||||
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
|
||||
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
|
||||
/**
|
||||
* This handler exposes the internal lucene index. It is inspired by and
|
||||
|
@ -185,6 +206,7 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.fieldType().storeTermVectorPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.fieldType().storeTermVectorPayloads()) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.fieldType().omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
|
||||
|
||||
flags.append( (f != null && DOCS == opts ) ?
|
||||
|
@ -223,6 +245,7 @@ public class LukeRequestHandler extends RequestHandlerBase
|
|||
flags.append( (f != null && f.storeTermVector() ) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.storeTermPayloads() ) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
|
||||
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
|
||||
flags.append( (f != null &&
|
||||
f.omitTermFreqAndPositions() ) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
|
||||
|
|
|
@ -11,10 +11,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -24,6 +24,7 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.TermVectorParams;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
@ -58,7 +59,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
* Return term vectors for the documents in a query result set.
|
||||
* <p>
|
||||
* Info available:
|
||||
* term, frequency, position, offset, IDF.
|
||||
* term, frequency, position, offset, payloads, IDF.
|
||||
* <p>
|
||||
* <b>Note</b> Returning IDF can be expensive.
|
||||
*
|
||||
|
@ -153,6 +154,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
|
||||
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
|
||||
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
|
||||
allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
|
||||
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
|
||||
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
|
||||
//boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
|
||||
|
@ -161,6 +163,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
allFields.termFreq = true;
|
||||
allFields.positions = true;
|
||||
allFields.offsets = true;
|
||||
allFields.payloads = true;
|
||||
allFields.docFreq = true;
|
||||
allFields.tfIdf = true;
|
||||
}
|
||||
|
@ -171,6 +174,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
List<String> noTV = new ArrayList<>();
|
||||
List<String> noPos = new ArrayList<>();
|
||||
List<String> noOff = new ArrayList<>();
|
||||
List<String> noPay = new ArrayList<>();
|
||||
|
||||
Set<String> fields = getFields(rb);
|
||||
if ( null != fields ) {
|
||||
|
@ -207,6 +211,10 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey){
|
||||
noOff.add(field);
|
||||
}
|
||||
option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
|
||||
if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey){
|
||||
noPay.add(field);
|
||||
}
|
||||
} else {//field doesn't have term vectors
|
||||
if (!fieldIsUniqueKey) noTV.add(field);
|
||||
}
|
||||
|
@ -234,6 +242,10 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
warnings.add("noOffsets", noOff);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (!noPay.isEmpty()) {
|
||||
warnings.add("noPayloads", noPay);
|
||||
hasWarnings = true;
|
||||
}
|
||||
if (hasWarnings) {
|
||||
termVectors.add("warnings", warnings);
|
||||
}
|
||||
|
@ -341,22 +353,27 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
termInfo.add("tf", freq);
|
||||
}
|
||||
|
||||
dpEnum = termsEnum.postings(null, dpEnum, PostingsEnum.ALL);
|
||||
boolean useOffsets = false;
|
||||
boolean usePositions = false;
|
||||
int dpEnumFlags = 0;
|
||||
dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
|
||||
//payloads require offsets
|
||||
dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
|
||||
dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
|
||||
dpEnum = termsEnum.postings(null, dpEnum, dpEnumFlags);
|
||||
|
||||
boolean atNextDoc = false;
|
||||
if (dpEnum != null) {
|
||||
dpEnum.nextDoc();
|
||||
usePositions = fieldOptions.positions;
|
||||
useOffsets = fieldOptions.offsets;
|
||||
atNextDoc = true;
|
||||
}
|
||||
|
||||
NamedList<Integer> positionsNL = null;
|
||||
NamedList<Number> theOffsets = null;
|
||||
if (atNextDoc && dpEnumFlags != 0) {
|
||||
NamedList<Integer> positionsNL = null;
|
||||
NamedList<Number> theOffsets = null;
|
||||
NamedList<String> thePayloads = null;
|
||||
|
||||
if (usePositions || useOffsets) {
|
||||
for (int i = 0; i < freq; i++) {
|
||||
final int pos = dpEnum.nextPosition();
|
||||
if (usePositions && pos >= 0) {
|
||||
if (fieldOptions.positions && pos >= 0) {
|
||||
if (positionsNL == null) {
|
||||
positionsNL = new NamedList<>();
|
||||
termInfo.add("positions", positionsNL);
|
||||
|
@ -364,19 +381,24 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
positionsNL.add("position", pos);
|
||||
}
|
||||
|
||||
if (useOffsets && theOffsets == null) {
|
||||
if (dpEnum.startOffset() == -1) {
|
||||
useOffsets = false;
|
||||
} else {
|
||||
int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
|
||||
if (startOffset >= 0) {
|
||||
if (theOffsets == null) {
|
||||
theOffsets = new NamedList<>();
|
||||
termInfo.add("offsets", theOffsets);
|
||||
}
|
||||
}
|
||||
|
||||
if (theOffsets != null) {
|
||||
theOffsets.add("start", dpEnum.startOffset());
|
||||
theOffsets.add("end", dpEnum.endOffset());
|
||||
}
|
||||
|
||||
BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
|
||||
if (payload != null) {
|
||||
if (thePayloads == null) {
|
||||
thePayloads = new NamedList<>();
|
||||
termInfo.add("payloads", thePayloads);
|
||||
}
|
||||
thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -472,5 +494,5 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
class FieldOptions {
|
||||
String fieldName;
|
||||
boolean termFreq, positions, offsets, docFreq, tfIdf;
|
||||
boolean termFreq, positions, offsets, payloads, docFreq, tfIdf;
|
||||
}
|
||||
|
|
|
@ -17,15 +17,33 @@ package org.apache.solr.schema;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -43,17 +61,6 @@ import org.w3c.dom.Node;
|
|||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
|
||||
/***
|
||||
* Field type for support of string values with custom sort order.
|
||||
*/
|
||||
|
@ -389,6 +396,7 @@ public class EnumField extends PrimitiveFieldType {
|
|||
newType.setStoreTermVectors(field.storeTermVector());
|
||||
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
|
||||
newType.setStoreTermVectorPositions(field.storeTermPositions());
|
||||
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
|
||||
newType.setNumericType(FieldType.NumericType.INT);
|
||||
newType.setNumericPrecisionStep(DEFAULT_PRECISION_STEP);
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -52,13 +52,15 @@ public abstract class FieldProperties {
|
|||
protected final static int STORE_OFFSETS = 0x00004000;
|
||||
protected final static int DOC_VALUES = 0x00008000;
|
||||
|
||||
protected final static int STORE_TERMPAYLOADS = 0x00010000;
|
||||
|
||||
static final String[] propertyNames = {
|
||||
"indexed", "tokenized", "stored",
|
||||
"binary", "omitNorms", "omitTermFreqAndPositions",
|
||||
"termVectors", "termPositions", "termOffsets",
|
||||
"multiValued",
|
||||
"sortMissingFirst","sortMissingLast","required", "omitPositions",
|
||||
"storeOffsetsWithPositions", "docValues"
|
||||
"storeOffsetsWithPositions", "docValues", "termPayloads"
|
||||
};
|
||||
|
||||
static final Map<String,Integer> propertyMap = new HashMap<>();
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
@ -67,6 +65,8 @@ import org.apache.solr.search.Sorting;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
|
||||
|
||||
/**
|
||||
* Base class for all field types used by an index schema.
|
||||
*
|
||||
|
@ -268,6 +268,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
newType.setStoreTermVectors(field.storeTermVector());
|
||||
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
|
||||
newType.setStoreTermVectorPositions(field.storeTermPositions());
|
||||
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
|
||||
|
||||
return createField(field.getName(), val, newType, boost);
|
||||
}
|
||||
|
|
|
@ -28,12 +28,11 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
@ -178,6 +177,7 @@ public class PreAnalyzedField extends FieldType {
|
|||
newType.setStoreTermVectors(field.storeTermVector());
|
||||
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
|
||||
newType.setStoreTermVectorPositions(field.storeTermPositions());
|
||||
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
|
||||
return newType;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,19 +17,18 @@
|
|||
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
|
||||
/**
|
||||
* Encapsulates all information about a Field in a Solr Schema
|
||||
|
@ -94,6 +93,7 @@ public final class SchemaField extends FieldProperties {
|
|||
public boolean storeTermVector() { return (properties & STORE_TERMVECTORS)!=0; }
|
||||
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
|
||||
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
|
||||
public boolean storeTermPayloads() { return (properties & STORE_TERMPAYLOADS)!=0; }
|
||||
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
|
||||
|
||||
public boolean omitTermFreqAndPositions() { return (properties & OMIT_TF_POSITIONS)!=0; }
|
||||
|
@ -236,7 +236,7 @@ public final class SchemaField extends FieldProperties {
|
|||
|
||||
if (on(falseProps,INDEXED)) {
|
||||
int pp = (INDEXED
|
||||
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
|
||||
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
|
||||
if (on(pp,trueProps)) {
|
||||
throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed field:" + props);
|
||||
}
|
||||
|
@ -269,7 +269,7 @@ public final class SchemaField extends FieldProperties {
|
|||
}
|
||||
|
||||
if (on(falseProps,STORE_TERMVECTORS)) {
|
||||
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
|
||||
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
|
||||
if (on(pp,trueProps)) {
|
||||
throw new RuntimeException("SchemaField: " + name + " conflicting termvector field options:" + props);
|
||||
}
|
||||
|
@ -323,6 +323,7 @@ public final class SchemaField extends FieldProperties {
|
|||
properties.add(getPropertyName(STORE_TERMVECTORS), storeTermVector());
|
||||
properties.add(getPropertyName(STORE_TERMPOSITIONS), storeTermPositions());
|
||||
properties.add(getPropertyName(STORE_TERMOFFSETS), storeTermOffsets());
|
||||
properties.add(getPropertyName(STORE_TERMPAYLOADS), storeTermPayloads());
|
||||
properties.add(getPropertyName(OMIT_NORMS), omitNorms());
|
||||
properties.add(getPropertyName(OMIT_TF_POSITIONS), omitTermFreqAndPositions());
|
||||
properties.add(getPropertyName(OMIT_POSITIONS), omitPositions());
|
||||
|
|
|
@ -103,6 +103,16 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldtype name="text_payload_tv" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldType name="nametext" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
|
@ -478,6 +488,8 @@
|
|||
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
|
||||
<field name="test_posofftv" type="text" termVectors="true"
|
||||
termPositions="true" termOffsets="true"/>
|
||||
<field name="test_posoffpaytv" type="text_payload_tv" termVectors="true"
|
||||
termPositions="true" termOffsets="true" termPayloads="true"/>
|
||||
|
||||
<!-- test highlit field settings -->
|
||||
<field name="test_hlt" type="highlittext" indexed="true"/>
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.solr;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
|
@ -26,18 +28,10 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LazyDocument;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
|
@ -56,10 +50,7 @@ import org.apache.solr.schema.IndexSchemaFactory;
|
|||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.update.DirectUpdateHandler2;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -540,6 +531,11 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
|
|||
assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets());
|
||||
assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions());
|
||||
|
||||
f = ischema.getField("test_posoffpaytv");
|
||||
luf = f.createField("test", 0f);
|
||||
assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets() && f.storeTermPayloads());
|
||||
assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions() && luf.fieldType().storeTermVectorPayloads());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -1,19 +1,13 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.TermVectorParams;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.TermVectorParams;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -42,6 +36,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
initCore("solrconfig.xml","schema.xml");
|
||||
|
||||
assertU(adoc("id", "0",
|
||||
"test_posoffpaytv", "This is a title and another title",
|
||||
"test_posofftv", "This is a title and another title",
|
||||
"test_basictv", "This is a title and another title",
|
||||
"test_notv", "This is a title and another title",
|
||||
|
@ -49,6 +44,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "This is a title and another title"
|
||||
));
|
||||
assertU(adoc("id", "1",
|
||||
"test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
"test_notv", "The quick reb fox jumped over the lazy brown dogs.",
|
||||
|
@ -56,6 +52,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "The quick reb fox jumped over the lazy brown dogs."
|
||||
));
|
||||
assertU(adoc("id", "2",
|
||||
"test_posoffpaytv", "This is a document",
|
||||
"test_posofftv", "This is a document",
|
||||
"test_basictv", "This is a document",
|
||||
"test_notv", "This is a document",
|
||||
|
@ -63,6 +60,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "This is a document"
|
||||
));
|
||||
assertU(adoc("id", "3",
|
||||
"test_posoffpaytv", "another document",
|
||||
"test_posofftv", "another document",
|
||||
"test_basictv", "another document",
|
||||
"test_notv", "another document",
|
||||
|
@ -71,6 +69,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
));
|
||||
//bunch of docs that are variants on blue
|
||||
assertU(adoc("id", "4",
|
||||
"test_posoffpaytv", "blue",
|
||||
"test_posofftv", "blue",
|
||||
"test_basictv", "blue",
|
||||
"test_notv", "blue",
|
||||
|
@ -78,6 +77,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "blue"
|
||||
));
|
||||
assertU(adoc("id", "5",
|
||||
"test_posoffpaytv", "blud",
|
||||
"test_posofftv", "blud",
|
||||
"test_basictv", "blud",
|
||||
"test_notv", "blud",
|
||||
|
@ -85,6 +85,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "blud"
|
||||
));
|
||||
assertU(adoc("id", "6",
|
||||
"test_posoffpaytv", "boue",
|
||||
"test_posofftv", "boue",
|
||||
"test_basictv", "boue",
|
||||
"test_notv", "boue",
|
||||
|
@ -92,6 +93,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "boue"
|
||||
));
|
||||
assertU(adoc("id", "7",
|
||||
"test_posoffpaytv", "glue",
|
||||
"test_posofftv", "glue",
|
||||
"test_basictv", "glue",
|
||||
"test_notv", "glue",
|
||||
|
@ -99,6 +101,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "glue"
|
||||
));
|
||||
assertU(adoc("id", "8",
|
||||
"test_posoffpaytv", "blee",
|
||||
"test_posofftv", "blee",
|
||||
"test_basictv", "blee",
|
||||
"test_notv", "blee",
|
||||
|
@ -106,6 +109,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
"test_offtv", "blee"
|
||||
));
|
||||
assertU(adoc("id", "9",
|
||||
"test_posoffpaytv", "blah",
|
||||
"test_posofftv", "blah",
|
||||
"test_basictv", "blah",
|
||||
"test_notv", "blah",
|
||||
|
@ -125,6 +129,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
);
|
||||
|
@ -166,6 +171,7 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
|
||||
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
|
||||
" 'uniqueKeyFieldName':'id'}"
|
||||
);
|
||||
|
@ -241,7 +247,8 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
public void testPerField() throws Exception {
|
||||
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
|
||||
,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true"
|
||||
,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv"
|
||||
,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv"
|
||||
,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false"
|
||||
,"f.test_posofftv." + TermVectorParams.POSITIONS, "false"
|
||||
,"f.test_offtv." + TermVectorParams.OFFSETS, "false"
|
||||
,"f.test_basictv." + TermVectorParams.DF, "false"
|
||||
|
@ -255,6 +262,17 @@ public class TermVectorComponentTest extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPayloads() throws Exception {
|
||||
// This field uses TokenOffsetPayloadTokenFilter, which
|
||||
// stuffs start (20) and end offset (27) into the
|
||||
// payload:
|
||||
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
|
||||
, TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true",
|
||||
TermVectorParams.PAYLOADS, "true")
|
||||
,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ public class TestFieldResource extends SolrRestletTestBase {
|
|||
public void testGetField() throws Exception {
|
||||
assertQ("/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true",
|
||||
"count(/response/lst[@name='field']) = 1",
|
||||
"count(/response/lst[@name='field']/*) = 15",
|
||||
"count(/response/lst[@name='field']/*) = 16",
|
||||
"/response/lst[@name='field']/str[@name='name'] = 'test_postv'",
|
||||
"/response/lst[@name='field']/str[@name='type'] = 'text'",
|
||||
"/response/lst[@name='field']/bool[@name='indexed'] = 'true'",
|
||||
|
@ -32,6 +32,7 @@ public class TestFieldResource extends SolrRestletTestBase {
|
|||
"/response/lst[@name='field']/bool[@name='docValues'] = 'false'",
|
||||
"/response/lst[@name='field']/bool[@name='termVectors'] = 'true'",
|
||||
"/response/lst[@name='field']/bool[@name='termPositions'] = 'true'",
|
||||
"/response/lst[@name='field']/bool[@name='termPayloads'] = 'false'",
|
||||
"/response/lst[@name='field']/bool[@name='termOffsets'] = 'false'",
|
||||
"/response/lst[@name='field']/bool[@name='omitNorms'] = 'false'",
|
||||
"/response/lst[@name='field']/bool[@name='omitTermFreqAndPositions'] = 'false'",
|
||||
|
@ -61,6 +62,7 @@ public class TestFieldResource extends SolrRestletTestBase {
|
|||
"/field/termVectors==true",
|
||||
"/field/termPositions==true",
|
||||
"/field/termOffsets==false",
|
||||
"/field/termPayloads==false",
|
||||
"/field/omitNorms==false",
|
||||
"/field/omitTermFreqAndPositions==false",
|
||||
"/field/omitPositions==false",
|
||||
|
|
|
@ -93,6 +93,8 @@
|
|||
This will increase storage costs.
|
||||
termOffsets: Store offset information with the term vector. This
|
||||
will increase storage costs.
|
||||
termPayloads: Store payload information with the term vector. This
|
||||
will increase storage costs.
|
||||
required: The field is required. It will throw an error if the
|
||||
value does not exist
|
||||
default: a value that should be used if no value is specified
|
||||
|
|
|
@ -30,6 +30,7 @@ public enum FieldFlag {
|
|||
TERM_VECTOR_STORED('V', "TermVector Stored"),
|
||||
TERM_VECTOR_OFFSET('o', "Store Offset With TermVector"),
|
||||
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
|
||||
TERM_VECTOR_PAYLOADS('y', "Store Payloads With TermVector"),
|
||||
OMIT_NORMS('O', "Omit Norms"),
|
||||
OMIT_TF('F', "Omit Term Frequencies & Positions"),
|
||||
OMIT_POSITIONS('P', "Omit Positions"),
|
||||
|
|
|
@ -35,6 +35,11 @@ public interface TermVectorParams {
|
|||
* */
|
||||
public static final String POSITIONS = TV_PREFIX + "positions";
|
||||
/**
|
||||
* Return Term Vector payloads information
|
||||
*
|
||||
* */
|
||||
public static final String PAYLOADS = TV_PREFIX + "payloads";
|
||||
/**
|
||||
* Return offset information, if available
|
||||
* */
|
||||
public static final String OFFSETS = TV_PREFIX + "offsets";
|
||||
|
|
Loading…
Reference in New Issue