Refactor term vector api
This is necessary to allow adding a mult term vector request
This commit is contained in:
parent
a09f217b45
commit
18c71b16b5
|
@ -19,18 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.action.termvector;
|
package org.elasticsearch.action.termvector;
|
||||||
|
|
||||||
import static org.apache.lucene.util.ArrayUtil.grow;
|
import gnu.trove.impl.Constants;
|
||||||
import gnu.trove.map.hash.TObjectLongHashMap;
|
import gnu.trove.map.hash.TObjectLongHashMap;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -38,16 +29,22 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.ArrayUtil.grow;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class represents the result of a {@link TermVectorRequest}. It works
|
* This class represents the result of a {@link TermVectorRequest}. It works
|
||||||
* exactly like the {@link Fields} class except for one thing: It can return
|
* exactly like the {@link Fields} class except for one thing: It can return
|
||||||
* offsets and payloads even if positions are not present. You must call
|
* offsets and payloads even if positions are not present. You must call
|
||||||
* nextPosition() anyway to move the counter although this method only returns
|
* nextPosition() anyway to move the counter although this method only returns
|
||||||
* <tt>-1,</tt>, if no positions were returned by the {@link TermVectorRequest}.
|
* <tt>-1,</tt>, if no positions were returned by the {@link TermVectorRequest}.
|
||||||
*
|
* <p/>
|
||||||
* The data is stored in two byte arrays ({@code headerRef} and
|
* The data is stored in two byte arrays ({@code headerRef} and
|
||||||
* {@code termVectors}, both {@link ByteRef}) that have the following format:
|
* {@code termVectors}, both {@link ByteRef}) that have the following format:
|
||||||
* <p>
|
* <p/>
|
||||||
* {@code headerRef}: Stores offsets per field in the {@code termVectors} array
|
* {@code headerRef}: Stores offsets per field in the {@code termVectors} array
|
||||||
* and some header information as {@link BytesRef}. Format is
|
* and some header information as {@link BytesRef}. Format is
|
||||||
* <ul>
|
* <ul>
|
||||||
|
@ -64,9 +61,9 @@ import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||||
* <li>vint: offset in {@code termVectors} for last field</li>
|
* <li>vint: offset in {@code termVectors} for last field</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
* <p/>
|
||||||
* termVectors: Stores the actual term vectors as a {@link BytesRef}.
|
* termVectors: Stores the actual term vectors as a {@link BytesRef}.
|
||||||
*
|
* <p/>
|
||||||
* Term vectors for each fields are stored in blocks, one for each field. The
|
* Term vectors for each fields are stored in blocks, one for each field. The
|
||||||
* offsets in {@code headerRef} are used to find where the block for a field
|
* offsets in {@code headerRef} are used to find where the block for a field
|
||||||
* starts. Each block begins with a
|
* starts. Each block begins with a
|
||||||
|
@ -84,14 +81,14 @@ import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||||
* <li>vint: number of documents in the shard that has an entry for this field
|
* <li>vint: number of documents in the shard that has an entry for this field
|
||||||
* (docCount)</li>
|
* (docCount)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
* <p/>
|
||||||
* After that, for each term it stores
|
* After that, for each term it stores
|
||||||
* <ul>
|
* <ul>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>vint: term lengths</li>
|
* <li>vint: term lengths</li>
|
||||||
* <li>BytesRef: term name</li>
|
* <li>BytesRef: term name</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
* <p/>
|
||||||
* If term statistics are requested ({@code hasTermStatistics} is true, see
|
* If term statistics are requested ({@code hasTermStatistics} is true, see
|
||||||
* {@code headerRef}):
|
* {@code headerRef}):
|
||||||
* <ul>
|
* <ul>
|
||||||
|
@ -111,7 +108,6 @@ import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||||
* <li>BytesRef: payload_freqency (if payloads == true)</li>
|
* <li>BytesRef: payload_freqency (if payloads == true)</li>
|
||||||
* <ul>
|
* <ul>
|
||||||
* </ul> </ul>
|
* </ul> </ul>
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class TermVectorFields extends Fields {
|
public final class TermVectorFields extends Fields {
|
||||||
|
@ -122,17 +118,14 @@ public final class TermVectorFields extends Fields {
|
||||||
final boolean hasFieldStatistic;
|
final boolean hasFieldStatistic;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param headerRef
|
* @param headerRef Stores offsets per field in the {@code termVectors} and some
|
||||||
* Stores offsets per field in the {@code termVectors} and some
|
|
||||||
* header information as {@link BytesRef}.
|
* header information as {@link BytesRef}.
|
||||||
*
|
* @param termVectors Stores the actual term vectors as a {@link BytesRef}.
|
||||||
* @param termVectors
|
|
||||||
* Stores the actual term vectors as a {@link BytesRef}.
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public TermVectorFields(BytesReference headerRef, BytesReference termVectors) throws IOException {
|
public TermVectorFields(BytesReference headerRef, BytesReference termVectors) throws IOException {
|
||||||
BytesStreamInput header = new BytesStreamInput(headerRef);
|
BytesStreamInput header = new BytesStreamInput(headerRef);
|
||||||
fieldMap = new TObjectLongHashMap<String>();
|
fieldMap = new TObjectLongHashMap<String>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
|
||||||
|
|
||||||
// here we read the header to fill the field offset map
|
// here we read the header to fill the field offset map
|
||||||
String headerString = header.readString();
|
String headerString = header.readString();
|
||||||
assert headerString.equals("TV");
|
assert headerString.equals("TV");
|
||||||
|
@ -159,6 +152,9 @@ public final class TermVectorFields extends Fields {
|
||||||
// first, find where in the termVectors bytes the actual term vector for
|
// first, find where in the termVectors bytes the actual term vector for
|
||||||
// this field is stored
|
// this field is stored
|
||||||
Long offset = fieldMap.get(field);
|
Long offset = fieldMap.get(field);
|
||||||
|
if (offset.longValue() < 0) {
|
||||||
|
return null; // we don't have it.
|
||||||
|
}
|
||||||
final BytesStreamInput perFieldTermVectorInput = new BytesStreamInput(this.termVectors);
|
final BytesStreamInput perFieldTermVectorInput = new BytesStreamInput(this.termVectors);
|
||||||
perFieldTermVectorInput.reset();
|
perFieldTermVectorInput.reset();
|
||||||
perFieldTermVectorInput.skip(offset.longValue());
|
perFieldTermVectorInput.skip(offset.longValue());
|
||||||
|
|
|
@ -19,23 +19,22 @@
|
||||||
|
|
||||||
package org.elasticsearch.action.termvector;
|
package org.elasticsearch.action.termvector;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.google.common.collect.Sets;
|
||||||
import java.util.EnumSet;
|
import org.elasticsearch.ElasticSearchParseException;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.elasticsearch.action.ActionRequestValidationException;
|
import org.elasticsearch.action.ActionRequestValidationException;
|
||||||
import org.elasticsearch.action.ValidateActions;
|
import org.elasticsearch.action.ValidateActions;
|
||||||
import org.elasticsearch.action.support.single.shard.SingleShardOperationRequest;
|
import org.elasticsearch.action.support.single.shard.SingleShardOperationRequest;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
|
||||||
import com.google.common.collect.Sets;
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Request returning the term vector (doc frequency, positions, offsets) for a
|
* Request returning the term vector (doc frequency, positions, offsets) for a
|
||||||
* document.
|
* document.
|
||||||
* <p>
|
* <p/>
|
||||||
* Note, the {@link #index()}, {@link #type(String)} and {@link #id(String)} are
|
* Note, the {@link #index()}, {@link #type(String)} and {@link #id(String)} are
|
||||||
* required.
|
* required.
|
||||||
*/
|
*/
|
||||||
|
@ -49,6 +48,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
|
|
||||||
protected String preference;
|
protected String preference;
|
||||||
|
|
||||||
|
// TODO: change to String[]
|
||||||
private Set<String> selectedFields;
|
private Set<String> selectedFields;
|
||||||
|
|
||||||
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
|
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
|
||||||
|
@ -246,6 +246,13 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
return validationException;
|
return validationException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static TermVectorRequest readTermVectorRequest(StreamInput in) throws IOException {
|
||||||
|
TermVectorRequest termVectorRequest = new TermVectorRequest();
|
||||||
|
termVectorRequest.readFrom(in);
|
||||||
|
return termVectorRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void readFrom(StreamInput in) throws IOException {
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
super.readFrom(in);
|
super.readFrom(in);
|
||||||
|
@ -300,4 +307,60 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
// the ordinal for encoding! Only append to the end!
|
// the ordinal for encoding! Only append to the end!
|
||||||
Positions, Offsets, Payloads, FieldStatistics, TermStatistics;
|
Positions, Offsets, Payloads, FieldStatistics, TermStatistics;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* populates a request object (pre-populated with defaults) based on a parser.
|
||||||
|
*
|
||||||
|
* @param termVectorRequest
|
||||||
|
* @param parser
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void parseRequest(TermVectorRequest termVectorRequest, XContentParser parser) throws IOException {
|
||||||
|
XContentParser.Token token;
|
||||||
|
String currentFieldName = null;
|
||||||
|
List<String> fields = new ArrayList<String>();
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
|
currentFieldName = parser.currentName();
|
||||||
|
} else if (currentFieldName != null) {
|
||||||
|
if (currentFieldName.equals("fields")) {
|
||||||
|
|
||||||
|
if (token == XContentParser.Token.START_ARRAY) {
|
||||||
|
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
||||||
|
fields.add(parser.text());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new ElasticSearchParseException(
|
||||||
|
"The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]");
|
||||||
|
}
|
||||||
|
} else if (currentFieldName.equals("offsets")) {
|
||||||
|
termVectorRequest.offsets(parser.booleanValue());
|
||||||
|
} else if (currentFieldName.equals("positions")) {
|
||||||
|
termVectorRequest.positions(parser.booleanValue());
|
||||||
|
} else if (currentFieldName.equals("payloads")) {
|
||||||
|
termVectorRequest.payloads(parser.booleanValue());
|
||||||
|
} else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) {
|
||||||
|
termVectorRequest.termStatistics(parser.booleanValue());
|
||||||
|
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
|
||||||
|
termVectorRequest.fieldStatistics(parser.booleanValue());
|
||||||
|
} else if ("_index".equals(currentFieldName)) { // the following is important for multi request parsing.
|
||||||
|
termVectorRequest.index = parser.text();
|
||||||
|
} else if ("_type".equals(currentFieldName)) {
|
||||||
|
termVectorRequest.type = parser.text();
|
||||||
|
} else if ("_id".equals(currentFieldName)) {
|
||||||
|
termVectorRequest.id = parser.text();
|
||||||
|
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
|
||||||
|
termVectorRequest.routing = parser.text();
|
||||||
|
} else {
|
||||||
|
throw new ElasticSearchParseException("The parameter " + currentFieldName
|
||||||
|
+ " is not valid for term vector request!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fields.size() > 0) {
|
||||||
|
String[] fieldsAsArray = new String[fields.size()];
|
||||||
|
termVectorRequest.selectedFields(fields.toArray(fieldsAsArray));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -320,10 +320,11 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
||||||
this.exists = exists;
|
this.exists = exists;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFields(Fields fields, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||||
TermVectorWriter tvw = new TermVectorWriter(this);
|
TermVectorWriter tvw = new TermVectorWriter(this);
|
||||||
if (fields != null) {
|
|
||||||
tvw.setFields(fields, selectedFields, flags, topLevelFields);
|
if (termVectorsByField != null) {
|
||||||
|
tvw.setFields(termVectorsByField, selectedFields, flags, topLevelFields);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -342,4 +343,16 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getIndex() {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,22 +18,18 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.action.termvector;
|
package org.elasticsearch.action.termvector;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||||
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
|
||||||
|
|
||||||
// package only - this is an internal class!
|
// package only - this is an internal class!
|
||||||
final class TermVectorWriter {
|
final class TermVectorWriter {
|
||||||
final List<String> fields = new ArrayList<String>();
|
final List<String> fields = new ArrayList<String>();
|
||||||
|
@ -49,30 +45,30 @@ final class TermVectorWriter {
|
||||||
response = termVectorResponse;
|
response = termVectorResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setFields(Fields fields, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||||
|
|
||||||
int numFieldsWritten = 0;
|
int numFieldsWritten = 0;
|
||||||
TermsEnum iterator = null;
|
TermsEnum iterator = null;
|
||||||
DocsAndPositionsEnum docsAndPosEnum = null;
|
DocsAndPositionsEnum docsAndPosEnum = null;
|
||||||
DocsEnum docsEnum = null;
|
DocsEnum docsEnum = null;
|
||||||
TermsEnum topLevelIterator = null;
|
TermsEnum topLevelIterator = null;
|
||||||
for (String field : fields) {
|
for (String field : termVectorsByField) {
|
||||||
if ((selectedFields != null) && (!selectedFields.contains(field))) {
|
if ((selectedFields != null) && (!selectedFields.contains(field))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Terms terms = fields.terms(field);
|
Terms fieldTermVector = termVectorsByField.terms(field);
|
||||||
Terms topLevelTerms = topLevelFields.terms(field);
|
Terms topLevelTerms = topLevelFields.terms(field);
|
||||||
|
|
||||||
topLevelIterator = topLevelTerms.iterator(topLevelIterator);
|
topLevelIterator = topLevelTerms.iterator(topLevelIterator);
|
||||||
boolean positions = flags.contains(Flag.Positions) && terms.hasPositions();
|
boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions();
|
||||||
boolean offsets = flags.contains(Flag.Offsets) && terms.hasOffsets();
|
boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets();
|
||||||
boolean payloads = flags.contains(Flag.Payloads) && terms.hasPayloads();
|
boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
|
||||||
startField(field, terms.size(), positions, offsets, payloads);
|
startField(field, fieldTermVector.size(), positions, offsets, payloads);
|
||||||
if (flags.contains(Flag.FieldStatistics)) {
|
if (flags.contains(Flag.FieldStatistics)) {
|
||||||
writeFieldStatistics(topLevelTerms);
|
writeFieldStatistics(topLevelTerms);
|
||||||
}
|
}
|
||||||
iterator = terms.iterator(iterator);
|
iterator = fieldTermVector.iterator(iterator);
|
||||||
final boolean useDocsAndPos = positions || offsets || payloads;
|
final boolean useDocsAndPos = positions || offsets || payloads;
|
||||||
while (iterator.next() != null) { // iterate all terms of the
|
while (iterator.next() != null) { // iterate all terms of the
|
||||||
// current field
|
// current field
|
||||||
|
|
|
@ -19,36 +19,28 @@
|
||||||
|
|
||||||
package org.elasticsearch.rest.action.termvector;
|
package org.elasticsearch.rest.action.termvector;
|
||||||
|
|
||||||
import static org.elasticsearch.rest.RestRequest.Method.GET;
|
|
||||||
import static org.elasticsearch.rest.RestRequest.Method.POST;
|
|
||||||
import static org.elasticsearch.rest.RestStatus.OK;
|
|
||||||
import static org.elasticsearch.rest.action.support.RestXContentBuilder.restContentBuilder;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.StringTokenizer;
|
|
||||||
|
|
||||||
import org.elasticsearch.ElasticSearchParseException;
|
|
||||||
import org.elasticsearch.action.ActionListener;
|
import org.elasticsearch.action.ActionListener;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||||
import org.elasticsearch.client.Client;
|
import org.elasticsearch.client.Client;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.rest.BaseRestHandler;
|
import org.elasticsearch.rest.*;
|
||||||
import org.elasticsearch.rest.RestChannel;
|
import org.elasticsearch.rest.action.support.RestXContentBuilder;
|
||||||
import org.elasticsearch.rest.RestController;
|
|
||||||
import org.elasticsearch.rest.RestRequest;
|
import java.io.IOException;
|
||||||
import org.elasticsearch.rest.XContentRestResponse;
|
import java.util.HashSet;
|
||||||
import org.elasticsearch.rest.XContentThrowableRestResponse;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.elasticsearch.rest.RestRequest.Method.GET;
|
||||||
|
import static org.elasticsearch.rest.RestRequest.Method.POST;
|
||||||
|
import static org.elasticsearch.rest.RestStatus.BAD_REQUEST;
|
||||||
|
import static org.elasticsearch.rest.RestStatus.OK;
|
||||||
|
import static org.elasticsearch.rest.action.support.RestXContentBuilder.restContentBuilder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class parses the json request and translates it into a
|
* This class parses the json request and translates it into a
|
||||||
|
@ -70,19 +62,24 @@ public class RestTermVectorAction extends BaseRestHandler {
|
||||||
termVectorRequest.routing(request.param("routing"));
|
termVectorRequest.routing(request.param("routing"));
|
||||||
termVectorRequest.parent(request.param("parent"));
|
termVectorRequest.parent(request.param("parent"));
|
||||||
termVectorRequest.preference(request.param("preference"));
|
termVectorRequest.preference(request.param("preference"));
|
||||||
|
XContentParser parser = null;
|
||||||
if (request.hasContent()) {
|
if (request.hasContent()) {
|
||||||
try {
|
try {
|
||||||
parseRequest(request.content(), termVectorRequest);
|
parser = XContentFactory.xContent(request.content()).createParser(request.content());
|
||||||
|
TermVectorRequest.parseRequest(termVectorRequest, parser);
|
||||||
|
} catch (IOException e) {
|
||||||
|
try {
|
||||||
|
XContentBuilder builder = RestXContentBuilder.restContentBuilder(request);
|
||||||
|
channel.sendResponse(new XContentRestResponse(request, BAD_REQUEST, builder.startObject().field("error", e.getMessage()).endObject()));
|
||||||
|
|
||||||
} catch (IOException e1) {
|
} catch (IOException e1) {
|
||||||
Set<String> selectedFields = termVectorRequest.selectedFields();
|
logger.warn("Failed to send response", e1);
|
||||||
String fieldString = "all";
|
return;
|
||||||
if (selectedFields != null) {
|
}
|
||||||
Strings.arrayToDelimitedString(termVectorRequest.selectedFields().toArray(new String[1]), " ");
|
} finally {
|
||||||
|
if (parser != null) {
|
||||||
|
parser.close();
|
||||||
}
|
}
|
||||||
logger.error("Something is wrong with your parameters for the term vector request. I am using parameters "
|
|
||||||
+ "\n positions :" + termVectorRequest.positions() + "\n offsets :" + termVectorRequest.offsets() + "\n payloads :"
|
|
||||||
+ termVectorRequest.payloads() + "\n termStatistics :" + termVectorRequest.termStatistics()
|
|
||||||
+ "\n fieldStatistics :" + termVectorRequest.fieldStatistics() + "\nfields " + fieldString, (Object) null);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
readURIParameters(termVectorRequest, request);
|
readURIParameters(termVectorRequest, request);
|
||||||
|
@ -142,47 +139,4 @@ public class RestTermVectorAction extends BaseRestHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static public void parseRequest(BytesReference cont, TermVectorRequest termVectorRequest) throws IOException {
|
|
||||||
|
|
||||||
XContentParser parser = XContentFactory.xContent(cont).createParser(cont);
|
|
||||||
try {
|
|
||||||
XContentParser.Token token;
|
|
||||||
String currentFieldName = null;
|
|
||||||
List<String> fields = new ArrayList<String>();
|
|
||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
|
||||||
if (token == XContentParser.Token.FIELD_NAME) {
|
|
||||||
currentFieldName = parser.currentName();
|
|
||||||
} else if (currentFieldName != null) {
|
|
||||||
if (currentFieldName.equals("fields")) {
|
|
||||||
|
|
||||||
if (token == XContentParser.Token.START_ARRAY) {
|
|
||||||
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
|
||||||
fields.add(parser.text());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new ElasticSearchParseException(
|
|
||||||
"The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]");
|
|
||||||
}
|
|
||||||
} else if (currentFieldName.equals("offsets")) {
|
|
||||||
termVectorRequest.offsets(parser.booleanValue());
|
|
||||||
} else if (currentFieldName.equals("positions")) {
|
|
||||||
termVectorRequest.positions(parser.booleanValue());
|
|
||||||
} else if (currentFieldName.equals("payloads")) {
|
|
||||||
termVectorRequest.payloads(parser.booleanValue());
|
|
||||||
} else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) {
|
|
||||||
termVectorRequest.termStatistics(parser.booleanValue());
|
|
||||||
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
|
|
||||||
termVectorRequest.fieldStatistics(parser.booleanValue());
|
|
||||||
} else {
|
|
||||||
throw new ElasticSearchParseException("The parameter " + currentFieldName
|
|
||||||
+ " is not valid for term vector request!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
String[] fieldsAsArray = new String[fields.size()];
|
|
||||||
termVectorRequest.selectedFields(fields.toArray(fieldsAsArray));
|
|
||||||
} finally {
|
|
||||||
parser.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -323,6 +323,11 @@ public abstract class AbstractSharedClusterTest extends ElasticsearchTestCase {
|
||||||
return client().prepareIndex(index, type).setSource(source).execute().actionGet();
|
return client().prepareIndex(index, type).setSource(source).execute().actionGet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected IndexResponse index(String index, String type, String id, Map<String, Object> source) {
|
||||||
|
return client().prepareIndex(index, type, id).setSource(source).execute().actionGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
protected GetResponse get(String index, String type, String id) {
|
protected GetResponse get(String index, String type, String id) {
|
||||||
return client().prepareGet(index, type, id).execute().actionGet();
|
return client().prepareGet(index, type, id).execute().actionGet();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,411 @@
|
||||||
|
package org.elasticsearch.test.integration.termvectors;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||||
|
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
||||||
|
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||||
|
import org.elasticsearch.common.inject.internal.Join;
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.indices.IndexMissingException;
|
||||||
|
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
public abstract class AbstractTermVectorTests extends AbstractSharedClusterTest {
|
||||||
|
|
||||||
|
protected static class TestFieldSetting {
|
||||||
|
final public String name;
|
||||||
|
final public boolean storedOffset;
|
||||||
|
final public boolean storedPayloads;
|
||||||
|
final public boolean storedPositions;
|
||||||
|
|
||||||
|
public TestFieldSetting(String name, boolean storedOffset, boolean storedPayloads, boolean storedPositions) {
|
||||||
|
this.name = name;
|
||||||
|
this.storedOffset = storedOffset;
|
||||||
|
this.storedPayloads = storedPayloads;
|
||||||
|
this.storedPositions = storedPositions;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addToMappings(XContentBuilder mappingsBuilder) throws IOException {
|
||||||
|
mappingsBuilder.startObject(name);
|
||||||
|
mappingsBuilder.field("type", "string");
|
||||||
|
String tv_settings;
|
||||||
|
if (storedPositions && storedOffset && storedPayloads) {
|
||||||
|
tv_settings = "with_positions_offsets_payloads";
|
||||||
|
} else if (storedPositions && storedOffset) {
|
||||||
|
tv_settings = "with_positions_offsets";
|
||||||
|
} else if (storedPayloads) {
|
||||||
|
tv_settings = "with_positions_payloads";
|
||||||
|
} else if (storedPositions) {
|
||||||
|
tv_settings = "with_positions";
|
||||||
|
} else if (storedOffset) {
|
||||||
|
tv_settings = "with_offsets";
|
||||||
|
} else {
|
||||||
|
tv_settings = "yes";
|
||||||
|
}
|
||||||
|
|
||||||
|
mappingsBuilder.field("term_vector", tv_settings);
|
||||||
|
|
||||||
|
if (storedPayloads) {
|
||||||
|
mappingsBuilder.field("analyzer", "tv_test");
|
||||||
|
}
|
||||||
|
|
||||||
|
mappingsBuilder.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder("name: ").append(name).append(" tv_with:");
|
||||||
|
if (storedPayloads) {
|
||||||
|
sb.append("payloads,");
|
||||||
|
}
|
||||||
|
if (storedOffset) {
|
||||||
|
sb.append("offsets,");
|
||||||
|
}
|
||||||
|
if (storedPositions) {
|
||||||
|
sb.append("positions,");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static class TestDoc {
|
||||||
|
final public String id;
|
||||||
|
final public TestFieldSetting[] fieldSettings;
|
||||||
|
final public String[] fieldContent;
|
||||||
|
public String index = "test";
|
||||||
|
public String type = "type1";
|
||||||
|
|
||||||
|
public TestDoc(String id, TestFieldSetting[] fieldSettings, String[] fieldContent) {
|
||||||
|
this.id = id;
|
||||||
|
this.fieldSettings = fieldSettings;
|
||||||
|
this.fieldContent = fieldContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestDoc index(String index) {
|
||||||
|
this.index = index;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder("index:").append(index).append(" type:").append(type).append(" id:").append(id);
|
||||||
|
for (int i = 0; i < fieldSettings.length; i++) {
|
||||||
|
TestFieldSetting f = fieldSettings[i];
|
||||||
|
sb.append("\n").append("Field: ").append(f).append("\n content:").append(fieldContent[i]);
|
||||||
|
}
|
||||||
|
sb.append("\n");
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static class TestConfig {
|
||||||
|
final public TestDoc doc;
|
||||||
|
final public String[] selectedFields;
|
||||||
|
final public boolean requestPositions;
|
||||||
|
final public boolean requestOffsets;
|
||||||
|
final public boolean requestPayloads;
|
||||||
|
public Class expectedException = null;
|
||||||
|
|
||||||
|
public TestConfig(TestDoc doc, String[] selectedFields, boolean requestPositions, boolean requestOffsets, boolean requestPayloads) {
|
||||||
|
this.doc = doc;
|
||||||
|
this.selectedFields = selectedFields;
|
||||||
|
this.requestPositions = requestPositions;
|
||||||
|
this.requestOffsets = requestOffsets;
|
||||||
|
this.requestPayloads = requestPayloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestConfig expectedException(Class exceptionClass) {
|
||||||
|
this.expectedException = exceptionClass;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
String requested = "";
|
||||||
|
if (requestOffsets) {
|
||||||
|
requested += "offsets,";
|
||||||
|
}
|
||||||
|
if (requestPositions) {
|
||||||
|
requested += "position,";
|
||||||
|
}
|
||||||
|
if (requestPayloads) {
|
||||||
|
requested += "payload,";
|
||||||
|
}
|
||||||
|
Locale aLocale = new Locale("en", "US");
|
||||||
|
return String.format(aLocale, "(doc: %s\n requested: %s, fields: %s)", doc, requested,
|
||||||
|
selectedFields == null ? "NULL" : Join.join(",", selectedFields));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void createIndexBasedOnFieldSettings(TestFieldSetting[] fieldSettings, int number_of_shards) throws IOException {
|
||||||
|
wipeIndex("test");
|
||||||
|
XContentBuilder mappingBuilder = jsonBuilder();
|
||||||
|
mappingBuilder.startObject().startObject("type1").startObject("properties");
|
||||||
|
for (TestFieldSetting field : fieldSettings) {
|
||||||
|
field.addToMappings(mappingBuilder);
|
||||||
|
}
|
||||||
|
ImmutableSettings.Builder settings = ImmutableSettings.settingsBuilder()
|
||||||
|
.put("index.analysis.analyzer.tv_test.tokenizer", "standard")
|
||||||
|
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase");
|
||||||
|
if (number_of_shards > 0) {
|
||||||
|
settings.put("number_of_shards", number_of_shards);
|
||||||
|
}
|
||||||
|
mappingBuilder.endObject().endObject().endObject();
|
||||||
|
run(prepareCreate("test").addMapping("type1", mappingBuilder).setSettings(settings));
|
||||||
|
|
||||||
|
ensureYellow();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate test documentsThe returned documents are already indexed.
|
||||||
|
*/
|
||||||
|
protected TestDoc[] generateTestDocs(int numberOfDocs, TestFieldSetting[] fieldSettings) {
|
||||||
|
String[] fieldContentOptions = new String[] { "Generating a random permutation of a sequence (such as when shuffling cards).",
|
||||||
|
"Selecting a random sample of a population (important in statistical sampling).",
|
||||||
|
"Allocating experimental units via random assignment to a treatment or control condition.",
|
||||||
|
"Generating random numbers: see Random number generation.",
|
||||||
|
"Transforming a data stream (such as when using a scrambler in telecommunications)." };
|
||||||
|
|
||||||
|
String[] contentArray = new String[fieldSettings.length];
|
||||||
|
Map<String, Object> docSource = new HashMap<String, Object>();
|
||||||
|
TestDoc[] testDocs = new TestDoc[numberOfDocs];
|
||||||
|
for (int docId = 0; docId < numberOfDocs; docId++) {
|
||||||
|
docSource.clear();
|
||||||
|
for (int i = 0; i < contentArray.length; i++) {
|
||||||
|
contentArray[i] = fieldContentOptions[randomInt(fieldContentOptions.length - 1)];
|
||||||
|
docSource.put(fieldSettings[i].name, contentArray[i]);
|
||||||
|
}
|
||||||
|
TestDoc doc = new TestDoc(Integer.toString(docId), fieldSettings, contentArray.clone());
|
||||||
|
index(doc.index, doc.type, doc.id, docSource);
|
||||||
|
testDocs[docId] = doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
refresh();
|
||||||
|
return testDocs;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TestConfig[] generateTestConfigs(int numberOfTests, TestDoc[] testDocs, TestFieldSetting[] fieldSettings) {
|
||||||
|
ArrayList<TestConfig> configs = new ArrayList<TestConfig>();
|
||||||
|
for (int i = 0; i < numberOfTests; i++) {
|
||||||
|
|
||||||
|
ArrayList<String> selectedFields = null;
|
||||||
|
if (randomBoolean()) {
|
||||||
|
// used field selection
|
||||||
|
selectedFields = new ArrayList<String>();
|
||||||
|
if (randomBoolean()) {
|
||||||
|
selectedFields.add("Doesnt_exist"); // this will be ignored.
|
||||||
|
}
|
||||||
|
for (TestFieldSetting field : fieldSettings)
|
||||||
|
if (randomBoolean()) {
|
||||||
|
selectedFields.add(field.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (selectedFields.size() == 0) {
|
||||||
|
selectedFields = null; // 0 length set is not supported.
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
TestConfig config = new TestConfig(testDocs[randomInt(testDocs.length - 1)], selectedFields == null ? null
|
||||||
|
: selectedFields.toArray(new String[] {}), randomBoolean(), randomBoolean(), randomBoolean());
|
||||||
|
|
||||||
|
configs.add(config);
|
||||||
|
}
|
||||||
|
// always adds a test that fails
|
||||||
|
configs.add(new TestConfig(new TestDoc("doesnt_exist", new TestFieldSetting[] {}, new String[] {}).index("doesn't_exist"),
|
||||||
|
new String[] { "doesnt_exist" }, true, true, true).expectedException(IndexMissingException.class));
|
||||||
|
|
||||||
|
refresh();
|
||||||
|
|
||||||
|
return configs.toArray(new TestConfig[] {});
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TestFieldSetting[] getFieldSettings() {
|
||||||
|
return new TestFieldSetting[] { new TestFieldSetting("field_with_positions", false, false, true),
|
||||||
|
new TestFieldSetting("field_with_offsets", true, false, false),
|
||||||
|
new TestFieldSetting("field_with_only_tv", false, false, false),
|
||||||
|
new TestFieldSetting("field_with_positions_offsets", false, false, true),
|
||||||
|
new TestFieldSetting("field_with_positions_payloads", false, true, true)
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
|
||||||
|
|
||||||
|
Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
|
||||||
|
for (TestFieldSetting field : testDocs[0].fieldSettings) {
|
||||||
|
if (field.storedPayloads) {
|
||||||
|
mapping.put(field.name, new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
|
Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
|
||||||
|
TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
|
||||||
|
filter = new TypeAsPayloadTokenFilter(filter);
|
||||||
|
return new TokenStreamComponents(tokenizer, filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.CURRENT.luceneVersion), mapping);
|
||||||
|
|
||||||
|
Directory dir = new RAMDirectory();
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);
|
||||||
|
|
||||||
|
conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||||
|
IndexWriter writer = new IndexWriter(dir, conf);
|
||||||
|
|
||||||
|
for (TestDoc doc : testDocs) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new Field("id", doc.id, StringField.TYPE_STORED));
|
||||||
|
for (int i = 0; i < doc.fieldContent.length; i++) {
|
||||||
|
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||||
|
TestFieldSetting fieldSetting = doc.fieldSettings[i];
|
||||||
|
|
||||||
|
type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
|
||||||
|
type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
|
||||||
|
type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
|
||||||
|
type.setStoreTermVectors(true);
|
||||||
|
type.freeze();
|
||||||
|
d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
|
||||||
|
}
|
||||||
|
writer.updateDocument(new Term("id", doc.id), d);
|
||||||
|
writer.commit();
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
return DirectoryReader.open(dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void validateResponse(TermVectorResponse esResponse, Fields luceneFields, TestConfig testConfig) throws IOException {
|
||||||
|
TestDoc testDoc = testConfig.doc;
|
||||||
|
HashSet<String> selectedFields = testConfig.selectedFields == null ? null : new HashSet<String>(
|
||||||
|
Arrays.asList(testConfig.selectedFields));
|
||||||
|
Fields esTermVectorFields = esResponse.getFields();
|
||||||
|
for (TestFieldSetting field : testDoc.fieldSettings) {
|
||||||
|
Terms esTerms = esTermVectorFields.terms(field.name);
|
||||||
|
if (selectedFields != null && !selectedFields.contains(field.name)) {
|
||||||
|
assertNull(esTerms);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertNotNull(esTerms);
|
||||||
|
|
||||||
|
Terms luceneTerms = luceneFields.terms(field.name);
|
||||||
|
TermsEnum esTermEnum = esTerms.iterator(null);
|
||||||
|
TermsEnum luceneTermEnum = luceneTerms.iterator(null);
|
||||||
|
|
||||||
|
while (esTermEnum.next() != null) {
|
||||||
|
assertNotNull(luceneTermEnum.next());
|
||||||
|
|
||||||
|
assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
|
||||||
|
DocsAndPositionsEnum esDocsPosEnum = esTermEnum.docsAndPositions(null, null, 0);
|
||||||
|
DocsAndPositionsEnum luceneDocsPosEnum = luceneTermEnum.docsAndPositions(null, null, 0);
|
||||||
|
if (luceneDocsPosEnum == null) {
|
||||||
|
// test we expect that...
|
||||||
|
assertFalse(field.storedOffset);
|
||||||
|
assertFalse(field.storedPayloads);
|
||||||
|
assertFalse(field.storedPositions);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String currentTerm = esTermEnum.term().utf8ToString();
|
||||||
|
|
||||||
|
assertThat("Token mismatch for field: " + field.name, currentTerm, equalTo(luceneTermEnum.term().utf8ToString()));
|
||||||
|
|
||||||
|
esDocsPosEnum.nextDoc();
|
||||||
|
luceneDocsPosEnum.nextDoc();
|
||||||
|
|
||||||
|
int freq = esDocsPosEnum.freq();
|
||||||
|
assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
|
||||||
|
for (int i = 0; i < freq; i++) {
|
||||||
|
String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
|
||||||
|
int lucenePos = luceneDocsPosEnum.nextPosition();
|
||||||
|
int esPos = esDocsPosEnum.nextPosition();
|
||||||
|
if (field.storedPositions && testConfig.requestPositions) {
|
||||||
|
assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
|
||||||
|
} else {
|
||||||
|
assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
|
||||||
|
}
|
||||||
|
if (field.storedOffset && testConfig.requestOffsets) {
|
||||||
|
assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset()));
|
||||||
|
assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset()));
|
||||||
|
} else {
|
||||||
|
assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1));
|
||||||
|
assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
|
||||||
|
}
|
||||||
|
if (field.storedPayloads && testConfig.requestPayloads) {
|
||||||
|
assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
|
||||||
|
} else {
|
||||||
|
assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TermVectorRequestBuilder getRequestForConfig(TestConfig config) {
|
||||||
|
return client().prepareTermVector(config.doc.index, config.doc.type, config.doc.id).setPayloads(config.requestPayloads)
|
||||||
|
.setOffsets(config.requestOffsets).setPositions(config.requestPositions).setFieldStatistics(true).setTermStatistics(true)
|
||||||
|
.setSelectedFields(config.selectedFields);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Fields getTermVectorsFromLucene(DirectoryReader directoryReader, TestDoc doc) throws IOException {
|
||||||
|
IndexSearcher searcher = new IndexSearcher(directoryReader);
|
||||||
|
TopDocs search = searcher.search(new TermQuery(new Term("id", doc.id)), 1);
|
||||||
|
|
||||||
|
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||||
|
assert (scoreDocs.length == 1);
|
||||||
|
return directoryReader.getTermVectors(scoreDocs[0].doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -25,12 +25,9 @@ import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
||||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||||
import org.elasticsearch.common.io.BytesStream;
|
import org.elasticsearch.common.io.BytesStream;
|
||||||
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
|
|
||||||
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
|
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
@ -38,53 +35,13 @@ import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
|
public class GetTermVectorCheckDocFreqTests extends AbstractSharedClusterTest {
|
||||||
|
|
||||||
@Test
|
|
||||||
public void streamRequest() throws IOException {
|
|
||||||
|
|
||||||
Random random = getRandom();
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
TermVectorRequest request = new TermVectorRequest("index", "type", "id");
|
|
||||||
request.offsets(random.nextBoolean());
|
|
||||||
request.fieldStatistics(random.nextBoolean());
|
|
||||||
request.payloads(random.nextBoolean());
|
|
||||||
request.positions(random.nextBoolean());
|
|
||||||
request.termStatistics(random.nextBoolean());
|
|
||||||
String parent = random.nextBoolean() ? "someParent" : null;
|
|
||||||
request.parent(parent);
|
|
||||||
String pref = random.nextBoolean() ? "somePreference" : null;
|
|
||||||
request.preference(pref);
|
|
||||||
|
|
||||||
// write
|
|
||||||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
|
||||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
|
||||||
request.writeTo(out);
|
|
||||||
|
|
||||||
// read
|
|
||||||
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
|
||||||
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
|
||||||
TermVectorRequest req2 = new TermVectorRequest(null, null, null);
|
|
||||||
req2.readFrom(esBuffer);
|
|
||||||
|
|
||||||
assertThat(request.offsets(), equalTo(req2.offsets()));
|
|
||||||
assertThat(request.fieldStatistics(), equalTo(req2.fieldStatistics()));
|
|
||||||
assertThat(request.payloads(), equalTo(req2.payloads()));
|
|
||||||
assertThat(request.positions(), equalTo(req2.positions()));
|
|
||||||
assertThat(request.termStatistics(), equalTo(req2.termStatistics()));
|
|
||||||
assertThat(request.preference(), equalTo(pref));
|
|
||||||
assertThat(request.routing(), equalTo(parent));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
|
@ -19,271 +19,37 @@
|
||||||
|
|
||||||
package org.elasticsearch.test.integration.termvectors;
|
package org.elasticsearch.test.integration.termvectors;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
||||||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|
||||||
import org.apache.lucene.document.*;
|
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.FSDirectory;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.action.ActionFuture;
|
import org.elasticsearch.action.ActionFuture;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
||||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
|
||||||
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
|
|
||||||
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
|
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.core.TypeParsers;
|
import org.elasticsearch.index.mapper.core.TypeParsers;
|
||||||
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
||||||
import org.elasticsearch.rest.action.termvector.RestTermVectorAction;
|
|
||||||
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
|
||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
public class GetTermVectorTests extends AbstractSharedClusterTest {
|
public class GetTermVectorTests extends AbstractTermVectorTests {
|
||||||
|
|
||||||
@Test
|
|
||||||
public void streamTest() throws Exception {
|
|
||||||
|
|
||||||
TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c");
|
|
||||||
outResponse.setExists(true);
|
|
||||||
writeStandardTermVector(outResponse);
|
|
||||||
|
|
||||||
// write
|
|
||||||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
|
||||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
|
||||||
outResponse.writeTo(out);
|
|
||||||
|
|
||||||
// read
|
|
||||||
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
|
||||||
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
|
||||||
TermVectorResponse inResponse = new TermVectorResponse("a", "b", "c");
|
|
||||||
inResponse.readFrom(esBuffer);
|
|
||||||
|
|
||||||
// see if correct
|
|
||||||
checkIfStandardTermVector(inResponse);
|
|
||||||
|
|
||||||
outResponse = new TermVectorResponse("a", "b", "c");
|
|
||||||
writeEmptyTermVector(outResponse);
|
|
||||||
// write
|
|
||||||
outBuffer = new ByteArrayOutputStream();
|
|
||||||
out = new OutputStreamStreamOutput(outBuffer);
|
|
||||||
outResponse.writeTo(out);
|
|
||||||
|
|
||||||
// read
|
|
||||||
esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
|
||||||
esBuffer = new InputStreamStreamInput(esInBuffer);
|
|
||||||
inResponse = new TermVectorResponse("a", "b", "c");
|
|
||||||
inResponse.readFrom(esBuffer);
|
|
||||||
assertTrue(inResponse.isExists());
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkIfStandardTermVector(TermVectorResponse inResponse) throws IOException {
|
|
||||||
|
|
||||||
Fields fields = inResponse.getFields();
|
|
||||||
assertThat(fields.terms("title"), Matchers.notNullValue());
|
|
||||||
assertThat(fields.terms("desc"), Matchers.notNullValue());
|
|
||||||
assertThat(fields.size(), equalTo(2));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException {
|
|
||||||
|
|
||||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
|
||||||
conf.setOpenMode(OpenMode.CREATE);
|
|
||||||
IndexWriter writer = new IndexWriter(dir, conf);
|
|
||||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
|
||||||
type.setStoreTermVectorOffsets(true);
|
|
||||||
type.setStoreTermVectorPayloads(false);
|
|
||||||
type.setStoreTermVectorPositions(true);
|
|
||||||
type.setStoreTermVectors(true);
|
|
||||||
type.freeze();
|
|
||||||
Document d = new Document();
|
|
||||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
|
||||||
|
|
||||||
writer.updateDocument(new Term("id", "abc"), d);
|
|
||||||
writer.commit();
|
|
||||||
writer.close();
|
|
||||||
DirectoryReader dr = DirectoryReader.open(dir);
|
|
||||||
IndexSearcher s = new IndexSearcher(dr);
|
|
||||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
|
||||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
|
||||||
int doc = scoreDocs[0].doc;
|
|
||||||
Fields fields = dr.getTermVectors(doc);
|
|
||||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
|
||||||
outResponse.setFields(fields, null, flags, fields);
|
|
||||||
outResponse.setExists(true);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException {
|
|
||||||
|
|
||||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
|
||||||
conf.setOpenMode(OpenMode.CREATE);
|
|
||||||
IndexWriter writer = new IndexWriter(dir, conf);
|
|
||||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
|
||||||
type.setStoreTermVectorOffsets(true);
|
|
||||||
type.setStoreTermVectorPayloads(false);
|
|
||||||
type.setStoreTermVectorPositions(true);
|
|
||||||
type.setStoreTermVectors(true);
|
|
||||||
type.freeze();
|
|
||||||
Document d = new Document();
|
|
||||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
|
||||||
d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type));
|
|
||||||
d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type));
|
|
||||||
|
|
||||||
writer.updateDocument(new Term("id", "abc"), d);
|
|
||||||
writer.commit();
|
|
||||||
writer.close();
|
|
||||||
DirectoryReader dr = DirectoryReader.open(dir);
|
|
||||||
IndexSearcher s = new IndexSearcher(dr);
|
|
||||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
|
||||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
|
||||||
int doc = scoreDocs[0].doc;
|
|
||||||
Fields fields = dr.getTermVectors(doc);
|
|
||||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
|
||||||
outResponse.setFields(fields, null, flags, fields);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private Fields buildWithLuceneAndReturnFields(String docId, String[] fields, String[] content, boolean[] withPositions,
|
|
||||||
boolean[] withOffsets, boolean[] withPayloads) throws IOException {
|
|
||||||
assert (fields.length == withPayloads.length);
|
|
||||||
assert (content.length == withPayloads.length);
|
|
||||||
assert (withPositions.length == withPayloads.length);
|
|
||||||
assert (withOffsets.length == withPayloads.length);
|
|
||||||
|
|
||||||
Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
|
|
||||||
for (int i = 0; i < withPayloads.length; i++) {
|
|
||||||
if (withPayloads[i]) {
|
|
||||||
mapping.put(fields[i], new Analyzer() {
|
|
||||||
@Override
|
|
||||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
|
||||||
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
|
||||||
TokenFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer);
|
|
||||||
filter = new TypeAsPayloadTokenFilter(filter);
|
|
||||||
return new TokenStreamComponents(tokenizer, filter);
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(TEST_VERSION_CURRENT), mapping);
|
|
||||||
|
|
||||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
|
||||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, wrapper);
|
|
||||||
|
|
||||||
conf.setOpenMode(OpenMode.CREATE);
|
|
||||||
IndexWriter writer = new IndexWriter(dir, conf);
|
|
||||||
|
|
||||||
Document d = new Document();
|
|
||||||
for (int i = 0; i < fields.length; i++) {
|
|
||||||
d.add(new Field("id", docId, StringField.TYPE_STORED));
|
|
||||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
|
||||||
type.setStoreTermVectorOffsets(withOffsets[i]);
|
|
||||||
type.setStoreTermVectorPayloads(withPayloads[i]);
|
|
||||||
type.setStoreTermVectorPositions(withPositions[i] || withOffsets[i] || withPayloads[i]);
|
|
||||||
type.setStoreTermVectors(true);
|
|
||||||
type.freeze();
|
|
||||||
d.add(new Field(fields[i], content[i], type));
|
|
||||||
writer.updateDocument(new Term("id", docId), d);
|
|
||||||
writer.commit();
|
|
||||||
}
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
DirectoryReader dr = DirectoryReader.open(dir);
|
|
||||||
IndexSearcher s = new IndexSearcher(dr);
|
|
||||||
TopDocs search = s.search(new TermQuery(new Term("id", docId)), 1);
|
|
||||||
|
|
||||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
|
||||||
assert (scoreDocs.length == 1);
|
|
||||||
int doc = scoreDocs[0].doc;
|
|
||||||
Fields returnFields = dr.getTermVectors(doc);
|
|
||||||
return returnFields;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRestRequestParsing() throws Exception {
|
|
||||||
BytesReference inputBytes = new BytesArray(
|
|
||||||
" {\"fields\" : [\"a\", \"b\",\"c\"], \"offsets\":false, \"positions\":false, \"payloads\":true}");
|
|
||||||
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
|
||||||
RestTermVectorAction.parseRequest(inputBytes, tvr);
|
|
||||||
Set<String> fields = tvr.selectedFields();
|
|
||||||
assertThat(fields.contains("a"), equalTo(true));
|
|
||||||
assertThat(fields.contains("b"), equalTo(true));
|
|
||||||
assertThat(fields.contains("c"), equalTo(true));
|
|
||||||
assertThat(tvr.offsets(), equalTo(false));
|
|
||||||
assertThat(tvr.positions(), equalTo(false));
|
|
||||||
assertThat(tvr.payloads(), equalTo(true));
|
|
||||||
String additionalFields = "b,c ,d, e ";
|
|
||||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
|
||||||
assertThat(tvr.selectedFields().size(), equalTo(5));
|
|
||||||
assertThat(fields.contains("d"), equalTo(true));
|
|
||||||
assertThat(fields.contains("e"), equalTo(true));
|
|
||||||
|
|
||||||
additionalFields = "";
|
|
||||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
|
||||||
|
|
||||||
inputBytes = new BytesArray(" {\"offsets\":false, \"positions\":false, \"payloads\":true}");
|
|
||||||
tvr = new TermVectorRequest(null, null, null);
|
|
||||||
RestTermVectorAction.parseRequest(inputBytes, tvr);
|
|
||||||
additionalFields = "";
|
|
||||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
|
||||||
assertThat(tvr.selectedFields(), equalTo(null));
|
|
||||||
additionalFields = "b,c ,d, e ";
|
|
||||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
|
||||||
assertThat(tvr.selectedFields().size(), equalTo(4));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRestRequestParsingThrowsException() throws Exception {
|
|
||||||
BytesReference inputBytes = new BytesArray(
|
|
||||||
" {\"fields\" : \"a, b,c \", \"offsets\":false, \"positions\":false, \"payloads\":true, \"meaningless_term\":2}");
|
|
||||||
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
|
||||||
boolean threwException = false;
|
|
||||||
try {
|
|
||||||
RestTermVectorAction.parseRequest(inputBytes, tvr);
|
|
||||||
} catch (Exception e) {
|
|
||||||
threwException = true;
|
|
||||||
}
|
|
||||||
assertThat(threwException, equalTo(true));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNoSuchDoc() throws Exception {
|
public void testNoSuchDoc() throws Exception {
|
||||||
|
|
||||||
run(addMapping(prepareCreate("test"), "type1", new Object[] { "field", "type", "string", "term_vector",
|
run(addMapping(prepareCreate("test"), "type1", new Object[]{"field", "type", "string", "term_vector",
|
||||||
"with_positions_offsets_payloads" }));
|
"with_positions_offsets_payloads"}));
|
||||||
|
|
||||||
ensureYellow();
|
ensureYellow();
|
||||||
|
|
||||||
|
@ -346,7 +112,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||||
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
||||||
|
|
||||||
run(addMapping(prepareCreate("test"), "type1",
|
run(addMapping(prepareCreate("test"), "type1",
|
||||||
new Object[] { "field", "type", "string", "term_vector", "with_positions_offsets_payloads", "analyzer", "tv_test" })
|
new Object[]{"field", "type", "string", "term_vector", "with_positions_offsets_payloads", "analyzer", "tv_test"})
|
||||||
.setSettings(
|
.setSettings(
|
||||||
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
|
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
|
||||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
||||||
|
@ -359,11 +125,11 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||||
.endObject()).execute().actionGet();
|
.endObject()).execute().actionGet();
|
||||||
refresh();
|
refresh();
|
||||||
}
|
}
|
||||||
String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
|
String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
|
||||||
int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
|
int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
|
||||||
int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
|
int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
|
||||||
int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
|
int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
|
||||||
int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
|
int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true)
|
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true)
|
||||||
.setOffsets(true).setPositions(true).setSelectedFields();
|
.setOffsets(true).setPositions(true).setSelectedFields();
|
||||||
|
@ -405,9 +171,8 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRandomSingleTermVectors() throws ElasticSearchException, IOException {
|
public void testRandomSingleTermVectors() throws ElasticSearchException, IOException {
|
||||||
Random random = getRandom();
|
|
||||||
FieldType ft = new FieldType();
|
FieldType ft = new FieldType();
|
||||||
int config = random.nextInt(6);
|
int config = randomInt(6);
|
||||||
boolean storePositions = false;
|
boolean storePositions = false;
|
||||||
boolean storeOffsets = false;
|
boolean storeOffsets = false;
|
||||||
boolean storePayloads = false;
|
boolean storePayloads = false;
|
||||||
|
@ -451,7 +216,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||||
|
|
||||||
String optionString = AbstractFieldMapper.termVectorOptionsToString(ft);
|
String optionString = AbstractFieldMapper.termVectorOptionsToString(ft);
|
||||||
run(addMapping(prepareCreate("test"), "type1",
|
run(addMapping(prepareCreate("test"), "type1",
|
||||||
new Object[] { "field", "type", "string", "term_vector", optionString, "analyzer", "tv_test" }).setSettings(
|
new Object[]{"field", "type", "string", "term_vector", optionString, "analyzer", "tv_test"}).setSettings(
|
||||||
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
|
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
|
||||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
||||||
ensureYellow();
|
ensureYellow();
|
||||||
|
@ -463,15 +228,15 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||||
.endObject()).execute().actionGet();
|
.endObject()).execute().actionGet();
|
||||||
refresh();
|
refresh();
|
||||||
}
|
}
|
||||||
String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
|
String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
|
||||||
int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
|
int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
|
||||||
int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
|
int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
|
||||||
int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
|
int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
|
||||||
int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
|
int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};
|
||||||
|
|
||||||
boolean isPayloadRequested = random.nextBoolean();
|
boolean isPayloadRequested = randomBoolean();
|
||||||
boolean isOffsetRequested = random.nextBoolean();
|
boolean isOffsetRequested = randomBoolean();
|
||||||
boolean isPositionsRequested = random.nextBoolean();
|
boolean isPositionsRequested = randomBoolean();
|
||||||
String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString);
|
String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString);
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
|
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
|
||||||
|
@ -552,147 +317,31 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDuellESLucene() throws Exception {
|
public void testDuelESLucene() throws Exception {
|
||||||
|
TestFieldSetting[] testFieldSettings = getFieldSettings();
|
||||||
|
createIndexBasedOnFieldSettings(testFieldSettings, -1);
|
||||||
|
TestDoc[] testDocs = generateTestDocs(5, testFieldSettings);
|
||||||
|
|
||||||
String[] fieldNames = { "field_that_should_not_be_requested", "field_with_positions", "field_with_offsets", "field_with_only_tv",
|
// for (int i=0;i<testDocs.length;i++)
|
||||||
"field_with_positions_offsets", "field_with_positions_payloads" };
|
// logger.info("Doc: {}",testDocs[i]);
|
||||||
run(addMapping(prepareCreate("test"), "type1",
|
DirectoryReader directoryReader = indexDocsWithLucene(testDocs);
|
||||||
new Object[] { fieldNames[0], "type", "string", "term_vector", "with_positions_offsets" },
|
TestConfig[] testConfigs = generateTestConfigs(20, testDocs, testFieldSettings);
|
||||||
new Object[] { fieldNames[1], "type", "string", "term_vector", "with_positions" },
|
|
||||||
new Object[] { fieldNames[2], "type", "string", "term_vector", "with_offsets" },
|
|
||||||
new Object[] { fieldNames[3], "type", "string", "store_term_vectors", "yes" },
|
|
||||||
new Object[] { fieldNames[4], "type", "string", "term_vector", "with_positions_offsets" },
|
|
||||||
new Object[] { fieldNames[5], "type", "string", "term_vector", "with_positions_payloads", "analyzer", "tv_test" })
|
|
||||||
.setSettings(
|
|
||||||
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "standard")
|
|
||||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
|
||||||
|
|
||||||
ensureYellow();
|
for (TestConfig test : testConfigs) {
|
||||||
// ginge auc mit XContentBuilder xcb = new XContentBuilder();
|
try {
|
||||||
|
TermVectorRequestBuilder request = getRequestForConfig(test);
|
||||||
// now, create the same thing with lucene and see if the returned stuff
|
if (test.expectedException != null) {
|
||||||
// is the same
|
assertThrows(request, test.expectedException);
|
||||||
|
|
||||||
String[] fieldContent = { "the quick shard jumps over the stupid brain", "here is another field",
|
|
||||||
"And yet another field withut any use.", "I am out of ideas on what to type here.",
|
|
||||||
"The last field for which offsets are stored but not positons.",
|
|
||||||
"The last field for which offsets are stored but not positons." };
|
|
||||||
|
|
||||||
boolean[] storeOffsets = { true, false, true, false, true, false };
|
|
||||||
boolean[] storePositions = { true, true, false, false, true, true };
|
|
||||||
boolean[] storePayloads = { false, false, false, false, false, true };
|
|
||||||
Map<String, Object> testSource = new HashMap<String, Object>();
|
|
||||||
|
|
||||||
for (int i = 0; i < fieldNames.length; i++) {
|
|
||||||
testSource.put(fieldNames[i], fieldContent[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
client().prepareIndex("test", "type1", "1").setSource(testSource).execute().actionGet();
|
|
||||||
refresh();
|
|
||||||
|
|
||||||
String[] selectedFields = { fieldNames[1], fieldNames[2], fieldNames[3], fieldNames[4], fieldNames[5] };
|
|
||||||
|
|
||||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, false, false, false);
|
|
||||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, false, false);
|
|
||||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, false, true, false);
|
|
||||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, true, false);
|
|
||||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, false, true);
|
|
||||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, true, true);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void testForConfig(String[] fieldNames, String[] fieldContent, boolean[] storeOffsets, boolean[] storePositions,
|
|
||||||
boolean[] storePayloads, String[] selectedFields, boolean withPositions, boolean withOffsets, boolean withPayloads)
|
|
||||||
throws IOException {
|
|
||||||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", "1").setPayloads(withPayloads).setOffsets(withOffsets)
|
|
||||||
.setPositions(withPositions).setFieldStatistics(true).setTermStatistics(true).setSelectedFields(selectedFields);
|
|
||||||
TermVectorResponse response = resp.execute().actionGet();
|
|
||||||
|
|
||||||
// build the same with lucene and compare the Fields
|
|
||||||
Fields luceneFields = buildWithLuceneAndReturnFields("1", fieldNames, fieldContent, storePositions, storeOffsets, storePayloads);
|
|
||||||
|
|
||||||
HashMap<String, Boolean> storeOfsetsMap = new HashMap<String, Boolean>();
|
|
||||||
HashMap<String, Boolean> storePositionsMap = new HashMap<String, Boolean>();
|
|
||||||
HashMap<String, Boolean> storePayloadsMap = new HashMap<String, Boolean>();
|
|
||||||
for (int i = 0; i < storePositions.length; i++) {
|
|
||||||
storeOfsetsMap.put(fieldNames[i], storeOffsets[i]);
|
|
||||||
storePositionsMap.put(fieldNames[i], storePositions[i]);
|
|
||||||
storePayloadsMap.put(fieldNames[i], storePayloads[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
compareLuceneESTermVectorResults(response.getFields(), luceneFields, storePositionsMap, storeOfsetsMap, storePayloadsMap,
|
|
||||||
withPositions, withOffsets, withPayloads, selectedFields);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void compareLuceneESTermVectorResults(Fields fields, Fields luceneFields, HashMap<String, Boolean> storePositionsMap,
|
|
||||||
HashMap<String, Boolean> storeOfsetsMap, HashMap<String, Boolean> storePayloadsMap, boolean getPositions, boolean getOffsets,
|
|
||||||
boolean getPayloads, String[] selectedFields) throws IOException {
|
|
||||||
HashSet<String> selectedFieldsMap = new HashSet<String>(Arrays.asList(selectedFields));
|
|
||||||
|
|
||||||
Iterator<String> luceneFieldNames = luceneFields.iterator();
|
|
||||||
assertThat(luceneFields.size(), equalTo(storeOfsetsMap.size()));
|
|
||||||
assertThat(fields.size(), equalTo(selectedFields.length));
|
|
||||||
|
|
||||||
while (luceneFieldNames.hasNext()) {
|
|
||||||
String luceneFieldName = luceneFieldNames.next();
|
|
||||||
if (!selectedFieldsMap.contains(luceneFieldName))
|
|
||||||
continue;
|
continue;
|
||||||
Terms esTerms = fields.terms(luceneFieldName);
|
|
||||||
Terms luceneTerms = luceneFields.terms(luceneFieldName);
|
|
||||||
TermsEnum esTermEnum = esTerms.iterator(null);
|
|
||||||
TermsEnum luceneTermEnum = luceneTerms.iterator(null);
|
|
||||||
|
|
||||||
int numTerms = 0;
|
|
||||||
|
|
||||||
while (esTermEnum.next() != null) {
|
|
||||||
luceneTermEnum.next();
|
|
||||||
assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
|
|
||||||
DocsAndPositionsEnum esDocsPosEnum = esTermEnum.docsAndPositions(null, null, 0);
|
|
||||||
DocsAndPositionsEnum luceneDocsPosEnum = luceneTermEnum.docsAndPositions(null, null, 0);
|
|
||||||
if (luceneDocsPosEnum == null) {
|
|
||||||
assertThat(storeOfsetsMap.get(luceneFieldName), equalTo(false));
|
|
||||||
assertThat(storePayloadsMap.get(luceneFieldName), equalTo(false));
|
|
||||||
assertThat(storePositionsMap.get(luceneFieldName), equalTo(false));
|
|
||||||
continue;
|
|
||||||
|
|
||||||
}
|
|
||||||
numTerms++;
|
|
||||||
|
|
||||||
assertThat("failed for field: " + luceneFieldName, esTermEnum.term().utf8ToString(), equalTo(luceneTermEnum.term()
|
|
||||||
.utf8ToString()));
|
|
||||||
esDocsPosEnum.nextDoc();
|
|
||||||
luceneDocsPosEnum.nextDoc();
|
|
||||||
|
|
||||||
int freq = (int) esDocsPosEnum.freq();
|
|
||||||
assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
|
|
||||||
for (int i = 0; i < freq; i++) {
|
|
||||||
|
|
||||||
int lucenePos = luceneDocsPosEnum.nextPosition();
|
|
||||||
int esPos = esDocsPosEnum.nextPosition();
|
|
||||||
if (storePositionsMap.get(luceneFieldName) && getPositions) {
|
|
||||||
assertThat(luceneFieldName, lucenePos, equalTo(esPos));
|
|
||||||
} else {
|
|
||||||
assertThat(esPos, equalTo(-1));
|
|
||||||
}
|
|
||||||
if (storeOfsetsMap.get(luceneFieldName) && getOffsets) {
|
|
||||||
assertThat(luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset()));
|
|
||||||
assertThat(luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset()));
|
|
||||||
} else {
|
|
||||||
assertThat(esDocsPosEnum.startOffset(), equalTo(-1));
|
|
||||||
assertThat(esDocsPosEnum.endOffset(), equalTo(-1));
|
|
||||||
}
|
|
||||||
if (storePayloadsMap.get(luceneFieldName) && getPayloads) {
|
|
||||||
assertThat(luceneFieldName, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
|
|
||||||
} else {
|
|
||||||
assertThat(esDocsPosEnum.getPayload(), equalTo(null));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TermVectorResponse response = run(request);
|
||||||
|
Fields luceneTermVectors = getTermVectorsFromLucene(directoryReader, test.doc);
|
||||||
|
validateResponse(response, luceneTermVectors, test);
|
||||||
|
} catch (Throwable t) {
|
||||||
|
throw new Exception("Test exception while running " + test.toString(), t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -0,0 +1,256 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.test.unit.termvectors;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||||
|
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||||
|
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||||
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
|
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
|
import org.elasticsearch.rest.action.termvector.RestTermVectorAction;
|
||||||
|
import org.hamcrest.Matchers;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
public class TermVectorUnitTests extends org.elasticsearch.test.integration.ElasticsearchTestCase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void streamResponse() throws Exception {
|
||||||
|
|
||||||
|
TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c");
|
||||||
|
outResponse.setExists(true);
|
||||||
|
writeStandardTermVector(outResponse);
|
||||||
|
|
||||||
|
// write
|
||||||
|
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||||
|
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||||
|
outResponse.writeTo(out);
|
||||||
|
|
||||||
|
// read
|
||||||
|
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||||
|
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||||
|
TermVectorResponse inResponse = new TermVectorResponse("a", "b", "c");
|
||||||
|
inResponse.readFrom(esBuffer);
|
||||||
|
|
||||||
|
// see if correct
|
||||||
|
checkIfStandardTermVector(inResponse);
|
||||||
|
|
||||||
|
outResponse = new TermVectorResponse("a", "b", "c");
|
||||||
|
writeEmptyTermVector(outResponse);
|
||||||
|
// write
|
||||||
|
outBuffer = new ByteArrayOutputStream();
|
||||||
|
out = new OutputStreamStreamOutput(outBuffer);
|
||||||
|
outResponse.writeTo(out);
|
||||||
|
|
||||||
|
// read
|
||||||
|
esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||||
|
esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||||
|
inResponse = new TermVectorResponse("a", "b", "c");
|
||||||
|
inResponse.readFrom(esBuffer);
|
||||||
|
assertTrue(inResponse.isExists());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException {
|
||||||
|
|
||||||
|
Directory dir = new RAMDirectory();
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||||
|
conf.setOpenMode(OpenMode.CREATE);
|
||||||
|
IndexWriter writer = new IndexWriter(dir, conf);
|
||||||
|
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||||
|
type.setStoreTermVectorOffsets(true);
|
||||||
|
type.setStoreTermVectorPayloads(false);
|
||||||
|
type.setStoreTermVectorPositions(true);
|
||||||
|
type.setStoreTermVectors(true);
|
||||||
|
type.freeze();
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||||
|
|
||||||
|
writer.updateDocument(new Term("id", "abc"), d);
|
||||||
|
writer.commit();
|
||||||
|
writer.close();
|
||||||
|
DirectoryReader dr = DirectoryReader.open(dir);
|
||||||
|
IndexSearcher s = new IndexSearcher(dr);
|
||||||
|
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||||
|
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||||
|
int doc = scoreDocs[0].doc;
|
||||||
|
Fields fields = dr.getTermVectors(doc);
|
||||||
|
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||||
|
outResponse.setFields(fields, null, flags, fields);
|
||||||
|
outResponse.setExists(true);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException {
|
||||||
|
|
||||||
|
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||||
|
|
||||||
|
conf.setOpenMode(OpenMode.CREATE);
|
||||||
|
IndexWriter writer = new IndexWriter(dir, conf);
|
||||||
|
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||||
|
type.setStoreTermVectorOffsets(true);
|
||||||
|
type.setStoreTermVectorPayloads(false);
|
||||||
|
type.setStoreTermVectorPositions(true);
|
||||||
|
type.setStoreTermVectors(true);
|
||||||
|
type.freeze();
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||||
|
d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type));
|
||||||
|
d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type));
|
||||||
|
|
||||||
|
writer.updateDocument(new Term("id", "abc"), d);
|
||||||
|
writer.commit();
|
||||||
|
writer.close();
|
||||||
|
DirectoryReader dr = DirectoryReader.open(dir);
|
||||||
|
IndexSearcher s = new IndexSearcher(dr);
|
||||||
|
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||||
|
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||||
|
int doc = scoreDocs[0].doc;
|
||||||
|
Fields termVectors = dr.getTermVectors(doc);
|
||||||
|
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||||
|
outResponse.setFields(termVectors, null, flags, termVectors);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkIfStandardTermVector(TermVectorResponse inResponse) throws IOException {
|
||||||
|
|
||||||
|
Fields fields = inResponse.getFields();
|
||||||
|
assertThat(fields.terms("title"), Matchers.notNullValue());
|
||||||
|
assertThat(fields.terms("desc"), Matchers.notNullValue());
|
||||||
|
assertThat(fields.size(), equalTo(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRestRequestParsing() throws Exception {
|
||||||
|
BytesReference inputBytes = new BytesArray(
|
||||||
|
" {\"fields\" : [\"a\", \"b\",\"c\"], \"offsets\":false, \"positions\":false, \"payloads\":true}");
|
||||||
|
|
||||||
|
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
||||||
|
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(inputBytes);
|
||||||
|
TermVectorRequest.parseRequest(tvr, parser);
|
||||||
|
|
||||||
|
Set<String> fields = tvr.selectedFields();
|
||||||
|
assertThat(fields.contains("a"), equalTo(true));
|
||||||
|
assertThat(fields.contains("b"), equalTo(true));
|
||||||
|
assertThat(fields.contains("c"), equalTo(true));
|
||||||
|
assertThat(tvr.offsets(), equalTo(false));
|
||||||
|
assertThat(tvr.positions(), equalTo(false));
|
||||||
|
assertThat(tvr.payloads(), equalTo(true));
|
||||||
|
String additionalFields = "b,c ,d, e ";
|
||||||
|
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||||
|
assertThat(tvr.selectedFields().size(), equalTo(5));
|
||||||
|
assertThat(fields.contains("d"), equalTo(true));
|
||||||
|
assertThat(fields.contains("e"), equalTo(true));
|
||||||
|
|
||||||
|
additionalFields = "";
|
||||||
|
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||||
|
|
||||||
|
inputBytes = new BytesArray(" {\"offsets\":false, \"positions\":false, \"payloads\":true}");
|
||||||
|
tvr = new TermVectorRequest(null, null, null);
|
||||||
|
parser = XContentFactory.xContent(XContentType.JSON).createParser(inputBytes);
|
||||||
|
TermVectorRequest.parseRequest(tvr, parser);
|
||||||
|
additionalFields = "";
|
||||||
|
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||||
|
assertThat(tvr.selectedFields(), equalTo(null));
|
||||||
|
additionalFields = "b,c ,d, e ";
|
||||||
|
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||||
|
assertThat(tvr.selectedFields().size(), equalTo(4));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRequestParsingThrowsException() throws Exception {
|
||||||
|
BytesReference inputBytes = new BytesArray(
|
||||||
|
" {\"fields\" : \"a, b,c \", \"offsets\":false, \"positions\":false, \"payloads\":true, \"meaningless_term\":2}");
|
||||||
|
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
||||||
|
boolean threwException = false;
|
||||||
|
try {
|
||||||
|
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(inputBytes);
|
||||||
|
TermVectorRequest.parseRequest(tvr, parser);
|
||||||
|
} catch (Exception e) {
|
||||||
|
threwException = true;
|
||||||
|
}
|
||||||
|
assertThat(threwException, equalTo(true));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void streamRequest() throws IOException {
|
||||||
|
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
TermVectorRequest request = new TermVectorRequest("index", "type", "id");
|
||||||
|
request.offsets(randomBoolean());
|
||||||
|
request.fieldStatistics(randomBoolean());
|
||||||
|
request.payloads(randomBoolean());
|
||||||
|
request.positions(randomBoolean());
|
||||||
|
request.termStatistics(randomBoolean());
|
||||||
|
String parent = randomBoolean() ? "someParent" : null;
|
||||||
|
request.parent(parent);
|
||||||
|
String pref = randomBoolean() ? "somePreference" : null;
|
||||||
|
request.preference(pref);
|
||||||
|
|
||||||
|
// write
|
||||||
|
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||||
|
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||||
|
request.writeTo(out);
|
||||||
|
|
||||||
|
// read
|
||||||
|
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||||
|
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||||
|
TermVectorRequest req2 = new TermVectorRequest(null, null, null);
|
||||||
|
req2.readFrom(esBuffer);
|
||||||
|
|
||||||
|
assertThat(request.offsets(), equalTo(req2.offsets()));
|
||||||
|
assertThat(request.fieldStatistics(), equalTo(req2.fieldStatistics()));
|
||||||
|
assertThat(request.payloads(), equalTo(req2.payloads()));
|
||||||
|
assertThat(request.positions(), equalTo(req2.positions()));
|
||||||
|
assertThat(request.termStatistics(), equalTo(req2.termStatistics()));
|
||||||
|
assertThat(request.preference(), equalTo(pref));
|
||||||
|
assertThat(request.routing(), equalTo(parent));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue