Refactor term vector api
This is necessary to allow adding a mult term vector request
This commit is contained in:
parent
a09f217b45
commit
18c71b16b5
|
@ -19,18 +19,9 @@
|
|||
|
||||
package org.elasticsearch.action.termvector;
|
||||
|
||||
import static org.apache.lucene.util.ArrayUtil.grow;
|
||||
import gnu.trove.impl.Constants;
|
||||
import gnu.trove.map.hash.TObjectLongHashMap;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -38,16 +29,22 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
import static org.apache.lucene.util.ArrayUtil.grow;
|
||||
|
||||
/**
|
||||
* This class represents the result of a {@link TermVectorRequest}. It works
|
||||
* exactly like the {@link Fields} class except for one thing: It can return
|
||||
* offsets and payloads even if positions are not present. You must call
|
||||
* nextPosition() anyway to move the counter although this method only returns
|
||||
* <tt>-1,</tt>, if no positions were returned by the {@link TermVectorRequest}.
|
||||
*
|
||||
* <p/>
|
||||
* The data is stored in two byte arrays ({@code headerRef} and
|
||||
* {@code termVectors}, both {@link ByteRef}) that have the following format:
|
||||
* <p>
|
||||
* <p/>
|
||||
* {@code headerRef}: Stores offsets per field in the {@code termVectors} array
|
||||
* and some header information as {@link BytesRef}. Format is
|
||||
* <ul>
|
||||
|
@ -64,9 +61,9 @@ import org.elasticsearch.common.io.stream.BytesStreamInput;
|
|||
* <li>vint: offset in {@code termVectors} for last field</li>
|
||||
* </ul>
|
||||
* </ul>
|
||||
*
|
||||
* <p/>
|
||||
* termVectors: Stores the actual term vectors as a {@link BytesRef}.
|
||||
*
|
||||
* <p/>
|
||||
* Term vectors for each fields are stored in blocks, one for each field. The
|
||||
* offsets in {@code headerRef} are used to find where the block for a field
|
||||
* starts. Each block begins with a
|
||||
|
@ -84,14 +81,14 @@ import org.elasticsearch.common.io.stream.BytesStreamInput;
|
|||
* <li>vint: number of documents in the shard that has an entry for this field
|
||||
* (docCount)</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p/>
|
||||
* After that, for each term it stores
|
||||
* <ul>
|
||||
* <ul>
|
||||
* <li>vint: term lengths</li>
|
||||
* <li>BytesRef: term name</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p/>
|
||||
* If term statistics are requested ({@code hasTermStatistics} is true, see
|
||||
* {@code headerRef}):
|
||||
* <ul>
|
||||
|
@ -111,7 +108,6 @@ import org.elasticsearch.common.io.stream.BytesStreamInput;
|
|||
* <li>BytesRef: payload_freqency (if payloads == true)</li>
|
||||
* <ul>
|
||||
* </ul> </ul>
|
||||
*
|
||||
*/
|
||||
|
||||
public final class TermVectorFields extends Fields {
|
||||
|
@ -122,17 +118,14 @@ public final class TermVectorFields extends Fields {
|
|||
final boolean hasFieldStatistic;
|
||||
|
||||
/**
|
||||
* @param headerRef
|
||||
* Stores offsets per field in the {@code termVectors} and some
|
||||
* @param headerRef Stores offsets per field in the {@code termVectors} and some
|
||||
* header information as {@link BytesRef}.
|
||||
*
|
||||
* @param termVectors
|
||||
* Stores the actual term vectors as a {@link BytesRef}.
|
||||
*
|
||||
* @param termVectors Stores the actual term vectors as a {@link BytesRef}.
|
||||
*/
|
||||
public TermVectorFields(BytesReference headerRef, BytesReference termVectors) throws IOException {
|
||||
BytesStreamInput header = new BytesStreamInput(headerRef);
|
||||
fieldMap = new TObjectLongHashMap<String>();
|
||||
fieldMap = new TObjectLongHashMap<String>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
|
||||
|
||||
// here we read the header to fill the field offset map
|
||||
String headerString = header.readString();
|
||||
assert headerString.equals("TV");
|
||||
|
@ -159,6 +152,9 @@ public final class TermVectorFields extends Fields {
|
|||
// first, find where in the termVectors bytes the actual term vector for
|
||||
// this field is stored
|
||||
Long offset = fieldMap.get(field);
|
||||
if (offset.longValue() < 0) {
|
||||
return null; // we don't have it.
|
||||
}
|
||||
final BytesStreamInput perFieldTermVectorInput = new BytesStreamInput(this.termVectors);
|
||||
perFieldTermVectorInput.reset();
|
||||
perFieldTermVectorInput.skip(offset.longValue());
|
||||
|
|
|
@ -19,23 +19,22 @@
|
|||
|
||||
package org.elasticsearch.action.termvector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import org.elasticsearch.ElasticSearchParseException;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.action.ValidateActions;
|
||||
import org.elasticsearch.action.support.single.shard.SingleShardOperationRequest;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Request returning the term vector (doc frequency, positions, offsets) for a
|
||||
* document.
|
||||
* <p>
|
||||
* <p/>
|
||||
* Note, the {@link #index()}, {@link #type(String)} and {@link #id(String)} are
|
||||
* required.
|
||||
*/
|
||||
|
@ -49,6 +48,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
|
||||
protected String preference;
|
||||
|
||||
// TODO: change to String[]
|
||||
private Set<String> selectedFields;
|
||||
|
||||
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
|
||||
|
@ -246,6 +246,13 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
return validationException;
|
||||
}
|
||||
|
||||
public static TermVectorRequest readTermVectorRequest(StreamInput in) throws IOException {
|
||||
TermVectorRequest termVectorRequest = new TermVectorRequest();
|
||||
termVectorRequest.readFrom(in);
|
||||
return termVectorRequest;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
super.readFrom(in);
|
||||
|
@ -300,4 +307,60 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
// the ordinal for encoding! Only append to the end!
|
||||
Positions, Offsets, Payloads, FieldStatistics, TermStatistics;
|
||||
}
|
||||
|
||||
/**
|
||||
* populates a request object (pre-populated with defaults) based on a parser.
|
||||
*
|
||||
* @param termVectorRequest
|
||||
* @param parser
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void parseRequest(TermVectorRequest termVectorRequest, XContentParser parser) throws IOException {
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
List<String> fields = new ArrayList<String>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (currentFieldName != null) {
|
||||
if (currentFieldName.equals("fields")) {
|
||||
|
||||
if (token == XContentParser.Token.START_ARRAY) {
|
||||
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
||||
fields.add(parser.text());
|
||||
}
|
||||
} else {
|
||||
throw new ElasticSearchParseException(
|
||||
"The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]");
|
||||
}
|
||||
} else if (currentFieldName.equals("offsets")) {
|
||||
termVectorRequest.offsets(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("positions")) {
|
||||
termVectorRequest.positions(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("payloads")) {
|
||||
termVectorRequest.payloads(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) {
|
||||
termVectorRequest.termStatistics(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
|
||||
termVectorRequest.fieldStatistics(parser.booleanValue());
|
||||
} else if ("_index".equals(currentFieldName)) { // the following is important for multi request parsing.
|
||||
termVectorRequest.index = parser.text();
|
||||
} else if ("_type".equals(currentFieldName)) {
|
||||
termVectorRequest.type = parser.text();
|
||||
} else if ("_id".equals(currentFieldName)) {
|
||||
termVectorRequest.id = parser.text();
|
||||
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
|
||||
termVectorRequest.routing = parser.text();
|
||||
} else {
|
||||
throw new ElasticSearchParseException("The parameter " + currentFieldName
|
||||
+ " is not valid for term vector request!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fields.size() > 0) {
|
||||
String[] fieldsAsArray = new String[fields.size()];
|
||||
termVectorRequest.selectedFields(fields.toArray(fieldsAsArray));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -320,10 +320,11 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
this.exists = exists;
|
||||
}
|
||||
|
||||
public void setFields(Fields fields, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||
TermVectorWriter tvw = new TermVectorWriter(this);
|
||||
if (fields != null) {
|
||||
tvw.setFields(fields, selectedFields, flags, topLevelFields);
|
||||
|
||||
if (termVectorsByField != null) {
|
||||
tvw.setFields(termVectorsByField, selectedFields, flags, topLevelFields);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -342,4 +343,16 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
|||
|
||||
}
|
||||
|
||||
public String getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,22 +18,18 @@
|
|||
*/
|
||||
package org.elasticsearch.action.termvector;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
|
||||
// package only - this is an internal class!
|
||||
final class TermVectorWriter {
|
||||
final List<String> fields = new ArrayList<String>();
|
||||
|
@ -49,30 +45,30 @@ final class TermVectorWriter {
|
|||
response = termVectorResponse;
|
||||
}
|
||||
|
||||
void setFields(Fields fields, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||
void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||
|
||||
int numFieldsWritten = 0;
|
||||
TermsEnum iterator = null;
|
||||
DocsAndPositionsEnum docsAndPosEnum = null;
|
||||
DocsEnum docsEnum = null;
|
||||
TermsEnum topLevelIterator = null;
|
||||
for (String field : fields) {
|
||||
for (String field : termVectorsByField) {
|
||||
if ((selectedFields != null) && (!selectedFields.contains(field))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Terms terms = fields.terms(field);
|
||||
Terms fieldTermVector = termVectorsByField.terms(field);
|
||||
Terms topLevelTerms = topLevelFields.terms(field);
|
||||
|
||||
topLevelIterator = topLevelTerms.iterator(topLevelIterator);
|
||||
boolean positions = flags.contains(Flag.Positions) && terms.hasPositions();
|
||||
boolean offsets = flags.contains(Flag.Offsets) && terms.hasOffsets();
|
||||
boolean payloads = flags.contains(Flag.Payloads) && terms.hasPayloads();
|
||||
startField(field, terms.size(), positions, offsets, payloads);
|
||||
boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions();
|
||||
boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets();
|
||||
boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
|
||||
startField(field, fieldTermVector.size(), positions, offsets, payloads);
|
||||
if (flags.contains(Flag.FieldStatistics)) {
|
||||
writeFieldStatistics(topLevelTerms);
|
||||
}
|
||||
iterator = terms.iterator(iterator);
|
||||
iterator = fieldTermVector.iterator(iterator);
|
||||
final boolean useDocsAndPos = positions || offsets || payloads;
|
||||
while (iterator.next() != null) { // iterate all terms of the
|
||||
// current field
|
||||
|
|
|
@ -19,36 +19,28 @@
|
|||
|
||||
package org.elasticsearch.rest.action.termvector;
|
||||
|
||||
import static org.elasticsearch.rest.RestRequest.Method.GET;
|
||||
import static org.elasticsearch.rest.RestRequest.Method.POST;
|
||||
import static org.elasticsearch.rest.RestStatus.OK;
|
||||
import static org.elasticsearch.rest.action.support.RestXContentBuilder.restContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.elasticsearch.ElasticSearchParseException;
|
||||
import org.elasticsearch.action.ActionListener;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.rest.BaseRestHandler;
|
||||
import org.elasticsearch.rest.RestChannel;
|
||||
import org.elasticsearch.rest.RestController;
|
||||
import org.elasticsearch.rest.RestRequest;
|
||||
import org.elasticsearch.rest.XContentRestResponse;
|
||||
import org.elasticsearch.rest.XContentThrowableRestResponse;
|
||||
import org.elasticsearch.rest.*;
|
||||
import org.elasticsearch.rest.action.support.RestXContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.rest.RestRequest.Method.GET;
|
||||
import static org.elasticsearch.rest.RestRequest.Method.POST;
|
||||
import static org.elasticsearch.rest.RestStatus.BAD_REQUEST;
|
||||
import static org.elasticsearch.rest.RestStatus.OK;
|
||||
import static org.elasticsearch.rest.action.support.RestXContentBuilder.restContentBuilder;
|
||||
|
||||
/**
|
||||
* This class parses the json request and translates it into a
|
||||
|
@ -70,19 +62,24 @@ public class RestTermVectorAction extends BaseRestHandler {
|
|||
termVectorRequest.routing(request.param("routing"));
|
||||
termVectorRequest.parent(request.param("parent"));
|
||||
termVectorRequest.preference(request.param("preference"));
|
||||
XContentParser parser = null;
|
||||
if (request.hasContent()) {
|
||||
try {
|
||||
parseRequest(request.content(), termVectorRequest);
|
||||
parser = XContentFactory.xContent(request.content()).createParser(request.content());
|
||||
TermVectorRequest.parseRequest(termVectorRequest, parser);
|
||||
} catch (IOException e) {
|
||||
try {
|
||||
XContentBuilder builder = RestXContentBuilder.restContentBuilder(request);
|
||||
channel.sendResponse(new XContentRestResponse(request, BAD_REQUEST, builder.startObject().field("error", e.getMessage()).endObject()));
|
||||
|
||||
} catch (IOException e1) {
|
||||
Set<String> selectedFields = termVectorRequest.selectedFields();
|
||||
String fieldString = "all";
|
||||
if (selectedFields != null) {
|
||||
Strings.arrayToDelimitedString(termVectorRequest.selectedFields().toArray(new String[1]), " ");
|
||||
logger.warn("Failed to send response", e1);
|
||||
return;
|
||||
}
|
||||
} finally {
|
||||
if (parser != null) {
|
||||
parser.close();
|
||||
}
|
||||
logger.error("Something is wrong with your parameters for the term vector request. I am using parameters "
|
||||
+ "\n positions :" + termVectorRequest.positions() + "\n offsets :" + termVectorRequest.offsets() + "\n payloads :"
|
||||
+ termVectorRequest.payloads() + "\n termStatistics :" + termVectorRequest.termStatistics()
|
||||
+ "\n fieldStatistics :" + termVectorRequest.fieldStatistics() + "\nfields " + fieldString, (Object) null);
|
||||
}
|
||||
}
|
||||
readURIParameters(termVectorRequest, request);
|
||||
|
@ -142,47 +139,4 @@ public class RestTermVectorAction extends BaseRestHandler {
|
|||
}
|
||||
}
|
||||
|
||||
static public void parseRequest(BytesReference cont, TermVectorRequest termVectorRequest) throws IOException {
|
||||
|
||||
XContentParser parser = XContentFactory.xContent(cont).createParser(cont);
|
||||
try {
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
List<String> fields = new ArrayList<String>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (currentFieldName != null) {
|
||||
if (currentFieldName.equals("fields")) {
|
||||
|
||||
if (token == XContentParser.Token.START_ARRAY) {
|
||||
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
||||
fields.add(parser.text());
|
||||
}
|
||||
} else {
|
||||
throw new ElasticSearchParseException(
|
||||
"The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]");
|
||||
}
|
||||
} else if (currentFieldName.equals("offsets")) {
|
||||
termVectorRequest.offsets(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("positions")) {
|
||||
termVectorRequest.positions(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("payloads")) {
|
||||
termVectorRequest.payloads(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) {
|
||||
termVectorRequest.termStatistics(parser.booleanValue());
|
||||
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
|
||||
termVectorRequest.fieldStatistics(parser.booleanValue());
|
||||
} else {
|
||||
throw new ElasticSearchParseException("The parameter " + currentFieldName
|
||||
+ " is not valid for term vector request!");
|
||||
}
|
||||
}
|
||||
}
|
||||
String[] fieldsAsArray = new String[fields.size()];
|
||||
termVectorRequest.selectedFields(fields.toArray(fieldsAsArray));
|
||||
} finally {
|
||||
parser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -323,6 +323,11 @@ public abstract class AbstractSharedClusterTest extends ElasticsearchTestCase {
|
|||
return client().prepareIndex(index, type).setSource(source).execute().actionGet();
|
||||
}
|
||||
|
||||
protected IndexResponse index(String index, String type, String id, Map<String, Object> source) {
|
||||
return client().prepareIndex(index, type, id).setSource(source).execute().actionGet();
|
||||
}
|
||||
|
||||
|
||||
protected GetResponse get(String index, String type, String id) {
|
||||
return client().prepareGet(index, type, id).execute().actionGet();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,411 @@
|
|||
package org.elasticsearch.test.integration.termvectors;
|
||||
|
||||
/*
|
||||
* Licensed to ElasticSearch under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||
import org.elasticsearch.common.inject.internal.Join;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.indices.IndexMissingException;
|
||||
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public abstract class AbstractTermVectorTests extends AbstractSharedClusterTest {
|
||||
|
||||
protected static class TestFieldSetting {
|
||||
final public String name;
|
||||
final public boolean storedOffset;
|
||||
final public boolean storedPayloads;
|
||||
final public boolean storedPositions;
|
||||
|
||||
public TestFieldSetting(String name, boolean storedOffset, boolean storedPayloads, boolean storedPositions) {
|
||||
this.name = name;
|
||||
this.storedOffset = storedOffset;
|
||||
this.storedPayloads = storedPayloads;
|
||||
this.storedPositions = storedPositions;
|
||||
}
|
||||
|
||||
public void addToMappings(XContentBuilder mappingsBuilder) throws IOException {
|
||||
mappingsBuilder.startObject(name);
|
||||
mappingsBuilder.field("type", "string");
|
||||
String tv_settings;
|
||||
if (storedPositions && storedOffset && storedPayloads) {
|
||||
tv_settings = "with_positions_offsets_payloads";
|
||||
} else if (storedPositions && storedOffset) {
|
||||
tv_settings = "with_positions_offsets";
|
||||
} else if (storedPayloads) {
|
||||
tv_settings = "with_positions_payloads";
|
||||
} else if (storedPositions) {
|
||||
tv_settings = "with_positions";
|
||||
} else if (storedOffset) {
|
||||
tv_settings = "with_offsets";
|
||||
} else {
|
||||
tv_settings = "yes";
|
||||
}
|
||||
|
||||
mappingsBuilder.field("term_vector", tv_settings);
|
||||
|
||||
if (storedPayloads) {
|
||||
mappingsBuilder.field("analyzer", "tv_test");
|
||||
}
|
||||
|
||||
mappingsBuilder.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder("name: ").append(name).append(" tv_with:");
|
||||
if (storedPayloads) {
|
||||
sb.append("payloads,");
|
||||
}
|
||||
if (storedOffset) {
|
||||
sb.append("offsets,");
|
||||
}
|
||||
if (storedPositions) {
|
||||
sb.append("positions,");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
protected static class TestDoc {
|
||||
final public String id;
|
||||
final public TestFieldSetting[] fieldSettings;
|
||||
final public String[] fieldContent;
|
||||
public String index = "test";
|
||||
public String type = "type1";
|
||||
|
||||
public TestDoc(String id, TestFieldSetting[] fieldSettings, String[] fieldContent) {
|
||||
this.id = id;
|
||||
this.fieldSettings = fieldSettings;
|
||||
this.fieldContent = fieldContent;
|
||||
}
|
||||
|
||||
public TestDoc index(String index) {
|
||||
this.index = index;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder("index:").append(index).append(" type:").append(type).append(" id:").append(id);
|
||||
for (int i = 0; i < fieldSettings.length; i++) {
|
||||
TestFieldSetting f = fieldSettings[i];
|
||||
sb.append("\n").append("Field: ").append(f).append("\n content:").append(fieldContent[i]);
|
||||
}
|
||||
sb.append("\n");
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
protected static class TestConfig {
|
||||
final public TestDoc doc;
|
||||
final public String[] selectedFields;
|
||||
final public boolean requestPositions;
|
||||
final public boolean requestOffsets;
|
||||
final public boolean requestPayloads;
|
||||
public Class expectedException = null;
|
||||
|
||||
public TestConfig(TestDoc doc, String[] selectedFields, boolean requestPositions, boolean requestOffsets, boolean requestPayloads) {
|
||||
this.doc = doc;
|
||||
this.selectedFields = selectedFields;
|
||||
this.requestPositions = requestPositions;
|
||||
this.requestOffsets = requestOffsets;
|
||||
this.requestPayloads = requestPayloads;
|
||||
}
|
||||
|
||||
public TestConfig expectedException(Class exceptionClass) {
|
||||
this.expectedException = exceptionClass;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String requested = "";
|
||||
if (requestOffsets) {
|
||||
requested += "offsets,";
|
||||
}
|
||||
if (requestPositions) {
|
||||
requested += "position,";
|
||||
}
|
||||
if (requestPayloads) {
|
||||
requested += "payload,";
|
||||
}
|
||||
Locale aLocale = new Locale("en", "US");
|
||||
return String.format(aLocale, "(doc: %s\n requested: %s, fields: %s)", doc, requested,
|
||||
selectedFields == null ? "NULL" : Join.join(",", selectedFields));
|
||||
}
|
||||
}
|
||||
|
||||
protected void createIndexBasedOnFieldSettings(TestFieldSetting[] fieldSettings, int number_of_shards) throws IOException {
|
||||
wipeIndex("test");
|
||||
XContentBuilder mappingBuilder = jsonBuilder();
|
||||
mappingBuilder.startObject().startObject("type1").startObject("properties");
|
||||
for (TestFieldSetting field : fieldSettings) {
|
||||
field.addToMappings(mappingBuilder);
|
||||
}
|
||||
ImmutableSettings.Builder settings = ImmutableSettings.settingsBuilder()
|
||||
.put("index.analysis.analyzer.tv_test.tokenizer", "standard")
|
||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase");
|
||||
if (number_of_shards > 0) {
|
||||
settings.put("number_of_shards", number_of_shards);
|
||||
}
|
||||
mappingBuilder.endObject().endObject().endObject();
|
||||
run(prepareCreate("test").addMapping("type1", mappingBuilder).setSettings(settings));
|
||||
|
||||
ensureYellow();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate test documentsThe returned documents are already indexed.
|
||||
*/
|
||||
protected TestDoc[] generateTestDocs(int numberOfDocs, TestFieldSetting[] fieldSettings) {
|
||||
String[] fieldContentOptions = new String[] { "Generating a random permutation of a sequence (such as when shuffling cards).",
|
||||
"Selecting a random sample of a population (important in statistical sampling).",
|
||||
"Allocating experimental units via random assignment to a treatment or control condition.",
|
||||
"Generating random numbers: see Random number generation.",
|
||||
"Transforming a data stream (such as when using a scrambler in telecommunications)." };
|
||||
|
||||
String[] contentArray = new String[fieldSettings.length];
|
||||
Map<String, Object> docSource = new HashMap<String, Object>();
|
||||
TestDoc[] testDocs = new TestDoc[numberOfDocs];
|
||||
for (int docId = 0; docId < numberOfDocs; docId++) {
|
||||
docSource.clear();
|
||||
for (int i = 0; i < contentArray.length; i++) {
|
||||
contentArray[i] = fieldContentOptions[randomInt(fieldContentOptions.length - 1)];
|
||||
docSource.put(fieldSettings[i].name, contentArray[i]);
|
||||
}
|
||||
TestDoc doc = new TestDoc(Integer.toString(docId), fieldSettings, contentArray.clone());
|
||||
index(doc.index, doc.type, doc.id, docSource);
|
||||
testDocs[docId] = doc;
|
||||
}
|
||||
|
||||
refresh();
|
||||
return testDocs;
|
||||
|
||||
}
|
||||
|
||||
protected TestConfig[] generateTestConfigs(int numberOfTests, TestDoc[] testDocs, TestFieldSetting[] fieldSettings) {
|
||||
ArrayList<TestConfig> configs = new ArrayList<TestConfig>();
|
||||
for (int i = 0; i < numberOfTests; i++) {
|
||||
|
||||
ArrayList<String> selectedFields = null;
|
||||
if (randomBoolean()) {
|
||||
// used field selection
|
||||
selectedFields = new ArrayList<String>();
|
||||
if (randomBoolean()) {
|
||||
selectedFields.add("Doesnt_exist"); // this will be ignored.
|
||||
}
|
||||
for (TestFieldSetting field : fieldSettings)
|
||||
if (randomBoolean()) {
|
||||
selectedFields.add(field.name);
|
||||
}
|
||||
|
||||
if (selectedFields.size() == 0) {
|
||||
selectedFields = null; // 0 length set is not supported.
|
||||
}
|
||||
|
||||
}
|
||||
TestConfig config = new TestConfig(testDocs[randomInt(testDocs.length - 1)], selectedFields == null ? null
|
||||
: selectedFields.toArray(new String[] {}), randomBoolean(), randomBoolean(), randomBoolean());
|
||||
|
||||
configs.add(config);
|
||||
}
|
||||
// always adds a test that fails
|
||||
configs.add(new TestConfig(new TestDoc("doesnt_exist", new TestFieldSetting[] {}, new String[] {}).index("doesn't_exist"),
|
||||
new String[] { "doesnt_exist" }, true, true, true).expectedException(IndexMissingException.class));
|
||||
|
||||
refresh();
|
||||
|
||||
return configs.toArray(new TestConfig[] {});
|
||||
}
|
||||
|
||||
protected TestFieldSetting[] getFieldSettings() {
|
||||
return new TestFieldSetting[] { new TestFieldSetting("field_with_positions", false, false, true),
|
||||
new TestFieldSetting("field_with_offsets", true, false, false),
|
||||
new TestFieldSetting("field_with_only_tv", false, false, false),
|
||||
new TestFieldSetting("field_with_positions_offsets", false, false, true),
|
||||
new TestFieldSetting("field_with_positions_payloads", false, true, true)
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
|
||||
|
||||
Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
|
||||
for (TestFieldSetting field : testDocs[0].fieldSettings) {
|
||||
if (field.storedPayloads) {
|
||||
mapping.put(field.name, new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
|
||||
TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
|
||||
filter = new TypeAsPayloadTokenFilter(filter);
|
||||
return new TokenStreamComponents(tokenizer, filter);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
}
|
||||
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.CURRENT.luceneVersion), mapping);
|
||||
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);
|
||||
|
||||
conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
for (TestDoc doc : testDocs) {
|
||||
Document d = new Document();
|
||||
d.add(new Field("id", doc.id, StringField.TYPE_STORED));
|
||||
for (int i = 0; i < doc.fieldContent.length; i++) {
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
TestFieldSetting fieldSetting = doc.fieldSettings[i];
|
||||
|
||||
type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
|
||||
type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
|
||||
type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
|
||||
}
|
||||
writer.updateDocument(new Term("id", doc.id), d);
|
||||
writer.commit();
|
||||
}
|
||||
writer.close();
|
||||
|
||||
return DirectoryReader.open(dir);
|
||||
}
|
||||
|
||||
protected void validateResponse(TermVectorResponse esResponse, Fields luceneFields, TestConfig testConfig) throws IOException {
|
||||
TestDoc testDoc = testConfig.doc;
|
||||
HashSet<String> selectedFields = testConfig.selectedFields == null ? null : new HashSet<String>(
|
||||
Arrays.asList(testConfig.selectedFields));
|
||||
Fields esTermVectorFields = esResponse.getFields();
|
||||
for (TestFieldSetting field : testDoc.fieldSettings) {
|
||||
Terms esTerms = esTermVectorFields.terms(field.name);
|
||||
if (selectedFields != null && !selectedFields.contains(field.name)) {
|
||||
assertNull(esTerms);
|
||||
continue;
|
||||
}
|
||||
|
||||
assertNotNull(esTerms);
|
||||
|
||||
Terms luceneTerms = luceneFields.terms(field.name);
|
||||
TermsEnum esTermEnum = esTerms.iterator(null);
|
||||
TermsEnum luceneTermEnum = luceneTerms.iterator(null);
|
||||
|
||||
while (esTermEnum.next() != null) {
|
||||
assertNotNull(luceneTermEnum.next());
|
||||
|
||||
assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
|
||||
DocsAndPositionsEnum esDocsPosEnum = esTermEnum.docsAndPositions(null, null, 0);
|
||||
DocsAndPositionsEnum luceneDocsPosEnum = luceneTermEnum.docsAndPositions(null, null, 0);
|
||||
if (luceneDocsPosEnum == null) {
|
||||
// test we expect that...
|
||||
assertFalse(field.storedOffset);
|
||||
assertFalse(field.storedPayloads);
|
||||
assertFalse(field.storedPositions);
|
||||
continue;
|
||||
}
|
||||
|
||||
String currentTerm = esTermEnum.term().utf8ToString();
|
||||
|
||||
assertThat("Token mismatch for field: " + field.name, currentTerm, equalTo(luceneTermEnum.term().utf8ToString()));
|
||||
|
||||
esDocsPosEnum.nextDoc();
|
||||
luceneDocsPosEnum.nextDoc();
|
||||
|
||||
int freq = esDocsPosEnum.freq();
|
||||
assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
|
||||
for (int i = 0; i < freq; i++) {
|
||||
String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
|
||||
int lucenePos = luceneDocsPosEnum.nextPosition();
|
||||
int esPos = esDocsPosEnum.nextPosition();
|
||||
if (field.storedPositions && testConfig.requestPositions) {
|
||||
assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
|
||||
} else {
|
||||
assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
|
||||
}
|
||||
if (field.storedOffset && testConfig.requestOffsets) {
|
||||
assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset()));
|
||||
assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset()));
|
||||
} else {
|
||||
assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1));
|
||||
assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
|
||||
}
|
||||
if (field.storedPayloads && testConfig.requestPayloads) {
|
||||
assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
|
||||
} else {
|
||||
assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected TermVectorRequestBuilder getRequestForConfig(TestConfig config) {
|
||||
return client().prepareTermVector(config.doc.index, config.doc.type, config.doc.id).setPayloads(config.requestPayloads)
|
||||
.setOffsets(config.requestOffsets).setPositions(config.requestPositions).setFieldStatistics(true).setTermStatistics(true)
|
||||
.setSelectedFields(config.selectedFields);
|
||||
|
||||
}
|
||||
|
||||
protected Fields getTermVectorsFromLucene(DirectoryReader directoryReader, TestDoc doc) throws IOException {
|
||||
IndexSearcher searcher = new IndexSearcher(directoryReader);
|
||||
TopDocs search = searcher.search(new TermQuery(new Term("id", doc.id)), 1);
|
||||
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
assert (scoreDocs.length == 1);
|
||||
return directoryReader.getTermVectors(scoreDocs[0].doc);
|
||||
}
|
||||
|
||||
}
|
|
@ -25,12 +25,9 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||
import org.elasticsearch.common.io.BytesStream;
|
||||
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
|
||||
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -38,53 +35,13 @@ import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
|||
import org.hamcrest.Matchers;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
|
||||
public class GetTermVectorCheckDocFreqTests extends AbstractSharedClusterTest {
|
||||
|
||||
@Test
|
||||
public void streamRequest() throws IOException {
|
||||
|
||||
Random random = getRandom();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
TermVectorRequest request = new TermVectorRequest("index", "type", "id");
|
||||
request.offsets(random.nextBoolean());
|
||||
request.fieldStatistics(random.nextBoolean());
|
||||
request.payloads(random.nextBoolean());
|
||||
request.positions(random.nextBoolean());
|
||||
request.termStatistics(random.nextBoolean());
|
||||
String parent = random.nextBoolean() ? "someParent" : null;
|
||||
request.parent(parent);
|
||||
String pref = random.nextBoolean() ? "somePreference" : null;
|
||||
request.preference(pref);
|
||||
|
||||
// write
|
||||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||
request.writeTo(out);
|
||||
|
||||
// read
|
||||
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
TermVectorRequest req2 = new TermVectorRequest(null, null, null);
|
||||
req2.readFrom(esBuffer);
|
||||
|
||||
assertThat(request.offsets(), equalTo(req2.offsets()));
|
||||
assertThat(request.fieldStatistics(), equalTo(req2.fieldStatistics()));
|
||||
assertThat(request.payloads(), equalTo(req2.payloads()));
|
||||
assertThat(request.positions(), equalTo(req2.positions()));
|
||||
assertThat(request.termStatistics(), equalTo(req2.termStatistics()));
|
||||
assertThat(request.preference(), equalTo(pref));
|
||||
assertThat(request.routing(), equalTo(parent));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
|
@ -19,271 +19,37 @@
|
|||
|
||||
package org.elasticsearch.test.integration.termvectors;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.action.ActionFuture;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequestBuilder;
|
||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
|
||||
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.TypeParsers;
|
||||
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
||||
import org.elasticsearch.rest.action.termvector.RestTermVectorAction;
|
||||
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class GetTermVectorTests extends AbstractSharedClusterTest {
|
||||
|
||||
@Test
|
||||
public void streamTest() throws Exception {
|
||||
|
||||
TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c");
|
||||
outResponse.setExists(true);
|
||||
writeStandardTermVector(outResponse);
|
||||
|
||||
// write
|
||||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||
outResponse.writeTo(out);
|
||||
|
||||
// read
|
||||
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
TermVectorResponse inResponse = new TermVectorResponse("a", "b", "c");
|
||||
inResponse.readFrom(esBuffer);
|
||||
|
||||
// see if correct
|
||||
checkIfStandardTermVector(inResponse);
|
||||
|
||||
outResponse = new TermVectorResponse("a", "b", "c");
|
||||
writeEmptyTermVector(outResponse);
|
||||
// write
|
||||
outBuffer = new ByteArrayOutputStream();
|
||||
out = new OutputStreamStreamOutput(outBuffer);
|
||||
outResponse.writeTo(out);
|
||||
|
||||
// read
|
||||
esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
inResponse = new TermVectorResponse("a", "b", "c");
|
||||
inResponse.readFrom(esBuffer);
|
||||
assertTrue(inResponse.isExists());
|
||||
public class GetTermVectorTests extends AbstractTermVectorTests {
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void checkIfStandardTermVector(TermVectorResponse inResponse) throws IOException {
|
||||
|
||||
Fields fields = inResponse.getFields();
|
||||
assertThat(fields.terms("title"), Matchers.notNullValue());
|
||||
assertThat(fields.terms("desc"), Matchers.notNullValue());
|
||||
assertThat(fields.size(), equalTo(2));
|
||||
}
|
||||
|
||||
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException {
|
||||
|
||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
conf.setOpenMode(OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPayloads(false);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
Document d = new Document();
|
||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||
|
||||
writer.updateDocument(new Term("id", "abc"), d);
|
||||
writer.commit();
|
||||
writer.close();
|
||||
DirectoryReader dr = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(dr);
|
||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
int doc = scoreDocs[0].doc;
|
||||
Fields fields = dr.getTermVectors(doc);
|
||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||
outResponse.setFields(fields, null, flags, fields);
|
||||
outResponse.setExists(true);
|
||||
|
||||
}
|
||||
|
||||
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException {
|
||||
|
||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
conf.setOpenMode(OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPayloads(false);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
Document d = new Document();
|
||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||
d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type));
|
||||
d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type));
|
||||
|
||||
writer.updateDocument(new Term("id", "abc"), d);
|
||||
writer.commit();
|
||||
writer.close();
|
||||
DirectoryReader dr = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(dr);
|
||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
int doc = scoreDocs[0].doc;
|
||||
Fields fields = dr.getTermVectors(doc);
|
||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||
outResponse.setFields(fields, null, flags, fields);
|
||||
|
||||
}
|
||||
|
||||
private Fields buildWithLuceneAndReturnFields(String docId, String[] fields, String[] content, boolean[] withPositions,
|
||||
boolean[] withOffsets, boolean[] withPayloads) throws IOException {
|
||||
assert (fields.length == withPayloads.length);
|
||||
assert (content.length == withPayloads.length);
|
||||
assert (withPositions.length == withPayloads.length);
|
||||
assert (withOffsets.length == withPayloads.length);
|
||||
|
||||
Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
|
||||
for (int i = 0; i < withPayloads.length; i++) {
|
||||
if (withPayloads[i]) {
|
||||
mapping.put(fields[i], new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
TokenFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer);
|
||||
filter = new TypeAsPayloadTokenFilter(filter);
|
||||
return new TokenStreamComponents(tokenizer, filter);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
}
|
||||
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(TEST_VERSION_CURRENT), mapping);
|
||||
|
||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, wrapper);
|
||||
|
||||
conf.setOpenMode(OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
Document d = new Document();
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
d.add(new Field("id", docId, StringField.TYPE_STORED));
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(withOffsets[i]);
|
||||
type.setStoreTermVectorPayloads(withPayloads[i]);
|
||||
type.setStoreTermVectorPositions(withPositions[i] || withOffsets[i] || withPayloads[i]);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
d.add(new Field(fields[i], content[i], type));
|
||||
writer.updateDocument(new Term("id", docId), d);
|
||||
writer.commit();
|
||||
}
|
||||
writer.close();
|
||||
|
||||
DirectoryReader dr = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(dr);
|
||||
TopDocs search = s.search(new TermQuery(new Term("id", docId)), 1);
|
||||
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
assert (scoreDocs.length == 1);
|
||||
int doc = scoreDocs[0].doc;
|
||||
Fields returnFields = dr.getTermVectors(doc);
|
||||
return returnFields;
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRestRequestParsing() throws Exception {
|
||||
BytesReference inputBytes = new BytesArray(
|
||||
" {\"fields\" : [\"a\", \"b\",\"c\"], \"offsets\":false, \"positions\":false, \"payloads\":true}");
|
||||
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
||||
RestTermVectorAction.parseRequest(inputBytes, tvr);
|
||||
Set<String> fields = tvr.selectedFields();
|
||||
assertThat(fields.contains("a"), equalTo(true));
|
||||
assertThat(fields.contains("b"), equalTo(true));
|
||||
assertThat(fields.contains("c"), equalTo(true));
|
||||
assertThat(tvr.offsets(), equalTo(false));
|
||||
assertThat(tvr.positions(), equalTo(false));
|
||||
assertThat(tvr.payloads(), equalTo(true));
|
||||
String additionalFields = "b,c ,d, e ";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
assertThat(tvr.selectedFields().size(), equalTo(5));
|
||||
assertThat(fields.contains("d"), equalTo(true));
|
||||
assertThat(fields.contains("e"), equalTo(true));
|
||||
|
||||
additionalFields = "";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
|
||||
inputBytes = new BytesArray(" {\"offsets\":false, \"positions\":false, \"payloads\":true}");
|
||||
tvr = new TermVectorRequest(null, null, null);
|
||||
RestTermVectorAction.parseRequest(inputBytes, tvr);
|
||||
additionalFields = "";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
assertThat(tvr.selectedFields(), equalTo(null));
|
||||
additionalFields = "b,c ,d, e ";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
assertThat(tvr.selectedFields().size(), equalTo(4));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRestRequestParsingThrowsException() throws Exception {
|
||||
BytesReference inputBytes = new BytesArray(
|
||||
" {\"fields\" : \"a, b,c \", \"offsets\":false, \"positions\":false, \"payloads\":true, \"meaningless_term\":2}");
|
||||
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
||||
boolean threwException = false;
|
||||
try {
|
||||
RestTermVectorAction.parseRequest(inputBytes, tvr);
|
||||
} catch (Exception e) {
|
||||
threwException = true;
|
||||
}
|
||||
assertThat(threwException, equalTo(true));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoSuchDoc() throws Exception {
|
||||
|
||||
run(addMapping(prepareCreate("test"), "type1", new Object[] { "field", "type", "string", "term_vector",
|
||||
"with_positions_offsets_payloads" }));
|
||||
run(addMapping(prepareCreate("test"), "type1", new Object[]{"field", "type", "string", "term_vector",
|
||||
"with_positions_offsets_payloads"}));
|
||||
|
||||
ensureYellow();
|
||||
|
||||
|
@ -346,7 +112,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
public void testSimpleTermVectors() throws ElasticSearchException, IOException {
|
||||
|
||||
run(addMapping(prepareCreate("test"), "type1",
|
||||
new Object[] { "field", "type", "string", "term_vector", "with_positions_offsets_payloads", "analyzer", "tv_test" })
|
||||
new Object[]{"field", "type", "string", "term_vector", "with_positions_offsets_payloads", "analyzer", "tv_test"})
|
||||
.setSettings(
|
||||
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
|
||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
||||
|
@ -359,11 +125,11 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
.endObject()).execute().actionGet();
|
||||
refresh();
|
||||
}
|
||||
String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
|
||||
int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
|
||||
int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
|
||||
int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
|
||||
int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
|
||||
String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
|
||||
int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
|
||||
int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
|
||||
int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
|
||||
int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};
|
||||
for (int i = 0; i < 10; i++) {
|
||||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)).setPayloads(true)
|
||||
.setOffsets(true).setPositions(true).setSelectedFields();
|
||||
|
@ -405,9 +171,8 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
|
||||
@Test
|
||||
public void testRandomSingleTermVectors() throws ElasticSearchException, IOException {
|
||||
Random random = getRandom();
|
||||
FieldType ft = new FieldType();
|
||||
int config = random.nextInt(6);
|
||||
int config = randomInt(6);
|
||||
boolean storePositions = false;
|
||||
boolean storeOffsets = false;
|
||||
boolean storePayloads = false;
|
||||
|
@ -451,7 +216,7 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
|
||||
String optionString = AbstractFieldMapper.termVectorOptionsToString(ft);
|
||||
run(addMapping(prepareCreate("test"), "type1",
|
||||
new Object[] { "field", "type", "string", "term_vector", optionString, "analyzer", "tv_test" }).setSettings(
|
||||
new Object[]{"field", "type", "string", "term_vector", optionString, "analyzer", "tv_test"}).setSettings(
|
||||
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "whitespace")
|
||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
||||
ensureYellow();
|
||||
|
@ -463,15 +228,15 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
.endObject()).execute().actionGet();
|
||||
refresh();
|
||||
}
|
||||
String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" };
|
||||
int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 };
|
||||
int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } };
|
||||
int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } };
|
||||
int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } };
|
||||
String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
|
||||
int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
|
||||
int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
|
||||
int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
|
||||
int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};
|
||||
|
||||
boolean isPayloadRequested = random.nextBoolean();
|
||||
boolean isOffsetRequested = random.nextBoolean();
|
||||
boolean isPositionsRequested = random.nextBoolean();
|
||||
boolean isPayloadRequested = randomBoolean();
|
||||
boolean isOffsetRequested = randomBoolean();
|
||||
boolean isPositionsRequested = randomBoolean();
|
||||
String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i))
|
||||
|
@ -552,147 +317,31 @@ public class GetTermVectorTests extends AbstractSharedClusterTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testDuellESLucene() throws Exception {
|
||||
public void testDuelESLucene() throws Exception {
|
||||
TestFieldSetting[] testFieldSettings = getFieldSettings();
|
||||
createIndexBasedOnFieldSettings(testFieldSettings, -1);
|
||||
TestDoc[] testDocs = generateTestDocs(5, testFieldSettings);
|
||||
|
||||
String[] fieldNames = { "field_that_should_not_be_requested", "field_with_positions", "field_with_offsets", "field_with_only_tv",
|
||||
"field_with_positions_offsets", "field_with_positions_payloads" };
|
||||
run(addMapping(prepareCreate("test"), "type1",
|
||||
new Object[] { fieldNames[0], "type", "string", "term_vector", "with_positions_offsets" },
|
||||
new Object[] { fieldNames[1], "type", "string", "term_vector", "with_positions" },
|
||||
new Object[] { fieldNames[2], "type", "string", "term_vector", "with_offsets" },
|
||||
new Object[] { fieldNames[3], "type", "string", "store_term_vectors", "yes" },
|
||||
new Object[] { fieldNames[4], "type", "string", "term_vector", "with_positions_offsets" },
|
||||
new Object[] { fieldNames[5], "type", "string", "term_vector", "with_positions_payloads", "analyzer", "tv_test" })
|
||||
.setSettings(
|
||||
ImmutableSettings.settingsBuilder().put("index.analysis.analyzer.tv_test.tokenizer", "standard")
|
||||
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase")));
|
||||
// for (int i=0;i<testDocs.length;i++)
|
||||
// logger.info("Doc: {}",testDocs[i]);
|
||||
DirectoryReader directoryReader = indexDocsWithLucene(testDocs);
|
||||
TestConfig[] testConfigs = generateTestConfigs(20, testDocs, testFieldSettings);
|
||||
|
||||
ensureYellow();
|
||||
// ginge auc mit XContentBuilder xcb = new XContentBuilder();
|
||||
|
||||
// now, create the same thing with lucene and see if the returned stuff
|
||||
// is the same
|
||||
|
||||
String[] fieldContent = { "the quick shard jumps over the stupid brain", "here is another field",
|
||||
"And yet another field withut any use.", "I am out of ideas on what to type here.",
|
||||
"The last field for which offsets are stored but not positons.",
|
||||
"The last field for which offsets are stored but not positons." };
|
||||
|
||||
boolean[] storeOffsets = { true, false, true, false, true, false };
|
||||
boolean[] storePositions = { true, true, false, false, true, true };
|
||||
boolean[] storePayloads = { false, false, false, false, false, true };
|
||||
Map<String, Object> testSource = new HashMap<String, Object>();
|
||||
|
||||
for (int i = 0; i < fieldNames.length; i++) {
|
||||
testSource.put(fieldNames[i], fieldContent[i]);
|
||||
}
|
||||
|
||||
client().prepareIndex("test", "type1", "1").setSource(testSource).execute().actionGet();
|
||||
refresh();
|
||||
|
||||
String[] selectedFields = { fieldNames[1], fieldNames[2], fieldNames[3], fieldNames[4], fieldNames[5] };
|
||||
|
||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, false, false, false);
|
||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, false, false);
|
||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, false, true, false);
|
||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, true, false);
|
||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, false, true);
|
||||
testForConfig(fieldNames, fieldContent, storeOffsets, storePositions, storePayloads, selectedFields, true, true, true);
|
||||
|
||||
}
|
||||
|
||||
private void testForConfig(String[] fieldNames, String[] fieldContent, boolean[] storeOffsets, boolean[] storePositions,
|
||||
boolean[] storePayloads, String[] selectedFields, boolean withPositions, boolean withOffsets, boolean withPayloads)
|
||||
throws IOException {
|
||||
TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", "1").setPayloads(withPayloads).setOffsets(withOffsets)
|
||||
.setPositions(withPositions).setFieldStatistics(true).setTermStatistics(true).setSelectedFields(selectedFields);
|
||||
TermVectorResponse response = resp.execute().actionGet();
|
||||
|
||||
// build the same with lucene and compare the Fields
|
||||
Fields luceneFields = buildWithLuceneAndReturnFields("1", fieldNames, fieldContent, storePositions, storeOffsets, storePayloads);
|
||||
|
||||
HashMap<String, Boolean> storeOfsetsMap = new HashMap<String, Boolean>();
|
||||
HashMap<String, Boolean> storePositionsMap = new HashMap<String, Boolean>();
|
||||
HashMap<String, Boolean> storePayloadsMap = new HashMap<String, Boolean>();
|
||||
for (int i = 0; i < storePositions.length; i++) {
|
||||
storeOfsetsMap.put(fieldNames[i], storeOffsets[i]);
|
||||
storePositionsMap.put(fieldNames[i], storePositions[i]);
|
||||
storePayloadsMap.put(fieldNames[i], storePayloads[i]);
|
||||
}
|
||||
|
||||
compareLuceneESTermVectorResults(response.getFields(), luceneFields, storePositionsMap, storeOfsetsMap, storePayloadsMap,
|
||||
withPositions, withOffsets, withPayloads, selectedFields);
|
||||
|
||||
}
|
||||
|
||||
private void compareLuceneESTermVectorResults(Fields fields, Fields luceneFields, HashMap<String, Boolean> storePositionsMap,
|
||||
HashMap<String, Boolean> storeOfsetsMap, HashMap<String, Boolean> storePayloadsMap, boolean getPositions, boolean getOffsets,
|
||||
boolean getPayloads, String[] selectedFields) throws IOException {
|
||||
HashSet<String> selectedFieldsMap = new HashSet<String>(Arrays.asList(selectedFields));
|
||||
|
||||
Iterator<String> luceneFieldNames = luceneFields.iterator();
|
||||
assertThat(luceneFields.size(), equalTo(storeOfsetsMap.size()));
|
||||
assertThat(fields.size(), equalTo(selectedFields.length));
|
||||
|
||||
while (luceneFieldNames.hasNext()) {
|
||||
String luceneFieldName = luceneFieldNames.next();
|
||||
if (!selectedFieldsMap.contains(luceneFieldName))
|
||||
for (TestConfig test : testConfigs) {
|
||||
try {
|
||||
TermVectorRequestBuilder request = getRequestForConfig(test);
|
||||
if (test.expectedException != null) {
|
||||
assertThrows(request, test.expectedException);
|
||||
continue;
|
||||
Terms esTerms = fields.terms(luceneFieldName);
|
||||
Terms luceneTerms = luceneFields.terms(luceneFieldName);
|
||||
TermsEnum esTermEnum = esTerms.iterator(null);
|
||||
TermsEnum luceneTermEnum = luceneTerms.iterator(null);
|
||||
|
||||
int numTerms = 0;
|
||||
|
||||
while (esTermEnum.next() != null) {
|
||||
luceneTermEnum.next();
|
||||
assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
|
||||
DocsAndPositionsEnum esDocsPosEnum = esTermEnum.docsAndPositions(null, null, 0);
|
||||
DocsAndPositionsEnum luceneDocsPosEnum = luceneTermEnum.docsAndPositions(null, null, 0);
|
||||
if (luceneDocsPosEnum == null) {
|
||||
assertThat(storeOfsetsMap.get(luceneFieldName), equalTo(false));
|
||||
assertThat(storePayloadsMap.get(luceneFieldName), equalTo(false));
|
||||
assertThat(storePositionsMap.get(luceneFieldName), equalTo(false));
|
||||
continue;
|
||||
|
||||
}
|
||||
numTerms++;
|
||||
|
||||
assertThat("failed for field: " + luceneFieldName, esTermEnum.term().utf8ToString(), equalTo(luceneTermEnum.term()
|
||||
.utf8ToString()));
|
||||
esDocsPosEnum.nextDoc();
|
||||
luceneDocsPosEnum.nextDoc();
|
||||
|
||||
int freq = (int) esDocsPosEnum.freq();
|
||||
assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
|
||||
for (int i = 0; i < freq; i++) {
|
||||
|
||||
int lucenePos = luceneDocsPosEnum.nextPosition();
|
||||
int esPos = esDocsPosEnum.nextPosition();
|
||||
if (storePositionsMap.get(luceneFieldName) && getPositions) {
|
||||
assertThat(luceneFieldName, lucenePos, equalTo(esPos));
|
||||
} else {
|
||||
assertThat(esPos, equalTo(-1));
|
||||
}
|
||||
if (storeOfsetsMap.get(luceneFieldName) && getOffsets) {
|
||||
assertThat(luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset()));
|
||||
assertThat(luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset()));
|
||||
} else {
|
||||
assertThat(esDocsPosEnum.startOffset(), equalTo(-1));
|
||||
assertThat(esDocsPosEnum.endOffset(), equalTo(-1));
|
||||
}
|
||||
if (storePayloadsMap.get(luceneFieldName) && getPayloads) {
|
||||
assertThat(luceneFieldName, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
|
||||
} else {
|
||||
assertThat(esDocsPosEnum.getPayload(), equalTo(null));
|
||||
}
|
||||
|
||||
TermVectorResponse response = run(request);
|
||||
Fields luceneTermVectors = getTermVectorsFromLucene(directoryReader, test.doc);
|
||||
validateResponse(response, luceneTermVectors, test);
|
||||
} catch (Throwable t) {
|
||||
throw new Exception("Test exception while running " + test.toString(), t);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.unit.termvectors;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
|
||||
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.rest.action.termvector.RestTermVectorAction;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class TermVectorUnitTests extends org.elasticsearch.test.integration.ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void streamResponse() throws Exception {
|
||||
|
||||
TermVectorResponse outResponse = new TermVectorResponse("a", "b", "c");
|
||||
outResponse.setExists(true);
|
||||
writeStandardTermVector(outResponse);
|
||||
|
||||
// write
|
||||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||
outResponse.writeTo(out);
|
||||
|
||||
// read
|
||||
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
TermVectorResponse inResponse = new TermVectorResponse("a", "b", "c");
|
||||
inResponse.readFrom(esBuffer);
|
||||
|
||||
// see if correct
|
||||
checkIfStandardTermVector(inResponse);
|
||||
|
||||
outResponse = new TermVectorResponse("a", "b", "c");
|
||||
writeEmptyTermVector(outResponse);
|
||||
// write
|
||||
outBuffer = new ByteArrayOutputStream();
|
||||
out = new OutputStreamStreamOutput(outBuffer);
|
||||
outResponse.writeTo(out);
|
||||
|
||||
// read
|
||||
esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
inResponse = new TermVectorResponse("a", "b", "c");
|
||||
inResponse.readFrom(esBuffer);
|
||||
assertTrue(inResponse.isExists());
|
||||
|
||||
}
|
||||
|
||||
private void writeEmptyTermVector(TermVectorResponse outResponse) throws IOException {
|
||||
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
conf.setOpenMode(OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPayloads(false);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
Document d = new Document();
|
||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||
|
||||
writer.updateDocument(new Term("id", "abc"), d);
|
||||
writer.commit();
|
||||
writer.close();
|
||||
DirectoryReader dr = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(dr);
|
||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
int doc = scoreDocs[0].doc;
|
||||
Fields fields = dr.getTermVectors(doc);
|
||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||
outResponse.setFields(fields, null, flags, fields);
|
||||
outResponse.setExists(true);
|
||||
|
||||
}
|
||||
|
||||
private void writeStandardTermVector(TermVectorResponse outResponse) throws IOException {
|
||||
|
||||
Directory dir = FSDirectory.open(new File("/tmp/foo"));
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
|
||||
conf.setOpenMode(OpenMode.CREATE);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
FieldType type = new FieldType(TextField.TYPE_STORED);
|
||||
type.setStoreTermVectorOffsets(true);
|
||||
type.setStoreTermVectorPayloads(false);
|
||||
type.setStoreTermVectorPositions(true);
|
||||
type.setStoreTermVectors(true);
|
||||
type.freeze();
|
||||
Document d = new Document();
|
||||
d.add(new Field("id", "abc", StringField.TYPE_STORED));
|
||||
d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type));
|
||||
d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type));
|
||||
|
||||
writer.updateDocument(new Term("id", "abc"), d);
|
||||
writer.commit();
|
||||
writer.close();
|
||||
DirectoryReader dr = DirectoryReader.open(dir);
|
||||
IndexSearcher s = new IndexSearcher(dr);
|
||||
TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
int doc = scoreDocs[0].doc;
|
||||
Fields termVectors = dr.getTermVectors(doc);
|
||||
EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
|
||||
outResponse.setFields(termVectors, null, flags, termVectors);
|
||||
|
||||
}
|
||||
|
||||
private void checkIfStandardTermVector(TermVectorResponse inResponse) throws IOException {
|
||||
|
||||
Fields fields = inResponse.getFields();
|
||||
assertThat(fields.terms("title"), Matchers.notNullValue());
|
||||
assertThat(fields.terms("desc"), Matchers.notNullValue());
|
||||
assertThat(fields.size(), equalTo(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRestRequestParsing() throws Exception {
|
||||
BytesReference inputBytes = new BytesArray(
|
||||
" {\"fields\" : [\"a\", \"b\",\"c\"], \"offsets\":false, \"positions\":false, \"payloads\":true}");
|
||||
|
||||
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
||||
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(inputBytes);
|
||||
TermVectorRequest.parseRequest(tvr, parser);
|
||||
|
||||
Set<String> fields = tvr.selectedFields();
|
||||
assertThat(fields.contains("a"), equalTo(true));
|
||||
assertThat(fields.contains("b"), equalTo(true));
|
||||
assertThat(fields.contains("c"), equalTo(true));
|
||||
assertThat(tvr.offsets(), equalTo(false));
|
||||
assertThat(tvr.positions(), equalTo(false));
|
||||
assertThat(tvr.payloads(), equalTo(true));
|
||||
String additionalFields = "b,c ,d, e ";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
assertThat(tvr.selectedFields().size(), equalTo(5));
|
||||
assertThat(fields.contains("d"), equalTo(true));
|
||||
assertThat(fields.contains("e"), equalTo(true));
|
||||
|
||||
additionalFields = "";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
|
||||
inputBytes = new BytesArray(" {\"offsets\":false, \"positions\":false, \"payloads\":true}");
|
||||
tvr = new TermVectorRequest(null, null, null);
|
||||
parser = XContentFactory.xContent(XContentType.JSON).createParser(inputBytes);
|
||||
TermVectorRequest.parseRequest(tvr, parser);
|
||||
additionalFields = "";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
assertThat(tvr.selectedFields(), equalTo(null));
|
||||
additionalFields = "b,c ,d, e ";
|
||||
RestTermVectorAction.addFieldStringsFromParameter(tvr, additionalFields);
|
||||
assertThat(tvr.selectedFields().size(), equalTo(4));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRequestParsingThrowsException() throws Exception {
|
||||
BytesReference inputBytes = new BytesArray(
|
||||
" {\"fields\" : \"a, b,c \", \"offsets\":false, \"positions\":false, \"payloads\":true, \"meaningless_term\":2}");
|
||||
TermVectorRequest tvr = new TermVectorRequest(null, null, null);
|
||||
boolean threwException = false;
|
||||
try {
|
||||
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(inputBytes);
|
||||
TermVectorRequest.parseRequest(tvr, parser);
|
||||
} catch (Exception e) {
|
||||
threwException = true;
|
||||
}
|
||||
assertThat(threwException, equalTo(true));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void streamRequest() throws IOException {
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
TermVectorRequest request = new TermVectorRequest("index", "type", "id");
|
||||
request.offsets(randomBoolean());
|
||||
request.fieldStatistics(randomBoolean());
|
||||
request.payloads(randomBoolean());
|
||||
request.positions(randomBoolean());
|
||||
request.termStatistics(randomBoolean());
|
||||
String parent = randomBoolean() ? "someParent" : null;
|
||||
request.parent(parent);
|
||||
String pref = randomBoolean() ? "somePreference" : null;
|
||||
request.preference(pref);
|
||||
|
||||
// write
|
||||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||
request.writeTo(out);
|
||||
|
||||
// read
|
||||
ByteArrayInputStream esInBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
InputStreamStreamInput esBuffer = new InputStreamStreamInput(esInBuffer);
|
||||
TermVectorRequest req2 = new TermVectorRequest(null, null, null);
|
||||
req2.readFrom(esBuffer);
|
||||
|
||||
assertThat(request.offsets(), equalTo(req2.offsets()));
|
||||
assertThat(request.fieldStatistics(), equalTo(req2.fieldStatistics()));
|
||||
assertThat(request.payloads(), equalTo(req2.payloads()));
|
||||
assertThat(request.positions(), equalTo(req2.positions()));
|
||||
assertThat(request.termStatistics(), equalTo(req2.termStatistics()));
|
||||
assertThat(request.preference(), equalTo(pref));
|
||||
assertThat(request.routing(), equalTo(parent));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue