Make parsing of uri and body parameters consistent with single term vector api

uri parameters were not all parsed for the multi term vector request. This commit
makes sure that all parameters are parsed and used when creating the requests for the
multi term vector request.

In order to simplify both code and json request, the request structure now allows
two ways to use multi term vectors:

1. Give all parameters for each document requested in the docs array like this:

```
{
   "docs": [
      {
         "_index": "testidx",
         "_type": "test",
         "_id": "2",
         "terms": [
            "fox"
         ],
         "term_statistics": true
      },
      {
         "_index": "testidx",
         "_type": "test",
         "_id": "1",
         "terms": [
            "quick",
            "brown"
         ],
         "term_statistics": false
      }
   ]
}
```

2. Define a list of ids and give parameters in a separate parameters object like this:

```
{
   "ids": [
      "1",
      "2"
   ],
   "parameters": {
      "_index": "testidx",
      "_type": "test",
      "terms": [
         "brown"
      ]
   }
}
```

uri parameters are global parameters that are set for both cases. They are overwritten
by parameter definitions in the body.

Also, this commit adds the missing setParent(..) and setPreference(..) to TermVectorRequestBuilder.
This commit is contained in:
Britta Weber 2013-10-28 16:48:30 +01:00
parent 6e1a04b370
commit 3be5f3345e
14 changed files with 156 additions and 114 deletions

View File

@ -25,7 +25,6 @@ import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.ValidateActions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@ -51,22 +50,6 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
return this;
}
/**
* Sets the preference to execute the search. Defaults to randomize across
* shards. Can be set to <tt>_local</tt> to prefer local shards,
* <tt>_primary</tt> to execute only on primary shards, or a custom value,
* which guarantees that the same order will be used across different
* requests.
*/
public MultiTermVectorsRequest preference(String preference) {
this.preference = preference;
return this;
}
public String preference() {
return this.preference;
}
@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = null;
@ -85,49 +68,16 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
return validationException;
}
public void add(@Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, byte[] data, int from,
int length) throws Exception {
add(defaultIndex, defaultType, defaultFields, new BytesArray(data, from, length));
}
public void add(@Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, BytesReference data)
public void add(TermVectorRequest template, BytesReference data)
throws Exception {
XContentParser parser = XContentFactory.xContent(data).createParser(data);
try {
XContentParser.Token token;
String currentFieldName = null;
boolean offsets = true;
boolean offsetsFound = false;
boolean positions = true;
boolean positionsFound = false;
boolean payloads = true;
boolean payloadsFound = false;
boolean termStatistics = false;
boolean termStatisticsFound = false;
boolean fieldStatistics = true;
boolean fieldStatisticsFound = false;
List<String> ids = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
if (currentFieldName.equals("offsets")) {
offsets = parser.booleanValue();
offsetsFound = true;
} else if (currentFieldName.equals("positions")) {
positions = parser.booleanValue();
positionsFound = true;
} else if (currentFieldName.equals("payloads")) {
payloads = parser.booleanValue();
payloadsFound = true;
} else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) {
termStatistics = parser.booleanValue();
termStatisticsFound = true;
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
fieldStatistics = parser.booleanValue();
fieldStatisticsFound = true;
} else {
throw new ElasticSearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("docs".equals(currentFieldName)) {
@ -135,52 +85,39 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticSearchIllegalArgumentException("docs array element should include an object");
}
TermVectorRequest termVectorRequest = new TermVectorRequest(defaultIndex, defaultType, null);
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
TermVectorRequest.parseRequest(termVectorRequest, parser);
if (defaultFields != null) {
termVectorRequest.selectedFields(defaultFields.clone());
}
add(termVectorRequest);
}
} else if ("ids".equals(currentFieldName)) {
ids = new ArrayList<String>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) {
throw new ElasticSearchIllegalArgumentException("ids array element should only contain ids");
}
TermVectorRequest tvr = new TermVectorRequest(defaultIndex, defaultType, parser.text());
if (defaultFields != null) {
tvr.selectedFields(defaultFields.clone());
}
add(tvr);
ids.add(parser.text());
}
} else {
throw new ElasticSearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
throw new ElasticSearchParseException(
"No parameter named " + currentFieldName + "and type ARRAY");
}
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
if ("parameters".equals(currentFieldName)) {
TermVectorRequest.parseRequest(template, parser);
} else {
throw new ElasticSearchParseException(
"No parameter named " + currentFieldName + "and type OBJECT");
}
} else if (currentFieldName != null) {
throw new ElasticSearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
}
}
for (int i = 0; i < requests.size(); i++) {
TermVectorRequest curRequest = requests.get(i);
if (offsetsFound) {
curRequest.offsets(offsets);
if (ids != null) {
for (String id : ids) {
TermVectorRequest curRequest = new TermVectorRequest(template);
curRequest.id(id);
requests.add(curRequest);
}
if (payloadsFound) {
curRequest.payloads(payloads);
}
if (fieldStatisticsFound) {
curRequest.fieldStatistics(fieldStatistics);
}
if (positionsFound) {
curRequest.positions(positions);
}
if (termStatisticsFound) {
curRequest.termStatistics(termStatistics);
}
requests.set(i, curRequest);
}
} finally {
parser.close();

View File

@ -49,16 +49,6 @@ public class MultiTermVectorsRequestBuilder extends ActionRequestBuilder<MultiTe
return this;
}
/**
* Sets the preference to execute the search. Defaults to randomize across shards. Can be set to
* <tt>_local</tt> to prefer local shards, <tt>_primary</tt> to execute only on primary shards, or
* a custom value, which guarantees that the same order will be used across different requests.
*/
public MultiTermVectorsRequestBuilder setPreference(String preference) {
request.preference(preference);
return this;
}
@Override
protected void doExecute(ActionListener<MultiTermVectorsResponse> listener) {
((Client) client).multiTermVectors(request, listener);

View File

@ -350,7 +350,7 @@ public final class TermVectorFields extends Fields {
public int getDocCount() throws IOException {
return docCount;
}
@Override
public boolean hasFreqs() {
return true;

View File

@ -54,7 +54,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
Flag.FieldStatistics);
TermVectorRequest() {
public TermVectorRequest() {
}
/**
@ -67,6 +67,23 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
this.id = id;
this.type = type;
}
/**
* Constructs a new term vector request for a document that will be fetch
* from the provided index. Use {@link #type(String)} and
* {@link #id(String)} to specify the document to load.
*/
public TermVectorRequest(TermVectorRequest other) {
super(other.index());
this.id = other.id();
this.type = other.type();
this.flagsEnum = other.getFlags().clone();
this.preference = other.preference();
this.routing = other.routing();
if (other.selectedFields != null) {
this.selectedFields = new HashSet<String>(other.selectedFields);
}
}
public EnumSet<Flag> getFlags() {
return flagsEnum;
@ -85,6 +102,13 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
public String id() {
return id;
}
/**
* Sets the id of document the term vector is requested for.
*/
public void id(String id) {
this.id = id;
}
/**
* @return The routing for this request.

View File

@ -43,7 +43,27 @@ public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorReq
request.routing(routing);
return this;
}
/**
* Sets the parent id of this document. Will simply set the routing to this value, as it is only
* used for routing with delete requests.
*/
public TermVectorRequestBuilder setParent(String parent) {
request.parent(parent);
return this;
}
/**
* Sets the preference to execute the search. Defaults to randomize across shards. Can be set to
* <tt>_local</tt> to prefer local shards, <tt>_primary</tt> to execute only on primary shards, or
* a custom value, which guarantees that the same order will be used across different requests.
*/
public TermVectorRequestBuilder setPreference(String preference) {
request.preference(preference);
return this;
}
public TermVectorRequestBuilder setOffsets(boolean offsets) {
request.offsets(offsets);
return this;

View File

@ -22,8 +22,8 @@ package org.elasticsearch.rest.action.termvector;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
import org.elasticsearch.action.termvector.TermVectorRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -49,18 +49,14 @@ public class RestMultiTermVectorsAction extends BaseRestHandler {
@Override
public void handleRequest(final RestRequest request, final RestChannel channel) {
MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
multiTermVectorsRequest.listenerThreaded(false);
multiTermVectorsRequest.preference(request.param("preference"));
String[] sFields = null;
String sField = request.param("fields");
if (sField != null) {
sFields = Strings.splitStringByCommaToArray(sField);
}
TermVectorRequest template = new TermVectorRequest();
RestTermVectorAction.readURIParameters(template, request);
try {
multiTermVectorsRequest.add(request.param("index"), request.param("type"), sFields, request.content());
multiTermVectorsRequest.add(template, request.content());
} catch (Throwable t) {
try {
channel.sendResponse(new XContentThrowableRestResponse(request, t));

View File

@ -59,9 +59,6 @@ public class RestTermVectorAction extends BaseRestHandler {
public void handleRequest(final RestRequest request, final RestChannel channel) {
TermVectorRequest termVectorRequest = new TermVectorRequest(request.param("index"), request.param("type"), request.param("id"));
termVectorRequest.routing(request.param("routing"));
termVectorRequest.parent(request.param("parent"));
termVectorRequest.preference(request.param("preference"));
XContentParser parser = null;
if (request.hasContent()) {
try {
@ -114,6 +111,9 @@ public class RestTermVectorAction extends BaseRestHandler {
termVectorRequest.offsets(request.paramAsBoolean("offsets", termVectorRequest.offsets()));
termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions()));
termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads()));
termVectorRequest.routing(request.param("routing"));
termVectorRequest.parent(request.param("parent"));
termVectorRequest.preference(request.param("preference"));
termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics()));
termVectorRequest.termStatistics(request.paramAsBoolean("term_statistics", termVectorRequest.termStatistics()));
termVectorRequest.fieldStatistics(request.paramAsBoolean("fieldStatistics", termVectorRequest.fieldStatistics()));

View File

@ -1,4 +1,4 @@
package org.elasticsearch.termvectors;
package org.elasticsearch.action.termvector;
/*
* Licensed to ElasticSearch under one

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.termvectors;
package org.elasticsearch.action.termvector;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.termvectors;
package org.elasticsearch.action.termvector;
import com.carrotsearch.hppc.ObjectIntOpenHashMap;
import org.apache.lucene.analysis.payloads.PayloadHelper;

View File

@ -1,4 +1,4 @@
package org.elasticsearch.termvectors;
package org.elasticsearch.action.termvector;
/*
* Licensed to ElasticSearch under one
* or more contributor license agreements. See the NOTICE file

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.termvectors;
package org.elasticsearch.action.termvector;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
@ -28,11 +28,10 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.elasticsearch.action.termvector.TermVectorRequest;
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
import org.elasticsearch.action.termvector.TermVectorResponse;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.io.stream.InputStreamStreamInput;
import org.elasticsearch.common.io.stream.OutputStreamStreamOutput;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -51,6 +50,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Set;
import static org.hamcrest.Matchers.equalTo;
@ -288,5 +288,41 @@ public class TermVectorUnitTests extends ElasticsearchLuceneTestCase {
String ftOpts = AbstractFieldMapper.termVectorOptionsToString(ft);
assertThat(ftOpts, equalTo("with_offsets"));
}
@Test
public void testMultiParser() throws Exception {
byte[] data = Streams.copyToBytesFromClasspath("/org/elasticsearch/action/termvector/multiRequest1.json");
BytesReference bytes = new BytesArray(data);
MultiTermVectorsRequest request = new MultiTermVectorsRequest();
request.add(new TermVectorRequest(), bytes);
checkParsedParameters(request);
data = Streams.copyToBytesFromClasspath("/org/elasticsearch/action/termvector/multiRequest2.json");
bytes = new BytesArray(data);
request = new MultiTermVectorsRequest();
request.add(new TermVectorRequest(), bytes);
checkParsedParameters(request);
}
void checkParsedParameters(MultiTermVectorsRequest request) {
Set<String> ids = new HashSet<String>();
ids.add("1");
ids.add("2");
Set<String> fields = new HashSet<String>();
fields.add("a");
fields.add("b");
fields.add("c");
for (TermVectorRequest singleRequest : request.requests) {
assertThat(singleRequest.index(), equalTo("testidx"));
assertThat(singleRequest.type(), equalTo("test"));
assertThat(singleRequest.payloads(), equalTo(false));
assertThat(singleRequest.positions(), equalTo(false));
assertThat(singleRequest.offsets(), equalTo(false));
assertThat(singleRequest.termStatistics(), equalTo(true));
assertThat(singleRequest.fieldStatistics(), equalTo(false));
assertThat(singleRequest.id(),Matchers.anyOf(Matchers.equalTo("1"), Matchers.equalTo("2")));
assertThat(singleRequest.selectedFields(), equalTo(fields));
}
}
}

View File

@ -0,0 +1,13 @@
{
"ids": ["1","2"],
"parameters": {
"field_statistics": false,
"term_statistics": true,
"payloads":false,
"offsets":false,
"positions":false,
"fields":["a","b","c"],
"_index": "testidx",
"_type":"test"
}
}

View File

@ -0,0 +1,26 @@
{
"docs": [
{
"_id": "1",
"field_statistics": false,
"term_statistics": true,
"payloads": false,
"offsets": false,
"positions": false,
"fields":["a","b","c"],
"_index": "testidx",
"_type": "test"
},
{
"_id": "2",
"field_statistics": false,
"term_statistics": true,
"payloads": false,
"offsets": false,
"positions": false,
"fields":["a","b","c"],
"_index": "testidx",
"_type": "test"
}
]
}