[SPEC,TEST,FIX] add spec and tests for termvector api and fix inconsistencies

- index and type were not read from the uri with the _mtermvectors api
- ids were not read from the uri parameters with the _mtermvectors api
This commit is contained in:
Britta Weber 2014-01-20 13:23:27 +01:00
parent f2fb114a3e
commit cae5eb479a
6 changed files with 370 additions and 44 deletions

View File

@ -0,0 +1,89 @@
{
"termvectors.get_multi_termvectors" : {
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
"methods" : ["GET", "POST"],
"url" : {
"path" : "/_mtermvectors",
"paths" : ["/_mtermvectors", "/{index}/_mtermvectors", "/{index}/{type}/_mtermvectors"],
"parts" : {
"index" : {
"type" : "string",
"description" : "The index in which the document resides.",
"required" : true
},
"type" : {
"type" : "string",
"description" : "The type of the document.",
"required" : true
},
"id" : {
"type" : "string",
"description" : "The id of the document.",
"required" : true
}
},
"params" : {
"ids" : {
"type" : "list",
"description" : "A comma-separated list of documents ids. You must define ids as parameter or set \"ids\" or \"docs\" in the request body",
"required" : false
},
"term_statistics" : {
"type" : "boolean",
"description" : "Specifies if total term frequency and document frequency should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : false,
"required" : false
},
"field_statistics" : {
"type" : "boolean",
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"fields" : {
"type" : "list",
"description" : "A comma-separated list of fields to return. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
},
"offsets" : {
"type" : "boolean",
"description" : "Specifies if term offsets should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"positions" : {
"type" : "boolean",
"description" : "Specifies if term positions should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"payloads" : {
"type" : "boolean",
"description" : "Specifies if term payloads should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"preference" : {
"type" : "string",
"description" : "Specify the node or shard the operation should be performed on (default: random) .Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
},
"routing" : {
"type" : "string",
"description" : "Specific routing value. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
},
"parent" : {
"type" : "string",
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
}
}
},
"body" : {
"description" : "Define ids, parameters or a list of parameters per document here. You must at least provide a list of document ids. See documentation.",
"required" : false
}
}
}

View File

@ -0,0 +1,83 @@
{
"termvectors.get_termvector" : {
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
"methods" : ["GET", "POST"],
"url" : {
"path" : "/{index}/{type}/{id}/_termvector",
"paths" : ["/{index}/{type}/{id}/_termvector"],
"parts" : {
"index" : {
"type" : "string",
"description" : "The index in which the document resides.",
"required" : true
},
"type" : {
"type" : "string",
"description" : "The type of the document.",
"required" : true
},
"id" : {
"type" : "string",
"description" : "The id of the document.",
"required" : true
}
},
"params": {
"term_statistics" : {
"type" : "boolean",
"description" : "Specifies if total term frequency and document frequency should be returned.",
"default" : false,
"required" : false
},
"field_statistics" : {
"type" : "boolean",
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned.",
"default" : true,
"required" : false
},
"fields" : {
"type" : "list",
"description" : "A comma-separated list of fields to return.",
"required" : false
},
"offsets" : {
"type" : "boolean",
"description" : "Specifies if term offsets should be returned.",
"default" : true,
"required" : false
},
"positions" : {
"type" : "boolean",
"description" : "Specifies if term positions should be returned.",
"default" : true,
"required" : false
},
"payloads" : {
"type" : "boolean",
"description" : "Specifies if term payloads should be returned.",
"default" : true,
"required" : false
},
"preference" : {
"type" : "string",
"description" : "Specify the node or shard the operation should be performed on (default: random).",
"required" : false
},
"routing" : {
"type" : "string",
"description" : "Specific routing value.",
"required" : false
},
"parent": {
"type" : "string",
"description" : "Parent id of documents.",
"required" : false
}
}
},
"body": {
"description" : "Define parameters. See documentation.",
"required" : false
}
}
}

View File

@ -0,0 +1,101 @@
setup:
- do:
indices.create:
index: testidx
body:
mappings:
testtype: {
"properties": {
"text": {
"type" : "string",
"term_vector" : "with_positions_offsets"
}
}
}
- do:
index:
index: testidx
type: testtype
id: testing_document
body:
{"text" : "The quick brown fox is brown."}
- do:
indices.refresh: {}
---
"Basic tests for multi termvector get":
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"body" : {
"docs": [
{
"_index" : "testidx",
"_type" : "testtype",
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"body" : {
"docs": [
{
"_index" : "testidx",
"_type" : "testtype",
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"index" : "testidx"
"body" : {
"docs": [
{
"_type" : "testtype",
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"index" : "testidx"
"type" : "testtype"
"body" : {
"docs": [
{
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"index" : "testidx"
"type" : "testtype"
"ids" : ["testing_document"]
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}

View File

@ -0,0 +1,38 @@
setup:
- do:
indices.create:
index: testidx
body:
mappings:
testtype: {
"properties": {
"text": {
"type" : "string",
"term_vector" : "with_positions_offsets"
}
}
}
- do:
index:
index: testidx
type: testtype
id: testing_document
body:
{"text" : "The quick brown fox is brown."}
- do:
indices.refresh: {}
---
"Basic tests for termvectors get":
- do:
termvectors.get_termvector:
index: testidx
type: testtype
id: testing_document
"term_statistics" : true
- match: {term_vectors.text.field_statistics.sum_doc_freq: 5}
- match: {term_vectors.text.terms.brown.doc_freq: 1}
- match: {term_vectors.text.terms.brown.tokens.0.start_offset: 10}

View File

@ -30,16 +30,21 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.rest.RestRequest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> {
String preference;
List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>();
final Set<String> ids = new HashSet<String>();
public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) {
requests.add(termVectorRequest);
return this;
@ -70,57 +75,57 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
public void add(TermVectorRequest template, BytesReference data)
throws Exception {
XContentParser parser = XContentFactory.xContent(data).createParser(data);
try {
XContentParser.Token token;
String currentFieldName = null;
List<String> ids = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if ("docs".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
XContentParser.Token token;
String currentFieldName = null;
if (data.length() > 0) {
XContentParser parser = XContentFactory.xContent(data).createParser(data);
try {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if ("docs".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
}
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
TermVectorRequest.parseRequest(termVectorRequest, parser);
add(termVectorRequest);
}
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
TermVectorRequest.parseRequest(termVectorRequest, parser);
add(termVectorRequest);
}
} else if ("ids".equals(currentFieldName)) {
ids = new ArrayList<String>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) {
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
} else if ("ids".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) {
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
}
ids.add(parser.text());
}
ids.add(parser.text());
} else {
throw new ElasticsearchParseException(
"No parameter named " + currentFieldName + "and type ARRAY");
}
} else {
throw new ElasticsearchParseException(
"No parameter named " + currentFieldName + "and type ARRAY");
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
if ("parameters".equals(currentFieldName)) {
TermVectorRequest.parseRequest(template, parser);
} else {
throw new ElasticsearchParseException(
"No parameter named " + currentFieldName + "and type OBJECT");
}
} else if (currentFieldName != null) {
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
}
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
if ("parameters".equals(currentFieldName)) {
TermVectorRequest.parseRequest(template, parser);
} else {
throw new ElasticsearchParseException(
"No parameter named " + currentFieldName + "and type OBJECT");
}
} else if (currentFieldName != null) {
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
}
}
if (ids != null) {
for (String id : ids) {
TermVectorRequest curRequest = new TermVectorRequest(template);
curRequest.id(id);
requests.add(curRequest);
}
finally {
parser.close();
}
} finally {
parser.close();
}
for (String id : ids) {
TermVectorRequest curRequest = new TermVectorRequest(template);
curRequest.id(id);
requests.add(curRequest);
}
}
@ -144,4 +149,10 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
termVectorRequest.writeTo(out);
}
}
public void ids(String[] ids) {
for (String id : ids) {
this.ids.add(id.replaceAll("\\s", ""));
}
}
}

View File

@ -24,6 +24,7 @@ import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
import org.elasticsearch.action.termvector.TermVectorRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -53,8 +54,11 @@ public class RestMultiTermVectorsAction extends BaseRestHandler {
MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
multiTermVectorsRequest.listenerThreaded(false);
TermVectorRequest template = new TermVectorRequest();
template.index(request.param("index"));
template.type(request.param("type"));
RestTermVectorAction.readURIParameters(template, request);
multiTermVectorsRequest.ids(Strings.commaDelimitedListToStringArray(request.param("ids")));
try {
multiTermVectorsRequest.add(template, request.content());
} catch (Throwable t) {