[SPEC,TEST,FIX] add spec and tests for termvector api and fix inconsistencies

- index and type were not read from the uri with the _mtermvectors api
- ids were not read from the uri parameters with the _mtermvectors api
This commit is contained in:
Britta Weber 2014-01-20 13:23:27 +01:00
parent f2fb114a3e
commit cae5eb479a
6 changed files with 370 additions and 44 deletions

View File

@ -0,0 +1,89 @@
{
"termvectors.get_multi_termvectors" : {
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
"methods" : ["GET", "POST"],
"url" : {
"path" : "/_mtermvectors",
"paths" : ["/_mtermvectors", "/{index}/_mtermvectors", "/{index}/{type}/_mtermvectors"],
"parts" : {
"index" : {
"type" : "string",
"description" : "The index in which the document resides.",
"required" : true
},
"type" : {
"type" : "string",
"description" : "The type of the document.",
"required" : true
},
"id" : {
"type" : "string",
"description" : "The id of the document.",
"required" : true
}
},
"params" : {
"ids" : {
"type" : "list",
"description" : "A comma-separated list of documents ids. You must define ids as parameter or set \"ids\" or \"docs\" in the request body",
"required" : false
},
"term_statistics" : {
"type" : "boolean",
"description" : "Specifies if total term frequency and document frequency should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : false,
"required" : false
},
"field_statistics" : {
"type" : "boolean",
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"fields" : {
"type" : "list",
"description" : "A comma-separated list of fields to return. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
},
"offsets" : {
"type" : "boolean",
"description" : "Specifies if term offsets should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"positions" : {
"type" : "boolean",
"description" : "Specifies if term positions should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"payloads" : {
"type" : "boolean",
"description" : "Specifies if term payloads should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"default" : true,
"required" : false
},
"preference" : {
"type" : "string",
"description" : "Specify the node or shard the operation should be performed on (default: random) .Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
},
"routing" : {
"type" : "string",
"description" : "Specific routing value. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
},
"parent" : {
"type" : "string",
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false
}
}
},
"body" : {
"description" : "Define ids, parameters or a list of parameters per document here. You must at least provide a list of document ids. See documentation.",
"required" : false
}
}
}

View File

@ -0,0 +1,83 @@
{
"termvectors.get_termvector" : {
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
"methods" : ["GET", "POST"],
"url" : {
"path" : "/{index}/{type}/{id}/_termvector",
"paths" : ["/{index}/{type}/{id}/_termvector"],
"parts" : {
"index" : {
"type" : "string",
"description" : "The index in which the document resides.",
"required" : true
},
"type" : {
"type" : "string",
"description" : "The type of the document.",
"required" : true
},
"id" : {
"type" : "string",
"description" : "The id of the document.",
"required" : true
}
},
"params": {
"term_statistics" : {
"type" : "boolean",
"description" : "Specifies if total term frequency and document frequency should be returned.",
"default" : false,
"required" : false
},
"field_statistics" : {
"type" : "boolean",
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned.",
"default" : true,
"required" : false
},
"fields" : {
"type" : "list",
"description" : "A comma-separated list of fields to return.",
"required" : false
},
"offsets" : {
"type" : "boolean",
"description" : "Specifies if term offsets should be returned.",
"default" : true,
"required" : false
},
"positions" : {
"type" : "boolean",
"description" : "Specifies if term positions should be returned.",
"default" : true,
"required" : false
},
"payloads" : {
"type" : "boolean",
"description" : "Specifies if term payloads should be returned.",
"default" : true,
"required" : false
},
"preference" : {
"type" : "string",
"description" : "Specify the node or shard the operation should be performed on (default: random).",
"required" : false
},
"routing" : {
"type" : "string",
"description" : "Specific routing value.",
"required" : false
},
"parent": {
"type" : "string",
"description" : "Parent id of documents.",
"required" : false
}
}
},
"body": {
"description" : "Define parameters. See documentation.",
"required" : false
}
}
}

View File

@ -0,0 +1,101 @@
setup:
- do:
indices.create:
index: testidx
body:
mappings:
testtype: {
"properties": {
"text": {
"type" : "string",
"term_vector" : "with_positions_offsets"
}
}
}
- do:
index:
index: testidx
type: testtype
id: testing_document
body:
{"text" : "The quick brown fox is brown."}
- do:
indices.refresh: {}
---
"Basic tests for multi termvector get":
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"body" : {
"docs": [
{
"_index" : "testidx",
"_type" : "testtype",
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"body" : {
"docs": [
{
"_index" : "testidx",
"_type" : "testtype",
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"index" : "testidx"
"body" : {
"docs": [
{
"_type" : "testtype",
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"index" : "testidx"
"type" : "testtype"
"body" : {
"docs": [
{
"_id" : "testing_document"
}
]
}
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
- do:
termvectors.get_multi_termvectors:
"term_statistics" : true
"index" : "testidx"
"type" : "testtype"
"ids" : ["testing_document"]
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}

View File

@ -0,0 +1,38 @@
setup:
- do:
indices.create:
index: testidx
body:
mappings:
testtype: {
"properties": {
"text": {
"type" : "string",
"term_vector" : "with_positions_offsets"
}
}
}
- do:
index:
index: testidx
type: testtype
id: testing_document
body:
{"text" : "The quick brown fox is brown."}
- do:
indices.refresh: {}
---
"Basic tests for termvectors get":
- do:
termvectors.get_termvector:
index: testidx
type: testtype
id: testing_document
"term_statistics" : true
- match: {term_vectors.text.field_statistics.sum_doc_freq: 5}
- match: {term_vectors.text.terms.brown.doc_freq: 1}
- match: {term_vectors.text.terms.brown.tokens.0.start_offset: 10}

View File

@ -30,16 +30,21 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.rest.RestRequest;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> { public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> {
String preference; String preference;
List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>(); List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>();
final Set<String> ids = new HashSet<String>();
public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) { public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) {
requests.add(termVectorRequest); requests.add(termVectorRequest);
return this; return this;
@ -70,11 +75,12 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
public void add(TermVectorRequest template, BytesReference data) public void add(TermVectorRequest template, BytesReference data)
throws Exception { throws Exception {
XContentParser parser = XContentFactory.xContent(data).createParser(data);
try {
XContentParser.Token token; XContentParser.Token token;
String currentFieldName = null; String currentFieldName = null;
List<String> ids = null; if (data.length() > 0) {
XContentParser parser = XContentFactory.xContent(data).createParser(data);
try {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) { if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName(); currentFieldName = parser.currentName();
@ -90,7 +96,6 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
add(termVectorRequest); add(termVectorRequest);
} }
} else if ("ids".equals(currentFieldName)) { } else if ("ids".equals(currentFieldName)) {
ids = new ArrayList<String>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) { if (!token.isValue()) {
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids"); throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
@ -112,17 +117,17 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported"); throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
} }
} }
if (ids != null) { }
finally {
parser.close();
}
}
for (String id : ids) { for (String id : ids) {
TermVectorRequest curRequest = new TermVectorRequest(template); TermVectorRequest curRequest = new TermVectorRequest(template);
curRequest.id(id); curRequest.id(id);
requests.add(curRequest); requests.add(curRequest);
} }
} }
} finally {
parser.close();
}
}
@Override @Override
public void readFrom(StreamInput in) throws IOException { public void readFrom(StreamInput in) throws IOException {
@ -144,4 +149,10 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
termVectorRequest.writeTo(out); termVectorRequest.writeTo(out);
} }
} }
public void ids(String[] ids) {
for (String id : ids) {
this.ids.add(id.replaceAll("\\s", ""));
}
}
} }

View File

@ -24,6 +24,7 @@ import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
import org.elasticsearch.action.termvector.MultiTermVectorsResponse; import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
import org.elasticsearch.action.termvector.TermVectorRequest; import org.elasticsearch.action.termvector.TermVectorRequest;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
@ -53,7 +54,10 @@ public class RestMultiTermVectorsAction extends BaseRestHandler {
MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest(); MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
multiTermVectorsRequest.listenerThreaded(false); multiTermVectorsRequest.listenerThreaded(false);
TermVectorRequest template = new TermVectorRequest(); TermVectorRequest template = new TermVectorRequest();
template.index(request.param("index"));
template.type(request.param("type"));
RestTermVectorAction.readURIParameters(template, request); RestTermVectorAction.readURIParameters(template, request);
multiTermVectorsRequest.ids(Strings.commaDelimitedListToStringArray(request.param("ids")));
try { try {
multiTermVectorsRequest.add(template, request.content()); multiTermVectorsRequest.add(template, request.content());