[SPEC,TEST,FIX] add spec and tests for termvector api and fix inconsistencies
- index and type were not read from the uri with the _mtermvectors api - ids were not read from the uri parameters with the _mtermvectors api
This commit is contained in:
parent
f2fb114a3e
commit
cae5eb479a
|
@ -0,0 +1,89 @@
|
|||
{
|
||||
"termvectors.get_multi_termvectors" : {
|
||||
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
|
||||
"methods" : ["GET", "POST"],
|
||||
"url" : {
|
||||
"path" : "/_mtermvectors",
|
||||
"paths" : ["/_mtermvectors", "/{index}/_mtermvectors", "/{index}/{type}/_mtermvectors"],
|
||||
"parts" : {
|
||||
"index" : {
|
||||
"type" : "string",
|
||||
"description" : "The index in which the document resides.",
|
||||
"required" : true
|
||||
},
|
||||
"type" : {
|
||||
"type" : "string",
|
||||
"description" : "The type of the document.",
|
||||
"required" : true
|
||||
},
|
||||
"id" : {
|
||||
"type" : "string",
|
||||
"description" : "The id of the document.",
|
||||
"required" : true
|
||||
}
|
||||
},
|
||||
"params" : {
|
||||
"ids" : {
|
||||
"type" : "list",
|
||||
"description" : "A comma-separated list of documents ids. You must define ids as parameter or set \"ids\" or \"docs\" in the request body",
|
||||
"required" : false
|
||||
},
|
||||
"term_statistics" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if total term frequency and document frequency should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"default" : false,
|
||||
"required" : false
|
||||
},
|
||||
"field_statistics" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"fields" : {
|
||||
"type" : "list",
|
||||
"description" : "A comma-separated list of fields to return. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"required" : false
|
||||
},
|
||||
"offsets" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if term offsets should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"positions" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if term positions should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"payloads" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if term payloads should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"preference" : {
|
||||
"type" : "string",
|
||||
"description" : "Specify the node or shard the operation should be performed on (default: random) .Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"required" : false
|
||||
},
|
||||
"routing" : {
|
||||
"type" : "string",
|
||||
"description" : "Specific routing value. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"required" : false
|
||||
},
|
||||
"parent" : {
|
||||
"type" : "string",
|
||||
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"required" : false
|
||||
}
|
||||
}
|
||||
},
|
||||
"body" : {
|
||||
"description" : "Define ids, parameters or a list of parameters per document here. You must at least provide a list of document ids. See documentation.",
|
||||
"required" : false
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
{
|
||||
"termvectors.get_termvector" : {
|
||||
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
|
||||
"methods" : ["GET", "POST"],
|
||||
"url" : {
|
||||
"path" : "/{index}/{type}/{id}/_termvector",
|
||||
"paths" : ["/{index}/{type}/{id}/_termvector"],
|
||||
"parts" : {
|
||||
"index" : {
|
||||
"type" : "string",
|
||||
"description" : "The index in which the document resides.",
|
||||
"required" : true
|
||||
},
|
||||
"type" : {
|
||||
"type" : "string",
|
||||
"description" : "The type of the document.",
|
||||
"required" : true
|
||||
},
|
||||
"id" : {
|
||||
"type" : "string",
|
||||
"description" : "The id of the document.",
|
||||
"required" : true
|
||||
}
|
||||
},
|
||||
"params": {
|
||||
"term_statistics" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if total term frequency and document frequency should be returned.",
|
||||
"default" : false,
|
||||
"required" : false
|
||||
},
|
||||
"field_statistics" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned.",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"fields" : {
|
||||
"type" : "list",
|
||||
"description" : "A comma-separated list of fields to return.",
|
||||
"required" : false
|
||||
},
|
||||
"offsets" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if term offsets should be returned.",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"positions" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if term positions should be returned.",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"payloads" : {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if term payloads should be returned.",
|
||||
"default" : true,
|
||||
"required" : false
|
||||
},
|
||||
"preference" : {
|
||||
"type" : "string",
|
||||
"description" : "Specify the node or shard the operation should be performed on (default: random).",
|
||||
"required" : false
|
||||
},
|
||||
"routing" : {
|
||||
"type" : "string",
|
||||
"description" : "Specific routing value.",
|
||||
"required" : false
|
||||
},
|
||||
"parent": {
|
||||
"type" : "string",
|
||||
"description" : "Parent id of documents.",
|
||||
"required" : false
|
||||
}
|
||||
}
|
||||
},
|
||||
"body": {
|
||||
"description" : "Define parameters. See documentation.",
|
||||
"required" : false
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
setup:
|
||||
- do:
|
||||
indices.create:
|
||||
index: testidx
|
||||
body:
|
||||
mappings:
|
||||
testtype: {
|
||||
"properties": {
|
||||
"text": {
|
||||
"type" : "string",
|
||||
"term_vector" : "with_positions_offsets"
|
||||
}
|
||||
}
|
||||
}
|
||||
- do:
|
||||
index:
|
||||
index: testidx
|
||||
type: testtype
|
||||
id: testing_document
|
||||
body:
|
||||
{"text" : "The quick brown fox is brown."}
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
---
|
||||
"Basic tests for multi termvector get":
|
||||
|
||||
- do:
|
||||
termvectors.get_multi_termvectors:
|
||||
"term_statistics" : true
|
||||
"body" : {
|
||||
"docs": [
|
||||
{
|
||||
"_index" : "testidx",
|
||||
"_type" : "testtype",
|
||||
"_id" : "testing_document"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||
|
||||
- do:
|
||||
termvectors.get_multi_termvectors:
|
||||
"term_statistics" : true
|
||||
"body" : {
|
||||
"docs": [
|
||||
{
|
||||
"_index" : "testidx",
|
||||
"_type" : "testtype",
|
||||
"_id" : "testing_document"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||
|
||||
- do:
|
||||
termvectors.get_multi_termvectors:
|
||||
"term_statistics" : true
|
||||
"index" : "testidx"
|
||||
"body" : {
|
||||
"docs": [
|
||||
{
|
||||
"_type" : "testtype",
|
||||
"_id" : "testing_document"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||
|
||||
- do:
|
||||
termvectors.get_multi_termvectors:
|
||||
"term_statistics" : true
|
||||
"index" : "testidx"
|
||||
"type" : "testtype"
|
||||
"body" : {
|
||||
"docs": [
|
||||
{
|
||||
"_id" : "testing_document"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||
|
||||
- do:
|
||||
termvectors.get_multi_termvectors:
|
||||
"term_statistics" : true
|
||||
"index" : "testidx"
|
||||
"type" : "testtype"
|
||||
"ids" : ["testing_document"]
|
||||
|
||||
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
|
@ -0,0 +1,38 @@
|
|||
setup:
|
||||
- do:
|
||||
indices.create:
|
||||
index: testidx
|
||||
body:
|
||||
mappings:
|
||||
testtype: {
|
||||
"properties": {
|
||||
"text": {
|
||||
"type" : "string",
|
||||
"term_vector" : "with_positions_offsets"
|
||||
}
|
||||
}
|
||||
}
|
||||
- do:
|
||||
index:
|
||||
index: testidx
|
||||
type: testtype
|
||||
id: testing_document
|
||||
body:
|
||||
{"text" : "The quick brown fox is brown."}
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
---
|
||||
"Basic tests for termvectors get":
|
||||
|
||||
- do:
|
||||
termvectors.get_termvector:
|
||||
index: testidx
|
||||
type: testtype
|
||||
id: testing_document
|
||||
"term_statistics" : true
|
||||
|
||||
|
||||
- match: {term_vectors.text.field_statistics.sum_doc_freq: 5}
|
||||
- match: {term_vectors.text.terms.brown.doc_freq: 1}
|
||||
- match: {term_vectors.text.terms.brown.tokens.0.start_offset: 10}
|
|
@ -30,16 +30,21 @@ import org.elasticsearch.common.io.stream.StreamInput;
|
|||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.rest.RestRequest;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> {
|
||||
|
||||
String preference;
|
||||
List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>();
|
||||
|
||||
final Set<String> ids = new HashSet<String>();
|
||||
|
||||
public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) {
|
||||
requests.add(termVectorRequest);
|
||||
return this;
|
||||
|
@ -70,57 +75,57 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
|
|||
|
||||
public void add(TermVectorRequest template, BytesReference data)
|
||||
throws Exception {
|
||||
XContentParser parser = XContentFactory.xContent(data).createParser(data);
|
||||
try {
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
List<String> ids = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
|
||||
if ("docs".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
if (data.length() > 0) {
|
||||
XContentParser parser = XContentFactory.xContent(data).createParser(data);
|
||||
try {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
|
||||
if ("docs".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
|
||||
}
|
||||
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
|
||||
TermVectorRequest.parseRequest(termVectorRequest, parser);
|
||||
add(termVectorRequest);
|
||||
}
|
||||
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
|
||||
TermVectorRequest.parseRequest(termVectorRequest, parser);
|
||||
add(termVectorRequest);
|
||||
}
|
||||
} else if ("ids".equals(currentFieldName)) {
|
||||
ids = new ArrayList<String>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (!token.isValue()) {
|
||||
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
|
||||
} else if ("ids".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (!token.isValue()) {
|
||||
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
|
||||
}
|
||||
ids.add(parser.text());
|
||||
}
|
||||
ids.add(parser.text());
|
||||
} else {
|
||||
throw new ElasticsearchParseException(
|
||||
"No parameter named " + currentFieldName + "and type ARRAY");
|
||||
}
|
||||
} else {
|
||||
throw new ElasticsearchParseException(
|
||||
"No parameter named " + currentFieldName + "and type ARRAY");
|
||||
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
|
||||
if ("parameters".equals(currentFieldName)) {
|
||||
TermVectorRequest.parseRequest(template, parser);
|
||||
} else {
|
||||
throw new ElasticsearchParseException(
|
||||
"No parameter named " + currentFieldName + "and type OBJECT");
|
||||
}
|
||||
} else if (currentFieldName != null) {
|
||||
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
|
||||
if ("parameters".equals(currentFieldName)) {
|
||||
TermVectorRequest.parseRequest(template, parser);
|
||||
} else {
|
||||
throw new ElasticsearchParseException(
|
||||
"No parameter named " + currentFieldName + "and type OBJECT");
|
||||
}
|
||||
} else if (currentFieldName != null) {
|
||||
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
|
||||
}
|
||||
}
|
||||
if (ids != null) {
|
||||
for (String id : ids) {
|
||||
TermVectorRequest curRequest = new TermVectorRequest(template);
|
||||
curRequest.id(id);
|
||||
requests.add(curRequest);
|
||||
}
|
||||
finally {
|
||||
parser.close();
|
||||
}
|
||||
} finally {
|
||||
parser.close();
|
||||
}
|
||||
for (String id : ids) {
|
||||
TermVectorRequest curRequest = new TermVectorRequest(template);
|
||||
curRequest.id(id);
|
||||
requests.add(curRequest);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -144,4 +149,10 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
|
|||
termVectorRequest.writeTo(out);
|
||||
}
|
||||
}
|
||||
|
||||
public void ids(String[] ids) {
|
||||
for (String id : ids) {
|
||||
this.ids.add(id.replaceAll("\\s", ""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
|
|||
import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
|
||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
|
@ -53,8 +54,11 @@ public class RestMultiTermVectorsAction extends BaseRestHandler {
|
|||
MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
|
||||
multiTermVectorsRequest.listenerThreaded(false);
|
||||
TermVectorRequest template = new TermVectorRequest();
|
||||
template.index(request.param("index"));
|
||||
template.type(request.param("type"));
|
||||
RestTermVectorAction.readURIParameters(template, request);
|
||||
|
||||
multiTermVectorsRequest.ids(Strings.commaDelimitedListToStringArray(request.param("ids")));
|
||||
|
||||
try {
|
||||
multiTermVectorsRequest.add(template, request.content());
|
||||
} catch (Throwable t) {
|
||||
|
|
Loading…
Reference in New Issue