[SPEC,TEST,FIX] add spec and tests for termvector api and fix inconsistencies
- index and type were not read from the uri with the _mtermvectors api - ids were not read from the uri parameters with the _mtermvectors api
This commit is contained in:
parent
f2fb114a3e
commit
cae5eb479a
|
@ -0,0 +1,89 @@
|
||||||
|
{
|
||||||
|
"termvectors.get_multi_termvectors" : {
|
||||||
|
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
|
||||||
|
"methods" : ["GET", "POST"],
|
||||||
|
"url" : {
|
||||||
|
"path" : "/_mtermvectors",
|
||||||
|
"paths" : ["/_mtermvectors", "/{index}/_mtermvectors", "/{index}/{type}/_mtermvectors"],
|
||||||
|
"parts" : {
|
||||||
|
"index" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The index in which the document resides.",
|
||||||
|
"required" : true
|
||||||
|
},
|
||||||
|
"type" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The type of the document.",
|
||||||
|
"required" : true
|
||||||
|
},
|
||||||
|
"id" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The id of the document.",
|
||||||
|
"required" : true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"params" : {
|
||||||
|
"ids" : {
|
||||||
|
"type" : "list",
|
||||||
|
"description" : "A comma-separated list of documents ids. You must define ids as parameter or set \"ids\" or \"docs\" in the request body",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"term_statistics" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if total term frequency and document frequency should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"default" : false,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"field_statistics" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"fields" : {
|
||||||
|
"type" : "list",
|
||||||
|
"description" : "A comma-separated list of fields to return. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"offsets" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if term offsets should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"positions" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if term positions should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"payloads" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if term payloads should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"preference" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Specify the node or shard the operation should be performed on (default: random) .Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"routing" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Specific routing value. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"parent" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
|
"required" : false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"body" : {
|
||||||
|
"description" : "Define ids, parameters or a list of parameters per document here. You must at least provide a list of document ids. See documentation.",
|
||||||
|
"required" : false
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
{
|
||||||
|
"termvectors.get_termvector" : {
|
||||||
|
"documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
|
||||||
|
"methods" : ["GET", "POST"],
|
||||||
|
"url" : {
|
||||||
|
"path" : "/{index}/{type}/{id}/_termvector",
|
||||||
|
"paths" : ["/{index}/{type}/{id}/_termvector"],
|
||||||
|
"parts" : {
|
||||||
|
"index" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The index in which the document resides.",
|
||||||
|
"required" : true
|
||||||
|
},
|
||||||
|
"type" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The type of the document.",
|
||||||
|
"required" : true
|
||||||
|
},
|
||||||
|
"id" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "The id of the document.",
|
||||||
|
"required" : true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"term_statistics" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if total term frequency and document frequency should be returned.",
|
||||||
|
"default" : false,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"field_statistics" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned.",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"fields" : {
|
||||||
|
"type" : "list",
|
||||||
|
"description" : "A comma-separated list of fields to return.",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"offsets" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if term offsets should be returned.",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"positions" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if term positions should be returned.",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"payloads" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if term payloads should be returned.",
|
||||||
|
"default" : true,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"preference" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Specify the node or shard the operation should be performed on (default: random).",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"routing" : {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Specific routing value.",
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
|
"parent": {
|
||||||
|
"type" : "string",
|
||||||
|
"description" : "Parent id of documents.",
|
||||||
|
"required" : false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"body": {
|
||||||
|
"description" : "Define parameters. See documentation.",
|
||||||
|
"required" : false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,101 @@
|
||||||
|
setup:
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: testidx
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
testtype: {
|
||||||
|
"properties": {
|
||||||
|
"text": {
|
||||||
|
"type" : "string",
|
||||||
|
"term_vector" : "with_positions_offsets"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: testidx
|
||||||
|
type: testtype
|
||||||
|
id: testing_document
|
||||||
|
body:
|
||||||
|
{"text" : "The quick brown fox is brown."}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.refresh: {}
|
||||||
|
|
||||||
|
---
|
||||||
|
"Basic tests for multi termvector get":
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvectors.get_multi_termvectors:
|
||||||
|
"term_statistics" : true
|
||||||
|
"body" : {
|
||||||
|
"docs": [
|
||||||
|
{
|
||||||
|
"_index" : "testidx",
|
||||||
|
"_type" : "testtype",
|
||||||
|
"_id" : "testing_document"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvectors.get_multi_termvectors:
|
||||||
|
"term_statistics" : true
|
||||||
|
"body" : {
|
||||||
|
"docs": [
|
||||||
|
{
|
||||||
|
"_index" : "testidx",
|
||||||
|
"_type" : "testtype",
|
||||||
|
"_id" : "testing_document"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvectors.get_multi_termvectors:
|
||||||
|
"term_statistics" : true
|
||||||
|
"index" : "testidx"
|
||||||
|
"body" : {
|
||||||
|
"docs": [
|
||||||
|
{
|
||||||
|
"_type" : "testtype",
|
||||||
|
"_id" : "testing_document"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvectors.get_multi_termvectors:
|
||||||
|
"term_statistics" : true
|
||||||
|
"index" : "testidx"
|
||||||
|
"type" : "testtype"
|
||||||
|
"body" : {
|
||||||
|
"docs": [
|
||||||
|
{
|
||||||
|
"_id" : "testing_document"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvectors.get_multi_termvectors:
|
||||||
|
"term_statistics" : true
|
||||||
|
"index" : "testidx"
|
||||||
|
"type" : "testtype"
|
||||||
|
"ids" : ["testing_document"]
|
||||||
|
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
|
||||||
|
- match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
|
|
@ -0,0 +1,38 @@
|
||||||
|
setup:
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: testidx
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
testtype: {
|
||||||
|
"properties": {
|
||||||
|
"text": {
|
||||||
|
"type" : "string",
|
||||||
|
"term_vector" : "with_positions_offsets"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: testidx
|
||||||
|
type: testtype
|
||||||
|
id: testing_document
|
||||||
|
body:
|
||||||
|
{"text" : "The quick brown fox is brown."}
|
||||||
|
- do:
|
||||||
|
indices.refresh: {}
|
||||||
|
|
||||||
|
---
|
||||||
|
"Basic tests for termvectors get":
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvectors.get_termvector:
|
||||||
|
index: testidx
|
||||||
|
type: testtype
|
||||||
|
id: testing_document
|
||||||
|
"term_statistics" : true
|
||||||
|
|
||||||
|
|
||||||
|
- match: {term_vectors.text.field_statistics.sum_doc_freq: 5}
|
||||||
|
- match: {term_vectors.text.terms.brown.doc_freq: 1}
|
||||||
|
- match: {term_vectors.text.terms.brown.tokens.0.start_offset: 10}
|
|
@ -30,16 +30,21 @@ import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.rest.RestRequest;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> {
|
public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> {
|
||||||
|
|
||||||
String preference;
|
String preference;
|
||||||
List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>();
|
List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>();
|
||||||
|
|
||||||
|
final Set<String> ids = new HashSet<String>();
|
||||||
|
|
||||||
public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) {
|
public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) {
|
||||||
requests.add(termVectorRequest);
|
requests.add(termVectorRequest);
|
||||||
return this;
|
return this;
|
||||||
|
@ -70,57 +75,57 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
|
||||||
|
|
||||||
public void add(TermVectorRequest template, BytesReference data)
|
public void add(TermVectorRequest template, BytesReference data)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
XContentParser parser = XContentFactory.xContent(data).createParser(data);
|
|
||||||
try {
|
|
||||||
XContentParser.Token token;
|
|
||||||
String currentFieldName = null;
|
|
||||||
List<String> ids = null;
|
|
||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
|
||||||
if (token == XContentParser.Token.FIELD_NAME) {
|
|
||||||
currentFieldName = parser.currentName();
|
|
||||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
|
||||||
|
|
||||||
if ("docs".equals(currentFieldName)) {
|
XContentParser.Token token;
|
||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
String currentFieldName = null;
|
||||||
if (token != XContentParser.Token.START_OBJECT) {
|
if (data.length() > 0) {
|
||||||
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
|
XContentParser parser = XContentFactory.xContent(data).createParser(data);
|
||||||
|
try {
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
|
currentFieldName = parser.currentName();
|
||||||
|
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||||
|
|
||||||
|
if ("docs".equals(currentFieldName)) {
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||||
|
if (token != XContentParser.Token.START_OBJECT) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
|
||||||
|
}
|
||||||
|
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
|
||||||
|
TermVectorRequest.parseRequest(termVectorRequest, parser);
|
||||||
|
add(termVectorRequest);
|
||||||
}
|
}
|
||||||
TermVectorRequest termVectorRequest = new TermVectorRequest(template);
|
} else if ("ids".equals(currentFieldName)) {
|
||||||
TermVectorRequest.parseRequest(termVectorRequest, parser);
|
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||||
add(termVectorRequest);
|
if (!token.isValue()) {
|
||||||
}
|
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
|
||||||
} else if ("ids".equals(currentFieldName)) {
|
}
|
||||||
ids = new ArrayList<String>();
|
ids.add(parser.text());
|
||||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
|
||||||
if (!token.isValue()) {
|
|
||||||
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
|
|
||||||
}
|
}
|
||||||
ids.add(parser.text());
|
} else {
|
||||||
|
throw new ElasticsearchParseException(
|
||||||
|
"No parameter named " + currentFieldName + "and type ARRAY");
|
||||||
}
|
}
|
||||||
} else {
|
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
|
||||||
throw new ElasticsearchParseException(
|
if ("parameters".equals(currentFieldName)) {
|
||||||
"No parameter named " + currentFieldName + "and type ARRAY");
|
TermVectorRequest.parseRequest(template, parser);
|
||||||
|
} else {
|
||||||
|
throw new ElasticsearchParseException(
|
||||||
|
"No parameter named " + currentFieldName + "and type OBJECT");
|
||||||
|
}
|
||||||
|
} else if (currentFieldName != null) {
|
||||||
|
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
|
||||||
}
|
}
|
||||||
} else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
|
|
||||||
if ("parameters".equals(currentFieldName)) {
|
|
||||||
TermVectorRequest.parseRequest(template, parser);
|
|
||||||
} else {
|
|
||||||
throw new ElasticsearchParseException(
|
|
||||||
"No parameter named " + currentFieldName + "and type OBJECT");
|
|
||||||
}
|
|
||||||
} else if (currentFieldName != null) {
|
|
||||||
throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ids != null) {
|
finally {
|
||||||
for (String id : ids) {
|
parser.close();
|
||||||
TermVectorRequest curRequest = new TermVectorRequest(template);
|
|
||||||
curRequest.id(id);
|
|
||||||
requests.add(curRequest);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} finally {
|
}
|
||||||
parser.close();
|
for (String id : ids) {
|
||||||
|
TermVectorRequest curRequest = new TermVectorRequest(template);
|
||||||
|
curRequest.id(id);
|
||||||
|
requests.add(curRequest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,4 +149,10 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
|
||||||
termVectorRequest.writeTo(out);
|
termVectorRequest.writeTo(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void ids(String[] ids) {
|
||||||
|
for (String id : ids) {
|
||||||
|
this.ids.add(id.replaceAll("\\s", ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
|
||||||
import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
|
import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||||
import org.elasticsearch.client.Client;
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
@ -53,8 +54,11 @@ public class RestMultiTermVectorsAction extends BaseRestHandler {
|
||||||
MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
|
MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
|
||||||
multiTermVectorsRequest.listenerThreaded(false);
|
multiTermVectorsRequest.listenerThreaded(false);
|
||||||
TermVectorRequest template = new TermVectorRequest();
|
TermVectorRequest template = new TermVectorRequest();
|
||||||
|
template.index(request.param("index"));
|
||||||
|
template.type(request.param("type"));
|
||||||
RestTermVectorAction.readURIParameters(template, request);
|
RestTermVectorAction.readURIParameters(template, request);
|
||||||
|
multiTermVectorsRequest.ids(Strings.commaDelimitedListToStringArray(request.param("ids")));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
multiTermVectorsRequest.add(template, request.content());
|
multiTermVectorsRequest.add(template, request.content());
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
|
|
Loading…
Reference in New Issue