diff --git a/rest-api-spec/api/termvectors.get_multi_termvectors.json b/rest-api-spec/api/termvectors.get_multi_termvectors.json new file mode 100644 index 00000000000..b61d90aae3d --- /dev/null +++ b/rest-api-spec/api/termvectors.get_multi_termvectors.json @@ -0,0 +1,89 @@ +{ + "termvectors.get_multi_termvectors" : { + "documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html", + "methods" : ["GET", "POST"], + "url" : { + "path" : "/_mtermvectors", + "paths" : ["/_mtermvectors", "/{index}/_mtermvectors", "/{index}/{type}/_mtermvectors"], + "parts" : { + "index" : { + "type" : "string", + "description" : "The index in which the document resides.", + "required" : true + }, + "type" : { + "type" : "string", + "description" : "The type of the document.", + "required" : true + }, + "id" : { + "type" : "string", + "description" : "The id of the document.", + "required" : true + } + }, + "params" : { + "ids" : { + "type" : "list", + "description" : "A comma-separated list of documents ids. You must define ids as parameter or set \"ids\" or \"docs\" in the request body", + "required" : false + }, + "term_statistics" : { + "type" : "boolean", + "description" : "Specifies if total term frequency and document frequency should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "default" : false, + "required" : false + }, + "field_statistics" : { + "type" : "boolean", + "description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "default" : true, + "required" : false + }, + "fields" : { + "type" : "list", + "description" : "A comma-separated list of fields to return. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "required" : false + }, + "offsets" : { + "type" : "boolean", + "description" : "Specifies if term offsets should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "default" : true, + "required" : false + }, + "positions" : { + "type" : "boolean", + "description" : "Specifies if term positions should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "default" : true, + "required" : false + }, + "payloads" : { + "type" : "boolean", + "description" : "Specifies if term payloads should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "default" : true, + "required" : false + }, + "preference" : { + "type" : "string", + "description" : "Specify the node or shard the operation should be performed on (default: random) .Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "required" : false + }, + "routing" : { + "type" : "string", + "description" : "Specific routing value. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "required" : false + }, + "parent" : { + "type" : "string", + "description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", + "required" : false + } + } + }, + "body" : { + "description" : "Define ids, parameters or a list of parameters per document here. You must at least provide a list of document ids. See documentation.", + "required" : false + + } + } +} \ No newline at end of file diff --git a/rest-api-spec/api/termvectors.get_termvectors.json b/rest-api-spec/api/termvectors.get_termvectors.json new file mode 100644 index 00000000000..0dfc1cd19d0 --- /dev/null +++ b/rest-api-spec/api/termvectors.get_termvectors.json @@ -0,0 +1,83 @@ +{ + "termvectors.get_termvector" : { + "documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html", + "methods" : ["GET", "POST"], + "url" : { + "path" : "/{index}/{type}/{id}/_termvector", + "paths" : ["/{index}/{type}/{id}/_termvector"], + "parts" : { + "index" : { + "type" : "string", + "description" : "The index in which the document resides.", + "required" : true + }, + "type" : { + "type" : "string", + "description" : "The type of the document.", + "required" : true + }, + "id" : { + "type" : "string", + "description" : "The id of the document.", + "required" : true + } + }, + "params": { + "term_statistics" : { + "type" : "boolean", + "description" : "Specifies if total term frequency and document frequency should be returned.", + "default" : false, + "required" : false + }, + "field_statistics" : { + "type" : "boolean", + "description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned.", + "default" : true, + "required" : false + }, + "fields" : { + "type" : "list", + "description" : "A comma-separated list of fields to return.", + "required" : false + }, + "offsets" : { + "type" : "boolean", + "description" : "Specifies if term offsets should be returned.", + "default" : true, + "required" : false + }, + "positions" : { + "type" : "boolean", + "description" : "Specifies if term positions should be returned.", + "default" : true, + "required" : false + }, + "payloads" : { + "type" : "boolean", + "description" : "Specifies if term payloads should be returned.", + "default" : true, + "required" : false + }, + "preference" : { + "type" : "string", + "description" : "Specify the node or shard the operation should be performed on (default: random).", + "required" : false + }, + "routing" : { + "type" : "string", + "description" : "Specific routing value.", + "required" : false + }, + "parent": { + "type" : "string", + "description" : "Parent id of documents.", + "required" : false + } + } + }, + "body": { + "description" : "Define parameters. See documentation.", + "required" : false + } + } +} diff --git a/rest-api-spec/test/termvectors.get_multi_termvectors/10_basic.yaml b/rest-api-spec/test/termvectors.get_multi_termvectors/10_basic.yaml new file mode 100644 index 00000000000..caf09533dad --- /dev/null +++ b/rest-api-spec/test/termvectors.get_multi_termvectors/10_basic.yaml @@ -0,0 +1,101 @@ +setup: + - do: + indices.create: + index: testidx + body: + mappings: + testtype: { + "properties": { + "text": { + "type" : "string", + "term_vector" : "with_positions_offsets" + } + } + } + - do: + index: + index: testidx + type: testtype + id: testing_document + body: + {"text" : "The quick brown fox is brown."} + + - do: + indices.refresh: {} + +--- +"Basic tests for multi termvector get": + + - do: + termvectors.get_multi_termvectors: + "term_statistics" : true + "body" : { + "docs": [ + { + "_index" : "testidx", + "_type" : "testtype", + "_id" : "testing_document" + } + ] + } + + - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2} + - match: {docs.0.term_vectors.text.terms.brown.ttf: 2} + + - do: + termvectors.get_multi_termvectors: + "term_statistics" : true + "body" : { + "docs": [ + { + "_index" : "testidx", + "_type" : "testtype", + "_id" : "testing_document" + } + ] + } + + - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2} + - match: {docs.0.term_vectors.text.terms.brown.ttf: 2} + + - do: + termvectors.get_multi_termvectors: + "term_statistics" : true + "index" : "testidx" + "body" : { + "docs": [ + { + "_type" : "testtype", + "_id" : "testing_document" + } + ] + } + + - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2} + - match: {docs.0.term_vectors.text.terms.brown.ttf: 2} + + - do: + termvectors.get_multi_termvectors: + "term_statistics" : true + "index" : "testidx" + "type" : "testtype" + "body" : { + "docs": [ + { + "_id" : "testing_document" + } + ] + } + + - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2} + - match: {docs.0.term_vectors.text.terms.brown.ttf: 2} + + - do: + termvectors.get_multi_termvectors: + "term_statistics" : true + "index" : "testidx" + "type" : "testtype" + "ids" : ["testing_document"] + + - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2} + - match: {docs.0.term_vectors.text.terms.brown.ttf: 2} \ No newline at end of file diff --git a/rest-api-spec/test/termvectors.get_termvectors/10_basic.yaml b/rest-api-spec/test/termvectors.get_termvectors/10_basic.yaml new file mode 100644 index 00000000000..c05c036e908 --- /dev/null +++ b/rest-api-spec/test/termvectors.get_termvectors/10_basic.yaml @@ -0,0 +1,38 @@ +setup: + - do: + indices.create: + index: testidx + body: + mappings: + testtype: { + "properties": { + "text": { + "type" : "string", + "term_vector" : "with_positions_offsets" + } + } + } + - do: + index: + index: testidx + type: testtype + id: testing_document + body: + {"text" : "The quick brown fox is brown."} + - do: + indices.refresh: {} + +--- +"Basic tests for termvectors get": + + - do: + termvectors.get_termvector: + index: testidx + type: testtype + id: testing_document + "term_statistics" : true + + + - match: {term_vectors.text.field_statistics.sum_doc_freq: 5} + - match: {term_vectors.text.terms.brown.doc_freq: 1} + - match: {term_vectors.text.terms.brown.tokens.0.start_offset: 10} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/action/termvector/MultiTermVectorsRequest.java b/src/main/java/org/elasticsearch/action/termvector/MultiTermVectorsRequest.java index 95453de6d07..c6af5fac6b1 100644 --- a/src/main/java/org/elasticsearch/action/termvector/MultiTermVectorsRequest.java +++ b/src/main/java/org/elasticsearch/action/termvector/MultiTermVectorsRequest.java @@ -30,16 +30,21 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.rest.RestRequest; import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; public class MultiTermVectorsRequest extends ActionRequest { String preference; List requests = new ArrayList(); + final Set ids = new HashSet(); + public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) { requests.add(termVectorRequest); return this; @@ -70,57 +75,57 @@ public class MultiTermVectorsRequest extends ActionRequest ids = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (token == XContentParser.Token.START_ARRAY) { - if ("docs".equals(currentFieldName)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token != XContentParser.Token.START_OBJECT) { - throw new ElasticsearchIllegalArgumentException("docs array element should include an object"); + XContentParser.Token token; + String currentFieldName = null; + if (data.length() > 0) { + XContentParser parser = XContentFactory.xContent(data).createParser(data); + try { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_ARRAY) { + + if ("docs".equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token != XContentParser.Token.START_OBJECT) { + throw new ElasticsearchIllegalArgumentException("docs array element should include an object"); + } + TermVectorRequest termVectorRequest = new TermVectorRequest(template); + TermVectorRequest.parseRequest(termVectorRequest, parser); + add(termVectorRequest); } - TermVectorRequest termVectorRequest = new TermVectorRequest(template); - TermVectorRequest.parseRequest(termVectorRequest, parser); - add(termVectorRequest); - } - } else if ("ids".equals(currentFieldName)) { - ids = new ArrayList(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (!token.isValue()) { - throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids"); + } else if ("ids".equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (!token.isValue()) { + throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids"); + } + ids.add(parser.text()); } - ids.add(parser.text()); + } else { + throw new ElasticsearchParseException( + "No parameter named " + currentFieldName + "and type ARRAY"); } - } else { - throw new ElasticsearchParseException( - "No parameter named " + currentFieldName + "and type ARRAY"); + } else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) { + if ("parameters".equals(currentFieldName)) { + TermVectorRequest.parseRequest(template, parser); + } else { + throw new ElasticsearchParseException( + "No parameter named " + currentFieldName + "and type OBJECT"); + } + } else if (currentFieldName != null) { + throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported"); } - } else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) { - if ("parameters".equals(currentFieldName)) { - TermVectorRequest.parseRequest(template, parser); - } else { - throw new ElasticsearchParseException( - "No parameter named " + currentFieldName + "and type OBJECT"); - } - } else if (currentFieldName != null) { - throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported"); } } - if (ids != null) { - for (String id : ids) { - TermVectorRequest curRequest = new TermVectorRequest(template); - curRequest.id(id); - requests.add(curRequest); - } + finally { + parser.close(); } - } finally { - parser.close(); + } + for (String id : ids) { + TermVectorRequest curRequest = new TermVectorRequest(template); + curRequest.id(id); + requests.add(curRequest); } } @@ -144,4 +149,10 @@ public class MultiTermVectorsRequest extends ActionRequest