[SPEC,TEST,FIX] add spec and tests for termvector api and fix inconsistencies

- index and type were not read from the uri with the _mtermvectors api - ids were not read from the uri parameters with the _mtermvectors api
2014-01-20 13:23:27 +01:00 · 2014-01-20 13:23:27 +01:00 · cae5eb479a
parent f2fb114a3e
commit cae5eb479a
6 changed files with 370 additions and 44 deletions
--- a/rest-api-spec/api/termvectors.get_multi_termvectors.json
+++ b/rest-api-spec/api/termvectors.get_multi_termvectors.json
@ -0,0 +1,89 @@
+{
+  "termvectors.get_multi_termvectors" : {
+    "documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
+    "methods" : ["GET", "POST"],
+    "url" : {
+      "path" : "/_mtermvectors",
+      "paths" : ["/_mtermvectors", "/{index}/_mtermvectors", "/{index}/{type}/_mtermvectors"],
+      "parts" : {
+        "index" : {
+         "type" : "string",
+         "description" : "The index in which the document resides.",
+         "required" : true
+        },
+        "type" : {
+          "type" : "string",
+          "description" : "The type of the document.",
+          "required" : true
+        },
+        "id" : {
+           "type" : "string",
+           "description" : "The id of the document.",
+           "required" : true
+         }
+      },
+      "params" : {
+        "ids" : {
+          "type" : "list",
+          "description" : "A comma-separated list of documents ids. You must define ids as parameter or set \"ids\" or \"docs\" in the request body",
+          "required" : false
+        },
+        "term_statistics" : {
+          "type" : "boolean",
+          "description" : "Specifies if total term frequency and document frequency should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "default" : false,
+          "required" : false
+        },
+        "field_statistics" : {
+           "type" : "boolean",
+           "description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+           "default" : true,
+           "required" : false
+        },
+        "fields" : {
+          "type" : "list",
+          "description" : "A comma-separated list of fields to return. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "required" : false
+        },
+        "offsets" : {
+          "type" : "boolean",
+          "description" : "Specifies if term offsets should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "default" : true,
+          "required" : false
+        },
+        "positions" : {
+          "type" : "boolean",
+          "description" : "Specifies if term positions should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "default" : true,
+          "required" : false
+        },
+        "payloads" : {
+          "type" : "boolean",
+          "description" : "Specifies if term payloads should be returned. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "default" : true,
+          "required" : false
+        },
+        "preference" : {
+          "type" : "string",
+          "description" : "Specify the node or shard the operation should be performed on (default: random) .Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "required" : false
+        },
+        "routing" : {
+          "type" : "string",
+          "description" : "Specific routing value. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "required" : false
+        },
+        "parent" : {
+          "type" : "string",
+          "description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
+          "required" : false
+        }
+      }
+    },
+    "body" : {
+        "description" : "Define ids, parameters or a list of parameters per document here. You must at least provide a list of document ids. See documentation.",
+        "required" : false
+
+    }
+  }
+}
--- a/rest-api-spec/api/termvectors.get_termvectors.json
+++ b/rest-api-spec/api/termvectors.get_termvectors.json
@ -0,0 +1,83 @@
+{
+  "termvectors.get_termvector" : {
+    "documentation" : "http://www.elasticsearch.org/guide/en/elasticsearch/reference/master/search-termvectors.html",
+    "methods" : ["GET", "POST"],
+    "url" : {
+      "path" : "/{index}/{type}/{id}/_termvector",
+      "paths" : ["/{index}/{type}/{id}/_termvector"],
+      "parts" : {
+        "index" : {
+         "type" : "string",
+         "description" : "The index in which the document resides.",
+         "required" : true
+        },
+        "type" : {
+          "type" : "string",
+          "description" : "The type of the document.",
+          "required" : true
+        },
+        "id" : {
+           "type" : "string",
+           "description" : "The id of the document.",
+           "required" : true
+         }
+      },
+      "params": {
+        "term_statistics" : {
+          "type" : "boolean",
+          "description" : "Specifies if total term frequency and document frequency should be returned.",
+          "default" : false,
+          "required" : false
+        },
+        "field_statistics" : {
+           "type" : "boolean",
+           "description" : "Specifies if document count, sum of document frequencies and sum of total term frequencies should be returned.",
+           "default" : true,
+           "required" : false
+        },
+        "fields" : {
+          "type" : "list",
+          "description" : "A comma-separated list of fields to return.",
+          "required" : false
+        },
+        "offsets" : {
+          "type" : "boolean",
+          "description" : "Specifies if term offsets should be returned.",
+          "default" : true,
+          "required" : false
+        },
+        "positions" : {
+          "type" : "boolean",
+          "description" : "Specifies if term positions should be returned.",
+          "default" : true,
+          "required" : false
+        },
+        "payloads" : {
+          "type" : "boolean",
+          "description" : "Specifies if term payloads should be returned.",
+          "default" : true,
+          "required" : false
+        },
+        "preference" : {
+          "type" : "string",
+          "description" : "Specify the node or shard the operation should be performed on (default: random).",
+          "required" : false
+        },
+        "routing" : {
+          "type" : "string",
+          "description" : "Specific routing value.",
+          "required" : false
+        },
+        "parent": {
+          "type" : "string",
+          "description" : "Parent id of documents.",
+          "required" : false
+        }
+      }
+    },
+    "body": {
+      "description" : "Define parameters. See documentation.",
+      "required" : false
+    }
+  }
+}
--- a/rest-api-spec/test/termvectors.get_multi_termvectors/10_basic.yaml
+++ b/rest-api-spec/test/termvectors.get_multi_termvectors/10_basic.yaml
@ -0,0 +1,101 @@
+setup:
+  - do:
+        indices.create:
+          index: testidx
+          body:
+            mappings:
+              testtype: {
+                "properties": {
+                  "text": {
+                     "type" : "string",
+                     "term_vector" : "with_positions_offsets"
+                   }
+                }
+              }
+  - do:
+      index:
+        index: testidx
+        type:  testtype
+        id:    testing_document
+        body:
+          {"text" : "The quick brown fox is brown."}
+
+  - do:
+      indices.refresh: {}
+
+---
+"Basic tests for multi termvector get":
+
+  - do:
+      termvectors.get_multi_termvectors:
+        "term_statistics" : true
+        "body" : {
+          "docs": [
+          {
+            "_index" : "testidx",
+            "_type" : "testtype",
+            "_id" : "testing_document"
+          }
+          ]
+        }
+
+  - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
+  - match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
+
+  - do:
+        termvectors.get_multi_termvectors:
+          "term_statistics" : true
+          "body" : {
+            "docs": [
+            {
+              "_index" : "testidx",
+              "_type" : "testtype",
+              "_id" : "testing_document"
+            }
+            ]
+          }
+
+  - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
+  - match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
+
+  - do:
+        termvectors.get_multi_termvectors:
+          "term_statistics" : true
+          "index" : "testidx"
+          "body" : {
+            "docs": [
+            {
+              "_type" : "testtype",
+              "_id" : "testing_document"
+            }
+            ]
+          }
+
+  - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
+  - match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
+
+  - do:
+        termvectors.get_multi_termvectors:
+          "term_statistics" : true
+          "index" : "testidx"
+          "type" : "testtype"
+          "body" : {
+            "docs": [
+            {
+              "_id" : "testing_document"
+            }
+            ]
+          }
+
+  - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
+  - match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
+
+  - do:
+        termvectors.get_multi_termvectors:
+          "term_statistics" : true
+          "index" : "testidx"
+          "type" : "testtype"
+          "ids" : ["testing_document"]
+
+  - match: {docs.0.term_vectors.text.terms.brown.term_freq: 2}
+  - match: {docs.0.term_vectors.text.terms.brown.ttf: 2}
--- a/rest-api-spec/test/termvectors.get_termvectors/10_basic.yaml
+++ b/rest-api-spec/test/termvectors.get_termvectors/10_basic.yaml
@ -0,0 +1,38 @@
+setup:
+  - do:
+        indices.create:
+          index: testidx
+          body:
+            mappings:
+              testtype: {
+                "properties": {
+                  "text": {
+                     "type" : "string",
+                     "term_vector" : "with_positions_offsets"
+                   }
+                }
+              }
+  - do:
+      index:
+        index: testidx
+        type:  testtype
+        id:    testing_document
+        body:
+          {"text" : "The quick brown fox is brown."}
+  - do:
+      indices.refresh: {}
+
+---
+"Basic tests for termvectors get":
+
+  - do:
+      termvectors.get_termvector:
+        index: testidx
+        type:  testtype
+        id:    testing_document
+        "term_statistics" : true
+
+
+  - match: {term_vectors.text.field_statistics.sum_doc_freq: 5}
+  - match: {term_vectors.text.terms.brown.doc_freq: 1}
+  - match: {term_vectors.text.terms.brown.tokens.0.start_offset: 10}
--- a/src/main/java/org/elasticsearch/action/termvector/MultiTermVectorsRequest.java
+++ b/src/main/java/org/elasticsearch/action/termvector/MultiTermVectorsRequest.java
@ -30,16 +30,21 @@ import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.rest.RestRequest;

 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;

 public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsRequest> {

    String preference;
    List<TermVectorRequest> requests = new ArrayList<TermVectorRequest>();

+    final Set<String> ids = new HashSet<String>();
+
    public MultiTermVectorsRequest add(TermVectorRequest termVectorRequest) {
        requests.add(termVectorRequest);
        return this;
@ -70,57 +75,57 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque

    public void add(TermVectorRequest template, BytesReference data)
            throws Exception {
-        XContentParser parser = XContentFactory.xContent(data).createParser(data);
-        try {
-            XContentParser.Token token;
-            String currentFieldName = null;
-            List<String> ids = null;
-            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
-                if (token == XContentParser.Token.FIELD_NAME) {
-                    currentFieldName = parser.currentName();
-                } else if (token == XContentParser.Token.START_ARRAY) {

-                    if ("docs".equals(currentFieldName)) {
-                        while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                            if (token != XContentParser.Token.START_OBJECT) {
-                                throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
+        XContentParser.Token token;
+        String currentFieldName = null;
+        if (data.length() > 0) {
+            XContentParser parser = XContentFactory.xContent(data).createParser(data);
+            try {
+                while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                    if (token == XContentParser.Token.FIELD_NAME) {
+                        currentFieldName = parser.currentName();
+                    } else if (token == XContentParser.Token.START_ARRAY) {
+
+                        if ("docs".equals(currentFieldName)) {
+                            while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                                if (token != XContentParser.Token.START_OBJECT) {
+                                    throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
+                                }
+                                TermVectorRequest termVectorRequest = new TermVectorRequest(template);
+                                TermVectorRequest.parseRequest(termVectorRequest, parser);
+                                add(termVectorRequest);
                            }
-                            TermVectorRequest termVectorRequest = new TermVectorRequest(template);
-                            TermVectorRequest.parseRequest(termVectorRequest, parser);
-                            add(termVectorRequest);
-                        }
-                    } else if ("ids".equals(currentFieldName)) {
-                        ids = new ArrayList<String>();
-                        while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                            if (!token.isValue()) {
-                                throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
+                        } else if ("ids".equals(currentFieldName)) {
+                            while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                                if (!token.isValue()) {
+                                    throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
+                                }
+                                ids.add(parser.text());
                            }
-                            ids.add(parser.text());
+                        } else {
+                            throw new ElasticsearchParseException(
+                                    "No parameter named " + currentFieldName + "and type ARRAY");
                        }
-                    } else {
-                        throw new ElasticsearchParseException(
-                                "No parameter named " + currentFieldName + "and type ARRAY");
+                    } else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
+                        if ("parameters".equals(currentFieldName)) {
+                            TermVectorRequest.parseRequest(template, parser);
+                        } else {
+                            throw new ElasticsearchParseException(
+                                    "No parameter named " + currentFieldName + "and type OBJECT");
+                        }
+                    } else if (currentFieldName != null) {
+                        throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
                    }
-                } else if (token == XContentParser.Token.START_OBJECT && currentFieldName != null) {
-                    if ("parameters".equals(currentFieldName)) {
-                        TermVectorRequest.parseRequest(template, parser);
-                    } else {
-                        throw new ElasticsearchParseException(
-                                "No parameter named " + currentFieldName + "and type OBJECT");
-                    }
-                } else if (currentFieldName != null) {
-                    throw new ElasticsearchParseException("_mtermvectors: Parameter " + currentFieldName + "not supported");
                }
            }
-            if (ids != null) {
-                for (String id : ids) {
-                    TermVectorRequest curRequest = new TermVectorRequest(template);
-                    curRequest.id(id);
-                    requests.add(curRequest);
-                }
+            finally {
+                parser.close();
            }
-        } finally {
-            parser.close();
+        }
+        for (String id : ids) {
+            TermVectorRequest curRequest = new TermVectorRequest(template);
+            curRequest.id(id);
+            requests.add(curRequest);
        }
    }

@ -144,4 +149,10 @@ public class MultiTermVectorsRequest extends ActionRequest<MultiTermVectorsReque
            termVectorRequest.writeTo(out);
        }
    }
+
+    public void ids(String[] ids) {
+        for (String id : ids) {
+            this.ids.add(id.replaceAll("\\s", ""));
+        }
+    }
 }
--- a/src/main/java/org/elasticsearch/rest/action/termvector/RestMultiTermVectorsAction.java
+++ b/src/main/java/org/elasticsearch/rest/action/termvector/RestMultiTermVectorsAction.java
@ -24,6 +24,7 @@ import org.elasticsearch.action.termvector.MultiTermVectorsRequest;
 import org.elasticsearch.action.termvector.MultiTermVectorsResponse;
 import org.elasticsearch.action.termvector.TermVectorRequest;
 import org.elasticsearch.client.Client;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@ -53,8 +54,11 @@ public class RestMultiTermVectorsAction extends BaseRestHandler {
        MultiTermVectorsRequest multiTermVectorsRequest = new MultiTermVectorsRequest();
        multiTermVectorsRequest.listenerThreaded(false);
        TermVectorRequest template = new TermVectorRequest();
+        template.index(request.param("index"));
+        template.type(request.param("type"));
        RestTermVectorAction.readURIParameters(template, request);
-       
+        multiTermVectorsRequest.ids(Strings.commaDelimitedListToStringArray(request.param("ids")));
+
        try {
            multiTermVectorsRequest.add(template, request.content());
        } catch (Throwable t) {