Change response format of term vector endpoint

This commit changes the response format of the term vectors
to be consistent with the response format of the analyze endpoint.

```
{
   "_index": "test",
   "_type": "type1",
   "_id": "1",
   "_version": 1,
   "exists": true,
   "term_vectors": {
      "field_with_positions_offsets": {
         "field_statistics": {..},
         "terms": {
            "evil": {
               "term_freq": 2,
               "pos": [ 4 , 7 ],
               "start": [ 17, 40 ],
               "end": [ 21 , 44 ]
            },
            "orthodontist": {
               "term_freq": 1,
               "pos": [ 5 ]
               ],
               "start": [ 22 ],
               "end": [ 34]
            }
         }
      }
   }
}
```

becomes

```
{
   "_index": "test",
   "_type": "type1",
   "_id": "1",
   "_version": 1,
   "exists": true,
   "term_vectors": {
      "field_with_positions_offsets": {
         "field_statistics": {..},
         "terms": {
            "evil": {
               "term_freq": 2,
               "tokens": [
                   { "position": 4, "start_offset": 17, "end_offset" : 21 },
                   { "position": 7, "start_offset": 40, "end_offset" : 44 }
               ]
            },
            "orthodontist": {
               "term_freq": 1,
               "tokens" : [ { "position": 5 , "start_offset" : 22, "end_offset" : 34 } ]
            }
         }
      }
   }
}
```

Closes issue #3484
This commit is contained in:
Boaz Leskes 2013-08-09 16:42:15 +02:00 committed by Britta Weber
parent cd5ebac7dd
commit 28e867b5c1
2 changed files with 26 additions and 16 deletions

View File

@ -59,9 +59,10 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
public static final XContentBuilderString SUM_DOC_FREQ = new XContentBuilderString("sum_doc_freq");
public static final XContentBuilderString SUM_TTF = new XContentBuilderString("sum_ttf");
public static final XContentBuilderString POS = new XContentBuilderString("pos");
public static final XContentBuilderString START_OFFSET = new XContentBuilderString("start");
public static final XContentBuilderString END_OFFSET = new XContentBuilderString("end");
public static final XContentBuilderString TOKENS = new XContentBuilderString("tokens");
public static final XContentBuilderString POS = new XContentBuilderString("position");
public static final XContentBuilderString START_OFFSET = new XContentBuilderString("start_offset");
public static final XContentBuilderString END_OFFSET = new XContentBuilderString("end_offset");
public static final XContentBuilderString PAYLOAD = new XContentBuilderString("payload");
public static final XContentBuilderString _INDEX = new XContentBuilderString("_index");
public static final XContentBuilderString _TYPE = new XContentBuilderString("_type");
@ -231,16 +232,27 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
}
private void buildValues(XContentBuilder builder, Terms curTerms, int termFreq) throws IOException {
if (curTerms.hasPositions()) {
builder.field(FieldStrings.POS, 0, termFreq, curentPositions);
if (!(curTerms.hasPayloads() || curTerms.hasOffsets() || curTerms.hasPositions())) {
return;
}
if (curTerms.hasOffsets()) {
builder.field(FieldStrings.START_OFFSET, 0, termFreq, currentStartOffset);
builder.field(FieldStrings.END_OFFSET, 0, termFreq, currentEndOffset);
}
if (curTerms.hasPayloads()) {
builder.array(FieldStrings.PAYLOAD, (Object[]) currentPayloads);
builder.startArray(FieldStrings.TOKENS);
for (int i = 0; i < termFreq; i++) {
builder.startObject();
if (curTerms.hasPositions()) {
builder.field(FieldStrings.POS, curentPositions[i]);
}
if (curTerms.hasOffsets()) {
builder.field(FieldStrings.START_OFFSET, currentStartOffset[i]);
builder.field(FieldStrings.END_OFFSET, currentEndOffset[i]);
}
if (curTerms.hasPayloads()) {
builder.field(FieldStrings.PAYLOAD, currentPayloads[i]);
}
builder.endObject();
}
builder.endArray();
}
private void initValues(Terms curTerms, DocsAndPositionsEnum posEnum, int termFreq) throws IOException {

View File

@ -170,7 +170,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
String utf8 = bytesStream.bytes().toUtf8();
String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
+ i
+ "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[2],\"start\":[10],\"end\":[15],\"payload\":[\"d29yZA==\"]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[8],\"start\":[40],\"end\":[43],\"payload\":[\"d29yZA==\"]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[3],\"start\":[16],\"end\":[19],\"payload\":[\"d29yZA==\"]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[4],\"start\":[20],\"end\":[25],\"payload\":[\"d29yZA==\"]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[7],\"start\":[35],\"end\":[39],\"payload\":[\"d29yZA==\"]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[5],\"start\":[26],\"end\":[30],\"payload\":[\"d29yZA==\"]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[1],\"start\":[4],\"end\":[9],\"payload\":[\"d29yZA==\"]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"pos\":[0,6],\"start\":[0,31],\"end\":[3,34],\"payload\":[\"d29yZA==\",\"d29yZA==\"]}}}}}";
+ "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
assertThat(utf8, equalTo(expectedString));
}
@ -226,8 +226,7 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
String utf8 = bytesStream.bytes().toUtf8();
String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
+ i
+ "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"pos\":[2],\"start\":[10],\"end\":[15],\"payload\":[\"d29yZA==\"]},\"dog\":{\"term_freq\":1,\"pos\":[8],\"start\":[40],\"end\":[43],\"payload\":[\"d29yZA==\"]},\"fox\":{\"term_freq\":1,\"pos\":[3],\"start\":[16],\"end\":[19],\"payload\":[\"d29yZA==\"]},\"jumps\":{\"term_freq\":1,\"pos\":[4],\"start\":[20],\"end\":[25],\"payload\":[\"d29yZA==\"]},\"lazy\":{\"term_freq\":1,\"pos\":[7],\"start\":[35],\"end\":[39],\"payload\":[\"d29yZA==\"]},\"over\":{\"term_freq\":1,\"pos\":[5],\"start\":[26],\"end\":[30],\"payload\":[\"d29yZA==\"]},\"quick\":{\"term_freq\":1,\"pos\":[1],\"start\":[4],\"end\":[9],\"payload\":[\"d29yZA==\"]},\"the\":{\"term_freq\":2,\"pos\":[0,6],\"start\":[0,31],\"end\":[3,34],\"payload\":[\"d29yZA==\",\"d29yZA==\"]}}}}}";
+ "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
assertThat(utf8, equalTo(expectedString));
}
@ -286,9 +285,8 @@ public class GetTermVectorTestsCheckDocFreq extends AbstractSharedClusterTest {
String utf8 = bytesStream.bytes().toUtf8();
String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
+ i
+ "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[2],\"start\":[10],\"end\":[15],\"payload\":[\"d29yZA==\"]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[8],\"start\":[40],\"end\":[43],\"payload\":[\"d29yZA==\"]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[3],\"start\":[16],\"end\":[19],\"payload\":[\"d29yZA==\"]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[4],\"start\":[20],\"end\":[25],\"payload\":[\"d29yZA==\"]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[7],\"start\":[35],\"end\":[39],\"payload\":[\"d29yZA==\"]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[5],\"start\":[26],\"end\":[30],\"payload\":[\"d29yZA==\"]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[1],\"start\":[4],\"end\":[9],\"payload\":[\"d29yZA==\"]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"pos\":[0,6],\"start\":[0,31],\"end\":[3,34],\"payload\":[\"d29yZA==\",\"d29yZA==\"]}}}}}";
+ "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
assertThat(utf8, equalTo(expectedString));
}
}