From c4830cf8627501b494e6f0e160597c11ff018159 Mon Sep 17 00:00:00 2001 From: Alex Ksikes Date: Tue, 23 Sep 2014 23:21:42 +0200 Subject: [PATCH] Term Vectors: support for realtime By default term vectors are now realtime, as opposed to previously near realtime. If they are not found in the index, they will be generated on the fly. The document is fetched from the transaction log and treated as an artificial document. One can set `realtime` parameter to `false` in order to disable this functionality. This consequently makes the MLT query realtime in fetching documents, as it previsouly used to be before switching from using the multi get API to the mtv API. Closes #7846 --- docs/reference/docs/termvectors.asciidoc | 6 +-- rest-api-spec/api/mtermvectors.json | 5 +++ rest-api-spec/api/termvector.json | 5 +++ .../test/termvector/20_issue7121.yaml | 1 + .../test/termvector/30_realtime.yaml | 40 +++++++++++++++++ .../action/termvector/TermVectorRequest.java | 31 ++++++++++++- .../termvector/TermVectorRequestBuilder.java | 5 +++ .../termvectors/ShardTermVectorService.java | 45 ++++++++++--------- .../termvector/RestTermVectorAction.java | 1 + .../action/termvector/GetTermVectorTests.java | 2 - 10 files changed, 113 insertions(+), 28 deletions(-) create mode 100644 rest-api-spec/test/termvector/30_realtime.yaml diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc index a9e7d855c0e..2a72956f6ae 100644 --- a/docs/reference/docs/termvectors.asciidoc +++ b/docs/reference/docs/termvectors.asciidoc @@ -3,9 +3,9 @@ Returns information and statistics on terms in the fields of a particular document. The document could be stored in the index or artificially provided -by the user Note that for documents stored in the index, this -is a near realtime API as the term vectors are not available until the next -refresh. +by the user coming[1.4.0]. Term vectors are now <>, as opposed to +previously near realtime coming[1.5.0]. The functionality is disabled by setting +`realtime` parameter to `false`. [source,js] -------------------------------------------------- diff --git a/rest-api-spec/api/mtermvectors.json b/rest-api-spec/api/mtermvectors.json index 4cac036d784..12838caf2b5 100644 --- a/rest-api-spec/api/mtermvectors.json +++ b/rest-api-spec/api/mtermvectors.json @@ -74,6 +74,11 @@ "type" : "string", "description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", "required" : false + }, + "realtime": { + "type" : "boolean", + "description" : "Specifies if requests are real-time as opposed to near-real-time (default: true).", + "required" : false } } }, diff --git a/rest-api-spec/api/termvector.json b/rest-api-spec/api/termvector.json index 617b1fa38ad..01a850f29fe 100644 --- a/rest-api-spec/api/termvector.json +++ b/rest-api-spec/api/termvector.json @@ -72,6 +72,11 @@ "type" : "string", "description" : "Parent id of documents.", "required" : false + }, + "realtime": { + "type" : "boolean", + "description" : "Specifies if request is real-time as opposed to near-real-time (default: true).", + "required" : false } } }, diff --git a/rest-api-spec/test/termvector/20_issue7121.yaml b/rest-api-spec/test/termvector/20_issue7121.yaml index be569385847..a75296aabce 100644 --- a/rest-api-spec/test/termvector/20_issue7121.yaml +++ b/rest-api-spec/test/termvector/20_issue7121.yaml @@ -29,6 +29,7 @@ setup: index: testidx type: doc id: 1 + realtime: 0 - match: { "_index": "testidx" } - match: { "_type": "doc" } diff --git a/rest-api-spec/test/termvector/30_realtime.yaml b/rest-api-spec/test/termvector/30_realtime.yaml new file mode 100644 index 00000000000..28d1b41e938 --- /dev/null +++ b/rest-api-spec/test/termvector/30_realtime.yaml @@ -0,0 +1,40 @@ +--- +"Realtime Term Vectors": + + - do: + indices.create: + index: test_1 + body: + settings: + index: + refresh_interval: -1 + number_of_replicas: 0 + + - do: + cluster.health: + wait_for_status: green + + - do: + index: + index: test_1 + type: test + id: 1 + body: { foo: bar } + + - do: + termvector: + index: test_1 + type: test + id: 1 + realtime: 0 + + - is_false: found + + - do: + termvector: + index: test_1 + type: test + id: 1 + realtime: 1 + + - is_true: found diff --git a/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java b/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java index ffe9229123e..da80d286e6e 100644 --- a/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java +++ b/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java @@ -63,6 +63,8 @@ public class TermVectorRequest extends SingleShardOperationRequest selectedFields; + Boolean realtime; + private EnumSet flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads, Flag.FieldStatistics); @@ -95,6 +97,7 @@ public class TermVectorRequest extends SingleShardOperationRequest(other.selectedFields); } + this.realtime = other.realtime(); } public TermVectorRequest(MultiGetRequest.Item item) { @@ -150,9 +153,18 @@ public class TermVectorRequest extends SingleShardOperationRequest listener) { client.termVector(request, listener); diff --git a/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java b/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java index 09a3dfdd382..05aabfcc2f6 100644 --- a/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java +++ b/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java @@ -74,6 +74,16 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { IndexReader topLevelReader = searcher.reader(); final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id()); + final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id())); + Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm)); + boolean docFromTranslog = get.source() != null; + + /* fetched from translog is treated as an artificial document */ + if (docFromTranslog) { + request.doc(get.source().source, false); + termVectorResponse.setDocVersion(get.version()); + } + /* handle potential wildcards in fields */ if (request.selectedFields() != null) { handleFieldWildcards(request); @@ -81,27 +91,25 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { try { Fields topLevelFields = MultiFields.getFields(topLevelReader); + Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); /* from an artificial document */ if (request.doc() != null) { - Fields termVectorsByField = generateTermVectorsFromDoc(request); + Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog); // if no document indexed in shard, take the queried document itself for stats if (topLevelFields == null) { topLevelFields = termVectorsByField; } termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setExists(true); - termVectorResponse.setArtificial(true); - return termVectorResponse; + termVectorResponse.setArtificial(!docFromTranslog); } /* or from an existing document */ - final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id())); - Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm); - if (docIdAndVersion != null) { + else if (docIdAndVersion != null) { // fields with stored term vectors Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId); // fields without term vectors if (request.selectedFields() != null) { - termVectorsByField = addGeneratedTermVectors(termVectorsByField, request, uidTerm, false); + termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request); } termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setDocVersion(docIdAndVersion.version); @@ -113,6 +121,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { throw new ElasticsearchException("failed to execute term vector request", ex); } finally { searcher.close(); + get.release(); } return termVectorResponse; } @@ -137,7 +146,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { return true; } - private Fields addGeneratedTermVectors(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException { + private Fields addGeneratedTermVectors(Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request) throws IOException { /* only keep valid fields */ Set validFields = new HashSet<>(); for (String field : request.selectedFields()) { @@ -157,18 +166,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { } /* generate term vectors from fetched document fields */ - Engine.GetResult get = indexShard.get(new Engine.Get(realTime, uidTerm)); - Fields generatedTermVectors; - try { - if (!get.exists()) { - return termVectorsByField; - } - GetResult getResult = indexShard.getService().get( - get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); - generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets()); - } finally { - get.release(); - } + GetResult getResult = indexShard.getService().get( + get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); + Fields generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets()); /* merge with existing Fields */ if (termVectorsByField == null) { @@ -195,7 +195,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { return MultiFields.getFields(index.createSearcher().getIndexReader()); } - private Fields generateTermVectorsFromDoc(TermVectorRequest request) throws IOException { + private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); @@ -214,6 +214,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { if (!isValidField(fieldMapper)) { continue; } + if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { + continue; + } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } diff --git a/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java b/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java index 435cec7f8db..88a2301d140 100644 --- a/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java +++ b/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java @@ -77,6 +77,7 @@ public class RestTermVectorAction extends BaseRestHandler { termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions())); termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads())); termVectorRequest.routing(request.param("routing")); + termVectorRequest.realtime(request.paramAsBoolean("realtime", null)); termVectorRequest.parent(request.param("parent")); termVectorRequest.preference(request.param("preference")); termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics())); diff --git a/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java b/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java index 8766a341dce..c4cd4ac204c 100644 --- a/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java +++ b/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java @@ -32,8 +32,6 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.core.AbstractFieldMapper; -import org.elasticsearch.index.service.IndexService; -import org.elasticsearch.indices.IndicesService; import org.junit.Test; import java.io.IOException;