diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc index a9e7d855c0e..2a72956f6ae 100644 --- a/docs/reference/docs/termvectors.asciidoc +++ b/docs/reference/docs/termvectors.asciidoc @@ -3,9 +3,9 @@ Returns information and statistics on terms in the fields of a particular document. The document could be stored in the index or artificially provided -by the user Note that for documents stored in the index, this -is a near realtime API as the term vectors are not available until the next -refresh. +by the user coming[1.4.0]. Term vectors are now <>, as opposed to +previously near realtime coming[1.5.0]. The functionality is disabled by setting +`realtime` parameter to `false`. [source,js] -------------------------------------------------- diff --git a/rest-api-spec/api/mtermvectors.json b/rest-api-spec/api/mtermvectors.json index 4cac036d784..12838caf2b5 100644 --- a/rest-api-spec/api/mtermvectors.json +++ b/rest-api-spec/api/mtermvectors.json @@ -74,6 +74,11 @@ "type" : "string", "description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", "required" : false + }, + "realtime": { + "type" : "boolean", + "description" : "Specifies if requests are real-time as opposed to near-real-time (default: true).", + "required" : false } } }, diff --git a/rest-api-spec/api/termvector.json b/rest-api-spec/api/termvector.json index 617b1fa38ad..01a850f29fe 100644 --- a/rest-api-spec/api/termvector.json +++ b/rest-api-spec/api/termvector.json @@ -72,6 +72,11 @@ "type" : "string", "description" : "Parent id of documents.", "required" : false + }, + "realtime": { + "type" : "boolean", + "description" : "Specifies if request is real-time as opposed to near-real-time (default: true).", + "required" : false } } }, diff --git a/rest-api-spec/test/termvector/20_issue7121.yaml b/rest-api-spec/test/termvector/20_issue7121.yaml index be569385847..a75296aabce 100644 --- a/rest-api-spec/test/termvector/20_issue7121.yaml +++ b/rest-api-spec/test/termvector/20_issue7121.yaml @@ -29,6 +29,7 @@ setup: index: testidx type: doc id: 1 + realtime: 0 - match: { "_index": "testidx" } - match: { "_type": "doc" } diff --git a/rest-api-spec/test/termvector/30_realtime.yaml b/rest-api-spec/test/termvector/30_realtime.yaml new file mode 100644 index 00000000000..28d1b41e938 --- /dev/null +++ b/rest-api-spec/test/termvector/30_realtime.yaml @@ -0,0 +1,40 @@ +--- +"Realtime Term Vectors": + + - do: + indices.create: + index: test_1 + body: + settings: + index: + refresh_interval: -1 + number_of_replicas: 0 + + - do: + cluster.health: + wait_for_status: green + + - do: + index: + index: test_1 + type: test + id: 1 + body: { foo: bar } + + - do: + termvector: + index: test_1 + type: test + id: 1 + realtime: 0 + + - is_false: found + + - do: + termvector: + index: test_1 + type: test + id: 1 + realtime: 1 + + - is_true: found diff --git a/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java b/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java index ffe9229123e..da80d286e6e 100644 --- a/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java +++ b/src/main/java/org/elasticsearch/action/termvector/TermVectorRequest.java @@ -63,6 +63,8 @@ public class TermVectorRequest extends SingleShardOperationRequest selectedFields; + Boolean realtime; + private EnumSet flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads, Flag.FieldStatistics); @@ -95,6 +97,7 @@ public class TermVectorRequest extends SingleShardOperationRequest(other.selectedFields); } + this.realtime = other.realtime(); } public TermVectorRequest(MultiGetRequest.Item item) { @@ -150,9 +153,18 @@ public class TermVectorRequest extends SingleShardOperationRequest listener) { client.termVector(request, listener); diff --git a/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java b/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java index 09a3dfdd382..05aabfcc2f6 100644 --- a/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java +++ b/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java @@ -74,6 +74,16 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { IndexReader topLevelReader = searcher.reader(); final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id()); + final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id())); + Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm)); + boolean docFromTranslog = get.source() != null; + + /* fetched from translog is treated as an artificial document */ + if (docFromTranslog) { + request.doc(get.source().source, false); + termVectorResponse.setDocVersion(get.version()); + } + /* handle potential wildcards in fields */ if (request.selectedFields() != null) { handleFieldWildcards(request); @@ -81,27 +91,25 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { try { Fields topLevelFields = MultiFields.getFields(topLevelReader); + Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); /* from an artificial document */ if (request.doc() != null) { - Fields termVectorsByField = generateTermVectorsFromDoc(request); + Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog); // if no document indexed in shard, take the queried document itself for stats if (topLevelFields == null) { topLevelFields = termVectorsByField; } termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setExists(true); - termVectorResponse.setArtificial(true); - return termVectorResponse; + termVectorResponse.setArtificial(!docFromTranslog); } /* or from an existing document */ - final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id())); - Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm); - if (docIdAndVersion != null) { + else if (docIdAndVersion != null) { // fields with stored term vectors Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId); // fields without term vectors if (request.selectedFields() != null) { - termVectorsByField = addGeneratedTermVectors(termVectorsByField, request, uidTerm, false); + termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request); } termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setDocVersion(docIdAndVersion.version); @@ -113,6 +121,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { throw new ElasticsearchException("failed to execute term vector request", ex); } finally { searcher.close(); + get.release(); } return termVectorResponse; } @@ -137,7 +146,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { return true; } - private Fields addGeneratedTermVectors(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException { + private Fields addGeneratedTermVectors(Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request) throws IOException { /* only keep valid fields */ Set validFields = new HashSet<>(); for (String field : request.selectedFields()) { @@ -157,18 +166,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { } /* generate term vectors from fetched document fields */ - Engine.GetResult get = indexShard.get(new Engine.Get(realTime, uidTerm)); - Fields generatedTermVectors; - try { - if (!get.exists()) { - return termVectorsByField; - } - GetResult getResult = indexShard.getService().get( - get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); - generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets()); - } finally { - get.release(); - } + GetResult getResult = indexShard.getService().get( + get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); + Fields generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets()); /* merge with existing Fields */ if (termVectorsByField == null) { @@ -195,7 +195,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { return MultiFields.getFields(index.createSearcher().getIndexReader()); } - private Fields generateTermVectorsFromDoc(TermVectorRequest request) throws IOException { + private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); @@ -214,6 +214,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { if (!isValidField(fieldMapper)) { continue; } + if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { + continue; + } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } diff --git a/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java b/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java index 435cec7f8db..88a2301d140 100644 --- a/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java +++ b/src/main/java/org/elasticsearch/rest/action/termvector/RestTermVectorAction.java @@ -77,6 +77,7 @@ public class RestTermVectorAction extends BaseRestHandler { termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions())); termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads())); termVectorRequest.routing(request.param("routing")); + termVectorRequest.realtime(request.paramAsBoolean("realtime", null)); termVectorRequest.parent(request.param("parent")); termVectorRequest.preference(request.param("preference")); termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics())); diff --git a/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java b/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java index 8766a341dce..c4cd4ac204c 100644 --- a/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java +++ b/src/test/java/org/elasticsearch/action/termvector/GetTermVectorTests.java @@ -32,8 +32,6 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.core.AbstractFieldMapper; -import org.elasticsearch.index.service.IndexService; -import org.elasticsearch.indices.IndicesService; import org.junit.Test; import java.io.IOException;