Term Vectors: support for realtime
By default term vectors are now realtime, as opposed to previously near realtime. If they are not found in the index, they will be generated on the fly. The document is fetched from the transaction log and treated as an artificial document. One can set `realtime` parameter to `false` in order to disable this functionality. This consequently makes the MLT query realtime in fetching documents, as it previsouly used to be before switching from using the multi get API to the mtv API. Closes #7846
This commit is contained in:
parent
1cc5da43b3
commit
c4830cf862
|
@ -3,9 +3,9 @@
|
||||||
|
|
||||||
Returns information and statistics on terms in the fields of a particular
|
Returns information and statistics on terms in the fields of a particular
|
||||||
document. The document could be stored in the index or artificially provided
|
document. The document could be stored in the index or artificially provided
|
||||||
by the user Note that for documents stored in the index, this
|
by the user coming[1.4.0]. Term vectors are now <<realtime,realtime>>, as opposed to
|
||||||
is a near realtime API as the term vectors are not available until the next
|
previously near realtime coming[1.5.0]. The functionality is disabled by setting
|
||||||
refresh.
|
`realtime` parameter to `false`.
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
|
@ -74,6 +74,11 @@
|
||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||||
"required" : false
|
"required" : false
|
||||||
|
},
|
||||||
|
"realtime": {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if requests are real-time as opposed to near-real-time (default: true).",
|
||||||
|
"required" : false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -72,6 +72,11 @@
|
||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "Parent id of documents.",
|
"description" : "Parent id of documents.",
|
||||||
"required" : false
|
"required" : false
|
||||||
|
},
|
||||||
|
"realtime": {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if request is real-time as opposed to near-real-time (default: true).",
|
||||||
|
"required" : false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -29,6 +29,7 @@ setup:
|
||||||
index: testidx
|
index: testidx
|
||||||
type: doc
|
type: doc
|
||||||
id: 1
|
id: 1
|
||||||
|
realtime: 0
|
||||||
|
|
||||||
- match: { "_index": "testidx" }
|
- match: { "_index": "testidx" }
|
||||||
- match: { "_type": "doc" }
|
- match: { "_type": "doc" }
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
---
|
||||||
|
"Realtime Term Vectors":
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test_1
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
index:
|
||||||
|
refresh_interval: -1
|
||||||
|
number_of_replicas: 0
|
||||||
|
|
||||||
|
- do:
|
||||||
|
cluster.health:
|
||||||
|
wait_for_status: green
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test_1
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
body: { foo: bar }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvector:
|
||||||
|
index: test_1
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
realtime: 0
|
||||||
|
|
||||||
|
- is_false: found
|
||||||
|
|
||||||
|
- do:
|
||||||
|
termvector:
|
||||||
|
index: test_1
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
realtime: 1
|
||||||
|
|
||||||
|
- is_true: found
|
|
@ -63,6 +63,8 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
// TODO: change to String[]
|
// TODO: change to String[]
|
||||||
private Set<String> selectedFields;
|
private Set<String> selectedFields;
|
||||||
|
|
||||||
|
Boolean realtime;
|
||||||
|
|
||||||
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
|
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
|
||||||
Flag.FieldStatistics);
|
Flag.FieldStatistics);
|
||||||
|
|
||||||
|
@ -95,6 +97,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
if (other.selectedFields != null) {
|
if (other.selectedFields != null) {
|
||||||
this.selectedFields = new HashSet<>(other.selectedFields);
|
this.selectedFields = new HashSet<>(other.selectedFields);
|
||||||
}
|
}
|
||||||
|
this.realtime = other.realtime();
|
||||||
}
|
}
|
||||||
|
|
||||||
public TermVectorRequest(MultiGetRequest.Item item) {
|
public TermVectorRequest(MultiGetRequest.Item item) {
|
||||||
|
@ -150,9 +153,18 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
* Sets an artificial document from which term vectors are requested for.
|
* Sets an artificial document from which term vectors are requested for.
|
||||||
*/
|
*/
|
||||||
public TermVectorRequest doc(XContentBuilder documentBuilder) {
|
public TermVectorRequest doc(XContentBuilder documentBuilder) {
|
||||||
|
return this.doc(documentBuilder.bytes(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets an artificial document from which term vectors are requested for.
|
||||||
|
*/
|
||||||
|
public TermVectorRequest doc(BytesReference doc, boolean generateRandomId) {
|
||||||
// assign a random id to this artificial document, for routing
|
// assign a random id to this artificial document, for routing
|
||||||
this.id(String.valueOf(randomInt.getAndAdd(1)));
|
if (generateRandomId) {
|
||||||
this.doc = documentBuilder.bytes();
|
this.id(String.valueOf(randomInt.getAndAdd(1)));
|
||||||
|
}
|
||||||
|
this.doc = doc;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,6 +305,15 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean realtime() {
|
||||||
|
return this.realtime == null ? true : this.realtime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermVectorRequest realtime(Boolean realtime) {
|
||||||
|
this.realtime = realtime;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
private void setFlag(Flag flag, boolean set) {
|
private void setFlag(Flag flag, boolean set) {
|
||||||
if (set && !flagsEnum.contains(flag)) {
|
if (set && !flagsEnum.contains(flag)) {
|
||||||
flagsEnum.add(flag);
|
flagsEnum.add(flag);
|
||||||
|
@ -353,6 +374,9 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
selectedFields.add(in.readString());
|
selectedFields.add(in.readString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (in.getVersion().onOrAfter(Version.V_1_5_0)) {
|
||||||
|
this.realtime = in.readBoolean();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -386,6 +410,9 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
} else {
|
} else {
|
||||||
out.writeVInt(0);
|
out.writeVInt(0);
|
||||||
}
|
}
|
||||||
|
if (out.getVersion().onOrAfter(Version.V_1_5_0)) {
|
||||||
|
out.writeBoolean(realtime());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static enum Flag {
|
public static enum Flag {
|
||||||
|
|
|
@ -126,6 +126,11 @@ public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorReq
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TermVectorRequestBuilder setRealtime(Boolean realtime) {
|
||||||
|
request.realtime(realtime);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doExecute(ActionListener<TermVectorResponse> listener) {
|
protected void doExecute(ActionListener<TermVectorResponse> listener) {
|
||||||
client.termVector(request, listener);
|
client.termVector(request, listener);
|
||||||
|
|
|
@ -74,6 +74,16 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
IndexReader topLevelReader = searcher.reader();
|
IndexReader topLevelReader = searcher.reader();
|
||||||
final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id());
|
final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id());
|
||||||
|
|
||||||
|
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
|
||||||
|
Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm));
|
||||||
|
boolean docFromTranslog = get.source() != null;
|
||||||
|
|
||||||
|
/* fetched from translog is treated as an artificial document */
|
||||||
|
if (docFromTranslog) {
|
||||||
|
request.doc(get.source().source, false);
|
||||||
|
termVectorResponse.setDocVersion(get.version());
|
||||||
|
}
|
||||||
|
|
||||||
/* handle potential wildcards in fields */
|
/* handle potential wildcards in fields */
|
||||||
if (request.selectedFields() != null) {
|
if (request.selectedFields() != null) {
|
||||||
handleFieldWildcards(request);
|
handleFieldWildcards(request);
|
||||||
|
@ -81,27 +91,25 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Fields topLevelFields = MultiFields.getFields(topLevelReader);
|
Fields topLevelFields = MultiFields.getFields(topLevelReader);
|
||||||
|
Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
|
||||||
/* from an artificial document */
|
/* from an artificial document */
|
||||||
if (request.doc() != null) {
|
if (request.doc() != null) {
|
||||||
Fields termVectorsByField = generateTermVectorsFromDoc(request);
|
Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog);
|
||||||
// if no document indexed in shard, take the queried document itself for stats
|
// if no document indexed in shard, take the queried document itself for stats
|
||||||
if (topLevelFields == null) {
|
if (topLevelFields == null) {
|
||||||
topLevelFields = termVectorsByField;
|
topLevelFields = termVectorsByField;
|
||||||
}
|
}
|
||||||
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
||||||
termVectorResponse.setExists(true);
|
termVectorResponse.setExists(true);
|
||||||
termVectorResponse.setArtificial(true);
|
termVectorResponse.setArtificial(!docFromTranslog);
|
||||||
return termVectorResponse;
|
|
||||||
}
|
}
|
||||||
/* or from an existing document */
|
/* or from an existing document */
|
||||||
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
|
else if (docIdAndVersion != null) {
|
||||||
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
|
|
||||||
if (docIdAndVersion != null) {
|
|
||||||
// fields with stored term vectors
|
// fields with stored term vectors
|
||||||
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
|
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
|
||||||
// fields without term vectors
|
// fields without term vectors
|
||||||
if (request.selectedFields() != null) {
|
if (request.selectedFields() != null) {
|
||||||
termVectorsByField = addGeneratedTermVectors(termVectorsByField, request, uidTerm, false);
|
termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request);
|
||||||
}
|
}
|
||||||
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
||||||
termVectorResponse.setDocVersion(docIdAndVersion.version);
|
termVectorResponse.setDocVersion(docIdAndVersion.version);
|
||||||
|
@ -113,6 +121,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
throw new ElasticsearchException("failed to execute term vector request", ex);
|
throw new ElasticsearchException("failed to execute term vector request", ex);
|
||||||
} finally {
|
} finally {
|
||||||
searcher.close();
|
searcher.close();
|
||||||
|
get.release();
|
||||||
}
|
}
|
||||||
return termVectorResponse;
|
return termVectorResponse;
|
||||||
}
|
}
|
||||||
|
@ -137,7 +146,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Fields addGeneratedTermVectors(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException {
|
private Fields addGeneratedTermVectors(Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request) throws IOException {
|
||||||
/* only keep valid fields */
|
/* only keep valid fields */
|
||||||
Set<String> validFields = new HashSet<>();
|
Set<String> validFields = new HashSet<>();
|
||||||
for (String field : request.selectedFields()) {
|
for (String field : request.selectedFields()) {
|
||||||
|
@ -157,18 +166,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generate term vectors from fetched document fields */
|
/* generate term vectors from fetched document fields */
|
||||||
Engine.GetResult get = indexShard.get(new Engine.Get(realTime, uidTerm));
|
GetResult getResult = indexShard.getService().get(
|
||||||
Fields generatedTermVectors;
|
get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false);
|
||||||
try {
|
Fields generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets());
|
||||||
if (!get.exists()) {
|
|
||||||
return termVectorsByField;
|
|
||||||
}
|
|
||||||
GetResult getResult = indexShard.getService().get(
|
|
||||||
get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false);
|
|
||||||
generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets());
|
|
||||||
} finally {
|
|
||||||
get.release();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* merge with existing Fields */
|
/* merge with existing Fields */
|
||||||
if (termVectorsByField == null) {
|
if (termVectorsByField == null) {
|
||||||
|
@ -195,7 +195,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
return MultiFields.getFields(index.createSearcher().getIndexReader());
|
return MultiFields.getFields(index.createSearcher().getIndexReader());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Fields generateTermVectorsFromDoc(TermVectorRequest request) throws IOException {
|
private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException {
|
||||||
// parse the document, at the moment we do update the mapping, just like percolate
|
// parse the document, at the moment we do update the mapping, just like percolate
|
||||||
ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());
|
ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());
|
||||||
|
|
||||||
|
@ -214,6 +214,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
if (!isValidField(fieldMapper)) {
|
if (!isValidField(fieldMapper)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
|
if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,6 +77,7 @@ public class RestTermVectorAction extends BaseRestHandler {
|
||||||
termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions()));
|
termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions()));
|
||||||
termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads()));
|
termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads()));
|
||||||
termVectorRequest.routing(request.param("routing"));
|
termVectorRequest.routing(request.param("routing"));
|
||||||
|
termVectorRequest.realtime(request.paramAsBoolean("realtime", null));
|
||||||
termVectorRequest.parent(request.param("parent"));
|
termVectorRequest.parent(request.param("parent"));
|
||||||
termVectorRequest.preference(request.param("preference"));
|
termVectorRequest.preference(request.param("preference"));
|
||||||
termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics()));
|
termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics()));
|
||||||
|
|
|
@ -32,8 +32,6 @@ import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.xcontent.ToXContent;
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||||
import org.elasticsearch.index.service.IndexService;
|
|
||||||
import org.elasticsearch.indices.IndicesService;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
Loading…
Reference in New Issue