Term Vectors: support for realtime
By default term vectors are now realtime, as opposed to previously near realtime. If they are not found in the index, they will be generated on the fly. The document is fetched from the transaction log and treated as an artificial document. One can set `realtime` parameter to `false` in order to disable this functionality. This consequently makes the MLT query realtime in fetching documents, as it previsouly used to be before switching from using the multi get API to the mtv API. Closes #7846
This commit is contained in:
parent
1cc5da43b3
commit
c4830cf862
|
@ -3,9 +3,9 @@
|
|||
|
||||
Returns information and statistics on terms in the fields of a particular
|
||||
document. The document could be stored in the index or artificially provided
|
||||
by the user Note that for documents stored in the index, this
|
||||
is a near realtime API as the term vectors are not available until the next
|
||||
refresh.
|
||||
by the user coming[1.4.0]. Term vectors are now <<realtime,realtime>>, as opposed to
|
||||
previously near realtime coming[1.5.0]. The functionality is disabled by setting
|
||||
`realtime` parameter to `false`.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -74,6 +74,11 @@
|
|||
"type" : "string",
|
||||
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
|
||||
"required" : false
|
||||
},
|
||||
"realtime": {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if requests are real-time as opposed to near-real-time (default: true).",
|
||||
"required" : false
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
@ -72,6 +72,11 @@
|
|||
"type" : "string",
|
||||
"description" : "Parent id of documents.",
|
||||
"required" : false
|
||||
},
|
||||
"realtime": {
|
||||
"type" : "boolean",
|
||||
"description" : "Specifies if request is real-time as opposed to near-real-time (default: true).",
|
||||
"required" : false
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
@ -29,6 +29,7 @@ setup:
|
|||
index: testidx
|
||||
type: doc
|
||||
id: 1
|
||||
realtime: 0
|
||||
|
||||
- match: { "_index": "testidx" }
|
||||
- match: { "_type": "doc" }
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
"Realtime Term Vectors":
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test_1
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
refresh_interval: -1
|
||||
number_of_replicas: 0
|
||||
|
||||
- do:
|
||||
cluster.health:
|
||||
wait_for_status: green
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test_1
|
||||
type: test
|
||||
id: 1
|
||||
body: { foo: bar }
|
||||
|
||||
- do:
|
||||
termvector:
|
||||
index: test_1
|
||||
type: test
|
||||
id: 1
|
||||
realtime: 0
|
||||
|
||||
- is_false: found
|
||||
|
||||
- do:
|
||||
termvector:
|
||||
index: test_1
|
||||
type: test
|
||||
id: 1
|
||||
realtime: 1
|
||||
|
||||
- is_true: found
|
|
@ -63,6 +63,8 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
// TODO: change to String[]
|
||||
private Set<String> selectedFields;
|
||||
|
||||
Boolean realtime;
|
||||
|
||||
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
|
||||
Flag.FieldStatistics);
|
||||
|
||||
|
@ -95,6 +97,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
if (other.selectedFields != null) {
|
||||
this.selectedFields = new HashSet<>(other.selectedFields);
|
||||
}
|
||||
this.realtime = other.realtime();
|
||||
}
|
||||
|
||||
public TermVectorRequest(MultiGetRequest.Item item) {
|
||||
|
@ -150,9 +153,18 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
* Sets an artificial document from which term vectors are requested for.
|
||||
*/
|
||||
public TermVectorRequest doc(XContentBuilder documentBuilder) {
|
||||
return this.doc(documentBuilder.bytes(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets an artificial document from which term vectors are requested for.
|
||||
*/
|
||||
public TermVectorRequest doc(BytesReference doc, boolean generateRandomId) {
|
||||
// assign a random id to this artificial document, for routing
|
||||
this.id(String.valueOf(randomInt.getAndAdd(1)));
|
||||
this.doc = documentBuilder.bytes();
|
||||
if (generateRandomId) {
|
||||
this.id(String.valueOf(randomInt.getAndAdd(1)));
|
||||
}
|
||||
this.doc = doc;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -293,6 +305,15 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
return this;
|
||||
}
|
||||
|
||||
public boolean realtime() {
|
||||
return this.realtime == null ? true : this.realtime;
|
||||
}
|
||||
|
||||
public TermVectorRequest realtime(Boolean realtime) {
|
||||
this.realtime = realtime;
|
||||
return this;
|
||||
}
|
||||
|
||||
private void setFlag(Flag flag, boolean set) {
|
||||
if (set && !flagsEnum.contains(flag)) {
|
||||
flagsEnum.add(flag);
|
||||
|
@ -353,6 +374,9 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
selectedFields.add(in.readString());
|
||||
}
|
||||
}
|
||||
if (in.getVersion().onOrAfter(Version.V_1_5_0)) {
|
||||
this.realtime = in.readBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -386,6 +410,9 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
|||
} else {
|
||||
out.writeVInt(0);
|
||||
}
|
||||
if (out.getVersion().onOrAfter(Version.V_1_5_0)) {
|
||||
out.writeBoolean(realtime());
|
||||
}
|
||||
}
|
||||
|
||||
public static enum Flag {
|
||||
|
|
|
@ -126,6 +126,11 @@ public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorReq
|
|||
return this;
|
||||
}
|
||||
|
||||
public TermVectorRequestBuilder setRealtime(Boolean realtime) {
|
||||
request.realtime(realtime);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doExecute(ActionListener<TermVectorResponse> listener) {
|
||||
client.termVector(request, listener);
|
||||
|
|
|
@ -74,6 +74,16 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
IndexReader topLevelReader = searcher.reader();
|
||||
final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id());
|
||||
|
||||
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
|
||||
Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm));
|
||||
boolean docFromTranslog = get.source() != null;
|
||||
|
||||
/* fetched from translog is treated as an artificial document */
|
||||
if (docFromTranslog) {
|
||||
request.doc(get.source().source, false);
|
||||
termVectorResponse.setDocVersion(get.version());
|
||||
}
|
||||
|
||||
/* handle potential wildcards in fields */
|
||||
if (request.selectedFields() != null) {
|
||||
handleFieldWildcards(request);
|
||||
|
@ -81,27 +91,25 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
|
||||
try {
|
||||
Fields topLevelFields = MultiFields.getFields(topLevelReader);
|
||||
Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
|
||||
/* from an artificial document */
|
||||
if (request.doc() != null) {
|
||||
Fields termVectorsByField = generateTermVectorsFromDoc(request);
|
||||
Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog);
|
||||
// if no document indexed in shard, take the queried document itself for stats
|
||||
if (topLevelFields == null) {
|
||||
topLevelFields = termVectorsByField;
|
||||
}
|
||||
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
||||
termVectorResponse.setExists(true);
|
||||
termVectorResponse.setArtificial(true);
|
||||
return termVectorResponse;
|
||||
termVectorResponse.setArtificial(!docFromTranslog);
|
||||
}
|
||||
/* or from an existing document */
|
||||
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
|
||||
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
|
||||
if (docIdAndVersion != null) {
|
||||
else if (docIdAndVersion != null) {
|
||||
// fields with stored term vectors
|
||||
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
|
||||
// fields without term vectors
|
||||
if (request.selectedFields() != null) {
|
||||
termVectorsByField = addGeneratedTermVectors(termVectorsByField, request, uidTerm, false);
|
||||
termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request);
|
||||
}
|
||||
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
||||
termVectorResponse.setDocVersion(docIdAndVersion.version);
|
||||
|
@ -113,6 +121,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
throw new ElasticsearchException("failed to execute term vector request", ex);
|
||||
} finally {
|
||||
searcher.close();
|
||||
get.release();
|
||||
}
|
||||
return termVectorResponse;
|
||||
}
|
||||
|
@ -137,7 +146,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
return true;
|
||||
}
|
||||
|
||||
private Fields addGeneratedTermVectors(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException {
|
||||
private Fields addGeneratedTermVectors(Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request) throws IOException {
|
||||
/* only keep valid fields */
|
||||
Set<String> validFields = new HashSet<>();
|
||||
for (String field : request.selectedFields()) {
|
||||
|
@ -157,18 +166,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
}
|
||||
|
||||
/* generate term vectors from fetched document fields */
|
||||
Engine.GetResult get = indexShard.get(new Engine.Get(realTime, uidTerm));
|
||||
Fields generatedTermVectors;
|
||||
try {
|
||||
if (!get.exists()) {
|
||||
return termVectorsByField;
|
||||
}
|
||||
GetResult getResult = indexShard.getService().get(
|
||||
get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false);
|
||||
generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets());
|
||||
} finally {
|
||||
get.release();
|
||||
}
|
||||
GetResult getResult = indexShard.getService().get(
|
||||
get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false);
|
||||
Fields generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets());
|
||||
|
||||
/* merge with existing Fields */
|
||||
if (termVectorsByField == null) {
|
||||
|
@ -195,7 +195,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
return MultiFields.getFields(index.createSearcher().getIndexReader());
|
||||
}
|
||||
|
||||
private Fields generateTermVectorsFromDoc(TermVectorRequest request) throws IOException {
|
||||
private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException {
|
||||
// parse the document, at the moment we do update the mapping, just like percolate
|
||||
ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());
|
||||
|
||||
|
@ -214,6 +214,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
|||
if (!isValidField(fieldMapper)) {
|
||||
continue;
|
||||
}
|
||||
if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) {
|
||||
continue;
|
||||
}
|
||||
if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -77,6 +77,7 @@ public class RestTermVectorAction extends BaseRestHandler {
|
|||
termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions()));
|
||||
termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads()));
|
||||
termVectorRequest.routing(request.param("routing"));
|
||||
termVectorRequest.realtime(request.paramAsBoolean("realtime", null));
|
||||
termVectorRequest.parent(request.param("parent"));
|
||||
termVectorRequest.preference(request.param("preference"));
|
||||
termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics()));
|
||||
|
|
|
@ -32,8 +32,6 @@ import org.elasticsearch.common.settings.ImmutableSettings;
|
|||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||
import org.elasticsearch.index.service.IndexService;
|
||||
import org.elasticsearch.indices.IndicesService;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
Loading…
Reference in New Issue