Term Vectors: support for realtime

By default term vectors are now realtime, as opposed to previously near
realtime. If they are not found in the index, they will be generated on the
fly. The document is fetched from the transaction log and treated as an
artificial document. One can set `realtime` parameter to `false` in order to
disable this functionality. This consequently makes the MLT query realtime in
fetching documents, as it previsouly used to be before switching from using
the multi get API to the mtv API.

Closes #7846
This commit is contained in:
Alex Ksikes 2014-09-23 23:21:42 +02:00
parent 1cc5da43b3
commit c4830cf862
10 changed files with 113 additions and 28 deletions

View File

@ -3,9 +3,9 @@
Returns information and statistics on terms in the fields of a particular Returns information and statistics on terms in the fields of a particular
document. The document could be stored in the index or artificially provided document. The document could be stored in the index or artificially provided
by the user Note that for documents stored in the index, this by the user coming[1.4.0]. Term vectors are now <<realtime,realtime>>, as opposed to
is a near realtime API as the term vectors are not available until the next previously near realtime coming[1.5.0]. The functionality is disabled by setting
refresh. `realtime` parameter to `false`.
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------

View File

@ -74,6 +74,11 @@
"type" : "string", "type" : "string",
"description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".", "description" : "Parent id of documents. Applies to all returned documents unless otherwise specified in body \"params\" or \"docs\".",
"required" : false "required" : false
},
"realtime": {
"type" : "boolean",
"description" : "Specifies if requests are real-time as opposed to near-real-time (default: true).",
"required" : false
} }
} }
}, },

View File

@ -72,6 +72,11 @@
"type" : "string", "type" : "string",
"description" : "Parent id of documents.", "description" : "Parent id of documents.",
"required" : false "required" : false
},
"realtime": {
"type" : "boolean",
"description" : "Specifies if request is real-time as opposed to near-real-time (default: true).",
"required" : false
} }
} }
}, },

View File

@ -29,6 +29,7 @@ setup:
index: testidx index: testidx
type: doc type: doc
id: 1 id: 1
realtime: 0
- match: { "_index": "testidx" } - match: { "_index": "testidx" }
- match: { "_type": "doc" } - match: { "_type": "doc" }

View File

@ -0,0 +1,40 @@
---
"Realtime Term Vectors":
- do:
indices.create:
index: test_1
body:
settings:
index:
refresh_interval: -1
number_of_replicas: 0
- do:
cluster.health:
wait_for_status: green
- do:
index:
index: test_1
type: test
id: 1
body: { foo: bar }
- do:
termvector:
index: test_1
type: test
id: 1
realtime: 0
- is_false: found
- do:
termvector:
index: test_1
type: test
id: 1
realtime: 1
- is_true: found

View File

@ -63,6 +63,8 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
// TODO: change to String[] // TODO: change to String[]
private Set<String> selectedFields; private Set<String> selectedFields;
Boolean realtime;
private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads, private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
Flag.FieldStatistics); Flag.FieldStatistics);
@ -95,6 +97,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
if (other.selectedFields != null) { if (other.selectedFields != null) {
this.selectedFields = new HashSet<>(other.selectedFields); this.selectedFields = new HashSet<>(other.selectedFields);
} }
this.realtime = other.realtime();
} }
public TermVectorRequest(MultiGetRequest.Item item) { public TermVectorRequest(MultiGetRequest.Item item) {
@ -150,9 +153,18 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
* Sets an artificial document from which term vectors are requested for. * Sets an artificial document from which term vectors are requested for.
*/ */
public TermVectorRequest doc(XContentBuilder documentBuilder) { public TermVectorRequest doc(XContentBuilder documentBuilder) {
return this.doc(documentBuilder.bytes(), true);
}
/**
* Sets an artificial document from which term vectors are requested for.
*/
public TermVectorRequest doc(BytesReference doc, boolean generateRandomId) {
// assign a random id to this artificial document, for routing // assign a random id to this artificial document, for routing
this.id(String.valueOf(randomInt.getAndAdd(1))); if (generateRandomId) {
this.doc = documentBuilder.bytes(); this.id(String.valueOf(randomInt.getAndAdd(1)));
}
this.doc = doc;
return this; return this;
} }
@ -293,6 +305,15 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
return this; return this;
} }
public boolean realtime() {
return this.realtime == null ? true : this.realtime;
}
public TermVectorRequest realtime(Boolean realtime) {
this.realtime = realtime;
return this;
}
private void setFlag(Flag flag, boolean set) { private void setFlag(Flag flag, boolean set) {
if (set && !flagsEnum.contains(flag)) { if (set && !flagsEnum.contains(flag)) {
flagsEnum.add(flag); flagsEnum.add(flag);
@ -353,6 +374,9 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
selectedFields.add(in.readString()); selectedFields.add(in.readString());
} }
} }
if (in.getVersion().onOrAfter(Version.V_1_5_0)) {
this.realtime = in.readBoolean();
}
} }
@Override @Override
@ -386,6 +410,9 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
} else { } else {
out.writeVInt(0); out.writeVInt(0);
} }
if (out.getVersion().onOrAfter(Version.V_1_5_0)) {
out.writeBoolean(realtime());
}
} }
public static enum Flag { public static enum Flag {

View File

@ -126,6 +126,11 @@ public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorReq
return this; return this;
} }
public TermVectorRequestBuilder setRealtime(Boolean realtime) {
request.realtime(realtime);
return this;
}
@Override @Override
protected void doExecute(ActionListener<TermVectorResponse> listener) { protected void doExecute(ActionListener<TermVectorResponse> listener) {
client.termVector(request, listener); client.termVector(request, listener);

View File

@ -74,6 +74,16 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
IndexReader topLevelReader = searcher.reader(); IndexReader topLevelReader = searcher.reader();
final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id()); final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id());
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm));
boolean docFromTranslog = get.source() != null;
/* fetched from translog is treated as an artificial document */
if (docFromTranslog) {
request.doc(get.source().source, false);
termVectorResponse.setDocVersion(get.version());
}
/* handle potential wildcards in fields */ /* handle potential wildcards in fields */
if (request.selectedFields() != null) { if (request.selectedFields() != null) {
handleFieldWildcards(request); handleFieldWildcards(request);
@ -81,27 +91,25 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
try { try {
Fields topLevelFields = MultiFields.getFields(topLevelReader); Fields topLevelFields = MultiFields.getFields(topLevelReader);
Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
/* from an artificial document */ /* from an artificial document */
if (request.doc() != null) { if (request.doc() != null) {
Fields termVectorsByField = generateTermVectorsFromDoc(request); Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog);
// if no document indexed in shard, take the queried document itself for stats // if no document indexed in shard, take the queried document itself for stats
if (topLevelFields == null) { if (topLevelFields == null) {
topLevelFields = termVectorsByField; topLevelFields = termVectorsByField;
} }
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
termVectorResponse.setExists(true); termVectorResponse.setExists(true);
termVectorResponse.setArtificial(true); termVectorResponse.setArtificial(!docFromTranslog);
return termVectorResponse;
} }
/* or from an existing document */ /* or from an existing document */
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id())); else if (docIdAndVersion != null) {
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
if (docIdAndVersion != null) {
// fields with stored term vectors // fields with stored term vectors
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId); Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
// fields without term vectors // fields without term vectors
if (request.selectedFields() != null) { if (request.selectedFields() != null) {
termVectorsByField = addGeneratedTermVectors(termVectorsByField, request, uidTerm, false); termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request);
} }
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields); termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
termVectorResponse.setDocVersion(docIdAndVersion.version); termVectorResponse.setDocVersion(docIdAndVersion.version);
@ -113,6 +121,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
throw new ElasticsearchException("failed to execute term vector request", ex); throw new ElasticsearchException("failed to execute term vector request", ex);
} finally { } finally {
searcher.close(); searcher.close();
get.release();
} }
return termVectorResponse; return termVectorResponse;
} }
@ -137,7 +146,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
return true; return true;
} }
private Fields addGeneratedTermVectors(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException { private Fields addGeneratedTermVectors(Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request) throws IOException {
/* only keep valid fields */ /* only keep valid fields */
Set<String> validFields = new HashSet<>(); Set<String> validFields = new HashSet<>();
for (String field : request.selectedFields()) { for (String field : request.selectedFields()) {
@ -157,18 +166,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
} }
/* generate term vectors from fetched document fields */ /* generate term vectors from fetched document fields */
Engine.GetResult get = indexShard.get(new Engine.Get(realTime, uidTerm)); GetResult getResult = indexShard.getService().get(
Fields generatedTermVectors; get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false);
try { Fields generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets());
if (!get.exists()) {
return termVectorsByField;
}
GetResult getResult = indexShard.getService().get(
get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false);
generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets());
} finally {
get.release();
}
/* merge with existing Fields */ /* merge with existing Fields */
if (termVectorsByField == null) { if (termVectorsByField == null) {
@ -195,7 +195,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
return MultiFields.getFields(index.createSearcher().getIndexReader()); return MultiFields.getFields(index.createSearcher().getIndexReader());
} }
private Fields generateTermVectorsFromDoc(TermVectorRequest request) throws IOException { private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException {
// parse the document, at the moment we do update the mapping, just like percolate // parse the document, at the moment we do update the mapping, just like percolate
ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());
@ -214,6 +214,9 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
if (!isValidField(fieldMapper)) { if (!isValidField(fieldMapper)) {
continue; continue;
} }
if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) {
continue;
}
if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
continue; continue;
} }

View File

@ -77,6 +77,7 @@ public class RestTermVectorAction extends BaseRestHandler {
termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions())); termVectorRequest.positions(request.paramAsBoolean("positions", termVectorRequest.positions()));
termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads())); termVectorRequest.payloads(request.paramAsBoolean("payloads", termVectorRequest.payloads()));
termVectorRequest.routing(request.param("routing")); termVectorRequest.routing(request.param("routing"));
termVectorRequest.realtime(request.paramAsBoolean("realtime", null));
termVectorRequest.parent(request.param("parent")); termVectorRequest.parent(request.param("parent"));
termVectorRequest.preference(request.param("preference")); termVectorRequest.preference(request.param("preference"));
termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics())); termVectorRequest.termStatistics(request.paramAsBoolean("termStatistics", termVectorRequest.termStatistics()));

View File

@ -32,8 +32,6 @@ import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.core.AbstractFieldMapper; import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
import org.elasticsearch.index.service.IndexService;
import org.elasticsearch.indices.IndicesService;
import org.junit.Test; import org.junit.Test;
import java.io.IOException; import java.io.IOException;