Term Vectors: support for distributed frequencies
Adds distributed frequencies support for the Term Vectors API. A new parameter called `dfs` is introduced which defaults to `false`. Closes #8144
This commit is contained in:
parent
19514a2ef4
commit
c13f5f21de
|
@ -76,16 +76,23 @@ omit :
|
||||||
* sum of total term frequencies (the sum of total term frequencies of
|
* sum of total term frequencies (the sum of total term frequencies of
|
||||||
each term in this field)
|
each term in this field)
|
||||||
|
|
||||||
|
[float]
|
||||||
|
==== Distributed frequencies coming[1.5.0]
|
||||||
|
|
||||||
|
Setting `dfs` to `true` (default is `false`) will return the term statistics
|
||||||
|
or the field statistics of the entire index, and not just at the shard. Use it
|
||||||
|
with caution as distributed frequencies can have a serious performance impact.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
=== Behaviour
|
=== Behaviour
|
||||||
|
|
||||||
The term and field statistics are not accurate. Deleted documents
|
The term and field statistics are not accurate. Deleted documents
|
||||||
are not taken into account. The information is only retrieved for the
|
are not taken into account. The information is only retrieved for the
|
||||||
shard the requested document resides in. The term and field statistics
|
shard the requested document resides in, unless `dfs` is set to `true`.
|
||||||
are therefore only useful as relative measures whereas the absolute
|
The term and field statistics are therefore only useful as relative measures
|
||||||
numbers have no meaning in this context. By default, when requesting
|
whereas the absolute numbers have no meaning in this context. By default,
|
||||||
term vectors of artificial documents, a shard to get the statistics from
|
when requesting term vectors of artificial documents, a shard to get the statistics
|
||||||
is randomly selected. Use `routing` only to hit a particular shard.
|
from is randomly selected. Use `routing` only to hit a particular shard.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
=== Example 1
|
=== Example 1
|
||||||
|
|
|
@ -35,6 +35,12 @@
|
||||||
"default" : true,
|
"default" : true,
|
||||||
"required" : false
|
"required" : false
|
||||||
},
|
},
|
||||||
|
"dfs" : {
|
||||||
|
"type" : "boolean",
|
||||||
|
"description" : "Specifies if distributed frequencies should be returned instead shard frequencies.",
|
||||||
|
"default" : false,
|
||||||
|
"required" : false
|
||||||
|
},
|
||||||
"fields" : {
|
"fields" : {
|
||||||
"type" : "list",
|
"type" : "list",
|
||||||
"description" : "A comma-separated list of fields to return.",
|
"description" : "A comma-separated list of fields to return.",
|
||||||
|
|
|
@ -160,6 +160,7 @@ import org.elasticsearch.action.support.ActionFilter;
|
||||||
import org.elasticsearch.action.support.ActionFilters;
|
import org.elasticsearch.action.support.ActionFilters;
|
||||||
import org.elasticsearch.action.support.TransportAction;
|
import org.elasticsearch.action.support.TransportAction;
|
||||||
import org.elasticsearch.action.termvector.*;
|
import org.elasticsearch.action.termvector.*;
|
||||||
|
import org.elasticsearch.action.termvector.dfs.TransportDfsOnlyAction;
|
||||||
import org.elasticsearch.action.update.TransportUpdateAction;
|
import org.elasticsearch.action.update.TransportUpdateAction;
|
||||||
import org.elasticsearch.action.update.UpdateAction;
|
import org.elasticsearch.action.update.UpdateAction;
|
||||||
import org.elasticsearch.common.inject.AbstractModule;
|
import org.elasticsearch.common.inject.AbstractModule;
|
||||||
|
@ -280,7 +281,8 @@ public class ActionModule extends AbstractModule {
|
||||||
|
|
||||||
registerAction(IndexAction.INSTANCE, TransportIndexAction.class);
|
registerAction(IndexAction.INSTANCE, TransportIndexAction.class);
|
||||||
registerAction(GetAction.INSTANCE, TransportGetAction.class);
|
registerAction(GetAction.INSTANCE, TransportGetAction.class);
|
||||||
registerAction(TermVectorAction.INSTANCE, TransportSingleShardTermVectorAction.class);
|
registerAction(TermVectorAction.INSTANCE, TransportSingleShardTermVectorAction.class,
|
||||||
|
TransportDfsOnlyAction.class);
|
||||||
registerAction(MultiTermVectorsAction.INSTANCE, TransportMultiTermVectorsAction.class,
|
registerAction(MultiTermVectorsAction.INSTANCE, TransportMultiTermVectorsAction.class,
|
||||||
TransportSingleShardMultiTermsVectorAction.class);
|
TransportSingleShardMultiTermsVectorAction.class);
|
||||||
registerAction(DeleteAction.INSTANCE, TransportDeleteAction.class,
|
registerAction(DeleteAction.INSTANCE, TransportDeleteAction.class,
|
||||||
|
|
|
@ -292,6 +292,22 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return <code>true</code> if distributed frequencies should be returned. Otherwise
|
||||||
|
* <code>false</code>
|
||||||
|
*/
|
||||||
|
public boolean dfs() {
|
||||||
|
return flagsEnum.contains(Flag.Dfs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use distributed frequencies instead of shard statistics.
|
||||||
|
*/
|
||||||
|
public TermVectorRequest dfs(boolean dfs) {
|
||||||
|
setFlag(Flag.Dfs, dfs);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return only term vectors for special selected fields. Returns for term
|
* Return only term vectors for special selected fields. Returns for term
|
||||||
* vectors for all fields if selectedFields == null
|
* vectors for all fields if selectedFields == null
|
||||||
|
@ -309,24 +325,30 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether term vectors should be generated real-time (default to true).
|
||||||
|
*/
|
||||||
public boolean realtime() {
|
public boolean realtime() {
|
||||||
return this.realtime == null ? true : this.realtime;
|
return this.realtime == null ? true : this.realtime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Choose whether term vectors be generated real-time.
|
||||||
|
*/
|
||||||
public TermVectorRequest realtime(Boolean realtime) {
|
public TermVectorRequest realtime(Boolean realtime) {
|
||||||
this.realtime = realtime;
|
this.realtime = realtime;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the overridden analyzers at each field
|
* Return the overridden analyzers at each field.
|
||||||
*/
|
*/
|
||||||
public Map<String, String> perFieldAnalyzer() {
|
public Map<String, String> perFieldAnalyzer() {
|
||||||
return perFieldAnalyzer;
|
return perFieldAnalyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Override the analyzer used at each field when generating term vectors
|
* Override the analyzer used at each field when generating term vectors.
|
||||||
*/
|
*/
|
||||||
public TermVectorRequest perFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
|
public TermVectorRequest perFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
|
||||||
this.perFieldAnalyzer = perFieldAnalyzer != null && perFieldAnalyzer.size() != 0 ? Maps.newHashMap(perFieldAnalyzer) : null;
|
this.perFieldAnalyzer = perFieldAnalyzer != null && perFieldAnalyzer.size() != 0 ? Maps.newHashMap(perFieldAnalyzer) : null;
|
||||||
|
@ -444,7 +466,7 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
public static enum Flag {
|
public static enum Flag {
|
||||||
// Do not change the order of these flags we use
|
// Do not change the order of these flags we use
|
||||||
// the ordinal for encoding! Only append to the end!
|
// the ordinal for encoding! Only append to the end!
|
||||||
Positions, Offsets, Payloads, FieldStatistics, TermStatistics
|
Positions, Offsets, Payloads, FieldStatistics, TermStatistics, Dfs
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -477,6 +499,8 @@ public class TermVectorRequest extends SingleShardOperationRequest<TermVectorReq
|
||||||
termVectorRequest.termStatistics(parser.booleanValue());
|
termVectorRequest.termStatistics(parser.booleanValue());
|
||||||
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
|
} else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) {
|
||||||
termVectorRequest.fieldStatistics(parser.booleanValue());
|
termVectorRequest.fieldStatistics(parser.booleanValue());
|
||||||
|
} else if (currentFieldName.equals("dfs")) {
|
||||||
|
termVectorRequest.dfs(parser.booleanValue());
|
||||||
} else if (currentFieldName.equals("per_field_analyzer") || currentFieldName.equals("perFieldAnalyzer")) {
|
} else if (currentFieldName.equals("per_field_analyzer") || currentFieldName.equals("perFieldAnalyzer")) {
|
||||||
termVectorRequest.perFieldAnalyzer(readPerFieldAnalyzer(parser.map()));
|
termVectorRequest.perFieldAnalyzer(readPerFieldAnalyzer(parser.map()));
|
||||||
} else if ("_index".equals(currentFieldName)) { // the following is important for multi request parsing.
|
} else if ("_index".equals(currentFieldName)) { // the following is important for multi request parsing.
|
||||||
|
|
|
@ -27,6 +27,11 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* The builder class for a term vector request.
|
||||||
|
* Returns the term vector (doc frequency, positions, offsets) for a document.
|
||||||
|
* <p/>
|
||||||
|
* Note, the {@code index}, {@code type} and {@code id} are
|
||||||
|
* required.
|
||||||
*/
|
*/
|
||||||
public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorRequest, TermVectorResponse, TermVectorRequestBuilder, Client> {
|
public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorRequest, TermVectorResponse, TermVectorRequestBuilder, Client> {
|
||||||
|
|
||||||
|
@ -34,6 +39,11 @@ public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorReq
|
||||||
super(client, new TermVectorRequest());
|
super(client, new TermVectorRequest());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a new term vector request builder for a document that will be fetch
|
||||||
|
* from the provided index. Use {@code index}, {@code type} and
|
||||||
|
* {@code id} to specify the document to load.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder(Client client, String index, String type, String id) {
|
public TermVectorRequestBuilder(Client client, String index, String type, String id) {
|
||||||
super(client, new TermVectorRequest(index, type, id));
|
super(client, new TermVectorRequest(index, type, id));
|
||||||
}
|
}
|
||||||
|
@ -92,47 +102,81 @@ public class TermVectorRequestBuilder extends ActionRequestBuilder<TermVectorReq
|
||||||
* <tt>_local</tt> to prefer local shards, <tt>_primary</tt> to execute only on primary shards, or
|
* <tt>_local</tt> to prefer local shards, <tt>_primary</tt> to execute only on primary shards, or
|
||||||
* a custom value, which guarantees that the same order will be used across different requests.
|
* a custom value, which guarantees that the same order will be used across different requests.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public TermVectorRequestBuilder setPreference(String preference) {
|
public TermVectorRequestBuilder setPreference(String preference) {
|
||||||
request.preference(preference);
|
request.preference(preference);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to return the start and stop offsets for each term if they were stored or
|
||||||
|
* skip offsets.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setOffsets(boolean offsets) {
|
public TermVectorRequestBuilder setOffsets(boolean offsets) {
|
||||||
request.offsets(offsets);
|
request.offsets(offsets);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to return the positions for each term if stored or skip.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setPositions(boolean positions) {
|
public TermVectorRequestBuilder setPositions(boolean positions) {
|
||||||
request.positions(positions);
|
request.positions(positions);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to return the payloads for each term or skip.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setPayloads(boolean payloads) {
|
public TermVectorRequestBuilder setPayloads(boolean payloads) {
|
||||||
request.payloads(payloads);
|
request.payloads(payloads);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to return the term statistics for each term in the shard or skip.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setTermStatistics(boolean termStatistics) {
|
public TermVectorRequestBuilder setTermStatistics(boolean termStatistics) {
|
||||||
request.termStatistics(termStatistics);
|
request.termStatistics(termStatistics);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to return the field statistics for each term in the shard or skip.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setFieldStatistics(boolean fieldStatistics) {
|
public TermVectorRequestBuilder setFieldStatistics(boolean fieldStatistics) {
|
||||||
request.fieldStatistics(fieldStatistics);
|
request.fieldStatistics(fieldStatistics);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to use distributed frequencies instead of shard statistics.
|
||||||
|
*/
|
||||||
|
public TermVectorRequestBuilder setDfs(boolean dfs) {
|
||||||
|
request.dfs(dfs);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to return only term vectors for special selected fields. Returns the term
|
||||||
|
* vectors for all fields if selectedFields == null
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setSelectedFields(String... fields) {
|
public TermVectorRequestBuilder setSelectedFields(String... fields) {
|
||||||
request.selectedFields(fields);
|
request.selectedFields(fields);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether term vectors are generated real-time.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setRealtime(Boolean realtime) {
|
public TermVectorRequestBuilder setRealtime(Boolean realtime) {
|
||||||
request.realtime(realtime);
|
request.realtime(realtime);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the analyzer used at each field when generating term vectors.
|
||||||
|
*/
|
||||||
public TermVectorRequestBuilder setPerFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
|
public TermVectorRequestBuilder setPerFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
|
||||||
request.perFieldAnalyzer(perFieldAnalyzer);
|
request.perFieldAnalyzer(perFieldAnalyzer);
|
||||||
return this;
|
return this;
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.CharsRefBuilder;
|
||||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||||
import org.elasticsearch.action.ActionResponse;
|
import org.elasticsearch.action.ActionResponse;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||||
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
@ -38,6 +39,7 @@ import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.xcontent.ToXContent;
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilderString;
|
import org.elasticsearch.common.xcontent.XContentBuilderString;
|
||||||
|
import org.elasticsearch.search.dfs.AggregatedDfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
@ -320,10 +322,14 @@ public class TermVectorResponse extends ActionResponse implements ToXContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
||||||
|
setFields(termVectorsByField, selectedFields, flags, topLevelFields, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs) throws IOException {
|
||||||
TermVectorWriter tvw = new TermVectorWriter(this);
|
TermVectorWriter tvw = new TermVectorWriter(this);
|
||||||
|
|
||||||
if (termVectorsByField != null) {
|
if (termVectorsByField != null) {
|
||||||
tvw.setFields(termVectorsByField, selectedFields, flags, topLevelFields);
|
tvw.setFields(termVectorsByField, selectedFields, flags, topLevelFields, dfs);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,10 +19,14 @@
|
||||||
package org.elasticsearch.action.termvector;
|
package org.elasticsearch.action.termvector;
|
||||||
|
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
|
import org.apache.lucene.search.TermStatistics;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
import org.elasticsearch.action.termvector.TermVectorRequest.Flag;
|
||||||
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
import org.elasticsearch.search.dfs.AggregatedDfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -45,7 +49,7 @@ final class TermVectorWriter {
|
||||||
response = termVectorResponse;
|
response = termVectorResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields) throws IOException {
|
void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs) throws IOException {
|
||||||
int numFieldsWritten = 0;
|
int numFieldsWritten = 0;
|
||||||
TermsEnum iterator = null;
|
TermsEnum iterator = null;
|
||||||
DocsAndPositionsEnum docsAndPosEnum = null;
|
DocsAndPositionsEnum docsAndPosEnum = null;
|
||||||
|
@ -70,8 +74,12 @@ final class TermVectorWriter {
|
||||||
boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
|
boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
|
||||||
startField(field, fieldTermVector.size(), positions, offsets, payloads);
|
startField(field, fieldTermVector.size(), positions, offsets, payloads);
|
||||||
if (flags.contains(Flag.FieldStatistics)) {
|
if (flags.contains(Flag.FieldStatistics)) {
|
||||||
|
if (dfs != null) {
|
||||||
|
writeFieldStatistics(dfs.fieldStatistics().get(field));
|
||||||
|
} else {
|
||||||
writeFieldStatistics(topLevelTerms);
|
writeFieldStatistics(topLevelTerms);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
iterator = fieldTermVector.iterator(iterator);
|
iterator = fieldTermVector.iterator(iterator);
|
||||||
final boolean useDocsAndPos = positions || offsets || payloads;
|
final boolean useDocsAndPos = positions || offsets || payloads;
|
||||||
while (iterator.next() != null) { // iterate all terms of the
|
while (iterator.next() != null) { // iterate all terms of the
|
||||||
|
@ -81,8 +89,12 @@ final class TermVectorWriter {
|
||||||
boolean foundTerm = topLevelIterator.seekExact(term);
|
boolean foundTerm = topLevelIterator.seekExact(term);
|
||||||
startTerm(term);
|
startTerm(term);
|
||||||
if (flags.contains(Flag.TermStatistics)) {
|
if (flags.contains(Flag.TermStatistics)) {
|
||||||
|
if (dfs != null) {
|
||||||
|
writeTermStatistics(dfs.termStatistics().get(new Term(field, term.utf8ToString())));
|
||||||
|
} else {
|
||||||
writeTermStatistics(topLevelIterator);
|
writeTermStatistics(topLevelIterator);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (useDocsAndPos) {
|
if (useDocsAndPos) {
|
||||||
// given we have pos or offsets
|
// given we have pos or offsets
|
||||||
docsAndPosEnum = writeTermWithDocsAndPos(iterator, docsAndPosEnum, positions, offsets, payloads);
|
docsAndPosEnum = writeTermWithDocsAndPos(iterator, docsAndPosEnum, positions, offsets, payloads);
|
||||||
|
@ -161,7 +173,6 @@ final class TermVectorWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeFreq(int termFreq) throws IOException {
|
private void writeFreq(int termFreq) throws IOException {
|
||||||
|
|
||||||
writePotentiallyNegativeVInt(termFreq);
|
writePotentiallyNegativeVInt(termFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,7 +216,15 @@ final class TermVectorWriter {
|
||||||
long ttf = topLevelIterator.totalTermFreq();
|
long ttf = topLevelIterator.totalTermFreq();
|
||||||
assert (ttf >= -1);
|
assert (ttf >= -1);
|
||||||
writePotentiallyNegativeVLong(ttf);
|
writePotentiallyNegativeVLong(ttf);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeTermStatistics(TermStatistics termStatistics) throws IOException {
|
||||||
|
int docFreq = (int) termStatistics.docFreq();
|
||||||
|
assert (docFreq >= -1);
|
||||||
|
writePotentiallyNegativeVInt(docFreq);
|
||||||
|
long ttf = termStatistics.totalTermFreq();
|
||||||
|
assert (ttf >= -1);
|
||||||
|
writePotentiallyNegativeVLong(ttf);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeFieldStatistics(Terms topLevelTerms) throws IOException {
|
private void writeFieldStatistics(Terms topLevelTerms) throws IOException {
|
||||||
|
@ -218,7 +237,18 @@ final class TermVectorWriter {
|
||||||
int dc = topLevelTerms.getDocCount();
|
int dc = topLevelTerms.getDocCount();
|
||||||
assert (dc >= -1);
|
assert (dc >= -1);
|
||||||
writePotentiallyNegativeVInt(dc);
|
writePotentiallyNegativeVInt(dc);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException {
|
||||||
|
long sttf = fieldStats.sumTotalTermFreq();
|
||||||
|
assert (sttf >= -1);
|
||||||
|
writePotentiallyNegativeVLong(sttf);
|
||||||
|
long sdf = fieldStats.sumDocFreq();
|
||||||
|
assert (sdf >= -1);
|
||||||
|
writePotentiallyNegativeVLong(sdf);
|
||||||
|
int dc = (int) fieldStats.docCount();
|
||||||
|
assert (dc >= -1);
|
||||||
|
writePotentiallyNegativeVInt(dc);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writePotentiallyNegativeVInt(int value) throws IOException {
|
private void writePotentiallyNegativeVInt(int value) throws IOException {
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.action.termvector.dfs;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.elasticsearch.action.ActionRequestValidationException;
|
||||||
|
import org.elasticsearch.action.search.SearchRequest;
|
||||||
|
import org.elasticsearch.action.support.broadcast.BroadcastOperationRequest;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||||
|
import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
|
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
|
||||||
|
|
||||||
|
public class DfsOnlyRequest extends BroadcastOperationRequest<DfsOnlyRequest> {
|
||||||
|
|
||||||
|
private SearchRequest searchRequest = new SearchRequest();
|
||||||
|
|
||||||
|
long nowInMillis;
|
||||||
|
|
||||||
|
DfsOnlyRequest() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public DfsOnlyRequest(Fields termVectorFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
|
||||||
|
super(indices);
|
||||||
|
|
||||||
|
// build a search request with a query of all the terms
|
||||||
|
final BoolQueryBuilder boolBuilder = boolQuery();
|
||||||
|
TermsEnum iterator = null;
|
||||||
|
for (String fieldName : termVectorFields) {
|
||||||
|
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Terms terms = termVectorFields.terms(fieldName);
|
||||||
|
iterator = terms.iterator(iterator);
|
||||||
|
while (iterator.next() != null) {
|
||||||
|
String text = iterator.term().utf8ToString();
|
||||||
|
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// wrap a search request object
|
||||||
|
this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchRequest getSearchRequest() {
|
||||||
|
return searchRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ActionRequestValidationException validate() {
|
||||||
|
return searchRequest.validate();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void beforeStart() {
|
||||||
|
searchRequest.beforeStart();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
|
super.readFrom(in);
|
||||||
|
this.searchRequest.readFrom(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
super.writeTo(out);
|
||||||
|
this.searchRequest.writeTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] types() {
|
||||||
|
return this.searchRequest.types();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String routing() {
|
||||||
|
return this.searchRequest.routing();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String preference() {
|
||||||
|
return this.searchRequest.preference();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
String sSource = "_na_";
|
||||||
|
try {
|
||||||
|
sSource = XContentHelper.convertToJson(searchRequest.source(), false);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return "[" + Arrays.toString(indices) + "]" + Arrays.toString(types()) + ", source[" + sSource + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.action.termvector.dfs;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.ShardOperationFailedException;
|
||||||
|
import org.elasticsearch.action.support.broadcast.BroadcastOperationResponse;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.search.dfs.AggregatedDfs;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A response of a dfs only request.
|
||||||
|
*/
|
||||||
|
public class DfsOnlyResponse extends BroadcastOperationResponse {
|
||||||
|
|
||||||
|
private AggregatedDfs dfs;
|
||||||
|
private long tookInMillis;
|
||||||
|
|
||||||
|
DfsOnlyResponse(AggregatedDfs dfs, int totalShards, int successfulShards, int failedShards,
|
||||||
|
List<ShardOperationFailedException> shardFailures, long tookInMillis) {
|
||||||
|
super(totalShards, successfulShards, failedShards, shardFailures);
|
||||||
|
this.dfs = dfs;
|
||||||
|
this.tookInMillis = tookInMillis;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AggregatedDfs getDfs() {
|
||||||
|
return dfs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TimeValue getTook() {
|
||||||
|
return new TimeValue(tookInMillis);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTookInMillis() {
|
||||||
|
return tookInMillis;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
|
super.readFrom(in);
|
||||||
|
AggregatedDfs.readAggregatedDfs(in);
|
||||||
|
tookInMillis = in.readVLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
super.writeTo(out);
|
||||||
|
dfs.writeTo(out);
|
||||||
|
out.writeVLong(tookInMillis);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.action.termvector.dfs;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.support.broadcast.BroadcastShardOperationRequest;
|
||||||
|
import org.elasticsearch.cluster.routing.ShardRouting;
|
||||||
|
import org.elasticsearch.common.Nullable;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.search.internal.ShardSearchRequest;
|
||||||
|
import org.elasticsearch.search.internal.ShardSearchTransportRequest;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
class ShardDfsOnlyRequest extends BroadcastShardOperationRequest {
|
||||||
|
|
||||||
|
private ShardSearchTransportRequest shardSearchRequest = new ShardSearchTransportRequest();
|
||||||
|
|
||||||
|
ShardDfsOnlyRequest() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ShardDfsOnlyRequest(ShardRouting shardRouting, int numberOfShards, @Nullable String[] filteringAliases, @Nullable long nowInMillis, DfsOnlyRequest request) {
|
||||||
|
super(shardRouting.shardId(), request);
|
||||||
|
this.shardSearchRequest = new ShardSearchTransportRequest(request.getSearchRequest(), shardRouting, numberOfShards, false,
|
||||||
|
filteringAliases, nowInMillis);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShardSearchRequest getShardSearchRequest() {
|
||||||
|
return shardSearchRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
|
super.readFrom(in);
|
||||||
|
shardSearchRequest.readFrom(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
super.writeTo(out);
|
||||||
|
shardSearchRequest.writeTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.action.termvector.dfs;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.support.broadcast.BroadcastShardOperationResponse;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.index.shard.ShardId;
|
||||||
|
import org.elasticsearch.search.dfs.DfsSearchResult;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class ShardDfsOnlyResponse extends BroadcastShardOperationResponse {
|
||||||
|
|
||||||
|
private DfsSearchResult dfsSearchResult = new DfsSearchResult();
|
||||||
|
|
||||||
|
ShardDfsOnlyResponse() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ShardDfsOnlyResponse(ShardId shardId, DfsSearchResult dfsSearchResult) {
|
||||||
|
super(shardId);
|
||||||
|
this.dfsSearchResult = dfsSearchResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DfsSearchResult getDfsSearchResult() {
|
||||||
|
return dfsSearchResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
|
super.readFrom(in);
|
||||||
|
dfsSearchResult.readFrom(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
super.writeTo(out);
|
||||||
|
dfsSearchResult.writeTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,160 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.action.termvector.dfs;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
|
import org.elasticsearch.action.ActionListener;
|
||||||
|
import org.elasticsearch.action.ShardOperationFailedException;
|
||||||
|
import org.elasticsearch.action.support.ActionFilters;
|
||||||
|
import org.elasticsearch.action.support.DefaultShardOperationFailedException;
|
||||||
|
import org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException;
|
||||||
|
import org.elasticsearch.action.support.broadcast.TransportBroadcastOperationAction;
|
||||||
|
import org.elasticsearch.cluster.ClusterService;
|
||||||
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
|
import org.elasticsearch.cluster.block.ClusterBlockException;
|
||||||
|
import org.elasticsearch.cluster.block.ClusterBlockLevel;
|
||||||
|
import org.elasticsearch.cluster.routing.GroupShardsIterator;
|
||||||
|
import org.elasticsearch.cluster.routing.ShardRouting;
|
||||||
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.util.concurrent.AtomicArray;
|
||||||
|
import org.elasticsearch.search.SearchService;
|
||||||
|
import org.elasticsearch.search.controller.SearchPhaseController;
|
||||||
|
import org.elasticsearch.search.dfs.AggregatedDfs;
|
||||||
|
import org.elasticsearch.search.dfs.DfsSearchResult;
|
||||||
|
import org.elasticsearch.threadpool.ThreadPool;
|
||||||
|
import org.elasticsearch.transport.TransportService;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.atomic.AtomicReferenceArray;
|
||||||
|
|
||||||
|
import static com.google.common.collect.Lists.newArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the dfs only with no fetch phase. This is for internal use only.
|
||||||
|
*/
|
||||||
|
public class TransportDfsOnlyAction extends TransportBroadcastOperationAction<DfsOnlyRequest, DfsOnlyResponse, ShardDfsOnlyRequest, ShardDfsOnlyResponse> {
|
||||||
|
|
||||||
|
public static final String NAME = "internal:index/termvectors/dfs";
|
||||||
|
|
||||||
|
private final SearchService searchService;
|
||||||
|
|
||||||
|
private final SearchPhaseController searchPhaseController;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public TransportDfsOnlyAction(Settings settings, ThreadPool threadPool, ClusterService clusterService, TransportService transportService,
|
||||||
|
ActionFilters actionFilters, SearchService searchService, SearchPhaseController searchPhaseController) {
|
||||||
|
super(settings, NAME, threadPool, clusterService, transportService, actionFilters);
|
||||||
|
this.searchService = searchService;
|
||||||
|
this.searchPhaseController = searchPhaseController;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doExecute(DfsOnlyRequest request, ActionListener<DfsOnlyResponse> listener) {
|
||||||
|
request.nowInMillis = System.currentTimeMillis();
|
||||||
|
super.doExecute(request, listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String executor() {
|
||||||
|
return ThreadPool.Names.SEARCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DfsOnlyRequest newRequest() {
|
||||||
|
return new DfsOnlyRequest();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ShardDfsOnlyRequest newShardRequest() {
|
||||||
|
return new ShardDfsOnlyRequest();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ShardDfsOnlyRequest newShardRequest(int numShards, ShardRouting shard, DfsOnlyRequest request) {
|
||||||
|
String[] filteringAliases = clusterService.state().metaData().filteringAliases(shard.index(), request.indices());
|
||||||
|
return new ShardDfsOnlyRequest(shard, numShards, filteringAliases, request.nowInMillis, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ShardDfsOnlyResponse newShardResponse() {
|
||||||
|
return new ShardDfsOnlyResponse();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected GroupShardsIterator shards(ClusterState clusterState, DfsOnlyRequest request, String[] concreteIndices) {
|
||||||
|
Map<String, Set<String>> routingMap = clusterState.metaData().resolveSearchRouting(request.routing(), request.indices());
|
||||||
|
return clusterService.operationRouting().searchShards(clusterState, request.indices(), concreteIndices, routingMap, request.preference());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ClusterBlockException checkGlobalBlock(ClusterState state, DfsOnlyRequest request) {
|
||||||
|
return state.blocks().globalBlockedException(ClusterBlockLevel.READ);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ClusterBlockException checkRequestBlock(ClusterState state, DfsOnlyRequest countRequest, String[] concreteIndices) {
|
||||||
|
return state.blocks().indicesBlockedException(ClusterBlockLevel.READ, concreteIndices);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DfsOnlyResponse newResponse(DfsOnlyRequest request, AtomicReferenceArray shardsResponses, ClusterState clusterState) {
|
||||||
|
int successfulShards = 0;
|
||||||
|
int failedShards = 0;
|
||||||
|
List<ShardOperationFailedException> shardFailures = null;
|
||||||
|
AtomicArray<DfsSearchResult> dfsResults = new AtomicArray<>(shardsResponses.length());
|
||||||
|
for (int i = 0; i < shardsResponses.length(); i++) {
|
||||||
|
Object shardResponse = shardsResponses.get(i);
|
||||||
|
if (shardResponse == null) {
|
||||||
|
// simply ignore non active shards
|
||||||
|
} else if (shardResponse instanceof BroadcastShardOperationFailedException) {
|
||||||
|
failedShards++;
|
||||||
|
if (shardFailures == null) {
|
||||||
|
shardFailures = newArrayList();
|
||||||
|
}
|
||||||
|
shardFailures.add(new DefaultShardOperationFailedException((BroadcastShardOperationFailedException) shardResponse));
|
||||||
|
} else {
|
||||||
|
dfsResults.set(i, ((ShardDfsOnlyResponse) shardResponse).getDfsSearchResult());
|
||||||
|
successfulShards++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AggregatedDfs dfs = searchPhaseController.aggregateDfs(dfsResults);
|
||||||
|
return new DfsOnlyResponse(dfs, shardsResponses.length(), successfulShards, failedShards, shardFailures, buildTookInMillis(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ShardDfsOnlyResponse shardOperation(ShardDfsOnlyRequest request) throws ElasticsearchException {
|
||||||
|
DfsSearchResult dfsSearchResult = searchService.executeDfsPhase(request.getShardSearchRequest());
|
||||||
|
searchService.freeContext(dfsSearchResult.id());
|
||||||
|
return new ShardDfsOnlyResponse(request.shardId(), dfsSearchResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds how long it took to execute the dfs request.
|
||||||
|
*/
|
||||||
|
protected final long buildTookInMillis(DfsOnlyRequest request) {
|
||||||
|
// protect ourselves against time going backwards
|
||||||
|
// negative values don't make sense and we want to be able to serialize that thing as a vLong
|
||||||
|
return Math.max(1, System.currentTimeMillis() - request.nowInMillis);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Distributed frequencies.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.action.termvector.dfs;
|
|
@ -25,6 +25,9 @@ import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.action.termvector.TermVectorRequest;
|
import org.elasticsearch.action.termvector.TermVectorRequest;
|
||||||
import org.elasticsearch.action.termvector.TermVectorResponse;
|
import org.elasticsearch.action.termvector.TermVectorResponse;
|
||||||
|
import org.elasticsearch.action.termvector.dfs.DfsOnlyRequest;
|
||||||
|
import org.elasticsearch.action.termvector.dfs.DfsOnlyResponse;
|
||||||
|
import org.elasticsearch.action.termvector.dfs.TransportDfsOnlyAction;
|
||||||
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
|
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
|
@ -44,6 +47,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
||||||
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
|
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
|
||||||
import org.elasticsearch.index.shard.ShardId;
|
import org.elasticsearch.index.shard.ShardId;
|
||||||
import org.elasticsearch.index.shard.service.IndexShard;
|
import org.elasticsearch.index.shard.service.IndexShard;
|
||||||
|
import org.elasticsearch.search.dfs.AggregatedDfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -57,11 +61,13 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
|
|
||||||
private IndexShard indexShard;
|
private IndexShard indexShard;
|
||||||
private final MappingUpdatedAction mappingUpdatedAction;
|
private final MappingUpdatedAction mappingUpdatedAction;
|
||||||
|
private final TransportDfsOnlyAction dfsAction;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public ShardTermVectorService(ShardId shardId, @IndexSettings Settings indexSettings, MappingUpdatedAction mappingUpdatedAction) {
|
public ShardTermVectorService(ShardId shardId, @IndexSettings Settings indexSettings, MappingUpdatedAction mappingUpdatedAction, TransportDfsOnlyAction dfsAction) {
|
||||||
super(shardId, indexSettings);
|
super(shardId, indexSettings);
|
||||||
this.mappingUpdatedAction = mappingUpdatedAction;
|
this.mappingUpdatedAction = mappingUpdatedAction;
|
||||||
|
this.dfsAction = dfsAction;
|
||||||
}
|
}
|
||||||
|
|
||||||
// sadly, to overcome cyclic dep, we need to do this and inject it ourselves...
|
// sadly, to overcome cyclic dep, we need to do this and inject it ourselves...
|
||||||
|
@ -78,6 +84,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
|
final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
|
||||||
Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm));
|
Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm));
|
||||||
boolean docFromTranslog = get.source() != null;
|
boolean docFromTranslog = get.source() != null;
|
||||||
|
AggregatedDfs dfs = null;
|
||||||
|
|
||||||
/* fetched from translog is treated as an artificial document */
|
/* fetched from translog is treated as an artificial document */
|
||||||
if (docFromTranslog) {
|
if (docFromTranslog) {
|
||||||
|
@ -100,7 +107,10 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
if (topLevelFields == null) {
|
if (topLevelFields == null) {
|
||||||
topLevelFields = termVectorsByField;
|
topLevelFields = termVectorsByField;
|
||||||
}
|
}
|
||||||
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
if (useDfs(request)) {
|
||||||
|
dfs = getAggregatedDfs(termVectorsByField, request);
|
||||||
|
}
|
||||||
|
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs);
|
||||||
termVectorResponse.setExists(true);
|
termVectorResponse.setExists(true);
|
||||||
termVectorResponse.setArtificial(!docFromTranslog);
|
termVectorResponse.setArtificial(!docFromTranslog);
|
||||||
}
|
}
|
||||||
|
@ -117,7 +127,10 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
if (selectedFields != null) {
|
if (selectedFields != null) {
|
||||||
termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request, selectedFields);
|
termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request, selectedFields);
|
||||||
}
|
}
|
||||||
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields);
|
if (useDfs(request)) {
|
||||||
|
dfs = getAggregatedDfs(termVectorsByField, request);
|
||||||
|
}
|
||||||
|
termVectorResponse.setFields(termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs);
|
||||||
termVectorResponse.setDocVersion(docIdAndVersion.version);
|
termVectorResponse.setDocVersion(docIdAndVersion.version);
|
||||||
termVectorResponse.setExists(true);
|
termVectorResponse.setExists(true);
|
||||||
} else {
|
} else {
|
||||||
|
@ -315,4 +328,14 @@ public class ShardTermVectorService extends AbstractIndexShardComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean useDfs(TermVectorRequest request) {
|
||||||
|
return request.dfs() && (request.fieldStatistics() || request.termStatistics());
|
||||||
|
}
|
||||||
|
|
||||||
|
private AggregatedDfs getAggregatedDfs(Fields termVectorFields, TermVectorRequest request) throws IOException {
|
||||||
|
DfsOnlyRequest dfsOnlyRequest = new DfsOnlyRequest(termVectorFields, new String[]{request.index()},
|
||||||
|
new String[]{request.type()}, request.selectedFields());
|
||||||
|
DfsOnlyResponse response = dfsAction.execute(dfsOnlyRequest).actionGet();
|
||||||
|
return response.getDfs();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,6 +84,7 @@ public class RestTermVectorAction extends BaseRestHandler {
|
||||||
termVectorRequest.termStatistics(request.paramAsBoolean("term_statistics", termVectorRequest.termStatistics()));
|
termVectorRequest.termStatistics(request.paramAsBoolean("term_statistics", termVectorRequest.termStatistics()));
|
||||||
termVectorRequest.fieldStatistics(request.paramAsBoolean("fieldStatistics", termVectorRequest.fieldStatistics()));
|
termVectorRequest.fieldStatistics(request.paramAsBoolean("fieldStatistics", termVectorRequest.fieldStatistics()));
|
||||||
termVectorRequest.fieldStatistics(request.paramAsBoolean("field_statistics", termVectorRequest.fieldStatistics()));
|
termVectorRequest.fieldStatistics(request.paramAsBoolean("field_statistics", termVectorRequest.fieldStatistics()));
|
||||||
|
termVectorRequest.dfs(request.paramAsBoolean("dfs", termVectorRequest.dfs()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static public void addFieldStringsFromParameter(TermVectorRequest termVectorRequest, String fields) {
|
static public void addFieldStringsFromParameter(TermVectorRequest termVectorRequest, String fields) {
|
||||||
|
|
|
@ -32,7 +32,9 @@ import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.xcontent.ToXContent;
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||||
|
import org.hamcrest.Matcher;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -403,7 +405,6 @@ public class GetTermVectorTests extends AbstractTermVectorTests {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException {
|
public void testRandomPayloadWithDelimitedPayloadTokenFilter() throws ElasticsearchException, IOException {
|
||||||
|
|
||||||
//create the test document
|
//create the test document
|
||||||
int encoding = randomIntBetween(0, 2);
|
int encoding = randomIntBetween(0, 2);
|
||||||
String encodingString = "";
|
String encodingString = "";
|
||||||
|
@ -1018,4 +1019,110 @@ public class GetTermVectorTests extends AbstractTermVectorTests {
|
||||||
private static String indexOrAlias() {
|
private static String indexOrAlias() {
|
||||||
return randomBoolean() ? "test" : "alias";
|
return randomBoolean() ? "test" : "alias";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDfs() throws ElasticsearchException, ExecutionException, InterruptedException, IOException {
|
||||||
|
logger.info("Setting up the index ...");
|
||||||
|
ImmutableSettings.Builder settings = settingsBuilder()
|
||||||
|
.put(indexSettings())
|
||||||
|
.put("index.analysis.analyzer", "standard")
|
||||||
|
.put("index.number_of_shards", randomIntBetween(2, 10)); // we need at least 2 shards
|
||||||
|
assertAcked(prepareCreate("test")
|
||||||
|
.setSettings(settings)
|
||||||
|
.addMapping("type1", "text", "type=string"));
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
int numDocs = scaledRandomIntBetween(25, 100);
|
||||||
|
logger.info("Indexing {} documents...", numDocs);
|
||||||
|
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
builders.add(client().prepareIndex("test", "type1", i + "").setSource("text", "cat"));
|
||||||
|
}
|
||||||
|
indexRandom(true, builders);
|
||||||
|
|
||||||
|
XContentBuilder expectedStats = jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.startObject("text")
|
||||||
|
.startObject("field_statistics")
|
||||||
|
.field("sum_doc_freq", numDocs)
|
||||||
|
.field("doc_count", numDocs)
|
||||||
|
.field("sum_ttf", numDocs)
|
||||||
|
.endObject()
|
||||||
|
.startObject("terms")
|
||||||
|
.startObject("cat")
|
||||||
|
.field("doc_freq", numDocs)
|
||||||
|
.field("ttf", numDocs)
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject();
|
||||||
|
|
||||||
|
logger.info("Without dfs 'cat' should appear strictly less than {} times.", numDocs);
|
||||||
|
TermVectorResponse response = client().prepareTermVector("test", "type1", randomIntBetween(0, numDocs - 1) + "")
|
||||||
|
.setSelectedFields("text")
|
||||||
|
.setFieldStatistics(true)
|
||||||
|
.setTermStatistics(true)
|
||||||
|
.get();
|
||||||
|
checkStats(response.getFields(), expectedStats, false);
|
||||||
|
|
||||||
|
logger.info("With dfs 'cat' should appear exactly {} times.", numDocs);
|
||||||
|
response = client().prepareTermVector("test", "type1", randomIntBetween(0, numDocs - 1) + "")
|
||||||
|
.setSelectedFields("text")
|
||||||
|
.setFieldStatistics(true)
|
||||||
|
.setTermStatistics(true)
|
||||||
|
.setDfs(true)
|
||||||
|
.get();
|
||||||
|
checkStats(response.getFields(), expectedStats, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkStats(Fields fields, XContentBuilder xContentBuilder, boolean isEqual) throws IOException {
|
||||||
|
Map<String, Object> stats = JsonXContent.jsonXContent.createParser(xContentBuilder.bytes()).map();
|
||||||
|
assertThat("number of fields expected:", fields.size(), equalTo(stats.size()));
|
||||||
|
for (String fieldName : fields) {
|
||||||
|
logger.info("Checking field statistics for field: {}", fieldName);
|
||||||
|
Terms terms = fields.terms(fieldName);
|
||||||
|
Map<String, Integer> fieldStatistics = getFieldStatistics(stats, fieldName);
|
||||||
|
String msg = "field: " + fieldName + " ";
|
||||||
|
assertThat(msg + "sum_doc_freq:",
|
||||||
|
(int) terms.getSumDocFreq(),
|
||||||
|
equalOrLessThanTo(fieldStatistics.get("sum_doc_freq"), isEqual));
|
||||||
|
assertThat(msg + "doc_count:",
|
||||||
|
terms.getDocCount(),
|
||||||
|
equalOrLessThanTo(fieldStatistics.get("doc_count"), isEqual));
|
||||||
|
assertThat(msg + "sum_ttf:",
|
||||||
|
(int) terms.getSumTotalTermFreq(),
|
||||||
|
equalOrLessThanTo(fieldStatistics.get("sum_ttf"), isEqual));
|
||||||
|
|
||||||
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
BytesRef text;
|
||||||
|
while((text = termsEnum.next()) != null) {
|
||||||
|
String term = text.utf8ToString();
|
||||||
|
logger.info("Checking term statistics for term: ({}, {})", fieldName, term);
|
||||||
|
Map<String, Integer> termStatistics = getTermStatistics(stats, fieldName, term);
|
||||||
|
msg = "term: (" + fieldName + "," + term + ") ";
|
||||||
|
assertThat(msg + "doc_freq:",
|
||||||
|
termsEnum.docFreq(),
|
||||||
|
equalOrLessThanTo(termStatistics.get("doc_freq"), isEqual));
|
||||||
|
assertThat(msg + "ttf:",
|
||||||
|
(int) termsEnum.totalTermFreq(),
|
||||||
|
equalOrLessThanTo(termStatistics.get("ttf"), isEqual));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Integer> getFieldStatistics(Map<String, Object> stats, String fieldName) throws IOException {
|
||||||
|
return (Map<String, Integer>) ((Map<String, Object>) stats.get(fieldName)).get("field_statistics");
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Integer> getTermStatistics(Map<String, Object> stats, String fieldName, String term) {
|
||||||
|
return (Map<String, Integer>) ((Map<String, Object>) ((Map<String, Object>) stats.get(fieldName)).get("terms")).get(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Matcher<Integer> equalOrLessThanTo(Integer value, boolean isEqual) {
|
||||||
|
if (isEqual) {
|
||||||
|
return equalTo(value);
|
||||||
|
}
|
||||||
|
return lessThan(value);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.action.bench.BenchmarkAction;
|
||||||
import org.elasticsearch.action.bench.BenchmarkService;
|
import org.elasticsearch.action.bench.BenchmarkService;
|
||||||
import org.elasticsearch.action.bench.BenchmarkStatusAction;
|
import org.elasticsearch.action.bench.BenchmarkStatusAction;
|
||||||
import org.elasticsearch.action.exists.ExistsAction;
|
import org.elasticsearch.action.exists.ExistsAction;
|
||||||
|
import org.elasticsearch.action.termvector.dfs.TransportDfsOnlyAction;
|
||||||
import org.elasticsearch.search.action.SearchServiceTransportAction;
|
import org.elasticsearch.search.action.SearchServiceTransportAction;
|
||||||
import org.elasticsearch.repositories.VerifyNodeRepositoryAction;
|
import org.elasticsearch.repositories.VerifyNodeRepositoryAction;
|
||||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||||
|
@ -146,5 +147,7 @@ public class ActionNamesTests extends ElasticsearchIntegrationTest {
|
||||||
post_1_4_actions.add(SearchServiceTransportAction.FETCH_ID_SCROLL_ACTION_NAME);
|
post_1_4_actions.add(SearchServiceTransportAction.FETCH_ID_SCROLL_ACTION_NAME);
|
||||||
post_1_4_actions.add(VerifyRepositoryAction.NAME);
|
post_1_4_actions.add(VerifyRepositoryAction.NAME);
|
||||||
post_1_4_actions.add(VerifyNodeRepositoryAction.ACTION_NAME);
|
post_1_4_actions.add(VerifyNodeRepositoryAction.ACTION_NAME);
|
||||||
|
post_1_4_actions.add(TransportDfsOnlyAction.NAME);
|
||||||
|
post_1_4_actions.add(TransportDfsOnlyAction.NAME + "[s]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue