HLRC API for _termvectors (#33447)
* HLRC API for _termvectors relates to #27205
This commit is contained in:
parent
f19565c3e0
commit
bf4d90a5dc
|
@ -77,6 +77,7 @@ import org.elasticsearch.script.mustache.MultiSearchTemplateRequest;
|
|||
import org.elasticsearch.script.mustache.SearchTemplateRequest;
|
||||
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
|
||||
import org.elasticsearch.tasks.TaskId;
|
||||
import org.elasticsearch.client.core.TermVectorsRequest;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
@ -578,6 +579,19 @@ final class RequestConverters {
|
|||
return req;
|
||||
}
|
||||
|
||||
static Request termVectors(TermVectorsRequest tvrequest) throws IOException {
|
||||
String endpoint = new EndpointBuilder().addPathPart(
|
||||
tvrequest.getIndex(), tvrequest.getType(), tvrequest.getId()).addPathPartAsIs("_termvectors").build();
|
||||
Request request = new Request(HttpGet.METHOD_NAME, endpoint);
|
||||
Params params = new Params(request);
|
||||
params.withRouting(tvrequest.getRouting());
|
||||
params.withPreference(tvrequest.getPreference());
|
||||
params.withFields(tvrequest.getFields());
|
||||
params.withRealtime(tvrequest.getRealtime());
|
||||
request.setEntity(createEntity(tvrequest, REQUEST_BODY_CONTENT_TYPE));
|
||||
return request;
|
||||
}
|
||||
|
||||
static Request getScript(GetStoredScriptRequest getStoredScriptRequest) {
|
||||
String endpoint = new EndpointBuilder().addPathPartAsIs("_scripts").addPathPart(getStoredScriptRequest.id()).build();
|
||||
Request request = new Request(HttpGet.METHOD_NAME, endpoint);
|
||||
|
|
|
@ -56,6 +56,8 @@ import org.elasticsearch.action.search.SearchScrollRequest;
|
|||
import org.elasticsearch.action.support.master.AcknowledgedResponse;
|
||||
import org.elasticsearch.action.update.UpdateRequest;
|
||||
import org.elasticsearch.action.update.UpdateResponse;
|
||||
import org.elasticsearch.client.core.TermVectorsResponse;
|
||||
import org.elasticsearch.client.core.TermVectorsRequest;
|
||||
import org.elasticsearch.common.CheckedConsumer;
|
||||
import org.elasticsearch.common.CheckedFunction;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
|
@ -1029,6 +1031,36 @@ public class RestHighLevelClient implements Closeable {
|
|||
listener, singleton(404));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calls the Term Vectors API
|
||||
*
|
||||
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html">Term Vectors API on
|
||||
* elastic.co</a>
|
||||
*
|
||||
* @param request the request
|
||||
* @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
|
||||
*/
|
||||
public final TermVectorsResponse termvectors(TermVectorsRequest request, RequestOptions options) throws IOException {
|
||||
return performRequestAndParseEntity(request, RequestConverters::termVectors, options, TermVectorsResponse::fromXContent,
|
||||
emptySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Asynchronously calls the Term Vectors API
|
||||
*
|
||||
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html">Term Vectors API on
|
||||
* elastic.co</a>
|
||||
* @param request the request
|
||||
* @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
|
||||
* @param listener the listener to be notified upon request completion
|
||||
*/
|
||||
public final void termvectorsAsync(TermVectorsRequest request, RequestOptions options, ActionListener<TermVectorsResponse> listener) {
|
||||
performRequestAsyncAndParseEntity(request, RequestConverters::termVectors, options, TermVectorsResponse::fromXContent, listener,
|
||||
emptySet());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Executes a request using the Ranking Evaluation API.
|
||||
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/search-rank-eval.html">Ranking Evaluation API
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.client.core;
|
||||
|
||||
import org.elasticsearch.client.Validatable;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Map;
|
||||
|
||||
public class TermVectorsRequest implements ToXContentObject, Validatable {
|
||||
|
||||
private final String index;
|
||||
private final String type;
|
||||
private String id = null;
|
||||
private String routing = null;
|
||||
private String preference = null;
|
||||
private boolean realtime = true;
|
||||
private String[] fields = null;
|
||||
private boolean requestPositions = true;
|
||||
private boolean requestPayloads = true;
|
||||
private boolean requestOffsets = true;
|
||||
private boolean requestFieldStatistics = true;
|
||||
private boolean requestTermStatistics = false;
|
||||
private Map<String, String> perFieldAnalyzer = null;
|
||||
private Map<String, Integer> filterSettings = null;
|
||||
private XContentBuilder docBuilder = null;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs TermVectorRequest for the given document
|
||||
* @param index - index of the document
|
||||
* @param type - type of the document
|
||||
* @param docId - id of the document
|
||||
*/
|
||||
public TermVectorsRequest(String index, String type, String docId) {
|
||||
this(index, type);
|
||||
this.id = docId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs TermVectorRequest for an artificial document
|
||||
* @param index - index of the document
|
||||
* @param type - type of the document
|
||||
*/
|
||||
public TermVectorsRequest(String index, String type) {
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the request
|
||||
*/
|
||||
public String getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the type of the request
|
||||
*/
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the id of the request
|
||||
* can be NULL if there is no document ID
|
||||
*/
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the fields for which term vectors information should be retrieved
|
||||
*/
|
||||
public void setFields(String... fields) {
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
public String[] getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to request term positions
|
||||
*/
|
||||
public void setPositions(boolean requestPositions) {
|
||||
this.requestPositions = requestPositions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to request term payloads
|
||||
*/
|
||||
public void setPayloads(boolean requestPayloads) {
|
||||
this.requestPayloads = requestPayloads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to request term offsets
|
||||
*/
|
||||
public void setOffsets(boolean requestOffsets) {
|
||||
this.requestOffsets = requestOffsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to request field statistics
|
||||
*/
|
||||
public void setFieldStatistics(boolean requestFieldStatistics) {
|
||||
this.requestFieldStatistics = requestFieldStatistics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to request term statistics
|
||||
*/
|
||||
public void setTermStatistics(boolean requestTermStatistics) {
|
||||
this.requestTermStatistics = requestTermStatistics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets different analyzers than the one at the fields
|
||||
*/
|
||||
public void setPerFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
|
||||
this.perFieldAnalyzer = perFieldAnalyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets an artifical document on what to request _termvectors
|
||||
*/
|
||||
public void setDoc(XContentBuilder docBuilder) {
|
||||
this.docBuilder = docBuilder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets conditions for terms filtering
|
||||
*/
|
||||
public void setFilterSettings(Map<String, Integer> filterSettings) {
|
||||
this.filterSettings = filterSettings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a routing to route a request to a particular shard
|
||||
*/
|
||||
public void setRouting(String routing) {
|
||||
this.routing = routing;
|
||||
}
|
||||
|
||||
public String getRouting() {
|
||||
return routing;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a preference of which shard copies to execute the request
|
||||
*/
|
||||
public void setPreference(String preference) {
|
||||
this.preference = preference;
|
||||
}
|
||||
|
||||
public String getPreference() {
|
||||
return preference;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets if the request should be realtime or near-realtime
|
||||
*/
|
||||
public void setRealtime(boolean realtime) {
|
||||
this.realtime = realtime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns if the request is realtime(true) or near-realtime(false)
|
||||
*/
|
||||
public boolean getRealtime() {
|
||||
return realtime;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
// set values only when different from defaults
|
||||
if (requestPositions == false) builder.field("positions", false);
|
||||
if (requestPayloads == false) builder.field("payloads", false);
|
||||
if (requestOffsets == false) builder.field("offsets", false);
|
||||
if (requestFieldStatistics == false) builder.field("field_statistics", false);
|
||||
if (requestTermStatistics) builder.field("term_statistics", true);
|
||||
if (perFieldAnalyzer != null) builder.field("per_field_analyzer", perFieldAnalyzer);
|
||||
|
||||
if (docBuilder != null) {
|
||||
BytesReference doc = BytesReference.bytes(docBuilder);
|
||||
try (InputStream stream = doc.streamInput()) {
|
||||
builder.rawField("doc", stream, docBuilder.contentType());
|
||||
}
|
||||
}
|
||||
|
||||
if (filterSettings != null) {
|
||||
builder.startObject("filter");
|
||||
String[] filterSettingNames =
|
||||
{"max_num_terms", "min_term_freq", "max_term_freq", "min_doc_freq", "max_doc_freq", "min_word_length", "max_word_length"};
|
||||
for (String settingName : filterSettingNames) {
|
||||
if (filterSettings.containsKey(settingName)) builder.field(settingName, filterSettings.get(settingName));
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,486 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.client.core;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Comparator;
|
||||
import java.util.Objects;
|
||||
|
||||
public class TermVectorsResponse {
|
||||
private final String index;
|
||||
private final String type;
|
||||
private final String id;
|
||||
private final long docVersion;
|
||||
private final boolean found;
|
||||
private final long tookInMillis;
|
||||
private final List<TermVector> termVectorList;
|
||||
|
||||
public TermVectorsResponse(
|
||||
String index, String type, String id, long version, boolean found, long tookInMillis, List<TermVector> termVectorList) {
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
this.id = id;
|
||||
this.docVersion = version;
|
||||
this.found = found;
|
||||
this.tookInMillis = tookInMillis;
|
||||
this.termVectorList = termVectorList;
|
||||
}
|
||||
|
||||
private static ConstructingObjectParser<TermVectorsResponse, Void> PARSER = new ConstructingObjectParser<>("term_vectors", true,
|
||||
args -> {
|
||||
// as the response comes from server, we are sure that args[6] will be a list of TermVector
|
||||
@SuppressWarnings("unchecked") List<TermVector> termVectorList = (List<TermVector>) args[6];
|
||||
if (termVectorList != null) {
|
||||
Collections.sort(termVectorList, Comparator.comparing(TermVector::getFieldName));
|
||||
}
|
||||
return new TermVectorsResponse(
|
||||
(String) args[0],
|
||||
(String) args[1],
|
||||
(String) args[2],
|
||||
(long) args[3],
|
||||
(boolean) args[4],
|
||||
(long) args[5],
|
||||
termVectorList
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareString(constructorArg(), new ParseField("_index"));
|
||||
PARSER.declareString(constructorArg(), new ParseField("_type"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("_id"));
|
||||
PARSER.declareLong(constructorArg(), new ParseField("_version"));
|
||||
PARSER.declareBoolean(constructorArg(), new ParseField("found"));
|
||||
PARSER.declareLong(constructorArg(), new ParseField("took"));
|
||||
PARSER.declareNamedObjects(optionalConstructorArg(),
|
||||
(p, c, fieldName) -> TermVector.fromXContent(p, fieldName), new ParseField("term_vectors"));
|
||||
}
|
||||
|
||||
public static TermVectorsResponse fromXContent(XContentParser parser) {
|
||||
return PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index for the response
|
||||
*/
|
||||
public String getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the type for the response
|
||||
*/
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the id of the request
|
||||
* can be NULL if there is no document ID
|
||||
*/
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns if the document is found
|
||||
* always <code>true</code> for artificial documents
|
||||
*/
|
||||
public boolean getFound() {
|
||||
return found;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the document version
|
||||
*/
|
||||
public long getDocVersion() {
|
||||
return docVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the time that a request took in milliseconds
|
||||
*/
|
||||
public long getTookInMillis() {
|
||||
return tookInMillis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of term vectors
|
||||
*/
|
||||
public List<TermVector> getTermVectorsList(){
|
||||
return termVectorList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (!(obj instanceof TermVectorsResponse)) return false;
|
||||
TermVectorsResponse other = (TermVectorsResponse) obj;
|
||||
return index.equals(other.index)
|
||||
&& type.equals(other.type)
|
||||
&& Objects.equals(id, other.id)
|
||||
&& docVersion == other.docVersion
|
||||
&& found == other.found
|
||||
&& tookInMillis == tookInMillis
|
||||
&& Objects.equals(termVectorList, other.termVectorList);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(index, type, id, docVersion, found, tookInMillis, termVectorList);
|
||||
}
|
||||
|
||||
|
||||
public static final class TermVector {
|
||||
|
||||
private static ConstructingObjectParser<TermVector, String> PARSER = new ConstructingObjectParser<>("term_vector", true,
|
||||
(args, ctxFieldName) -> {
|
||||
// as the response comes from server, we are sure that args[1] will be a list of Term
|
||||
@SuppressWarnings("unchecked") List<Term> terms = (List<Term>) args[1];
|
||||
if (terms != null) {
|
||||
Collections.sort(terms, Comparator.comparing(Term::getTerm));
|
||||
}
|
||||
return new TermVector(ctxFieldName, (FieldStatistics) args[0], terms);
|
||||
}
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareObject(optionalConstructorArg(),
|
||||
(p,c) -> FieldStatistics.fromXContent(p), new ParseField("field_statistics"));
|
||||
PARSER.declareNamedObjects(optionalConstructorArg(), (p, c, term) -> Term.fromXContent(p, term), new ParseField("terms"));
|
||||
}
|
||||
|
||||
private final String fieldName;
|
||||
@Nullable
|
||||
private final FieldStatistics fieldStatistics;
|
||||
@Nullable
|
||||
private final List<Term> terms;
|
||||
|
||||
public TermVector(String fieldName, FieldStatistics fieldStatistics, List<Term> terms) {
|
||||
this.fieldName = fieldName;
|
||||
this.fieldStatistics = fieldStatistics;
|
||||
this.terms = terms;
|
||||
}
|
||||
|
||||
public static TermVector fromXContent(XContentParser parser, String fieldName) {
|
||||
return PARSER.apply(parser, fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field name of the current term vector
|
||||
*/
|
||||
public String getFieldName() {
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of terms for the current term vector
|
||||
*/
|
||||
public List<Term> getTerms() {
|
||||
return terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field statistics for the current field
|
||||
*/
|
||||
public FieldStatistics getFieldStatistics() {
|
||||
return fieldStatistics;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (!(obj instanceof TermVector)) return false;
|
||||
TermVector other = (TermVector) obj;
|
||||
return fieldName.equals(other.fieldName)
|
||||
&& Objects.equals(fieldStatistics, other.fieldStatistics)
|
||||
&& Objects.equals(terms, other.terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(fieldName, fieldStatistics, terms);
|
||||
}
|
||||
|
||||
// Class containing a general field statistics for the field
|
||||
public static final class FieldStatistics {
|
||||
|
||||
private static ConstructingObjectParser<FieldStatistics, Void> PARSER = new ConstructingObjectParser<>(
|
||||
"field_statistics", true,
|
||||
args -> {
|
||||
return new FieldStatistics((long) args[0], (int) args[1], (long) args[2]);
|
||||
}
|
||||
);
|
||||
|
||||
static {
|
||||
PARSER.declareLong(constructorArg(), new ParseField("sum_doc_freq"));
|
||||
PARSER.declareInt(constructorArg(), new ParseField("doc_count"));
|
||||
PARSER.declareLong(constructorArg(), new ParseField("sum_ttf"));
|
||||
}
|
||||
private final long sumDocFreq;
|
||||
private final int docCount;
|
||||
private final long sumTotalTermFreq;
|
||||
|
||||
public FieldStatistics(long sumDocFreq, int docCount, long sumTotalTermFreq) {
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
}
|
||||
|
||||
public static FieldStatistics fromXContent(XContentParser parser) {
|
||||
return PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns how many documents this field contains
|
||||
*/
|
||||
public int getDocCount() {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sum of document frequencies for all terms in this field
|
||||
*/
|
||||
public long getSumDocFreq() {
|
||||
return sumDocFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sum of total term frequencies of all terms in this field
|
||||
*/
|
||||
public long getSumTotalTermFreq() {
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (!(obj instanceof FieldStatistics)) return false;
|
||||
FieldStatistics other = (FieldStatistics) obj;
|
||||
return docCount == other.docCount
|
||||
&& sumDocFreq == other.sumDocFreq
|
||||
&& sumTotalTermFreq == other.sumTotalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(docCount, sumDocFreq, sumTotalTermFreq);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static final class Term {
|
||||
private static ConstructingObjectParser<Term, String> PARSER = new ConstructingObjectParser<>("token", true,
|
||||
(args, ctxTerm) -> {
|
||||
// as the response comes from server, we are sure that args[4] will be a list of Token
|
||||
@SuppressWarnings("unchecked") List<Token> tokens = (List<Token>) args[4];
|
||||
if (tokens != null) {
|
||||
Collections.sort(
|
||||
tokens,
|
||||
Comparator.comparing(Token::getPosition, Comparator.nullsFirst(Integer::compareTo))
|
||||
.thenComparing(Token::getStartOffset, Comparator.nullsFirst(Integer::compareTo))
|
||||
.thenComparing(Token::getEndOffset, Comparator.nullsFirst(Integer::compareTo))
|
||||
);
|
||||
}
|
||||
return new Term(ctxTerm, (int) args[0], (Integer) args[1], (Long) args[2], (Float) args[3], tokens);
|
||||
}
|
||||
);
|
||||
static {
|
||||
PARSER.declareInt(constructorArg(), new ParseField("term_freq"));
|
||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("doc_freq"));
|
||||
PARSER.declareLong(optionalConstructorArg(), new ParseField("ttf"));
|
||||
PARSER.declareFloat(optionalConstructorArg(), new ParseField("score"));
|
||||
PARSER.declareObjectArray(optionalConstructorArg(), (p,c) -> Token.fromXContent(p), new ParseField("tokens"));
|
||||
}
|
||||
|
||||
private final String term;
|
||||
private final int termFreq;
|
||||
@Nullable
|
||||
private final Integer docFreq;
|
||||
@Nullable
|
||||
private final Long totalTermFreq;
|
||||
@Nullable
|
||||
private final Float score;
|
||||
@Nullable
|
||||
private final List<Token> tokens;
|
||||
|
||||
public Term(String term, int termFreq, Integer docFreq, Long totalTermFreq, Float score, List<Token> tokens) {
|
||||
this.term = term;
|
||||
this.termFreq = termFreq;
|
||||
this.docFreq = docFreq;
|
||||
this.totalTermFreq = totalTermFreq;
|
||||
this.score = score;
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
public static Term fromXContent(XContentParser parser, String term) {
|
||||
return PARSER.apply(parser, term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the term
|
||||
*/
|
||||
public String getTerm() {
|
||||
return term;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term frequency - the number of times this term occurs in the current document
|
||||
*/
|
||||
public int getTermFreq() {
|
||||
return termFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns document frequency - the number of documents in the index that contain this term
|
||||
*/
|
||||
public Integer getDocFreq() {
|
||||
return docFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns total term frequency - the number of times this term occurs across all documents
|
||||
*/
|
||||
public Long getTotalTermFreq( ){
|
||||
return totalTermFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns tf-idf score, if the request used some form of terms filtering
|
||||
*/
|
||||
public Float getScore(){
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of tokens for the term
|
||||
*/
|
||||
public List<Token> getTokens() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (!(obj instanceof Term)) return false;
|
||||
Term other = (Term) obj;
|
||||
return term.equals(other.term)
|
||||
&& termFreq == other.termFreq
|
||||
&& Objects.equals(docFreq, other.docFreq)
|
||||
&& Objects.equals(totalTermFreq, other.totalTermFreq)
|
||||
&& Objects.equals(score, other.score)
|
||||
&& Objects.equals(tokens, other.tokens);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(term, termFreq, docFreq, totalTermFreq, score, tokens);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static final class Token {
|
||||
|
||||
private static ConstructingObjectParser<Token, Void> PARSER = new ConstructingObjectParser<>("token", true,
|
||||
args -> {
|
||||
return new Token((Integer) args[0], (Integer) args[1], (Integer) args[2], (String) args[3]);
|
||||
});
|
||||
static {
|
||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("start_offset"));
|
||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("end_offset"));
|
||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("position"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("payload"));
|
||||
}
|
||||
|
||||
@Nullable
|
||||
private final Integer startOffset;
|
||||
@Nullable
|
||||
private final Integer endOffset;
|
||||
@Nullable
|
||||
private final Integer position;
|
||||
@Nullable
|
||||
private final String payload;
|
||||
|
||||
|
||||
public Token(Integer startOffset, Integer endOffset, Integer position, String payload) {
|
||||
this.startOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
this.position = position;
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
public static Token fromXContent(XContentParser parser) {
|
||||
return PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the start offset of the token in the document's field
|
||||
*/
|
||||
public Integer getStartOffset() {
|
||||
return startOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the end offset of the token in the document's field
|
||||
*/
|
||||
public Integer getEndOffset() {
|
||||
return endOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the position of the token in the document's field
|
||||
*/
|
||||
public Integer getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the payload of the token or <code>null</code> if the payload doesn't exist
|
||||
*/
|
||||
public String getPayload() {
|
||||
return payload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) return true;
|
||||
if (!(obj instanceof Token)) return false;
|
||||
Token other = (Token) obj;
|
||||
return Objects.equals(startOffset, other.startOffset)
|
||||
&& Objects.equals(endOffset,other.endOffset)
|
||||
&& Objects.equals(position, other.position)
|
||||
&& Objects.equals(payload, other.payload);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(startOffset, endOffset, position, payload);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -44,12 +44,15 @@ import org.elasticsearch.action.search.SearchRequest;
|
|||
import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
|
||||
import org.elasticsearch.action.update.UpdateRequest;
|
||||
import org.elasticsearch.action.update.UpdateResponse;
|
||||
import org.elasticsearch.client.core.TermVectorsRequest;
|
||||
import org.elasticsearch.client.core.TermVectorsResponse;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.get.GetResult;
|
||||
|
@ -73,6 +76,7 @@ import org.joda.time.format.DateTimeFormat;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -80,6 +84,7 @@ import java.util.concurrent.atomic.AtomicReference;
|
|||
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.hamcrest.Matchers.empty;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.hasSize;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.lessThan;
|
||||
|
@ -1154,4 +1159,80 @@ public class CrudIT extends ESRestHighLevelClientTestCase {
|
|||
assertEquals(routing, getResponse.getField("_routing").getValue());
|
||||
}
|
||||
}
|
||||
|
||||
// Not entirely sure if _termvectors belongs to CRUD, and in the absence of a better place, will have it here
|
||||
public void testTermvectors() throws IOException {
|
||||
final String sourceIndex = "index1";
|
||||
{
|
||||
// prepare : index docs
|
||||
Settings settings = Settings.builder()
|
||||
.put("number_of_shards", 1)
|
||||
.put("number_of_replicas", 0)
|
||||
.build();
|
||||
String mappings = "\"_doc\":{\"properties\":{\"field\":{\"type\":\"text\"}}}";
|
||||
createIndex(sourceIndex, settings, mappings);
|
||||
assertEquals(
|
||||
RestStatus.OK,
|
||||
highLevelClient().bulk(
|
||||
new BulkRequest()
|
||||
.add(new IndexRequest(sourceIndex, "_doc", "1")
|
||||
.source(Collections.singletonMap("field", "value1"), XContentType.JSON))
|
||||
.add(new IndexRequest(sourceIndex, "_doc", "2")
|
||||
.source(Collections.singletonMap("field", "value2"), XContentType.JSON))
|
||||
.setRefreshPolicy(RefreshPolicy.IMMEDIATE),
|
||||
RequestOptions.DEFAULT
|
||||
).status()
|
||||
);
|
||||
}
|
||||
{
|
||||
// test _termvectors on real documents
|
||||
TermVectorsRequest tvRequest = new TermVectorsRequest(sourceIndex, "_doc", "1");
|
||||
tvRequest.setFields("field");
|
||||
TermVectorsResponse tvResponse = execute(tvRequest, highLevelClient()::termvectors, highLevelClient()::termvectorsAsync);
|
||||
|
||||
TermVectorsResponse.TermVector.Token expectedToken = new TermVectorsResponse.TermVector.Token(0, 6, 0, null);
|
||||
TermVectorsResponse.TermVector.Term expectedTerm = new TermVectorsResponse.TermVector.Term(
|
||||
"value1", 1, null, null, null, Collections.singletonList(expectedToken));
|
||||
TermVectorsResponse.TermVector.FieldStatistics expectedFieldStats =
|
||||
new TermVectorsResponse.TermVector.FieldStatistics(2, 2, 2);
|
||||
TermVectorsResponse.TermVector expectedTV =
|
||||
new TermVectorsResponse.TermVector("field", expectedFieldStats, Collections.singletonList(expectedTerm));
|
||||
List<TermVectorsResponse.TermVector> expectedTVlist = Collections.singletonList(expectedTV);
|
||||
|
||||
assertThat(tvResponse.getIndex(), equalTo(sourceIndex));
|
||||
assertThat(Integer.valueOf(tvResponse.getId()), equalTo(1));
|
||||
assertTrue(tvResponse.getFound());
|
||||
assertEquals(expectedTVlist, tvResponse.getTermVectorsList());
|
||||
}
|
||||
{
|
||||
// test _termvectors on artificial documents
|
||||
TermVectorsRequest tvRequest = new TermVectorsRequest(sourceIndex, "_doc");
|
||||
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
|
||||
docBuilder.startObject().field("field", "valuex").endObject();
|
||||
tvRequest.setDoc(docBuilder);
|
||||
TermVectorsResponse tvResponse = execute(tvRequest, highLevelClient()::termvectors, highLevelClient()::termvectorsAsync);
|
||||
|
||||
TermVectorsResponse.TermVector.Token expectedToken = new TermVectorsResponse.TermVector.Token(0, 6, 0, null);
|
||||
TermVectorsResponse.TermVector.Term expectedTerm = new TermVectorsResponse.TermVector.Term(
|
||||
"valuex", 1, null, null, null, Collections.singletonList(expectedToken));
|
||||
TermVectorsResponse.TermVector.FieldStatistics expectedFieldStats =
|
||||
new TermVectorsResponse.TermVector.FieldStatistics(2, 2, 2);
|
||||
TermVectorsResponse.TermVector expectedTV =
|
||||
new TermVectorsResponse.TermVector("field", expectedFieldStats, Collections.singletonList(expectedTerm));
|
||||
List<TermVectorsResponse.TermVector> expectedTVlist = Collections.singletonList(expectedTV);
|
||||
|
||||
assertThat(tvResponse.getIndex(), equalTo(sourceIndex));
|
||||
assertTrue(tvResponse.getFound());
|
||||
assertEquals(expectedTVlist, tvResponse.getTermVectorsList());
|
||||
}
|
||||
}
|
||||
|
||||
// Not entirely sure if _termvectors belongs to CRUD, and in the absence of a better place, will have it here
|
||||
public void testTermvectorsWithNonExistentIndex() {
|
||||
TermVectorsRequest request = new TermVectorsRequest("non-existent", "non-existent", "non-existent");
|
||||
|
||||
ElasticsearchException exception = expectThrows(ElasticsearchException.class,
|
||||
() -> execute(request, highLevelClient()::termvectors, highLevelClient()::termvectorsAsync));
|
||||
assertEquals(RestStatus.NOT_FOUND, exception.status());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,6 +53,7 @@ import org.elasticsearch.action.support.master.AcknowledgedRequest;
|
|||
import org.elasticsearch.action.support.master.MasterNodeReadRequest;
|
||||
import org.elasticsearch.action.support.master.MasterNodeRequest;
|
||||
import org.elasticsearch.action.support.replication.ReplicationRequest;
|
||||
import org.elasticsearch.client.core.TermVectorsRequest;
|
||||
import org.elasticsearch.action.update.UpdateRequest;
|
||||
import org.elasticsearch.client.RequestConverters.EndpointBuilder;
|
||||
import org.elasticsearch.common.CheckedBiConsumer;
|
||||
|
@ -1177,6 +1178,46 @@ public class RequestConvertersTests extends ESTestCase {
|
|||
assertToXContentBody(explainRequest, request.getEntity());
|
||||
}
|
||||
|
||||
public void testTermVectors() throws IOException {
|
||||
String index = randomAlphaOfLengthBetween(3, 10);
|
||||
String type = randomAlphaOfLengthBetween(3, 10);
|
||||
String id = randomAlphaOfLengthBetween(3, 10);
|
||||
TermVectorsRequest tvRequest = new TermVectorsRequest(index, type, id);
|
||||
Map<String, String> expectedParams = new HashMap<>();
|
||||
String[] fields;
|
||||
if (randomBoolean()) {
|
||||
String routing = randomAlphaOfLengthBetween(3, 10);
|
||||
tvRequest.setRouting(routing);
|
||||
expectedParams.put("routing", routing);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
tvRequest.setRealtime(false);
|
||||
expectedParams.put("realtime", "false");
|
||||
}
|
||||
|
||||
boolean hasFields = randomBoolean();
|
||||
if (hasFields) {
|
||||
fields = generateRandomStringArray(10, 5, false, false);
|
||||
tvRequest.setFields(fields);
|
||||
}
|
||||
|
||||
Request request = RequestConverters.termVectors(tvRequest);
|
||||
StringJoiner endpoint = new StringJoiner("/", "/", "");
|
||||
endpoint.add(index).add(type).add(id).add("_termvectors");
|
||||
|
||||
assertEquals(HttpGet.METHOD_NAME, request.getMethod());
|
||||
assertEquals(endpoint.toString(), request.getEndpoint());
|
||||
if (hasFields) {
|
||||
assertThat(request.getParameters(), hasKey("fields"));
|
||||
String[] requestFields = Strings.splitStringByCommaToArray(request.getParameters().get("fields"));
|
||||
assertArrayEquals(tvRequest.getFields(), requestFields);
|
||||
}
|
||||
for (Map.Entry<String, String> param : expectedParams.entrySet()) {
|
||||
assertThat(request.getParameters(), hasEntry(param.getKey(), param.getValue()));
|
||||
}
|
||||
assertToXContentBody(tvRequest, request.getEntity());
|
||||
}
|
||||
|
||||
public void testFieldCaps() {
|
||||
// Create a random request.
|
||||
String[] indices = randomIndicesNames(0, 5);
|
||||
|
|
|
@ -661,8 +661,7 @@ public class RestHighLevelClientTests extends ESTestCase {
|
|||
"mtermvectors",
|
||||
"render_search_template",
|
||||
"scripts_painless_execute",
|
||||
"tasks.get",
|
||||
"termvectors"
|
||||
"tasks.get"
|
||||
};
|
||||
//These API are not required for high-level client feature completeness
|
||||
String[] notRequiredApi = new String[] {
|
||||
|
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.client.core;
|
||||
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
|
||||
import static org.elasticsearch.test.AbstractXContentTestCase.xContentTester;
|
||||
|
||||
public class TermVectorsResponseTests extends ESTestCase {
|
||||
|
||||
public void testFromXContent() throws IOException {
|
||||
xContentTester(
|
||||
this::createParser,
|
||||
this::createTestInstance,
|
||||
this::toXContent,
|
||||
TermVectorsResponse::fromXContent)
|
||||
.supportsUnknownFields(true)
|
||||
.randomFieldsExcludeFilter(field ->
|
||||
field.endsWith("term_vectors") || field.endsWith("terms") || field.endsWith("tokens"))
|
||||
.test();
|
||||
}
|
||||
|
||||
private void toXContent(TermVectorsResponse response, XContentBuilder builder) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field("_index", response.getIndex());
|
||||
builder.field("_type", response.getType());
|
||||
if (response.getId() != null) {
|
||||
builder.field("_id", response.getId());
|
||||
}
|
||||
builder.field("_version", response.getDocVersion());
|
||||
builder.field("found", response.getFound());
|
||||
builder.field("took", response.getTookInMillis());
|
||||
List<TermVectorsResponse.TermVector> termVectorList = response.getTermVectorsList();
|
||||
if (termVectorList != null) {
|
||||
Collections.sort(termVectorList, Comparator.comparing(TermVectorsResponse.TermVector::getFieldName));
|
||||
builder.startObject("term_vectors");
|
||||
for (TermVectorsResponse.TermVector tv : termVectorList) {
|
||||
toXContent(tv, builder);
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
private void toXContent(TermVectorsResponse.TermVector tv, XContentBuilder builder) throws IOException {
|
||||
builder.startObject(tv.getFieldName());
|
||||
// build fields_statistics
|
||||
if (tv.getFieldStatistics() != null) {
|
||||
builder.startObject("field_statistics");
|
||||
builder.field("sum_doc_freq", tv.getFieldStatistics().getSumDocFreq());
|
||||
builder.field("doc_count", tv.getFieldStatistics().getDocCount());
|
||||
builder.field("sum_ttf", tv.getFieldStatistics().getSumTotalTermFreq());
|
||||
builder.endObject();
|
||||
}
|
||||
// build terms
|
||||
List<TermVectorsResponse.TermVector.Term> terms = tv.getTerms();
|
||||
if (terms != null) {
|
||||
Collections.sort(terms, Comparator.comparing(TermVectorsResponse.TermVector.Term::getTerm));
|
||||
builder.startObject("terms");
|
||||
for (TermVectorsResponse.TermVector.Term term : terms) {
|
||||
builder.startObject(term.getTerm());
|
||||
// build term_statistics
|
||||
if (term.getDocFreq() != null) builder.field("doc_freq", term.getDocFreq());
|
||||
if (term.getTotalTermFreq() != null) builder.field("ttf", term.getTotalTermFreq());
|
||||
builder.field("term_freq", term.getTermFreq());
|
||||
|
||||
// build tokens
|
||||
List<TermVectorsResponse.TermVector.Token> tokens = term.getTokens();
|
||||
if (tokens != null) {
|
||||
Collections.sort(
|
||||
tokens,
|
||||
Comparator.comparing(TermVectorsResponse.TermVector.Token::getPosition, Comparator.nullsFirst(Integer::compareTo))
|
||||
.thenComparing(TermVectorsResponse.TermVector.Token::getStartOffset, Comparator.nullsFirst(Integer::compareTo))
|
||||
.thenComparing(TermVectorsResponse.TermVector.Token::getEndOffset, Comparator.nullsFirst(Integer::compareTo))
|
||||
);
|
||||
builder.startArray("tokens");
|
||||
for (TermVectorsResponse.TermVector.Token token : tokens) {
|
||||
builder.startObject();
|
||||
if (token.getPosition() != null) builder.field("position", token.getPosition());
|
||||
if (token.getStartOffset()!= null) builder.field("start_offset", token.getStartOffset());
|
||||
if (token.getEndOffset() != null) builder.field("end_offset", token.getEndOffset());
|
||||
if (token.getPayload() != null) builder.field("payload", token.getPayload());
|
||||
builder.endObject();
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
if (term.getScore() != null) builder.field("score", term.getScore());
|
||||
builder.endObject();
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
|
||||
protected TermVectorsResponse createTestInstance() {
|
||||
String index = randomAlphaOfLength(5);
|
||||
String type = randomAlphaOfLength(5);
|
||||
String id = String.valueOf(randomIntBetween(1,100));
|
||||
long version = randomNonNegativeLong();
|
||||
long tookInMillis = randomNonNegativeLong();
|
||||
boolean found = randomBoolean();
|
||||
List<TermVectorsResponse.TermVector> tvList = null;
|
||||
if (found == true){
|
||||
boolean hasFieldStatistics = randomBoolean();
|
||||
boolean hasTermStatistics = randomBoolean();
|
||||
boolean hasScores = randomBoolean();
|
||||
boolean hasOffsets = randomBoolean();
|
||||
boolean hasPositions = randomBoolean();
|
||||
boolean hasPayloads = randomBoolean();
|
||||
int fieldsCount = randomIntBetween(1, 3);
|
||||
tvList = new ArrayList<>(fieldsCount);
|
||||
for (int i = 0; i < fieldsCount; i++) {
|
||||
tvList.add(randomTermVector(hasFieldStatistics, hasTermStatistics, hasScores, hasOffsets, hasPositions, hasPayloads));
|
||||
}
|
||||
}
|
||||
TermVectorsResponse tvresponse = new TermVectorsResponse(index, type, id, version, found, tookInMillis, tvList);
|
||||
return tvresponse;
|
||||
}
|
||||
|
||||
private TermVectorsResponse.TermVector randomTermVector(boolean hasFieldStatistics, boolean hasTermStatistics, boolean hasScores,
|
||||
boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
|
||||
TermVectorsResponse.TermVector.FieldStatistics fs = null;
|
||||
if (hasFieldStatistics) {
|
||||
long sumDocFreq = randomNonNegativeLong();
|
||||
int docCount = randomInt(1000);
|
||||
long sumTotalTermFreq = randomNonNegativeLong();
|
||||
fs = new TermVectorsResponse.TermVector.FieldStatistics(sumDocFreq, docCount, sumTotalTermFreq);
|
||||
}
|
||||
|
||||
int termsCount = randomIntBetween(1, 5);
|
||||
List<TermVectorsResponse.TermVector.Term> terms = new ArrayList<>(termsCount);
|
||||
for (int i = 0; i < termsCount; i++) {
|
||||
terms.add(randomTerm(hasTermStatistics, hasScores, hasOffsets, hasPositions, hasPayloads));
|
||||
}
|
||||
|
||||
TermVectorsResponse.TermVector tv = new TermVectorsResponse.TermVector("field" + randomAlphaOfLength(2), fs, terms);
|
||||
return tv;
|
||||
}
|
||||
|
||||
private TermVectorsResponse.TermVector.Term randomTerm(boolean hasTermStatistics, boolean hasScores,
|
||||
boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
|
||||
|
||||
String termTxt = "term" + randomAlphaOfLength(2);
|
||||
int termFreq = randomInt(10000);
|
||||
Integer docFreq = null;
|
||||
Long totalTermFreq = null;
|
||||
Float score = null;
|
||||
List<TermVectorsResponse.TermVector.Token> tokens = null;
|
||||
if (hasTermStatistics) {
|
||||
docFreq = randomInt(1000);
|
||||
totalTermFreq = randomNonNegativeLong();
|
||||
}
|
||||
if (hasScores) score = randomFloat();
|
||||
if (hasOffsets || hasPositions || hasPayloads ){
|
||||
int tokensCount = randomIntBetween(1, 5);
|
||||
tokens = new ArrayList<>(tokensCount);
|
||||
for (int i = 0; i < tokensCount; i++) {
|
||||
Integer startOffset = null;
|
||||
Integer endOffset = null;
|
||||
Integer position = null;
|
||||
String payload = null;
|
||||
if (hasOffsets) {
|
||||
startOffset = randomInt(1000);
|
||||
endOffset = randomInt(2000);
|
||||
}
|
||||
if (hasPositions) position = randomInt(100);
|
||||
if (hasPayloads) payload = "payload" + randomAlphaOfLength(2);
|
||||
TermVectorsResponse.TermVector.Token token =
|
||||
new TermVectorsResponse.TermVector.Token(startOffset, endOffset, position, payload);
|
||||
tokens.add(token);
|
||||
}
|
||||
}
|
||||
TermVectorsResponse.TermVector.Term term =
|
||||
new TermVectorsResponse.TermVector.Term(termTxt, termFreq, docFreq, totalTermFreq, score, tokens);
|
||||
return term;
|
||||
}
|
||||
|
||||
}
|
|
@ -25,6 +25,8 @@ import org.elasticsearch.action.DocWriteRequest;
|
|||
import org.elasticsearch.action.DocWriteResponse;
|
||||
import org.elasticsearch.action.LatchedActionListener;
|
||||
import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
|
||||
import org.elasticsearch.action.bulk.BackoffPolicy;
|
||||
import org.elasticsearch.action.bulk.BulkItemResponse;
|
||||
import org.elasticsearch.action.bulk.BulkProcessor;
|
||||
|
@ -52,6 +54,8 @@ import org.elasticsearch.client.RequestOptions;
|
|||
import org.elasticsearch.client.Response;
|
||||
import org.elasticsearch.client.RestHighLevelClient;
|
||||
import org.elasticsearch.client.RethrottleRequest;
|
||||
import org.elasticsearch.client.core.TermVectorsRequest;
|
||||
import org.elasticsearch.client.core.TermVectorsResponse;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -1503,6 +1507,125 @@ public class CRUDDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// Not entirely sure if _termvectors belongs to CRUD, and in the absence of a better place, will have it here
|
||||
public void testTermVectors() throws Exception {
|
||||
RestHighLevelClient client = highLevelClient();
|
||||
CreateIndexRequest authorsRequest = new CreateIndexRequest("authors").mapping("doc", "user", "type=keyword");
|
||||
CreateIndexResponse authorsResponse = client.indices().create(authorsRequest, RequestOptions.DEFAULT);
|
||||
assertTrue(authorsResponse.isAcknowledged());
|
||||
client.index(new IndexRequest("index", "doc", "1").source("user", "kimchy"), RequestOptions.DEFAULT);
|
||||
Response refreshResponse = client().performRequest(new Request("POST", "/authors/_refresh"));
|
||||
assertEquals(200, refreshResponse.getStatusLine().getStatusCode());
|
||||
|
||||
{
|
||||
// tag::term-vectors-request
|
||||
TermVectorsRequest request = new TermVectorsRequest("authors", "doc", "1");
|
||||
request.setFields("user");
|
||||
// end::term-vectors-request
|
||||
}
|
||||
|
||||
{
|
||||
// tag::term-vectors-request-artificial
|
||||
TermVectorsRequest request = new TermVectorsRequest("authors", "doc");
|
||||
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
|
||||
docBuilder.startObject().field("user", "guest-user").endObject();
|
||||
request.setDoc(docBuilder); // <1>
|
||||
// end::term-vectors-request-artificial
|
||||
|
||||
// tag::term-vectors-request-optional-arguments
|
||||
request.setFieldStatistics(false); // <1>
|
||||
request.setTermStatistics(true); // <2>
|
||||
request.setPositions(false); // <3>
|
||||
request.setOffsets(false); // <4>
|
||||
request.setPayloads(false); // <5>
|
||||
|
||||
Map<String, Integer> filterSettings = new HashMap<>();
|
||||
filterSettings.put("max_num_terms", 3);
|
||||
filterSettings.put("min_term_freq", 1);
|
||||
filterSettings.put("max_term_freq", 10);
|
||||
filterSettings.put("min_doc_freq", 1);
|
||||
filterSettings.put("max_doc_freq", 100);
|
||||
filterSettings.put("min_word_length", 1);
|
||||
filterSettings.put("max_word_length", 10);
|
||||
|
||||
request.setFilterSettings(filterSettings); // <6>
|
||||
|
||||
Map<String, String> perFieldAnalyzer = new HashMap<>();
|
||||
perFieldAnalyzer.put("user", "keyword");
|
||||
request.setPerFieldAnalyzer(perFieldAnalyzer); // <7>
|
||||
|
||||
request.setRealtime(false); // <8>
|
||||
request.setRouting("routing"); // <9>
|
||||
// end::term-vectors-request-optional-arguments
|
||||
}
|
||||
|
||||
TermVectorsRequest request = new TermVectorsRequest("authors", "doc", "1");
|
||||
request.setFields("user");
|
||||
|
||||
// tag::term-vectors-execute
|
||||
TermVectorsResponse response = client.termvectors(request, RequestOptions.DEFAULT);
|
||||
// end:::term-vectors-execute
|
||||
|
||||
|
||||
// tag::term-vectors-response
|
||||
String index = response.getIndex(); // <1>
|
||||
String type = response.getType(); // <2>
|
||||
String id = response.getId(); // <3>
|
||||
boolean found = response.getFound(); // <4>
|
||||
// end:::term-vectors-response
|
||||
|
||||
// tag::term-vectors-term-vectors
|
||||
if (response.getTermVectorsList() != null) {
|
||||
List<TermVectorsResponse.TermVector> tvList = response.getTermVectorsList(); // <1>
|
||||
for (TermVectorsResponse.TermVector tv : tvList) {
|
||||
String fieldname = tv.getFieldName(); // <2>
|
||||
int docCount = tv.getFieldStatistics().getDocCount(); // <3>
|
||||
long sumTotalTermFreq = tv.getFieldStatistics().getSumTotalTermFreq(); // <4>
|
||||
long sumDocFreq = tv.getFieldStatistics().getSumDocFreq(); // <5>
|
||||
if (tv.getTerms() != null) {
|
||||
List<TermVectorsResponse.TermVector.Term> terms = tv.getTerms(); // <6>
|
||||
for (TermVectorsResponse.TermVector.Term term : terms) {
|
||||
String termStr = term.getTerm(); // <7>
|
||||
int termFreq = term.getTermFreq(); // <8>
|
||||
int docFreq = term.getDocFreq(); // <9>
|
||||
long totalTermFreq = term.getTotalTermFreq(); // <10>
|
||||
float score = term.getScore(); // <11>
|
||||
if (term.getTokens() != null) {
|
||||
List<TermVectorsResponse.TermVector.Token> tokens = term.getTokens(); // <12>
|
||||
for (TermVectorsResponse.TermVector.Token token : tokens) {
|
||||
int position = token.getPosition(); // <13>
|
||||
int startOffset = token.getStartOffset(); // <14>
|
||||
int endOffset = token.getEndOffset(); // <15>
|
||||
String payload = token.getPayload(); // <16>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// end:::term-vectors-term-vectors
|
||||
|
||||
// tag::term-vectors-execute-listener
|
||||
ActionListener<TermVectorsResponse> listener = new ActionListener<TermVectorsResponse>() {
|
||||
@Override
|
||||
public void onResponse(TermVectorsResponse termVectorsResponse) {
|
||||
// <1>
|
||||
}
|
||||
@Override
|
||||
public void onFailure(Exception e) {
|
||||
// <2>
|
||||
}
|
||||
};
|
||||
// end::term-vectors-execute-listener
|
||||
CountDownLatch latch = new CountDownLatch(1);
|
||||
listener = new LatchedActionListener<>(listener, latch);
|
||||
// tag::term-vectors-execute-async
|
||||
client.termvectorsAsync(request, RequestOptions.DEFAULT, listener); // <1>
|
||||
// end::term-vectors-execute-async
|
||||
assertTrue(latch.await(30L, TimeUnit.SECONDS));
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void testMultiGet() throws Exception {
|
||||
RestHighLevelClient client = highLevelClient();
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
--
|
||||
:api: term-vectors
|
||||
:request: TermVectorsRequest
|
||||
:response: TermVectorsResponse
|
||||
--
|
||||
|
||||
[id="{upid}-{api}"]
|
||||
=== Term Vectors API
|
||||
|
||||
Term Vectors API returns information and statistics on terms in the fields
|
||||
of a particular document. The document could be stored in the index or
|
||||
artificially provided by the user.
|
||||
|
||||
|
||||
[id="{upid}-{api}-request"]
|
||||
==== Term Vectors Request
|
||||
|
||||
A +{request}+ expects an `index`, a `type` and an `id` to specify
|
||||
a certain document, and fields for which the information is retrieved.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-request]
|
||||
--------------------------------------------------
|
||||
|
||||
Term vectors can also be generated for artificial documents, that is for
|
||||
documents not present in the index:
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-request-artificial]
|
||||
--------------------------------------------------
|
||||
<1> An artificial document is provided as an `XContentBuilder` object,
|
||||
the Elasticsearch built-in helper to generate JSON content.
|
||||
|
||||
===== Optional arguments
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-request-optional-arguments]
|
||||
--------------------------------------------------
|
||||
<1> Set `fieldStatistics` to `false` (default is `true`) to omit document count,
|
||||
sum of document frequencies, sum of total term frequencies.
|
||||
<2> Set `termStatistics` to `true` (default is `false`) to display
|
||||
total term frequency and document frequency.
|
||||
<3> Set `positions` to `false` (default is `true`) to omit the output of
|
||||
positions.
|
||||
<4> Set `offsets` to `false` (default is `true`) to omit the output of
|
||||
offsets.
|
||||
<5> Set `payloads` to `false` (default is `true`) to omit the output of
|
||||
payloads.
|
||||
<6> Set `filterSettings` to filter the terms that can be returned based
|
||||
on their tf-idf scores.
|
||||
<7> Set `perFieldAnalyzer` to specify a different analyzer than
|
||||
the one that the field has.
|
||||
<8> Set `realtime` to `false` (default is `true`) to retrieve term vectors
|
||||
near realtime.
|
||||
<9> Set a routing parameter
|
||||
|
||||
|
||||
include::../execution.asciidoc[]
|
||||
|
||||
|
||||
[id="{upid}-{api}-response"]
|
||||
==== TermVectorsResponse
|
||||
|
||||
The `TermVectorsResponse` contains the following information:
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-response]
|
||||
--------------------------------------------------
|
||||
<1> The index name of the document.
|
||||
<2> The type name of the document.
|
||||
<3> The id of the document.
|
||||
<4> Indicates whether or not the document found.
|
||||
|
||||
|
||||
===== Inspecting Term Vectors
|
||||
If `TermVectorsResponse` contains non-null list of term vectors,
|
||||
more information about them can be obtained using following:
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-term-vectors]
|
||||
--------------------------------------------------
|
||||
<1> The list of `TermVector` for the document
|
||||
<2> The name of the current field
|
||||
<3> Fields statistics for the current field - document count
|
||||
<4> Fields statistics for the current field - sum of total term frequencies
|
||||
<5> Fields statistics for the current field - sum of document frequencies
|
||||
<6> Terms for the current field
|
||||
<7> The name of the term
|
||||
<8> Term frequency of the term
|
||||
<9> Document frequency of the term
|
||||
<10> Total term frequency of the term
|
||||
<11> Score of the term
|
||||
<12> Tokens of the term
|
||||
<13> Position of the token
|
||||
<14> Start offset of the token
|
||||
<15> End offset of the token
|
||||
<16> Payload of the token
|
||||
|
||||
|
||||
[id="{upid}-{api}-response"]
|
||||
==== TermVectorsResponse
|
||||
|
||||
The `TermVectorsResponse` contains the following information:
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-response]
|
||||
--------------------------------------------------
|
||||
<1> The index name of the document.
|
||||
<2> The type name of the document.
|
||||
<3> The id of the document.
|
||||
<4> Indicates whether or not the document found.
|
||||
<5> Indicates whether or not there are term vectors for this document.
|
||||
<6> The list of `TermVector` for the document
|
||||
<7> The name of the current field
|
||||
<8> Fields statistics for the current field - document count
|
||||
<9> Fields statistics for the current field - sum of total term frequencies
|
||||
<10> Fields statistics for the current field - sum of document frequencies
|
||||
<11> Terms for the current field
|
||||
<12> The name of the term
|
||||
<13> Term frequency of the term
|
||||
<14> Document frequency of the term
|
||||
<15> Total term frequency of the term
|
||||
<16> Score of the term
|
||||
<17> Tokens of the term
|
||||
<18> Position of the token
|
||||
<19> Start offset of the token
|
||||
<20> End offset of the token
|
||||
<21> Payload of the token
|
|
@ -14,6 +14,7 @@ Single document APIs::
|
|||
* <<{upid}-exists>>
|
||||
* <<{upid}-delete>>
|
||||
* <<{upid}-update>>
|
||||
* <<{upid}-term-vectors>>
|
||||
|
||||
[[multi-doc]]
|
||||
Multi-document APIs::
|
||||
|
@ -29,6 +30,7 @@ include::document/get.asciidoc[]
|
|||
include::document/exists.asciidoc[]
|
||||
include::document/delete.asciidoc[]
|
||||
include::document/update.asciidoc[]
|
||||
include::document/term-vectors.asciidoc[]
|
||||
include::document/bulk.asciidoc[]
|
||||
include::document/multi-get.asciidoc[]
|
||||
include::document/reindex.asciidoc[]
|
||||
|
@ -372,4 +374,4 @@ don't leak into the rest of the documentation.
|
|||
:response!:
|
||||
:doc-tests-file!:
|
||||
:upid!:
|
||||
--
|
||||
--
|
|
@ -51,7 +51,7 @@ import static org.elasticsearch.action.ValidateActions.addValidationError;
|
|||
* </ul>
|
||||
*/
|
||||
public class DeleteByQueryRequest extends AbstractBulkByScrollRequest<DeleteByQueryRequest>
|
||||
implements IndicesRequest.Replaceable, ToXContentObject {
|
||||
implements IndicesRequest.Replaceable, ToXContentObject {
|
||||
|
||||
public DeleteByQueryRequest() {
|
||||
this(new SearchRequest());
|
||||
|
|
Loading…
Reference in New Issue