From 450ee700388f87d2f552c75d702e6f171aedf0b3 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 4 Feb 2016 16:20:24 +0100 Subject: [PATCH] Remove DFS support from TermVector API Retrieving distributed DF for TermVectors is beside it's esotheric justification a very slow process and can cause serious load on the cluster. We also don't have nearly enough testing for this stuff and given the complexity we should remove it rather than carrying it around. --- .../elasticsearch/action/ActionModule.java | 6 +- .../termvectors/TermVectorsRequest.java | 20 +-- .../TermVectorsRequestBuilder.java | 8 - .../termvectors/dfs/DfsOnlyRequest.java | 112 -------------- .../termvectors/dfs/DfsOnlyResponse.java | 73 --------- .../termvectors/dfs/ShardDfsOnlyRequest.java | 62 -------- .../termvectors/dfs/ShardDfsOnlyResponse.java | 62 -------- .../dfs/TransportDfsOnlyAction.java | 146 ------------------ .../action/termvectors/dfs/package-info.java | 23 --- .../index/query/MoreLikeThisQueryBuilder.java | 3 +- .../index/termvectors/TermVectorsService.java | 27 ---- .../termvectors/RestTermVectorsAction.java | 1 - .../action/termvectors/GetTermVectorsIT.java | 92 +---------- .../rest-api-spec/api/termvectors.json | 6 - 14 files changed, 5 insertions(+), 636 deletions(-) delete mode 100644 core/src/main/java/org/elasticsearch/action/termvectors/dfs/DfsOnlyRequest.java delete mode 100644 core/src/main/java/org/elasticsearch/action/termvectors/dfs/DfsOnlyResponse.java delete mode 100644 core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyRequest.java delete mode 100644 core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyResponse.java delete mode 100644 core/src/main/java/org/elasticsearch/action/termvectors/dfs/TransportDfsOnlyAction.java delete mode 100644 core/src/main/java/org/elasticsearch/action/termvectors/dfs/package-info.java diff --git a/core/src/main/java/org/elasticsearch/action/ActionModule.java b/core/src/main/java/org/elasticsearch/action/ActionModule.java index 39aa4b7a2ba..55651e42628 100644 --- a/core/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/core/src/main/java/org/elasticsearch/action/ActionModule.java @@ -190,14 +190,11 @@ import org.elasticsearch.action.termvectors.TermVectorsAction; import org.elasticsearch.action.termvectors.TransportMultiTermVectorsAction; import org.elasticsearch.action.termvectors.TransportShardMultiTermsVectorAction; import org.elasticsearch.action.termvectors.TransportTermVectorsAction; -import org.elasticsearch.action.termvectors.dfs.TransportDfsOnlyAction; import org.elasticsearch.action.update.TransportUpdateAction; import org.elasticsearch.action.update.UpdateAction; import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.multibindings.MapBinder; import org.elasticsearch.common.inject.multibindings.Multibinder; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.node.NodeModule; import java.util.ArrayList; import java.util.HashMap; @@ -323,8 +320,7 @@ public class ActionModule extends AbstractModule { registerAction(IndexAction.INSTANCE, TransportIndexAction.class); registerAction(GetAction.INSTANCE, TransportGetAction.class); - registerAction(TermVectorsAction.INSTANCE, TransportTermVectorsAction.class, - TransportDfsOnlyAction.class); + registerAction(TermVectorsAction.INSTANCE, TransportTermVectorsAction.class); registerAction(MultiTermVectorsAction.INSTANCE, TransportMultiTermVectorsAction.class, TransportShardMultiTermsVectorAction.class); registerAction(DeleteAction.INSTANCE, TransportDeleteAction.class); diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java index 7a97a242401..bb153885d2d 100644 --- a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java +++ b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java @@ -373,22 +373,6 @@ public class TermVectorsRequest extends SingleShardRequest i return this; } - /** - * @return true if distributed frequencies should be returned. Otherwise - * false - */ - public boolean dfs() { - return flagsEnum.contains(Flag.Dfs); - } - - /** - * Use distributed frequencies instead of shard statistics. - */ - public TermVectorsRequest dfs(boolean dfs) { - setFlag(Flag.Dfs, dfs); - return this; - } - /** * Return only term vectors for special selected fields. Returns for term * vectors for all fields if selectedFields == null @@ -583,7 +567,7 @@ public class TermVectorsRequest extends SingleShardRequest i public static enum Flag { // Do not change the order of these flags we use // the ordinal for encoding! Only append to the end! - Positions, Offsets, Payloads, FieldStatistics, TermStatistics, Dfs + Positions, Offsets, Payloads, FieldStatistics, TermStatistics } /** @@ -616,7 +600,7 @@ public class TermVectorsRequest extends SingleShardRequest i } else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) { termVectorsRequest.fieldStatistics(parser.booleanValue()); } else if (currentFieldName.equals("dfs")) { - termVectorsRequest.dfs(parser.booleanValue()); + throw new IllegalArgumentException("distributed frequencies is not supported anymore for term vectors"); } else if (currentFieldName.equals("per_field_analyzer") || currentFieldName.equals("perFieldAnalyzer")) { termVectorsRequest.perFieldAnalyzer(readPerFieldAnalyzer(parser.map())); } else if (currentFieldName.equals("filter")) { diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequestBuilder.java index c3a474cd21e..ae4bbc63f1d 100644 --- a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequestBuilder.java @@ -149,14 +149,6 @@ public class TermVectorsRequestBuilder extends ActionRequestBuilder { - - private SearchRequest searchRequest = new SearchRequest(); - - long nowInMillis; - - public DfsOnlyRequest() { - - } - - public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set selectedFields) throws IOException { - super(indices); - - // build a search request with a query of all the terms - final BoolQueryBuilder boolBuilder = boolQuery(); - for (String fieldName : termVectorsFields) { - if ((selectedFields != null) && (!selectedFields.contains(fieldName))) { - continue; - } - Terms terms = termVectorsFields.terms(fieldName); - TermsEnum iterator = terms.iterator(); - while (iterator.next() != null) { - String text = iterator.term().utf8ToString(); - boolBuilder.should(QueryBuilders.termQuery(fieldName, text)); - } - } - // wrap a search request object - this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder)); - } - - public SearchRequest getSearchRequest() { - return searchRequest; - } - - @Override - public ActionRequestValidationException validate() { - return searchRequest.validate(); - } - - @Override - public void readFrom(StreamInput in) throws IOException { - super.readFrom(in); - this.searchRequest.readFrom(in); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - this.searchRequest.writeTo(out); - } - - public String[] types() { - return this.searchRequest.types(); - } - - public String routing() { - return this.searchRequest.routing(); - } - - public String preference() { - return this.searchRequest.preference(); - } - - @Override - public String toString() { - String sSource = "_na_"; - if (searchRequest.source() != null) { - sSource = searchRequest.source().toString(); - } - return "[" + Arrays.toString(indices) + "]" + Arrays.toString(types()) + ", source[" + sSource + "]"; - } - -} diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/DfsOnlyResponse.java b/core/src/main/java/org/elasticsearch/action/termvectors/dfs/DfsOnlyResponse.java deleted file mode 100644 index db1cddff046..00000000000 --- a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/DfsOnlyResponse.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.action.termvectors.dfs; - -import org.elasticsearch.action.ShardOperationFailedException; -import org.elasticsearch.action.support.broadcast.BroadcastResponse; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.search.dfs.AggregatedDfs; - -import java.io.IOException; -import java.util.List; - -/** - * A response of a dfs only request. - */ -public class DfsOnlyResponse extends BroadcastResponse { - - private AggregatedDfs dfs; - private long tookInMillis; - - DfsOnlyResponse(AggregatedDfs dfs, int totalShards, int successfulShards, int failedShards, - List shardFailures, long tookInMillis) { - super(totalShards, successfulShards, failedShards, shardFailures); - this.dfs = dfs; - this.tookInMillis = tookInMillis; - } - - public AggregatedDfs getDfs() { - return dfs; - } - - public TimeValue getTook() { - return new TimeValue(tookInMillis); - } - - public long getTookInMillis() { - return tookInMillis; - } - - @Override - public void readFrom(StreamInput in) throws IOException { - super.readFrom(in); - AggregatedDfs.readAggregatedDfs(in); - tookInMillis = in.readVLong(); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - dfs.writeTo(out); - out.writeVLong(tookInMillis); - } - -} diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyRequest.java b/core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyRequest.java deleted file mode 100644 index 95a9a821ad0..00000000000 --- a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyRequest.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.action.termvectors.dfs; - -import org.elasticsearch.action.support.broadcast.BroadcastShardRequest; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.search.internal.ShardSearchRequest; -import org.elasticsearch.search.internal.ShardSearchTransportRequest; - -import java.io.IOException; - -public class ShardDfsOnlyRequest extends BroadcastShardRequest { - - private ShardSearchTransportRequest shardSearchRequest = new ShardSearchTransportRequest(); - - public ShardDfsOnlyRequest() { - - } - - ShardDfsOnlyRequest(ShardRouting shardRouting, int numberOfShards, @Nullable String[] filteringAliases, long nowInMillis, DfsOnlyRequest request) { - super(shardRouting.shardId(), request); - this.shardSearchRequest = new ShardSearchTransportRequest(request.getSearchRequest(), shardRouting, numberOfShards, - filteringAliases, nowInMillis); - } - - public ShardSearchRequest getShardSearchRequest() { - return shardSearchRequest; - } - - @Override - public void readFrom(StreamInput in) throws IOException { - super.readFrom(in); - shardSearchRequest.readFrom(in); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - shardSearchRequest.writeTo(out); - } - -} diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyResponse.java b/core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyResponse.java deleted file mode 100644 index 688a475ea64..00000000000 --- a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/ShardDfsOnlyResponse.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.action.termvectors.dfs; - -import org.elasticsearch.action.support.broadcast.BroadcastShardResponse; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.search.dfs.DfsSearchResult; - -import java.io.IOException; - -/** - * - */ -class ShardDfsOnlyResponse extends BroadcastShardResponse { - - private DfsSearchResult dfsSearchResult = new DfsSearchResult(); - - ShardDfsOnlyResponse() { - - } - - ShardDfsOnlyResponse(ShardId shardId, DfsSearchResult dfsSearchResult) { - super(shardId); - this.dfsSearchResult = dfsSearchResult; - } - - public DfsSearchResult getDfsSearchResult() { - return dfsSearchResult; - } - - @Override - public void readFrom(StreamInput in) throws IOException { - super.readFrom(in); - dfsSearchResult.readFrom(in); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - dfsSearchResult.writeTo(out); - } - -} diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/TransportDfsOnlyAction.java b/core/src/main/java/org/elasticsearch/action/termvectors/dfs/TransportDfsOnlyAction.java deleted file mode 100644 index 647e3cc7546..00000000000 --- a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/TransportDfsOnlyAction.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.action.termvectors.dfs; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ShardOperationFailedException; -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.DefaultShardOperationFailedException; -import org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException; -import org.elasticsearch.action.support.broadcast.TransportBroadcastAction; -import org.elasticsearch.cluster.ClusterService; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.block.ClusterBlockException; -import org.elasticsearch.cluster.block.ClusterBlockLevel; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; -import org.elasticsearch.cluster.routing.GroupShardsIterator; -import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.util.concurrent.AtomicArray; -import org.elasticsearch.search.SearchService; -import org.elasticsearch.search.controller.SearchPhaseController; -import org.elasticsearch.search.dfs.AggregatedDfs; -import org.elasticsearch.search.dfs.DfsSearchResult; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.TransportService; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicReferenceArray; - -/** - * Get the dfs only with no fetch phase. This is for internal use only. - */ -public class TransportDfsOnlyAction extends TransportBroadcastAction { - - public static final String NAME = "internal:index/termvectors/dfs"; - - private final SearchService searchService; - - private final SearchPhaseController searchPhaseController; - - @Inject - public TransportDfsOnlyAction(Settings settings, ThreadPool threadPool, ClusterService clusterService, TransportService transportService, - ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver, SearchService searchService, SearchPhaseController searchPhaseController) { - super(settings, NAME, threadPool, clusterService, transportService, actionFilters, indexNameExpressionResolver, - DfsOnlyRequest::new, ShardDfsOnlyRequest::new, ThreadPool.Names.SEARCH); - this.searchService = searchService; - this.searchPhaseController = searchPhaseController; - } - - @Override - protected void doExecute(Task task, DfsOnlyRequest request, ActionListener listener) { - request.nowInMillis = System.currentTimeMillis(); - super.doExecute(task, request, listener); - } - - @Override - protected ShardDfsOnlyRequest newShardRequest(int numShards, ShardRouting shard, DfsOnlyRequest request) { - String[] filteringAliases = indexNameExpressionResolver.filteringAliases(clusterService.state(), shard.index().getName(), request.indices()); - return new ShardDfsOnlyRequest(shard, numShards, filteringAliases, request.nowInMillis, request); - } - - @Override - protected ShardDfsOnlyResponse newShardResponse() { - return new ShardDfsOnlyResponse(); - } - - @Override - protected GroupShardsIterator shards(ClusterState clusterState, DfsOnlyRequest request, String[] concreteIndices) { - Map> routingMap = indexNameExpressionResolver.resolveSearchRouting(clusterState, request.routing(), request.indices()); - return clusterService.operationRouting().searchShards(clusterState, concreteIndices, routingMap, request.preference()); - } - - @Override - protected ClusterBlockException checkGlobalBlock(ClusterState state, DfsOnlyRequest request) { - return state.blocks().globalBlockedException(ClusterBlockLevel.READ); - } - - @Override - protected ClusterBlockException checkRequestBlock(ClusterState state, DfsOnlyRequest countRequest, String[] concreteIndices) { - return state.blocks().indicesBlockedException(ClusterBlockLevel.READ, concreteIndices); - } - - @Override - protected DfsOnlyResponse newResponse(DfsOnlyRequest request, AtomicReferenceArray shardsResponses, ClusterState clusterState) { - int successfulShards = 0; - int failedShards = 0; - List shardFailures = null; - AtomicArray dfsResults = new AtomicArray<>(shardsResponses.length()); - for (int i = 0; i < shardsResponses.length(); i++) { - Object shardResponse = shardsResponses.get(i); - if (shardResponse == null) { - // simply ignore non active shards - } else if (shardResponse instanceof BroadcastShardOperationFailedException) { - failedShards++; - if (shardFailures == null) { - shardFailures = new ArrayList<>(); - } - shardFailures.add(new DefaultShardOperationFailedException((BroadcastShardOperationFailedException) shardResponse)); - } else { - dfsResults.set(i, ((ShardDfsOnlyResponse) shardResponse).getDfsSearchResult()); - successfulShards++; - } - } - AggregatedDfs dfs = searchPhaseController.aggregateDfs(dfsResults); - return new DfsOnlyResponse(dfs, shardsResponses.length(), successfulShards, failedShards, shardFailures, buildTookInMillis(request)); - } - - @Override - protected ShardDfsOnlyResponse shardOperation(ShardDfsOnlyRequest request) { - DfsSearchResult dfsSearchResult = searchService.executeDfsPhase(request.getShardSearchRequest()); - searchService.freeContext(dfsSearchResult.id()); - return new ShardDfsOnlyResponse(request.shardId(), dfsSearchResult); - } - - /** - * Builds how long it took to execute the dfs request. - */ - protected final long buildTookInMillis(DfsOnlyRequest request) { - // protect ourselves against time going backwards - // negative values don't make sense and we want to be able to serialize that thing as a vLong - return Math.max(1, System.currentTimeMillis() - request.nowInMillis); - } - -} diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/package-info.java b/core/src/main/java/org/elasticsearch/action/termvectors/dfs/package-info.java deleted file mode 100644 index 8ff53c5f9b5..00000000000 --- a/core/src/main/java/org/elasticsearch/action/termvectors/dfs/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * Distributed frequencies. - */ -package org.elasticsearch.action.termvectors.dfs; \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java index 49842a652f2..8b4af09c2b7 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java @@ -287,8 +287,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder termVectors = client().termVectors(new TermVectorsRequest(indexOrAlias(), "type1", "0") .selectedFields(randomBoolean() ? new String[]{"existingfield"} : null) .termStatistics(true) - .fieldStatistics(true) - .dfs(true)); + .fieldStatistics(true)); // lets see if the null term vectors are caught... TermVectorsResponse actionGet = termVectors.actionGet(); @@ -966,95 +965,6 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase { return randomBoolean() ? "test" : "alias"; } - public void testDfs() throws ExecutionException, InterruptedException, IOException { - logger.info("Setting up the index ..."); - Settings.Builder settings = settingsBuilder() - .put(indexSettings()) - .put("index.analysis.analyzer", "standard") - .put("index.number_of_shards", randomIntBetween(2, 10)); // we need at least 2 shards - assertAcked(prepareCreate("test") - .setSettings(settings) - .addMapping("type1", "text", "type=string")); - ensureGreen(); - - int numDocs = scaledRandomIntBetween(25, 100); - logger.info("Indexing {} documents...", numDocs); - List builders = new ArrayList<>(); - for (int i = 0; i < numDocs; i++) { - builders.add(client().prepareIndex("test", "type1", i + "").setSource("text", "cat")); - } - indexRandom(true, builders); - - XContentBuilder expectedStats = jsonBuilder() - .startObject() - .startObject("text") - .startObject("field_statistics") - .field("sum_doc_freq", numDocs) - .field("doc_count", numDocs) - .field("sum_ttf", numDocs) - .endObject() - .startObject("terms") - .startObject("cat") - .field("doc_freq", numDocs) - .field("ttf", numDocs) - .endObject() - .endObject() - .endObject() - .endObject(); - - logger.info("Without dfs 'cat' should appear strictly less than {} times.", numDocs); - TermVectorsResponse response = client().prepareTermVectors("test", "type1", randomIntBetween(0, numDocs - 1) + "") - .setSelectedFields("text") - .setFieldStatistics(true) - .setTermStatistics(true) - .get(); - checkStats(response.getFields(), expectedStats, false); - - logger.info("With dfs 'cat' should appear exactly {} times.", numDocs); - response = client().prepareTermVectors("test", "type1", randomIntBetween(0, numDocs - 1) + "") - .setSelectedFields("text") - .setFieldStatistics(true) - .setTermStatistics(true) - .setDfs(true) - .get(); - checkStats(response.getFields(), expectedStats, true); - } - - private void checkStats(Fields fields, XContentBuilder xContentBuilder, boolean isEqual) throws IOException { - Map stats = JsonXContent.jsonXContent.createParser(xContentBuilder.bytes()).map(); - assertThat("number of fields expected:", fields.size(), equalTo(stats.size())); - for (String fieldName : fields) { - logger.info("Checking field statistics for field: {}", fieldName); - Terms terms = fields.terms(fieldName); - Map fieldStatistics = getFieldStatistics(stats, fieldName); - String msg = "field: " + fieldName + " "; - assertThat(msg + "sum_doc_freq:", - (int) terms.getSumDocFreq(), - equalOrLessThanTo(fieldStatistics.get("sum_doc_freq"), isEqual)); - assertThat(msg + "doc_count:", - terms.getDocCount(), - equalOrLessThanTo(fieldStatistics.get("doc_count"), isEqual)); - assertThat(msg + "sum_ttf:", - (int) terms.getSumTotalTermFreq(), - equalOrLessThanTo(fieldStatistics.get("sum_ttf"), isEqual)); - - final TermsEnum termsEnum = terms.iterator(); - BytesRef text; - while((text = termsEnum.next()) != null) { - String term = text.utf8ToString(); - logger.info("Checking term statistics for term: ({}, {})", fieldName, term); - Map termStatistics = getTermStatistics(stats, fieldName, term); - msg = "term: (" + fieldName + "," + term + ") "; - assertThat(msg + "doc_freq:", - termsEnum.docFreq(), - equalOrLessThanTo(termStatistics.get("doc_freq"), isEqual)); - assertThat(msg + "ttf:", - (int) termsEnum.totalTermFreq(), - equalOrLessThanTo(termStatistics.get("ttf"), isEqual)); - } - } - } - private Map getFieldStatistics(Map stats, String fieldName) throws IOException { return (Map) ((Map) stats.get(fieldName)).get("field_statistics"); } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/termvectors.json b/rest-api-spec/src/main/resources/rest-api-spec/api/termvectors.json index 147d7971c9c..f5d8b6bd08d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/termvectors.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/termvectors.json @@ -34,12 +34,6 @@ "default" : true, "required" : false }, - "dfs" : { - "type" : "boolean", - "description" : "Specifies if distributed frequencies should be returned instead shard frequencies.", - "default" : false, - "required" : false - }, "fields" : { "type" : "list", "description" : "A comma-separated list of fields to return.",